Exemple #1
0
    def _init_env(self, replay_config, sac_config, model_root_path):
        if self.config['build_path'] is None or self.config['build_path'] == '':
            self.env = UnityEnvironment()
        else:
            self.env = UnityEnvironment(file_name=self.config['build_path'],
                                        no_graphics=True,
                                        base_port=self.config['build_port'],
                                        args=['--scene', self.config['scene']])

        self.logger.info(f'{self.config["build_path"]} initialized')

        self.default_brain_name = self.env.brain_names[0]

        brain_params = self.env.brains[self.default_brain_name]
        state_dim = brain_params.vector_observation_space_size * brain_params.num_stacked_vector_observations
        action_dim = brain_params.vector_action_space_size[0]

        custom_sac_model = importlib.import_module(self.config['sac'])
        shutil.copyfile(f'{self.config["sac"]}.py',
                        f'{model_root_path}/{self.config["sac"]}.py')

        self.sac = SAC_DS_with_Replay_Base(
            state_dim=state_dim,
            action_dim=action_dim,
            model_root_path=model_root_path,
            model=custom_sac_model,
            use_rnn=self.config['use_rnn'],
            replay_config=replay_config,
            burn_in_step=self.config['burn_in_step'],
            n_step=self.config['n_step'],
            **sac_config)
def _start_env(file_name="", seed=0, worker_id=0):

    log2logger("Starting Environment: " + str(worker_id))

    if file_name == "":
        env = UnityEnvironment(file_name=None, seed=seed)
    else:
        env = UnityEnvironment(file_name=file_name,
                               seed=seed,
                               worker_id=worker_id)

    return env
Exemple #3
0
    def _init_env(self):
        if self.config['build_path'] is None or self.config['build_path'] == '':
            self.env = UnityEnvironment()
        else:
            self.env = UnityEnvironment(file_name=self.config['build_path'],
                                        no_graphics=self._train_mode,
                                        base_port=self.config['build_port'],
                                        args=['--scene', self.config['scene']])

        self.logger.info(f'{self.config["build_path"]} initialized')

        self.default_brain_name = self.env.brain_names[0]
def test_step(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    brain = env.brains["RealFakeBrain"]
    brain_info = env.step()
    brain_info = env.step([0] * brain.vector_action_space_size[0] *
                          len(brain_info["RealFakeBrain"].agents))
    with pytest.raises(UnityActionException):
        env.step([0])
    brain_info = env.step([-1] * brain.vector_action_space_size[0] *
                          len(brain_info["RealFakeBrain"].agents))
    env.close()
    assert isinstance(brain_info, dict)
    assert isinstance(brain_info["RealFakeBrain"], BrainInfo)
    assert isinstance(brain_info["RealFakeBrain"].visual_observations, list)
    assert isinstance(brain_info["RealFakeBrain"].vector_observations,
                      np.ndarray)
    assert (len(brain_info["RealFakeBrain"].visual_observations) ==
            brain.number_visual_observations)
    assert len(brain_info["RealFakeBrain"].vector_observations) == len(
        brain_info["RealFakeBrain"].agents)
    assert (len(brain_info["RealFakeBrain"].vector_observations[0]) ==
            brain.vector_observation_space_size *
            brain.num_stacked_vector_observations)

    print("\n\n\n\n\n\n\n" + str(brain_info["RealFakeBrain"].local_done))
    assert not brain_info["RealFakeBrain"].local_done[0]
    assert brain_info["RealFakeBrain"].local_done[2]
def test_ppo_model_dc_vector(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=0)
            env = UnityEnvironment(" ")
            model = PPOModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output,
                model.all_log_probs,
                model.value,
                model.entropy,
                model.learning_rate,
            ]
            feed_dict = {
                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.action_masks: np.ones([2, 2]),
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Exemple #6
0
def test_step(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    spec = env.get_agent_group_spec("RealFakeBrain")
    env.step()
    batched_step_result = env.get_step_result("RealFakeBrain")
    n_agents = batched_step_result.n_agents()
    env.set_actions("RealFakeBrain",
                    np.zeros((n_agents, spec.action_size), dtype=np.float32))
    env.step()
    with pytest.raises(UnityActionException):
        env.set_actions(
            "RealFakeBrain",
            np.zeros((n_agents - 1, spec.action_size), dtype=np.float32),
        )
    batched_step_result = env.get_step_result("RealFakeBrain")
    n_agents = batched_step_result.n_agents()
    env.set_actions(
        "RealFakeBrain", -1 * np.ones(
            (n_agents, spec.action_size), dtype=np.float32))
    env.step()

    env.close()
    assert isinstance(batched_step_result, BatchedStepResult)
    assert len(spec.observation_shapes) == len(batched_step_result.obs)
    for shape, obs in zip(spec.observation_shapes, batched_step_result.obs):
        assert (n_agents, ) + shape == obs.shape
    assert not batched_step_result.done[0]
    assert batched_step_result.done[2]
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0)
            env = UnityEnvironment(" ")
            memory_size = 128
            model = PPOModel(env.brains["RealFakeBrain"],
                             use_recurrent=True,
                             m_size=memory_size)
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output,
                model.all_log_probs,
                model.value,
                model.entropy,
                model.learning_rate,
                model.memory_out,
            ]
            feed_dict = {
                model.batch_size: 1,
                model.sequence_length: 2,
                model.memory_in: np.zeros((1, memory_size)),
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.epsilon: np.array([[0, 1]]),
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Exemple #8
0
def test_ppo_get_value_estimates(mock_communicator, mock_launcher,
                                 dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    brain_infos = env.reset()
    brain_info = brain_infos[env.external_brain_names[0]]

    trainer_parameters = dummy_config
    model_path = env.external_brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    policy = PPOPolicy(0, env.brains[env.external_brain_names[0]],
                       trainer_parameters, False, False)
    run_out = policy.get_value_estimates(brain_info, 0, done=False)
    for key, val in run_out.items():
        assert type(key) is str
        assert type(val) is float

    run_out = policy.get_value_estimates(brain_info, 0, done=True)
    for key, val in run_out.items():
        assert type(key) is str
        assert val == 0.0

    # Check if we ignore terminal states properly
    policy.reward_signals["extrinsic"].use_terminal_states = False
    run_out = policy.get_value_estimates(brain_info, 0, done=True)
    for key, val in run_out.items():
        assert type(key) is str
        assert val != 0.0

    env.close()
Exemple #9
0
def init_unity_env(env_path, show_visuals=True):
    worker_id = 0
    done = False

    while not done:
        if worker_id > 64:
            sys.exit()
        try:
            env = UnityEnvironment(env_path,
                                   worker_id=worker_id,
                                   no_graphics=not show_visuals)
            done = True
        except mlagents.envs.exception.UnityWorkerInUseException:
            worker_id += 1

    env.reset(train_mode=True)
    brain_name = list(env.brains.keys())[0]

    state_space = env.brains[brain_name].vector_observation_space_size
    action_space = env.brains[brain_name].vector_action_space_size

    n_agents = env._n_agents[brain_name]

    multiagent = True if n_agents > 1 else False

    return env, state_space, action_space, n_agents, multiagent, brain_name
def test_close(mock_communicator, mock_launcher):
    comm = MockCommunicator(discrete_action=False, visual_inputs=0)
    mock_communicator.return_value = comm
    env = UnityEnvironment(" ")
    assert env._loaded
    env.close()
    assert not env._loaded
    assert comm.has_been_closed
Exemple #11
0
    def setup_connection_with_unity(self, build_scene):
        # Connect to Unity and get environment
        self.env = UnityEnvironment(file_name=build_scene, worker_id=0, seed=1)

        # Reset the environment
        self.env_info = self.env.reset(train_mode=True)

        # Set the default brain to work with
        self.default_brain = "Robot"
Exemple #12
0
def run(train_mode, load_model, env_name):
    env = UnityEnvironment(file_name=env_name)
    default_brain = env.brain_names[0]

    agent = DDPGAgent(state_size, action_size, train_mode, load_model)
    rewards = deque(maxlen=print_interval)
    success_cnt = 0
    step = 0

    for episode in range(run_episode + test_episode):
        if episode == run_episode:
            train_mode = False

        env_info = env.reset(train_mode=train_mode)[default_brain]
        state = env_info.vector_observations[0]
        episode_rewards = 0
        done = False

        while not done:
            step += 1

            action = agent.get_action([state])[0]
            #print(action)
            env_info = env.step(action)[default_brain]
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]

            episode_rewards += reward

            if train_mode:
                agent.append_sample(state, action, reward, next_state, done)

            state = next_state

            if episode > start_train_episode and train_mode:
                agent.train_model()

        success_cnt = success_cnt + 1 if reward == 1 else success_cnt
        rewards.append(episode_rewards)
        agent.save_samples(episode)

        if episode % print_interval == 0 and episode != 0:
            print("step: {} / episode: {} / reward: {:.3f} / success_cnt: {}".
                  format(step, episode, np.mean(rewards), success_cnt))
            agent.Write_Summray(np.mean(rewards), success_cnt, episode)
            success_cnt = 0

        if train_mode and episode % save_interval == 0 and episode != 0:
            print("model saved")
            agent.save_model()

    env.close()
Exemple #13
0
def get_unity_envs():
    # check the python environment
    print("Python version: ", sys.version)
    if (sys.version_info[0] < 3):
        raise Exception("ERROR: ML-Agents Toolkit requires Python 3")

    # set the unity environment
    env = UnityEnvironment(file_name=UNITY_PATH, base_port=5005)
    brain = env.brain_names[0]
    env.reset(train_mode=True)[brain]

    return env, brain
Exemple #14
0
    def __init__(self, file_name=None, worker_id=0, base_port=5005, seed=0, docker_training=False, no_graphics=False,
                 timeout_wait=30, train_mode=True, **kwargs):
        """
        Args:
            file_name (Optional[str]): Name of Unity environment binary.
            base_port (int): Port number to connect to Unity environment. `worker_id` increments on top of this.
            worker_id (int): Number to add to `base_port`. Used for asynchronous agent scenarios.
            docker_training (bool): Informs this class, whether the process is being run within a container.
                Default: False.
            no_graphics (bool): Whether to run the Unity simulator in no-graphics mode. Default: False.
            timeout_wait (int): Time (in seconds) to wait for connection from environment.
            train_mode (bool): Whether to run in training mode, speeding up the simulation. Default: True.
        """
        # First create the UnityMLAgentsEnvironment to get state and action spaces, then create RLgraph Environment
        # instance.
        self.mlagents_env = UnityEnvironment(
            file_name, worker_id, base_port, seed, docker_training, no_graphics
        )
        all_brain_info = self.mlagents_env.reset()
        # Get all possible information from AllBrainInfo.
        # TODO: Which scene do we pick?
        self.scene_key = next(iter(all_brain_info))
        first_brain_info = all_brain_info[self.scene_key]
        num_environments = len(first_brain_info.agents)

        state_space = {}
        if len(first_brain_info.vector_observations[0]) > 0:
            state_space["vector"] = get_space_from_op(first_brain_info.vector_observations[0])
            # TODO: This is a hack.
            if state_space["vector"].dtype == np.float64:
                state_space["vector"].dtype = np.float32
        if len(first_brain_info.visual_observations) > 0:
            state_space["visual"] = get_space_from_op(first_brain_info.visual_observations[0])
        if first_brain_info.text_observations[0]:
            state_space["text"] = get_space_from_op(first_brain_info.text_observations[0])

        if len(state_space) == 1:
            self.state_key = next(iter(state_space))
            state_space = state_space[self.state_key]
        else:
            self.state_key = None
            state_space = Dict(state_space)
        action_space = get_space_from_op(first_brain_info.action_masks[0])
        if action_space.dtype == np.float64:
            action_space.dtype = np.float32

        super(MLAgentsEnv, self).__init__(
            num_environments=num_environments, state_space=state_space, action_space=action_space, **kwargs
        )

        # Caches the last observation we made (after stepping or resetting).
        self.last_state = []
Exemple #15
0
 def create_unity_environment(worker_id: int) -> UnityEnvironment:
     env_seed = seed
     if not env_seed:
         env_seed = seed_pool[worker_id % len(seed_pool)]
     return UnityEnvironment(
         file_name=env_path,
         worker_id=worker_id,
         seed=env_seed,
         docker_training=docker_training,
         no_graphics=no_graphics,
         base_port=start_port,
         args=env_args,
     )
Exemple #16
0
def test_reset(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    spec = env.get_agent_group_spec("RealFakeBrain")
    env.reset()
    batched_step_result = env.get_step_result("RealFakeBrain")
    env.close()
    assert isinstance(batched_step_result, BatchedStepResult)
    assert len(spec.observation_shapes) == len(batched_step_result.obs)
    n_agents = batched_step_result.n_agents()
    for shape, obs in zip(spec.observation_shapes, batched_step_result.obs):
        assert (n_agents, ) + shape == obs.shape
Exemple #17
0
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    brain_infos = env.reset()
    brain_info = brain_infos[env.external_brain_names[0]]

    trainer_parameters = dummy_config
    model_path = env.external_brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    policy = PPOPolicy(0, env.brains[env.external_brain_names[0]],
                       trainer_parameters, False, False)
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (3, 2)
    env.close()
Exemple #18
0
def test_reset(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    brain = env.brains["RealFakeBrain"]
    brain_info = env.reset()
    env.close()
    assert isinstance(brain_info, dict)
    assert isinstance(brain_info["RealFakeBrain"], BrainInfo)
    assert isinstance(brain_info["RealFakeBrain"].visual_observations, list)
    assert isinstance(brain_info["RealFakeBrain"].vector_observations,
                      np.ndarray)
    assert (len(brain_info["RealFakeBrain"].visual_observations) ==
            brain.number_visual_observations)
    assert len(brain_info["RealFakeBrain"].vector_observations) == len(
        brain_info["RealFakeBrain"].agents)
    assert (len(brain_info["RealFakeBrain"].vector_observations[0]) ==
            brain.vector_observation_space_size)
 def __init__(self,
              env_file='data/Reacher.exe',
              no_graphics=True,
              mlagents=False):
     if mlagents:
         from mlagents.envs.environment import UnityEnvironment
     else:
         from unityagents import UnityEnvironment
     self.env = UnityEnvironment(file_name=env_file,
                                 no_graphics=no_graphics)
     self.brain_name = self.env.brain_names[0]
     brain = self.env.brains[self.brain_name]
     self.action_size = brain.vector_action_space_size
     if type(self.action_size) != int:
         self.action_size = self.action_size[0]
     env_info = self.env.reset(train_mode=True)[self.brain_name]
     self.state_size = env_info.vector_observations.shape[1]
     self.num_agents = len(env_info.agents)
Exemple #20
0
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    env.reset()
    brain_name = env.get_agent_groups()[0]
    brain_info = step_result_to_brain_info(
        env.get_step_result(brain_name), env.get_agent_group_spec(brain_name))
    brain_params = group_spec_to_brain_parameters(
        brain_name, env.get_agent_group_spec(brain_name))

    trainer_parameters = dummy_config
    model_path = brain_name
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    policy = PPOPolicy(0, brain_params, trainer_parameters, False, False)
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (3, 2)
    env.close()
def test_cc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0
            )
            env = UnityEnvironment(" ")
            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.sample_action, model.policy]
            feed_dict = {
                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Exemple #22
0
def init_unity_env(env_path, show_visuals=True):

    # Find a worker_id < 64 that's not in use
    worker_id = 0
    done = False

    while not done:
        if worker_id > 64:
            sys.exit()
        try:
            env = UnityEnvironment(env_path,
                                   worker_id=worker_id,
                                   no_graphics=not show_visuals)
            done = True
        except mlagents.envs.exception.UnityWorkerInUseException:
            worker_id += 1

    # Get state and action space, as well as multiagent and multibrain info from environment
    env.reset(train_mode=not show_visuals)
    # brain_name = list(env.brains.keys())[0]
    brain_names = list(env.brains.keys())

    if len(brain_names) > 1:
        multibrain = True
        n_agents = env._n_agents[brain_names[0]] + env._n_agents[
            brain_names[1]]
    else:
        multibrain = False
        n_agents = env._n_agents[brain_names[0]]

    # WalkerVis is a version of the Walker environment with one brain 'WalkerVis'
    # having visual observations, whereas 'Walker' brain does not.
    # The visual observations are used for recording episodes
    state_space = env.brains[brain_names[0]].vector_observation_space_size
    action_space = env.brains[brain_names[0]].vector_action_space_size

    multiagent = True if n_agents > 1 else False

    return env, state_space, action_space, n_agents, multiagent, brain_names, multibrain
Exemple #23
0
opt = parser.parse_args()
env_name = opt.env_path # is ./Build-linux/VehicleX if linux is used
train_mode = opt.train_mode  # Whether to run the environment in training or inference mode

print("Python version:")
print(sys.version)

# check Python version
if (sys.version_info[0] < 3):
    raise Exception("ERROR: ML-Agents Toolkit (v0.3 onwards) requires Python 3")
if (not os.path.exists("./Background_imgs") and train_mode == False):
    raise Exception("The inference mode requre background images")

# env = UnityEnvironment(file_name=None)
env = UnityEnvironment(file_name=opt.env_path) # is None if you use Unity Editor
# Set the default brain to work with

default_brain = env.brain_names[0]
brain = env.brains[default_brain]
distance_bias = 12.11

print ("Begin generation")

doc = Document()
TrainingImages = doc.createElement('TrainingImages')
TrainingImages.setAttribute("Version", "1.0")  
doc.appendChild(TrainingImages)
Items = doc.createElement('Items')
Items.setAttribute("number", "-")  
TrainingImages.appendChild(Items)
Exemple #24
0
if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

env_name = "./Build-win/VehicleX"  # is ./Build-linux/VehicleX if linux is used
train_mode = opt.train_mode  # Whether to run the environment in training or inference mode

print("Python version:")
print(sys.version)

# check Python version
if (sys.version_info[0] < 3):
    raise Exception(
        "ERROR: ML-Agents Toolkit (v0.3 onwards) requires Python 3")

# env = UnityEnvironment(file_name=env_name)
env = UnityEnvironment(file_name=None)  # is None if you use Unity Editor

# Set the default brain to work with
default_brain = env.brain_names[0]
brain = env.brains[default_brain]
distance_bias = 12.11


def ancestral_sampler_1(pi=[0.5, 0.5], mu=[0, 180], sigma=[20, 20], size=1):
    sigma = [20 for i in range(6)]
    pi = [0.16 for i in range(6)]
    sample = []
    z_list = np.random.uniform(size=size)
    low = 0  # low bound of a pi interval
    high = 0  # higg bound of a pi interval
Exemple #25
0
import copy
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from collections import namedtuple, deque
import random
import numpy as np
import matplotlib.pyplot as plt
from torch.distributions import Normal, Categorical
from mlagents.envs.environment import UnityEnvironment
#from DDPG import MADDPG
from DDPG import DDPG

#Transition = namedtuple('Transition',['state', 'action', 'reward', 'a_log_prob', 'next_state'])
env = UnityEnvironment(file_name=env_name, worker_id=1, seed=1)
default_brain = env.brain_names[0]
brain = env.brains[default_brain]
env_info = env.reset(train_mode=True)[default_brain]
max_step = 1000
#maddpg = MADDPG()
ddpg = DDPG()
rewards = []

for eps in range(1):
    env_info = env.reset(train_mode=True)[default_brain]
    done = False
    eps_reward = 0
    state = env_info.vector_observations
    #state = torch.from_numpy(state).float()
    score = 0
Exemple #26
0
    def __init__(
        self,
        environment_filename: str,
        worker_id: int = 0,
        use_visual: bool = False,
        uint8_visual: bool = False,
        multiagent: bool = False,
        flatten_branched: bool = False,
        no_graphics: bool = False,
        allow_multiple_visual_obs: bool = False,
    ):
        """
        Environment initialization
        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
        :param worker_id: Worker number for environment.
        :param use_visual: Whether to use visual observation or vector observation.
        :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
        :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
        :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
            MultiDiscrete.
        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
        :param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
        """
        self._env = UnityEnvironment(environment_filename,
                                     worker_id,
                                     no_graphics=no_graphics)

        # Take a single step so that the brain information will be sent over
        if not self._env.brains:
            self._env.step()

        self.name = self._env.academy_name
        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._multiagent = multiagent
        self._flattener = None
        self.game_over = (
            False
        )  # Hidden flag used by Atari environments to determine if the game is over
        self._allow_multiple_visual_obs = allow_multiple_visual_obs

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        if len(self._env.external_brain_names) <= 0:
            raise UnityGymException(
                "There are not any external brain in the UnityEnvironment")

        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if use_visual and brain.number_visual_observations == 0:
            raise UnityGymException(
                "`use_visual` was set to True, however there are no"
                " visual observations as part of this environment.")
        self.use_visual = brain.number_visual_observations >= 1 and use_visual

        if not use_visual and uint8_visual:
            logger.warning(
                "`uint8_visual was set to true, but visual observations are not in use. "
                "This setting will not have any effect.")
        else:
            self.uint8_visual = uint8_visual

        if brain.number_visual_observations > 1 and not self._allow_multiple_visual_obs:
            logger.warning(
                "The environment contains more than one visual observation. "
                "You must define allow_multiple_visual_obs=True to received them all. "
                "Otherwise, please note that only the first will be provided in the observation."
            )

        if brain.num_stacked_vector_observations != 1:
            raise UnityGymException(
                "There can only be one stacked vector observation in a UnityEnvironment "
                "if it is wrapped in a gym.")

        # Check for number of agents in scene.
        initial_info = self._env.reset()[self.brain_name]
        self._check_agents(len(initial_info.agents))

        # Set observation and action spaces
        if brain.vector_action_space_type == "discrete":
            if len(brain.vector_action_space_size) == 1:
                self._action_space = spaces.Discrete(
                    brain.vector_action_space_size[0])
            else:
                if flatten_branched:
                    self._flattener = ActionFlattener(
                        brain.vector_action_space_size)
                    self._action_space = self._flattener.action_space
                else:
                    self._action_space = spaces.MultiDiscrete(
                        brain.vector_action_space_size)

        else:
            if flatten_branched:
                logger.warning(
                    "The environment has a non-discrete action space. It will "
                    "not be flattened.")
            high = np.array([1] * brain.vector_action_space_size[0])
            self._action_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions
        if self.use_visual:
            shape = (
                brain.camera_resolutions[0].height,
                brain.camera_resolutions[0].width,
                brain.camera_resolutions[0].num_channels,
            )
            if uint8_visual:
                self._observation_space = spaces.Box(0,
                                                     255,
                                                     dtype=np.uint8,
                                                     shape=shape)
            else:
                self._observation_space = spaces.Box(0,
                                                     1,
                                                     dtype=np.float32,
                                                     shape=shape)

        else:
            self._observation_space = spaces.Box(-high, high, dtype=np.float32)
Exemple #27
0
            if i==1:
                if j == 0:
                    A_[i][j] = 1
                    A_[j][i] = 1
                if 1 + n < j and j < 1 + 1 + n + n:
                    A_[i][j] = 1
                    A_[j][i] = 1
                if 1 + n + n + n < j and j < 1 + 1 + n + n + n + n:
                    A_[i][j] = 1
                    A_[j][i] = 1
    for i in range(1+1+n+n+n+n):
        A_[i][i] = 1
    for a in range(NUM_AGENT):
        A.append(preprocess_adj(A_))


    return np.array(A), np.array(X)



if __name__ == '__main__':
    env = UnityEnvironment()
    obs = env.reset(train_mode=True)
    brain_name = env.brain_names[0]
    obs = obs[brain_name].vector_observations
    a , x = preprocess_observation_n(obs)
    print(a)
    print(x)
    env.close()

Exemple #28
0
NETWORK = networks.Network3
LEARNING_RATE = 0.005
DISCOUNT_RATE = 0.95
EXPLORATION_RATE = 1
EXPLORATION_RATE_DECAY = 1
TARGET_NETWORK_UPDATE_INTERVAL = 100
REPLAY_MEMORY_SIZE = 100
MINIBATCH_SIZE = 10

# progress tracking and saving
COLLECT_DATA = True
RANDOM_STATES = 10
CHECKPOINT_EPOCHS = 5

# initialize simulation
env = UnityEnvironment(ENVIRONMENT)
bi = env.reset()
BRAIN_NAME = env.external_brain_names[0]
brain_parameters = env.external_brains[BRAIN_NAME]
STATE_SPACE_SIZE = brain_parameters.vector_observation_space_size
ACTION_SPACE_SIZE = brain_parameters.vector_action_space_size[0]

# sample states
random_states = utils.sample_states(env, BRAIN_NAME, ACTION_SPACE_SIZE,
                                    RANDOM_STATES)

# initialize policy network, target network, and optimizer
qnet = NETWORK(STATE_SPACE_SIZE, ACTION_SPACE_SIZE)
qnet.load_state_dict(torch.load("qnet_parameters.pt"))
optimizer = torch.optim.SGD(qnet.parameters(), LEARNING_RATE)
tnet = NETWORK(STATE_SPACE_SIZE, ACTION_SPACE_SIZE)
Exemple #29
0
from mlagents.envs.environment import UnityEnvironment
import Utils
from agents import get_agents

game = "AirCombat/Aircombat"
#game = "AirHockey/AirHockey"
env_name = 'Games/{}'.format(game)

if __name__ == '__main__':
    args = Utils.get_config('A2C')
    env = UnityEnvironment(file_name=env_name)

    default_brain = env.brain_names[0]
    agent = get_agents(env, args)

    if agent is not None:
        agent.train()

    env.close()
    def __init__(self,
                 environment_filename=None,
                 docker_training=False,
                 worker_id=0,
                 retro=True,
                 timeout_wait=30,
                 realtime_mode=False,
                 config=None,
                 greyscale=False):
        """
        Arguments:
          environment_filename: The file path to the Unity executable.  Does not require the extension.
          docker_training: Whether this is running within a docker environment and should use a virtual
            frame buffer (xvfb).
          worker_id: The index of the worker in the case where multiple environments are running.  Each
            environment reserves port (5005 + worker_id) for communication with the Unity executable.
          retro: Resize visual observation to 84x84 (int8) and flattens action space.
          timeout_wait: Time for python interface to wait for environment to connect.
          realtime_mode: Whether to render the environment window image and run environment at realtime.
        """
        self._env = UnityEnvironment(environment_filename,
                                     worker_id,
                                     docker_training=docker_training,
                                     timeout_wait=timeout_wait)

        split_name = self._env.academy_name.split('-v')
        if len(split_name) == 2 and split_name[0] == "ObstacleTower":
            self.name, self.version = split_name
        else:
            raise UnityGymException(
                "Attempting to launch non-Obstacle Tower environment")

        if self.version not in self.ALLOWED_VERSIONS:
            raise UnityGymException(
                "Invalid Obstacle Tower version.  Your build is v" +
                self.version +
                " but only the following versions are compatible with this gym: "
                + str(self.ALLOWED_VERSIONS))

        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._flattener = None
        self._greyscale = greyscale

        # Environment reset parameters
        self._seed = None
        self._floor = None

        self.realtime_mode = realtime_mode
        self.game_over = False  # Hidden flag used by Atari environments to determine if the game is over
        self.retro = retro
        if config != None:
            self.config = config
        else:
            self.config = None

        flatten_branched = self.retro
        uint8_visual = self.retro

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if brain.number_visual_observations == 0:
            raise UnityGymException(
                "Environment provides no visual observations.")

        self.uint8_visual = uint8_visual

        if brain.number_visual_observations > 1:
            logger.warning(
                "The environment contains more than one visual observation. "
                "Please note that only the first will be provided in the observation."
            )

        # Check for number of agents in scene.
        initial_info = self._env.reset(
            train_mode=not self.realtime_mode)[self.brain_name]
        self._check_agents(len(initial_info.agents))

        # Set observation and action spaces
        if len(brain.vector_action_space_size) == 1:
            self._action_space = spaces.Discrete(
                brain.vector_action_space_size[0])
        else:
            if flatten_branched:
                self._flattener = ActionFlattener(
                    brain.vector_action_space_size)
                self._action_space = self._flattener.action_space
            else:
                self._action_space = spaces.MultiDiscrete(
                    brain.vector_action_space_size)

        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions

        if self._greyscale:
            depth = 1
        else:
            depth = 3
        image_space_max = 1.0
        image_space_dtype = np.float32
        camera_height = brain.camera_resolutions[0]["height"]
        camera_width = brain.camera_resolutions[0]["width"]
        if self.retro:
            image_space_max = 255
            image_space_dtype = np.uint8
            camera_height = 84
            camera_width = 84

        image_space = spaces.Box(0,
                                 image_space_max,
                                 dtype=image_space_dtype,
                                 shape=(camera_height, camera_width, depth))
        if self.retro:
            self._observation_space = image_space
        else:
            max_float = np.finfo(np.float32).max
            keys_space = spaces.Discrete(5)
            time_remaining_space = spaces.Box(low=0.0,
                                              high=max_float,
                                              shape=(1, ),
                                              dtype=np.float32)
            floor_space = spaces.Discrete(9999)
            self._observation_space = spaces.Tuple(
                (image_space, keys_space, time_remaining_space, floor_space))