Ejemplo n.º 1
0
def test_multi_agent(mock_env):
    mock_brain = create_mock_group_spec()
    mock_braininfo = create_mock_vector_step_result(num_agents=2)
    setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)

    with pytest.raises(UnityGymException):
        UnityEnv(" ", multiagent=False)

    env = UnityEnv(" ", use_visual=False, multiagent=True)
    assert isinstance(env.reset(), list)
    actions = [env.action_space.sample() for i in range(env.number_agents)]
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, list)
    assert isinstance(rew, list)
    assert isinstance(done, list)
Ejemplo n.º 2
0
def main(env_name):
    """
    Run the gym test using the specified environment
    :param env_name: Name of the Unity environment binary to launch
    """
    env = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)

    try:
        # Examine environment parameters
        print(str(env))

        # Reset the environment
        initial_observations = env.reset()

        if len(env.observation_space.shape) == 1:
            # Examine the initial vector observation
            print("Agent observations look like: \n{}".format(
                initial_observations))

        for _episode in range(10):
            env.reset()
            done = False
            episode_rewards = 0
            while not done:
                actions = env.action_space.sample()
                obs, reward, done, _ = env.step(actions)
                episode_rewards += reward
            print("Total reward this episode: {}".format(episode_rewards))
    finally:
        env.close()
Ejemplo n.º 3
0
    def __init__(self, env_id, n_agents):
        env_path = UnityVecEnv.GetFilePath(env_id, n_agents=n_agents)
        print("**** ", env_path)
        env = UnityEnv(env_path, multiagent=True)
        self.env = env
        env.num_envs = env.number_agents
        VecEnv.__init__(self, env.num_envs, env.observation_space,
                        env.action_space)
        obs_space = env.observation_space

        # self.keys, shapes, dtypes = obs_space_info(obs_space)
        # self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys }
        # self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
        # self.buf_rews  = np.zeros((self.num_envs,), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        # Fake Monitor
        self.tstart = time.time()
        self.results_writer = ResultsWriter("filename",
                                            header={
                                                "t_start": time.time(),
                                                'env_id': env.spec
                                                and env.spec.id
                                            },
                                            extra_keys=() + ())
        self.reset_keywords = ()
        self.info_keywords = ()
        self.allow_early_resets = True
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_times = []
        self.total_steps = 0
        self.current_reset_info = {
        }  # extra info about the current episode, that was passed in during reset()
Ejemplo n.º 4
0
    def __init__(self, n_episodes, env_name, model):

        # Nª Episodes
        self.n_episodes = n_episodes

        # Environment
        self.env_name = env_name
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=False)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)

        # Model
        self.model = ActorCritic(self.state_size, self.action_size,
                                 seed=0).to(device)

        # Initialize time step (for updating every "update_every" time steps)
        self.t_step = 1

        # Start test
        self.load_model(model)
        self.test()
Ejemplo n.º 5
0
    def __init__(self, platform):
        if platform == OSName.MAC:
            env_filename = EnvironmentName.DRONE_RACING_MAC.value
        elif platform == OSName.WINDOWS:
            env_filename = EnvironmentName.DRONE_RACING_WINDOWS.value
        else:
            env_filename = None

        self.env = UnityEnv(
            environment_filename=env_filename,
            worker_id=randrange(65536),
            use_visual=False,
            multiagent=False
        ).unwrapped

        super(Drone_Racing, self).__init__()
        Drone_Racing.worker_id += 1
        self.action_shape = self.get_action_shape()
        self.action_space = self.env.action_space

        self.continuous = False

        self.skipping_state_fq = 3
        self.skipping_state_index = 0

        self.WIN_AND_LEARN_FINISH_SCORE = 200
Ejemplo n.º 6
0
 def _init():
     # env = gym.make(env_id)
     # env.seed(seed + rank)
     env = UnityEnv(env_id, worker_id=rank, use_visual=False, no_graphics=True)
     #env.seed(seed + rank)
     env = Monitor(env, log_dir, allow_early_resets=True)
     return env
Ejemplo n.º 7
0
def test_branched_flatten(mock_env):
    mock_brain = create_mock_brainparams(
        vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
    )
    mock_braininfo = create_mock_vector_braininfo(num_agents=1)
    setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)

    env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=True)
    assert isinstance(env.action_space, spaces.Discrete)
    assert env.action_space.n == 12
    assert env._flattener.lookup_action(0) == [0, 0, 0]
    assert env._flattener.lookup_action(11) == [1, 1, 2]

    # Check that False produces a MultiDiscrete
    env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=False)
    assert isinstance(env.action_space, spaces.MultiDiscrete)
Ejemplo n.º 8
0
def test_branched_flatten(mock_env):
    mock_spec = create_mock_group_spec(vector_action_space_type="discrete",
                                       vector_action_space_size=[2, 2, 3])
    mock_decision_step, mock_terminal_step = create_mock_vector_steps(
        mock_spec, num_agents=1)
    setup_mock_unityenvironment(mock_env, mock_spec, mock_decision_step,
                                mock_terminal_step)

    env = UnityEnv(" ", use_visual=False, flatten_branched=True)
    assert isinstance(env.action_space, spaces.Discrete)
    assert env.action_space.n == 12
    assert env._flattener.lookup_action(0) == [0, 0, 0]
    assert env._flattener.lookup_action(11) == [1, 1, 2]

    # Check that False produces a MultiDiscrete
    env = UnityEnv(" ", use_visual=False, flatten_branched=False)
    assert isinstance(env.action_space, spaces.MultiDiscrete)
Ejemplo n.º 9
0
 def __init__(self, env_config):
     self.vector_index = env_config.vector_index
     self.worker_index = env_config.worker_index
     self.worker_id = env_config["unity_worker_id"] + env_config.worker_index
     # Name of the Unity environment binary to launch
     env_name = '/home/jim/projects/unity_ray/basic_env_linux/basic_env_linux'
     self.env = UnityEnv(env_name, worker_id=self.worker_id, use_visual=False, multiagent=False, no_graphics=True) #
     self.action_space = self.env.action_space
     self.observation_space = self.env.observation_space
Ejemplo n.º 10
0
 def env_ctor():
     env_path = GetFilePath(domain, n_agents=1)
     # env = UnityEnv(env_path, multiagent=True)
     from random import randint
     worker_id = randint(0, 9)
     print("---***** worker_id", worker_id)
     env = UnityEnv(env_path, worker_id=worker_id)
     # env = control.wrappers.CollectGymDataset(env, params.save_episode_dir)
     return env
Ejemplo n.º 11
0
def worker(id, td3_trainer, rewards_queue, replay_buffer, max_episodes, max_steps, batch_size, explore_steps, \
            update_itr, explore_noise_scale, eval_noise_scale, reward_scale, DETERMINISTIC, hidden_dim, model_path):
    '''
    the function for sampling with multi-processing
    '''
    print(td3_trainer, replay_buffer)

    env_name="./tac_follow_new"
    env = UnityEnv(env_name, worker_id=id+15, use_visual=False, use_both=True)



    # training loop
    for eps in range(max_episodes):
        frame_idx=0
        rewards=[]
        episode_reward = 0
        state, info = env.reset()
        # state=state[:6]
        
        for step in range(max_steps):
            if frame_idx > explore_steps:
                action = td3_trainer.policy_net.get_action(state, deterministic = DETERMINISTIC, explore_noise_scale=explore_noise_scale)
            else:
                action = td3_trainer.policy_net.sample_action()
    
            try:
                next_state, reward, done, info = env.step(action)
                # next_state = next_state[:6]
            except KeyboardInterrupt:
                print('Finished')
                td3_trainer.save_model(model_path)
    
            replay_buffer.push(state, action, reward, next_state, done)
            
            state = next_state
            episode_reward += reward
            frame_idx += 1
            
            
            # if len(replay_buffer) > batch_size:
            if replay_buffer.get_length() > batch_size:
                for i in range(update_itr):
                    _=td3_trainer.update(batch_size, deterministic=DETERMINISTIC, eval_noise_scale=eval_noise_scale, reward_scale=reward_scale)
            
            if eps % 10 == 0 and eps>0:
                # plot(rewards, id)
                td3_trainer.save_model(model_path)
            
            if done:
                break
        print('Episode: ', eps, '| Episode Reward: ', episode_reward)
        if len(rewards) == 0: rewards.append(episode_reward)
        else: rewards.append(rewards[-1]*0.9+episode_reward*0.1)
        rewards_queue.put(episode_reward)

    td3_trainer.save_model(model_path)
Ejemplo n.º 12
0
    def __init__(self, wid):
        self.wid = wid
        self.env = UnityEnv(env_name,
                            worker_id=wid,
                            use_visual=False,
                            use_both=True)

        # self.env=Reacher(render=True)
        self.ppo = GLOBAL_PPO
Ejemplo n.º 13
0
def test_multi_agent(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0,
                                                      stack=False,
                                                      num_agents=2)

    # Test for incorrect number of agents.
    with pytest.raises(UnityGymException):
        UnityEnv(' ', multiagent=False)

    env = UnityEnv(' ', use_visual=False, multiagent=True)
    assert isinstance(env.reset(), list)
    actions = [env.action_space.sample() for i in range(env.number_agents)]
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, list)
    assert isinstance(rew, list)
    assert isinstance(done, list)
    assert isinstance(info, dict)
Ejemplo n.º 14
0
def test_gym_wrapper(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0,
                                                      stack=False,
                                                      num_agents=1)

    # Test for incorrect number of agents.
    with pytest.raises(UnityGymException):
        UnityEnv(' ', use_visual=False, multiagent=True)

    env = UnityEnv(' ', use_visual=False)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, np.ndarray)
    assert isinstance(rew, float)
    assert isinstance(done, bool)
    assert isinstance(info, dict)
Ejemplo n.º 15
0
def main():

    env_path = 'AutoBenchExecutable/AutoBenchExecutable'
    curriculum_file = 'config/curricula/autobench/AutoBenchBrain.json'
    camera_res_overwrite, use_visual = extract_camera_config_gym(
        curriculum_file)
    # Setup the Unity Environment
    env = UnityEnv(environment_filename=env_path,
                   worker_id=0,
                   use_visual=use_visual,
                   multiagent=True,
                   env_config=get_env_config(curriculum_file),
                   camera_res_overwrite=camera_res_overwrite)
Ejemplo n.º 16
0
def make_env(env_spec, concurrency=None):
    '''Makes an environment from the spec. `concurrency` determines whether/how the env is run in a sub-process.'''
    if concurrency is None:
        try:
            try_register_env(env_spec['name'])
            return gym.make(env_spec['name'], *env_spec.get('args', []),
                            **env_spec.get('kwargs', {}))
        except:
            worker_id = int(f'{os.getpid()}{ps.unique_id()}'[-4:])
            return UnityEnv(get_env_path(env_spec['name']),
                            worker_id=worker_id,
                            *env_spec.get('args', []),
                            **env_spec.get('kwargs', {}))
    return ConcurrentEnv(env_spec, concurrency=concurrency)
Ejemplo n.º 17
0
def test_gym_wrapper(mock_env):
    mock_brain = create_mock_brainparams()
    mock_braininfo = create_mock_vector_braininfo()
    setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)

    env = UnityEnv(" ", use_visual=False, multiagent=False)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, np.ndarray)
    assert isinstance(rew, float)
    assert isinstance(done, bool)
    assert isinstance(info, dict)
Ejemplo n.º 18
0
def test_closing(env_name):
    """
    Run the gym test and closes the environment multiple times
    :param env_name: Name of the Unity environment binary to launch
    """

    try:
        env1 = UnityEnv(env_name,
                        worker_id=1,
                        use_visual=False,
                        no_graphics=True)
        env1.close()
        env1 = UnityEnv(env_name,
                        worker_id=1,
                        use_visual=False,
                        no_graphics=True)
        env2 = UnityEnv(env_name,
                        worker_id=2,
                        use_visual=False,
                        no_graphics=True)
        env2.reset()
    finally:
        env1.close()
        env2.close()
Ejemplo n.º 19
0
def test_gym_wrapper(mock_env):
    mock_brain = create_mock_group_spec()
    mock_braininfo = create_mock_vector_step_result()
    setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)

    env = UnityEnv(" ", use_visual=False, multiagent=False)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    obs, rew, done, info = env.step(actions)
    assert env.observation_space.contains(obs)
    assert isinstance(obs, np.ndarray)
    assert isinstance(rew, float)
    assert isinstance(done, (bool, np.bool_))
Ejemplo n.º 20
0
        def _thunk():
            if env_type == 'unity':
                worker_id = 32 + rank
                print ("***** UnityEnv", env_id, worker_id, rank)
                env = UnityEnv(env_id, worker_id)
            else:
                env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id)
            env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Ejemplo n.º 21
0
    def run():
        # LINUX: Disable the Unity window -> no_graphics=True
        env = UnityEnv(env_name,
                       worker_id=1000,
                       use_visual=False,
                       uint8_visual=False,
                       allow_multiple_visual_obs=False,
                       no_graphics=False)

        # Create the agent
        model = PPO2(MlpPolicy, env, verbose=0, learning_rate=1.0e-4)
        model.learn(total_timesteps=num_episodes)

        env.close()

        print("Successfully trained")
Ejemplo n.º 22
0
        def _thunk():
            if env_type == 'unity':
                from gym_unity.envs import UnityEnv
                import random; r=random.randint(64,164)
                print ("***** HELLO", mpi_rank + r)
                env = UnityEnv(env_id, mpi_rank + r)
            else:
                env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id)
            env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Ejemplo n.º 23
0
def test_sanitize_action_one_agent_done(mock_env):
    mock_spec = create_mock_group_spec(vector_action_space_type="discrete",
                                       vector_action_space_size=[2, 2, 3])
    mock_step = create_mock_vector_step_result(num_agents=5)
    mock_step.agent_id = np.array(range(5))
    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
    env = UnityEnv(" ", use_visual=False, multiagent=True)

    received_step_result = create_mock_vector_step_result(num_agents=6)
    received_step_result.agent_id = np.array(range(6))
    # agent #3 (id = 2) is Done
    received_step_result.done = np.array([False] * 2 + [True] + [False] * 3)
    sanitized_result = env._sanitize_info(received_step_result)
    for expected_agent_id, agent_id in zip([0, 1, 5, 3, 4],
                                           sanitized_result.agent_id):
        assert expected_agent_id == agent_id
Ejemplo n.º 24
0
    def __init__(self):

        # Hyperparameters
        self.learning_rate = 0.0003
        self.betas = (0.9, 0.999)
        self.gamma = 0.99
        self.eps_clip = 0.2
        self.buffer_size = 2048
        self.batch_size = 256
        self.K_epochs = 3
        self.max_steps = 100000

        self.tau = 0.95
        self.entropy_coef = 0.001
        self.value_loss_coef = 0.5

        self.summary_freq = 1000

        # Environment
        self.env_name = "Environments/env1/Unity Environment"
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=True)
        channel.set_configuration_parameters(time_scale=100)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)
        self.n_agents = self.env.number_agents
        print("Nº of Agents: ", self.n_agents)

        # Model
        self.model = ActorCritic(self.state_size, self.action_size,
                                 seed=0).to(device)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.learning_rate,
                                    betas=self.betas)
        self.MseLoss = nn.MSELoss()

        # Buffer memory
        self.memory = []
        for _ in range(self.n_agents):
            self.memory.append(Buffer())

        # Initialize time step (for updating when buffer_size is full)
        self.t_step = 1
Ejemplo n.º 25
0
def test_sanitize_action_single_agent_multiple_done(mock_env):
    mock_spec = create_mock_group_spec(vector_action_space_type="discrete",
                                       vector_action_space_size=[2, 2, 3])
    mock_step = create_mock_vector_step_result(num_agents=1)
    mock_step.agent_id = np.array(range(1))
    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
    env = UnityEnv(" ", use_visual=False, multiagent=False)

    received_step_result = create_mock_vector_step_result(num_agents=3)
    received_step_result.agent_id = np.array(range(3))
    # original agent (id = 0) is Done
    # so is the "new" agent (id = 1)
    done = [True, True, False]
    received_step_result.done = np.array(done)
    sanitized_result = env._sanitize_info(received_step_result)
    for expected_agent_id, agent_id in zip([2], sanitized_result.agent_id):
        assert expected_agent_id == agent_id
Ejemplo n.º 26
0
    def __init__(self):

        # Hyperparameters
        self.learning_rate = 0.0003
        self.buffer_size = 10240
        self.batch_size = 1024
        self.gamma = 0.99
        self.update_every = 64
        self.max_steps = 100000

        self.epsilon = 1.0
        self.epsilon_end = 0.01
        self.epsilon_decay = 0.995
        self.tau = 0.01

        self.summary_freq = 1000

        # Environment
        self.env_name = "Environments/env1/Unity Environment"
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=False)
        channel.set_configuration_parameters(time_scale=100)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)
        self.n_agents = self.env.number_agents

        # Models
        self.local_model = QNetwork(self.state_size, self.action_size,
                                    seed=0).to(device)
        self.target_model = QNetwork(self.state_size, self.action_size,
                                     seed=0).to(device)
        self.optimizer = optim.Adam(self.local_model.parameters(),
                                    lr=self.learning_rate)

        # Buffer memory
        self.memory = Buffer(self.buffer_size,
                             self.batch_size,
                             seed=0,
                             device=device)

        # Initialize time step (for updating every "update_every" time steps)
        self.t_step = 0
Ejemplo n.º 27
0
def make_unity_env(env_directory, num_env, visual, start_index=0):
    """
    Create a wrapped, monitored Unity environment.
    """
    def make_env(rank): # pylint: disable=C0111
        def _thunk():
            env = UnityEnv(env_directory, rank, use_visual=True)
            env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
            return env
        return _thunk
    if visual:
        return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)])
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        env = UnityEnv(env_directory, rank, use_visual=False)
        env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
        return env
Ejemplo n.º 28
0
def test_gym_wrapper_visual(mock_env, use_uint8):
    mock_spec = create_mock_group_spec(number_visual_observations=1)
    mock_decision_step, mock_terminal_step = create_mock_vector_steps(
        mock_spec, number_visual_observations=1)
    setup_mock_unityenvironment(mock_env, mock_spec, mock_decision_step,
                                mock_terminal_step)

    env = UnityEnv(" ", use_visual=True, uint8_visual=use_uint8)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    obs, rew, done, info = env.step(actions)
    assert env.observation_space.contains(obs)
    assert isinstance(obs, np.ndarray)
    assert isinstance(rew, float)
    assert isinstance(done, (bool, np.bool_))
    assert isinstance(info, dict)
Ejemplo n.º 29
0
def make_env(env_name,
             seed=-1,
             render_mode=False,
             full_episode=False,
             workerid=1):

    env = UnityEnv(env_name, worker_id=workerid, use_visual=True)
    print(str(env))
    '''
  print("environment details")
  print("env.action_space", env.action_space)
  print("high, low", env.action_space.high, env.action_space.low)
  print("environment details")
  print("env.observation_space", env.observation_space)
  print("high, low", env.observation_space.high, env.observation_space.low)
  assert False
  '''
    return env
Ejemplo n.º 30
0
def test_sanitize_action_shuffled_id(mock_env):
    mock_spec = create_mock_group_spec(vector_action_space_type="discrete",
                                       vector_action_space_size=[2, 2, 3])
    mock_step = create_mock_vector_step_result(num_agents=5)
    mock_step.agent_id = np.array(range(5))
    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
    env = UnityEnv(" ", use_visual=False, multiagent=True)

    shuffled_step_result = create_mock_vector_step_result(num_agents=5)
    shuffled_order = [4, 2, 3, 1, 0]
    shuffled_step_result.reward = np.array(shuffled_order)
    shuffled_step_result.agent_id = np.array(shuffled_order)
    sanitized_result = env._sanitize_info(shuffled_step_result)
    for expected_reward, reward in zip(range(5), sanitized_result.reward):
        assert expected_reward == reward
    for expected_agent_id, agent_id in zip(range(5),
                                           sanitized_result.agent_id):
        assert expected_agent_id == agent_id