Ejemplo n.º 1
0
    def __init__(self, n_episodes, env_name, model):

        # Nª Episodes
        self.n_episodes = n_episodes

        # Environment
        self.env_name = env_name
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=False)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)

        # Model
        self.model = ActorCritic(self.state_size, self.action_size,
                                 seed=0).to(device)

        # Initialize time step (for updating every "update_every" time steps)
        self.t_step = 1

        # Start test
        self.load_model(model)
        self.test()
Ejemplo n.º 2
0
    def __init__(self, env_id, n_agents):
        env_path = UnityVecEnv.GetFilePath(env_id, n_agents=n_agents)
        print("**** ", env_path)
        env = UnityEnv(env_path, multiagent=True)
        self.env = env
        env.num_envs = env.number_agents
        VecEnv.__init__(self, env.num_envs, env.observation_space,
                        env.action_space)
        obs_space = env.observation_space

        # self.keys, shapes, dtypes = obs_space_info(obs_space)
        # self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys }
        # self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
        # self.buf_rews  = np.zeros((self.num_envs,), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        # Fake Monitor
        self.tstart = time.time()
        self.results_writer = ResultsWriter("filename",
                                            header={
                                                "t_start": time.time(),
                                                'env_id': env.spec
                                                and env.spec.id
                                            },
                                            extra_keys=() + ())
        self.reset_keywords = ()
        self.info_keywords = ()
        self.allow_early_resets = True
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_times = []
        self.total_steps = 0
        self.current_reset_info = {
        }  # extra info about the current episode, that was passed in during reset()
Ejemplo n.º 3
0
    def __init__(self, platform):
        if platform == OSName.MAC:
            env_filename = EnvironmentName.DRONE_RACING_MAC.value
        elif platform == OSName.WINDOWS:
            env_filename = EnvironmentName.DRONE_RACING_WINDOWS.value
        else:
            env_filename = None

        self.env = UnityEnv(
            environment_filename=env_filename,
            worker_id=randrange(65536),
            use_visual=False,
            multiagent=False
        ).unwrapped

        super(Drone_Racing, self).__init__()
        Drone_Racing.worker_id += 1
        self.action_shape = self.get_action_shape()
        self.action_space = self.env.action_space

        self.continuous = False

        self.skipping_state_fq = 3
        self.skipping_state_index = 0

        self.WIN_AND_LEARN_FINISH_SCORE = 200
Ejemplo n.º 4
0
class Chaser_v1(Environment):
    unity_env_worker_id = 0

    def __init__(self, platform):
        if platform == OSName.MAC:
            env_filename = EnvironmentName.CHASER_V1_MAC.value
        elif platform == OSName.WINDOWS:
            env_filename = EnvironmentName.CHASER_V1_WINDOWS.value
        else:
            env_filename = None

        self.env = UnityEnv(environment_filename=env_filename,
                            worker_id=Chaser_v1.unity_env_worker_id,
                            use_visual=True,
                            multiagent=True).unwrapped
        self.increase_env_worker_id()
        super(Chaser_v1, self).__init__()
        self.action_shape = self.get_action_shape()
        self.state_shape = self.get_state_shape()

        self.cnn_input_height = self.state_shape[0]
        self.cnn_input_width = self.state_shape[1]
        self.cnn_input_channels = self.state_shape[2]

        self.observation_space = self.env.observation_space
        self.continuous = True

    @staticmethod
    def increase_env_worker_id():
        Chaser_v1.unity_env_worker_id += 1

    def get_n_states(self):
        n_states = 3
        return n_states

    def get_n_actions(self):
        n_actions = 3
        return n_actions

    def get_state_shape(self):
        return self.env.observation_space.shape

    def get_action_shape(self):
        return self.env.action_space.shape

    def reset(self):
        state = self.env.reset()
        return state

    def step(self, action):
        next_state, reward, done, info = self.env.step(action)

        adjusted_reward = reward

        return next_state, reward, adjusted_reward, done, info

    def close(self):
        self.env.close()
Ejemplo n.º 5
0
 def __init__(self, env_config):
     self.vector_index = env_config.vector_index
     self.worker_index = env_config.worker_index
     self.worker_id = env_config["unity_worker_id"] + env_config.worker_index
     # Name of the Unity environment binary to launch
     env_name = '/home/jim/projects/unity_ray/basic_env_linux/basic_env_linux'
     self.env = UnityEnv(env_name, worker_id=self.worker_id, use_visual=False, multiagent=False, no_graphics=True) #
     self.action_space = self.env.action_space
     self.observation_space = self.env.observation_space
Ejemplo n.º 6
0
    def __init__(self, wid):
        self.wid = wid
        self.env = UnityEnv(env_name,
                            worker_id=wid,
                            use_visual=False,
                            use_both=True)

        # self.env=Reacher(render=True)
        self.ppo = GLOBAL_PPO
Ejemplo n.º 7
0
def worker(id, td3_trainer, rewards_queue, replay_buffer, max_episodes, max_steps, batch_size, explore_steps, \
            update_itr, explore_noise_scale, eval_noise_scale, reward_scale, DETERMINISTIC, hidden_dim, model_path):
    '''
    the function for sampling with multi-processing
    '''
    print(td3_trainer, replay_buffer)

    env_name="./tac_follow_new"
    env = UnityEnv(env_name, worker_id=id+15, use_visual=False, use_both=True)



    # training loop
    for eps in range(max_episodes):
        frame_idx=0
        rewards=[]
        episode_reward = 0
        state, info = env.reset()
        # state=state[:6]
        
        for step in range(max_steps):
            if frame_idx > explore_steps:
                action = td3_trainer.policy_net.get_action(state, deterministic = DETERMINISTIC, explore_noise_scale=explore_noise_scale)
            else:
                action = td3_trainer.policy_net.sample_action()
    
            try:
                next_state, reward, done, info = env.step(action)
                # next_state = next_state[:6]
            except KeyboardInterrupt:
                print('Finished')
                td3_trainer.save_model(model_path)
    
            replay_buffer.push(state, action, reward, next_state, done)
            
            state = next_state
            episode_reward += reward
            frame_idx += 1
            
            
            # if len(replay_buffer) > batch_size:
            if replay_buffer.get_length() > batch_size:
                for i in range(update_itr):
                    _=td3_trainer.update(batch_size, deterministic=DETERMINISTIC, eval_noise_scale=eval_noise_scale, reward_scale=reward_scale)
            
            if eps % 10 == 0 and eps>0:
                # plot(rewards, id)
                td3_trainer.save_model(model_path)
            
            if done:
                break
        print('Episode: ', eps, '| Episode Reward: ', episode_reward)
        if len(rewards) == 0: rewards.append(episode_reward)
        else: rewards.append(rewards[-1]*0.9+episode_reward*0.1)
        rewards_queue.put(episode_reward)

    td3_trainer.save_model(model_path)
Ejemplo n.º 8
0
def main(env_name):
    """
    Run the gym test using the specified environment
    :param env_name: Name of the Unity environment binary to launch
    """
    env = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True)

    try:
        # Examine environment parameters
        print(str(env))

        # Reset the environment
        initial_observations = env.reset()

        if len(env.observation_space.shape) == 1:
            # Examine the initial vector observation
            print("Agent observations look like: \n{}".format(
                initial_observations))

        for _episode in range(10):
            env.reset()
            done = False
            episode_rewards = 0
            while not done:
                actions = env.action_space.sample()
                obs, reward, done, _ = env.step(actions)
                episode_rewards += reward
            print("Total reward this episode: {}".format(episode_rewards))
    finally:
        env.close()
Ejemplo n.º 9
0
def test_gym_wrapper(mock_env):
    mock_brain = create_mock_group_spec()
    mock_braininfo = create_mock_vector_step_result()
    setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)

    env = UnityEnv(" ", use_visual=False, multiagent=False)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    obs, rew, done, info = env.step(actions)
    assert env.observation_space.contains(obs)
    assert isinstance(obs, np.ndarray)
    assert isinstance(rew, float)
    assert isinstance(done, (bool, np.bool_))
Ejemplo n.º 10
0
def test_multi_agent(mock_env):
    mock_brain = create_mock_group_spec()
    mock_braininfo = create_mock_vector_step_result(num_agents=2)
    setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)

    with pytest.raises(UnityGymException):
        UnityEnv(" ", multiagent=False)

    env = UnityEnv(" ", use_visual=False, multiagent=True)
    assert isinstance(env.reset(), list)
    actions = [env.action_space.sample() for i in range(env.number_agents)]
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, list)
    assert isinstance(rew, list)
    assert isinstance(done, list)
Ejemplo n.º 11
0
def test_gym_wrapper(mock_env):
    mock_brain = create_mock_brainparams()
    mock_braininfo = create_mock_vector_braininfo()
    setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)

    env = UnityEnv(" ", use_visual=False, multiagent=False)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, np.ndarray)
    assert isinstance(rew, float)
    assert isinstance(done, bool)
    assert isinstance(info, dict)
Ejemplo n.º 12
0
        def _thunk():
            if env_type == 'unity':
                worker_id = 32 + rank
                print ("***** UnityEnv", env_id, worker_id, rank)
                env = UnityEnv(env_id, worker_id)
            else:
                env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id)
            env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Ejemplo n.º 13
0
def test_branched_flatten(mock_env):
    mock_brain = create_mock_brainparams(
        vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
    )
    mock_braininfo = create_mock_vector_braininfo(num_agents=1)
    setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)

    env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=True)
    assert isinstance(env.action_space, spaces.Discrete)
    assert env.action_space.n == 12
    assert env._flattener.lookup_action(0) == [0, 0, 0]
    assert env._flattener.lookup_action(11) == [1, 1, 2]

    # Check that False produces a MultiDiscrete
    env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=False)
    assert isinstance(env.action_space, spaces.MultiDiscrete)
Ejemplo n.º 14
0
def test_sanitize_action_one_agent_done(mock_env):
    mock_spec = create_mock_group_spec(vector_action_space_type="discrete",
                                       vector_action_space_size=[2, 2, 3])
    mock_step = create_mock_vector_step_result(num_agents=5)
    mock_step.agent_id = np.array(range(5))
    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
    env = UnityEnv(" ", use_visual=False, multiagent=True)

    received_step_result = create_mock_vector_step_result(num_agents=6)
    received_step_result.agent_id = np.array(range(6))
    # agent #3 (id = 2) is Done
    received_step_result.done = np.array([False] * 2 + [True] + [False] * 3)
    sanitized_result = env._sanitize_info(received_step_result)
    for expected_agent_id, agent_id in zip([0, 1, 5, 3, 4],
                                           sanitized_result.agent_id):
        assert expected_agent_id == agent_id
Ejemplo n.º 15
0
    def run():
        # LINUX: Disable the Unity window -> no_graphics=True
        env = UnityEnv(env_name,
                       worker_id=1000,
                       use_visual=False,
                       uint8_visual=False,
                       allow_multiple_visual_obs=False,
                       no_graphics=False)

        # Create the agent
        model = PPO2(MlpPolicy, env, verbose=0, learning_rate=1.0e-4)
        model.learn(total_timesteps=num_episodes)

        env.close()

        print("Successfully trained")
Ejemplo n.º 16
0
 def _init():
     # env = gym.make(env_id)
     # env.seed(seed + rank)
     env = UnityEnv(env_id, worker_id=rank, use_visual=False, no_graphics=True)
     #env.seed(seed + rank)
     env = Monitor(env, log_dir, allow_early_resets=True)
     return env
Ejemplo n.º 17
0
        def _thunk():
            if env_type == 'unity':
                from gym_unity.envs import UnityEnv
                import random; r=random.randint(64,164)
                print ("***** HELLO", mpi_rank + r)
                env = UnityEnv(env_id, mpi_rank + r)
            else:
                env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id)
            env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Ejemplo n.º 18
0
    def __init__(self):

        # Hyperparameters
        self.learning_rate = 0.0003
        self.betas = (0.9, 0.999)
        self.gamma = 0.99
        self.eps_clip = 0.2
        self.buffer_size = 2048
        self.batch_size = 256
        self.K_epochs = 3
        self.max_steps = 100000

        self.tau = 0.95
        self.entropy_coef = 0.001
        self.value_loss_coef = 0.5

        self.summary_freq = 1000

        # Environment
        self.env_name = "Environments/env1/Unity Environment"
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=True)
        channel.set_configuration_parameters(time_scale=100)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)
        self.n_agents = self.env.number_agents
        print("Nº of Agents: ", self.n_agents)

        # Model
        self.model = ActorCritic(self.state_size, self.action_size,
                                 seed=0).to(device)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.learning_rate,
                                    betas=self.betas)
        self.MseLoss = nn.MSELoss()

        # Buffer memory
        self.memory = []
        for _ in range(self.n_agents):
            self.memory.append(Buffer())

        # Initialize time step (for updating when buffer_size is full)
        self.t_step = 1
Ejemplo n.º 19
0
    def __init__(self):

        # Hyperparameters
        self.learning_rate = 0.0003
        self.buffer_size = 10240
        self.batch_size = 1024
        self.gamma = 0.99
        self.update_every = 64
        self.max_steps = 100000

        self.epsilon = 1.0
        self.epsilon_end = 0.01
        self.epsilon_decay = 0.995
        self.tau = 0.01

        self.summary_freq = 1000

        # Environment
        self.env_name = "Environments/env1/Unity Environment"
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=False)
        channel.set_configuration_parameters(time_scale=100)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)
        self.n_agents = self.env.number_agents

        # Models
        self.local_model = QNetwork(self.state_size, self.action_size,
                                    seed=0).to(device)
        self.target_model = QNetwork(self.state_size, self.action_size,
                                     seed=0).to(device)
        self.optimizer = optim.Adam(self.local_model.parameters(),
                                    lr=self.learning_rate)

        # Buffer memory
        self.memory = Buffer(self.buffer_size,
                             self.batch_size,
                             seed=0,
                             device=device)

        # Initialize time step (for updating every "update_every" time steps)
        self.t_step = 0
Ejemplo n.º 20
0
def test_branched_flatten(mock_env):
    mock_spec = create_mock_group_spec(vector_action_space_type="discrete",
                                       vector_action_space_size=[2, 2, 3])
    mock_decision_step, mock_terminal_step = create_mock_vector_steps(
        mock_spec, num_agents=1)
    setup_mock_unityenvironment(mock_env, mock_spec, mock_decision_step,
                                mock_terminal_step)

    env = UnityEnv(" ", use_visual=False, flatten_branched=True)
    assert isinstance(env.action_space, spaces.Discrete)
    assert env.action_space.n == 12
    assert env._flattener.lookup_action(0) == [0, 0, 0]
    assert env._flattener.lookup_action(11) == [1, 1, 2]

    # Check that False produces a MultiDiscrete
    env = UnityEnv(" ", use_visual=False, flatten_branched=False)
    assert isinstance(env.action_space, spaces.MultiDiscrete)
Ejemplo n.º 21
0
def test_sanitize_action_single_agent_multiple_done(mock_env):
    mock_spec = create_mock_group_spec(vector_action_space_type="discrete",
                                       vector_action_space_size=[2, 2, 3])
    mock_step = create_mock_vector_step_result(num_agents=1)
    mock_step.agent_id = np.array(range(1))
    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
    env = UnityEnv(" ", use_visual=False, multiagent=False)

    received_step_result = create_mock_vector_step_result(num_agents=3)
    received_step_result.agent_id = np.array(range(3))
    # original agent (id = 0) is Done
    # so is the "new" agent (id = 1)
    done = [True, True, False]
    received_step_result.done = np.array(done)
    sanitized_result = env._sanitize_info(received_step_result)
    for expected_agent_id, agent_id in zip([2], sanitized_result.agent_id):
        assert expected_agent_id == agent_id
Ejemplo n.º 22
0
def test_gym_wrapper_visual(mock_env, use_uint8):
    mock_spec = create_mock_group_spec(number_visual_observations=1)
    mock_decision_step, mock_terminal_step = create_mock_vector_steps(
        mock_spec, number_visual_observations=1)
    setup_mock_unityenvironment(mock_env, mock_spec, mock_decision_step,
                                mock_terminal_step)

    env = UnityEnv(" ", use_visual=True, uint8_visual=use_uint8)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    obs, rew, done, info = env.step(actions)
    assert env.observation_space.contains(obs)
    assert isinstance(obs, np.ndarray)
    assert isinstance(rew, float)
    assert isinstance(done, (bool, np.bool_))
    assert isinstance(info, dict)
Ejemplo n.º 23
0
def test_sanitize_action_shuffled_id(mock_env):
    mock_spec = create_mock_group_spec(vector_action_space_type="discrete",
                                       vector_action_space_size=[2, 2, 3])
    mock_step = create_mock_vector_step_result(num_agents=5)
    mock_step.agent_id = np.array(range(5))
    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
    env = UnityEnv(" ", use_visual=False, multiagent=True)

    shuffled_step_result = create_mock_vector_step_result(num_agents=5)
    shuffled_order = [4, 2, 3, 1, 0]
    shuffled_step_result.reward = np.array(shuffled_order)
    shuffled_step_result.agent_id = np.array(shuffled_order)
    sanitized_result = env._sanitize_info(shuffled_step_result)
    for expected_reward, reward in zip(range(5), sanitized_result.reward):
        assert expected_reward == reward
    for expected_agent_id, agent_id in zip(range(5),
                                           sanitized_result.agent_id):
        assert expected_agent_id == agent_id
Ejemplo n.º 24
0
 def env_ctor():
     env_path = GetFilePath(domain, n_agents=1)
     # env = UnityEnv(env_path, multiagent=True)
     from random import randint
     worker_id = randint(0, 9)
     print("---***** worker_id", worker_id)
     env = UnityEnv(env_path, worker_id=worker_id)
     # env = control.wrappers.CollectGymDataset(env, params.save_episode_dir)
     return env
Ejemplo n.º 25
0
def test_multi_agent(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0,
                                                      stack=False,
                                                      num_agents=2)

    # Test for incorrect number of agents.
    with pytest.raises(UnityGymException):
        UnityEnv(' ', multiagent=False)

    env = UnityEnv(' ', use_visual=False, multiagent=True)
    assert isinstance(env.reset(), list)
    actions = [env.action_space.sample() for i in range(env.number_agents)]
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, list)
    assert isinstance(rew, list)
    assert isinstance(done, list)
    assert isinstance(info, dict)
Ejemplo n.º 26
0
def test_sanitize_action_new_agent_done(mock_env):
    mock_spec = create_mock_group_spec(vector_action_space_type="discrete",
                                       vector_action_space_size=[2, 2, 3])
    mock_step = create_mock_vector_step_result(num_agents=3)
    mock_step.agent_id = np.array(range(5))
    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
    env = UnityEnv(" ", use_visual=False, multiagent=True)

    received_step_result = create_mock_vector_step_result(num_agents=7)
    received_step_result.agent_id = np.array(range(7))
    # agent #3 (id = 2) is Done
    # so is the "new" agent (id = 5)
    done = [False] * 7
    done[2] = True
    done[5] = True
    received_step_result.done = np.array(done)
    sanitized_result = env._sanitize_info(received_step_result)
    for expected_agent_id, agent_id in zip([0, 1, 6, 3, 4],
                                           sanitized_result.agent_id):
        assert expected_agent_id == agent_id
    class UnityEnvWrapper(gym.Env):
        def __init__(self, env_config):
            #self.vector_index = env_config.vector_index
            # self.worker_index = env_config.worker_index
            # self.worker_id = env_config["unity_worker_id"] + env_config.worker_index
            # Name of the Unity environment binary to launch
            env_name = '/home/jim/projects/unity_ray/basic_env_linux/basic_env_linux'
            self.env = UnityEnv(env_name,
                                worker_id=16,
                                use_visual=False,
                                multiagent=False,
                                no_graphics=True)  #
            self.action_space = self.env.action_space
            self.observation_space = self.env.observation_space

        def reset(self):
            return self.env.reset()

        def step(self, action):
            return self.env.step(action)
Ejemplo n.º 28
0
def test_gym_wrapper(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0,
                                                      stack=False,
                                                      num_agents=1)

    # Test for incorrect number of agents.
    with pytest.raises(UnityGymException):
        UnityEnv(' ', use_visual=False, multiagent=True)

    env = UnityEnv(' ', use_visual=False)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert actions.shape[0] == 2
    obs, rew, done, info = env.step(actions)
    assert isinstance(obs, np.ndarray)
    assert isinstance(rew, float)
    assert isinstance(done, bool)
    assert isinstance(info, dict)
Ejemplo n.º 29
0
    def __init__(self, env_id):
        print ("**** ", env_id, platform.system())
        # env = UnityEnv(env_id, multiagent=True)
        env_id = "hopper"
        # env_id = "walker"
        if platform.system() == 'Windows':
            env_path = os.path.join('envs', env_id+'-x16', 'Unity Environment.exe')
        elif platform.system() == 'Darwin': # MacOS
            env_path = os.path.join('envs', env_id+'-x16')
        elif platform.system() == 'Linux': 
            env_path = os.path.join('envs', env_id+'-x16')
        print ("**** Override", env_path, env_id)
        env = UnityEnv(env_path, multiagent=True)
        self.env = env
        env.num_envs = env.number_agents
        VecEnv.__init__(self, env.num_envs, env.observation_space, env.action_space)
        # obs_space = env.observation_space
        # spec = env.spec

        # self.keys, shapes, dtypes = obs_space_info(obs_space)
        # self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys }
        # self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
        # self.buf_rews  = np.zeros((self.num_envs,), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        # Fake Monitor
        self.tstart = time.time()
        self.results_writer = ResultsWriter(
            "filename",
            header={"t_start": time.time(), 'env_id' : env.spec and env.spec.id},
            extra_keys=() + ()
        )
        self.reset_keywords = ()
        self.info_keywords = ()
        self.allow_early_resets = True
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_times = []
        self.total_steps = 0
        self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()
Ejemplo n.º 30
0
 def __init__(self):
    
     # Hyperparameters
     self.learning_rate = 0.0003
     self.gamma = 0.99
     self.batch_size = 256
     self.max_steps = 100000
     
     self.tau = 0.95
     self.entropy_coef = 0.001
     self.value_loss_coef = 0.5
     
     self.summary_freq = 1000
     
     # Environment
     self.env_name = "Environments/env1/Unity Environment"
     channel = EngineConfigurationChannel()
     self.env = UnityEnv(self.env_name, worker_id=0, use_visual=False, side_channels=[channel], no_graphics = False, multiagent = True)
     channel.set_configuration_parameters(time_scale = 100)
     self.action_size, self.state_size  = Utils.getActionStateSize(self.env)
     self.n_agents = self.env.number_agents
     print("Nº of Agents: ",self.n_agents)
     
     # Shared model
     self.shared_model = ActorCritic(self.state_size, self.action_size, seed = 0).to(device)
     
     # Agents models
     self.agent_model = []
     self.optimizer = []
     for i in range(self.n_agents):
         self.agent_model.append(ActorCritic(self.state_size, self.action_size, seed = 0).to(device))
         self.optimizer.append(optim.Adam(self.agent_model[i].parameters(), lr=self.learning_rate))
     
     # Buffer memory
     self.memory = []
     for _ in range(self.n_agents):
         self.memory.append(Buffer())
         
     # Initialize time step (for updating every "batch_size" time steps)
     self.t_step = 1