def test_multi_agent(mock_env): mock_brain = create_mock_group_spec() mock_braininfo = create_mock_vector_step_result(num_agents=2) setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo) with pytest.raises(UnityGymException): UnityEnv(" ", multiagent=False) env = UnityEnv(" ", use_visual=False, multiagent=True) assert isinstance(env.reset(), list) actions = [env.action_space.sample() for i in range(env.number_agents)] obs, rew, done, info = env.step(actions) assert isinstance(obs, list) assert isinstance(rew, list) assert isinstance(done, list)
def main(env_name): """ Run the gym test using the specified environment :param env_name: Name of the Unity environment binary to launch """ env = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True) try: # Examine environment parameters print(str(env)) # Reset the environment initial_observations = env.reset() if len(env.observation_space.shape) == 1: # Examine the initial vector observation print("Agent observations look like: \n{}".format( initial_observations)) for _episode in range(10): env.reset() done = False episode_rewards = 0 while not done: actions = env.action_space.sample() obs, reward, done, _ = env.step(actions) episode_rewards += reward print("Total reward this episode: {}".format(episode_rewards)) finally: env.close()
def __init__(self, env_id, n_agents): env_path = UnityVecEnv.GetFilePath(env_id, n_agents=n_agents) print("**** ", env_path) env = UnityEnv(env_path, multiagent=True) self.env = env env.num_envs = env.number_agents VecEnv.__init__(self, env.num_envs, env.observation_space, env.action_space) obs_space = env.observation_space # self.keys, shapes, dtypes = obs_space_info(obs_space) # self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } # self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) # self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] # Fake Monitor self.tstart = time.time() self.results_writer = ResultsWriter("filename", header={ "t_start": time.time(), 'env_id': env.spec and env.spec.id }, extra_keys=() + ()) self.reset_keywords = () self.info_keywords = () self.allow_early_resets = True self.rewards = None self.needs_reset = True self.episode_rewards = [] self.episode_lengths = [] self.episode_times = [] self.total_steps = 0 self.current_reset_info = { } # extra info about the current episode, that was passed in during reset()
def __init__(self, n_episodes, env_name, model): # Nª Episodes self.n_episodes = n_episodes # Environment self.env_name = env_name channel = EngineConfigurationChannel() self.env = UnityEnv(self.env_name, worker_id=0, use_visual=False, side_channels=[channel], no_graphics=False, multiagent=False) self.action_size, self.state_size = Utils.getActionStateSize(self.env) # Model self.model = ActorCritic(self.state_size, self.action_size, seed=0).to(device) # Initialize time step (for updating every "update_every" time steps) self.t_step = 1 # Start test self.load_model(model) self.test()
def __init__(self, platform): if platform == OSName.MAC: env_filename = EnvironmentName.DRONE_RACING_MAC.value elif platform == OSName.WINDOWS: env_filename = EnvironmentName.DRONE_RACING_WINDOWS.value else: env_filename = None self.env = UnityEnv( environment_filename=env_filename, worker_id=randrange(65536), use_visual=False, multiagent=False ).unwrapped super(Drone_Racing, self).__init__() Drone_Racing.worker_id += 1 self.action_shape = self.get_action_shape() self.action_space = self.env.action_space self.continuous = False self.skipping_state_fq = 3 self.skipping_state_index = 0 self.WIN_AND_LEARN_FINISH_SCORE = 200
def _init(): # env = gym.make(env_id) # env.seed(seed + rank) env = UnityEnv(env_id, worker_id=rank, use_visual=False, no_graphics=True) #env.seed(seed + rank) env = Monitor(env, log_dir, allow_early_resets=True) return env
def test_branched_flatten(mock_env): mock_brain = create_mock_brainparams( vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3] ) mock_braininfo = create_mock_vector_braininfo(num_agents=1) setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo) env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=True) assert isinstance(env.action_space, spaces.Discrete) assert env.action_space.n == 12 assert env._flattener.lookup_action(0) == [0, 0, 0] assert env._flattener.lookup_action(11) == [1, 1, 2] # Check that False produces a MultiDiscrete env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=False) assert isinstance(env.action_space, spaces.MultiDiscrete)
def test_branched_flatten(mock_env): mock_spec = create_mock_group_spec(vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]) mock_decision_step, mock_terminal_step = create_mock_vector_steps( mock_spec, num_agents=1) setup_mock_unityenvironment(mock_env, mock_spec, mock_decision_step, mock_terminal_step) env = UnityEnv(" ", use_visual=False, flatten_branched=True) assert isinstance(env.action_space, spaces.Discrete) assert env.action_space.n == 12 assert env._flattener.lookup_action(0) == [0, 0, 0] assert env._flattener.lookup_action(11) == [1, 1, 2] # Check that False produces a MultiDiscrete env = UnityEnv(" ", use_visual=False, flatten_branched=False) assert isinstance(env.action_space, spaces.MultiDiscrete)
def __init__(self, env_config): self.vector_index = env_config.vector_index self.worker_index = env_config.worker_index self.worker_id = env_config["unity_worker_id"] + env_config.worker_index # Name of the Unity environment binary to launch env_name = '/home/jim/projects/unity_ray/basic_env_linux/basic_env_linux' self.env = UnityEnv(env_name, worker_id=self.worker_id, use_visual=False, multiagent=False, no_graphics=True) # self.action_space = self.env.action_space self.observation_space = self.env.observation_space
def env_ctor(): env_path = GetFilePath(domain, n_agents=1) # env = UnityEnv(env_path, multiagent=True) from random import randint worker_id = randint(0, 9) print("---***** worker_id", worker_id) env = UnityEnv(env_path, worker_id=worker_id) # env = control.wrappers.CollectGymDataset(env, params.save_episode_dir) return env
def worker(id, td3_trainer, rewards_queue, replay_buffer, max_episodes, max_steps, batch_size, explore_steps, \ update_itr, explore_noise_scale, eval_noise_scale, reward_scale, DETERMINISTIC, hidden_dim, model_path): ''' the function for sampling with multi-processing ''' print(td3_trainer, replay_buffer) env_name="./tac_follow_new" env = UnityEnv(env_name, worker_id=id+15, use_visual=False, use_both=True) # training loop for eps in range(max_episodes): frame_idx=0 rewards=[] episode_reward = 0 state, info = env.reset() # state=state[:6] for step in range(max_steps): if frame_idx > explore_steps: action = td3_trainer.policy_net.get_action(state, deterministic = DETERMINISTIC, explore_noise_scale=explore_noise_scale) else: action = td3_trainer.policy_net.sample_action() try: next_state, reward, done, info = env.step(action) # next_state = next_state[:6] except KeyboardInterrupt: print('Finished') td3_trainer.save_model(model_path) replay_buffer.push(state, action, reward, next_state, done) state = next_state episode_reward += reward frame_idx += 1 # if len(replay_buffer) > batch_size: if replay_buffer.get_length() > batch_size: for i in range(update_itr): _=td3_trainer.update(batch_size, deterministic=DETERMINISTIC, eval_noise_scale=eval_noise_scale, reward_scale=reward_scale) if eps % 10 == 0 and eps>0: # plot(rewards, id) td3_trainer.save_model(model_path) if done: break print('Episode: ', eps, '| Episode Reward: ', episode_reward) if len(rewards) == 0: rewards.append(episode_reward) else: rewards.append(rewards[-1]*0.9+episode_reward*0.1) rewards_queue.put(episode_reward) td3_trainer.save_model(model_path)
def __init__(self, wid): self.wid = wid self.env = UnityEnv(env_name, worker_id=wid, use_visual=False, use_both=True) # self.env=Reacher(render=True) self.ppo = GLOBAL_PPO
def test_multi_agent(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0, stack=False, num_agents=2) # Test for incorrect number of agents. with pytest.raises(UnityGymException): UnityEnv(' ', multiagent=False) env = UnityEnv(' ', use_visual=False, multiagent=True) assert isinstance(env.reset(), list) actions = [env.action_space.sample() for i in range(env.number_agents)] obs, rew, done, info = env.step(actions) assert isinstance(obs, list) assert isinstance(rew, list) assert isinstance(done, list) assert isinstance(info, dict)
def test_gym_wrapper(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0, stack=False, num_agents=1) # Test for incorrect number of agents. with pytest.raises(UnityGymException): UnityEnv(' ', use_visual=False, multiagent=True) env = UnityEnv(' ', use_visual=False) assert isinstance(env, UnityEnv) assert isinstance(env.reset(), np.ndarray) actions = env.action_space.sample() assert actions.shape[0] == 2 obs, rew, done, info = env.step(actions) assert isinstance(obs, np.ndarray) assert isinstance(rew, float) assert isinstance(done, bool) assert isinstance(info, dict)
def main(): env_path = 'AutoBenchExecutable/AutoBenchExecutable' curriculum_file = 'config/curricula/autobench/AutoBenchBrain.json' camera_res_overwrite, use_visual = extract_camera_config_gym( curriculum_file) # Setup the Unity Environment env = UnityEnv(environment_filename=env_path, worker_id=0, use_visual=use_visual, multiagent=True, env_config=get_env_config(curriculum_file), camera_res_overwrite=camera_res_overwrite)
def make_env(env_spec, concurrency=None): '''Makes an environment from the spec. `concurrency` determines whether/how the env is run in a sub-process.''' if concurrency is None: try: try_register_env(env_spec['name']) return gym.make(env_spec['name'], *env_spec.get('args', []), **env_spec.get('kwargs', {})) except: worker_id = int(f'{os.getpid()}{ps.unique_id()}'[-4:]) return UnityEnv(get_env_path(env_spec['name']), worker_id=worker_id, *env_spec.get('args', []), **env_spec.get('kwargs', {})) return ConcurrentEnv(env_spec, concurrency=concurrency)
def test_gym_wrapper(mock_env): mock_brain = create_mock_brainparams() mock_braininfo = create_mock_vector_braininfo() setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo) env = UnityEnv(" ", use_visual=False, multiagent=False) assert isinstance(env, UnityEnv) assert isinstance(env.reset(), np.ndarray) actions = env.action_space.sample() assert actions.shape[0] == 2 obs, rew, done, info = env.step(actions) assert isinstance(obs, np.ndarray) assert isinstance(rew, float) assert isinstance(done, bool) assert isinstance(info, dict)
def test_closing(env_name): """ Run the gym test and closes the environment multiple times :param env_name: Name of the Unity environment binary to launch """ try: env1 = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True) env1.close() env1 = UnityEnv(env_name, worker_id=1, use_visual=False, no_graphics=True) env2 = UnityEnv(env_name, worker_id=2, use_visual=False, no_graphics=True) env2.reset() finally: env1.close() env2.close()
def test_gym_wrapper(mock_env): mock_brain = create_mock_group_spec() mock_braininfo = create_mock_vector_step_result() setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo) env = UnityEnv(" ", use_visual=False, multiagent=False) assert isinstance(env, UnityEnv) assert isinstance(env.reset(), np.ndarray) actions = env.action_space.sample() assert actions.shape[0] == 2 obs, rew, done, info = env.step(actions) assert env.observation_space.contains(obs) assert isinstance(obs, np.ndarray) assert isinstance(rew, float) assert isinstance(done, (bool, np.bool_))
def _thunk(): if env_type == 'unity': worker_id = 32 + rank print ("***** UnityEnv", env_id, worker_id, rank) env = UnityEnv(env_id, worker_id) else: env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env
def run(): # LINUX: Disable the Unity window -> no_graphics=True env = UnityEnv(env_name, worker_id=1000, use_visual=False, uint8_visual=False, allow_multiple_visual_obs=False, no_graphics=False) # Create the agent model = PPO2(MlpPolicy, env, verbose=0, learning_rate=1.0e-4) model.learn(total_timesteps=num_episodes) env.close() print("Successfully trained")
def _thunk(): if env_type == 'unity': from gym_unity.envs import UnityEnv import random; r=random.randint(64,164) print ("***** HELLO", mpi_rank + r) env = UnityEnv(env_id, mpi_rank + r) else: env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env
def test_sanitize_action_one_agent_done(mock_env): mock_spec = create_mock_group_spec(vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]) mock_step = create_mock_vector_step_result(num_agents=5) mock_step.agent_id = np.array(range(5)) setup_mock_unityenvironment(mock_env, mock_spec, mock_step) env = UnityEnv(" ", use_visual=False, multiagent=True) received_step_result = create_mock_vector_step_result(num_agents=6) received_step_result.agent_id = np.array(range(6)) # agent #3 (id = 2) is Done received_step_result.done = np.array([False] * 2 + [True] + [False] * 3) sanitized_result = env._sanitize_info(received_step_result) for expected_agent_id, agent_id in zip([0, 1, 5, 3, 4], sanitized_result.agent_id): assert expected_agent_id == agent_id
def __init__(self): # Hyperparameters self.learning_rate = 0.0003 self.betas = (0.9, 0.999) self.gamma = 0.99 self.eps_clip = 0.2 self.buffer_size = 2048 self.batch_size = 256 self.K_epochs = 3 self.max_steps = 100000 self.tau = 0.95 self.entropy_coef = 0.001 self.value_loss_coef = 0.5 self.summary_freq = 1000 # Environment self.env_name = "Environments/env1/Unity Environment" channel = EngineConfigurationChannel() self.env = UnityEnv(self.env_name, worker_id=0, use_visual=False, side_channels=[channel], no_graphics=False, multiagent=True) channel.set_configuration_parameters(time_scale=100) self.action_size, self.state_size = Utils.getActionStateSize(self.env) self.n_agents = self.env.number_agents print("Nº of Agents: ", self.n_agents) # Model self.model = ActorCritic(self.state_size, self.action_size, seed=0).to(device) self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate, betas=self.betas) self.MseLoss = nn.MSELoss() # Buffer memory self.memory = [] for _ in range(self.n_agents): self.memory.append(Buffer()) # Initialize time step (for updating when buffer_size is full) self.t_step = 1
def test_sanitize_action_single_agent_multiple_done(mock_env): mock_spec = create_mock_group_spec(vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]) mock_step = create_mock_vector_step_result(num_agents=1) mock_step.agent_id = np.array(range(1)) setup_mock_unityenvironment(mock_env, mock_spec, mock_step) env = UnityEnv(" ", use_visual=False, multiagent=False) received_step_result = create_mock_vector_step_result(num_agents=3) received_step_result.agent_id = np.array(range(3)) # original agent (id = 0) is Done # so is the "new" agent (id = 1) done = [True, True, False] received_step_result.done = np.array(done) sanitized_result = env._sanitize_info(received_step_result) for expected_agent_id, agent_id in zip([2], sanitized_result.agent_id): assert expected_agent_id == agent_id
def __init__(self): # Hyperparameters self.learning_rate = 0.0003 self.buffer_size = 10240 self.batch_size = 1024 self.gamma = 0.99 self.update_every = 64 self.max_steps = 100000 self.epsilon = 1.0 self.epsilon_end = 0.01 self.epsilon_decay = 0.995 self.tau = 0.01 self.summary_freq = 1000 # Environment self.env_name = "Environments/env1/Unity Environment" channel = EngineConfigurationChannel() self.env = UnityEnv(self.env_name, worker_id=0, use_visual=False, side_channels=[channel], no_graphics=False, multiagent=False) channel.set_configuration_parameters(time_scale=100) self.action_size, self.state_size = Utils.getActionStateSize(self.env) self.n_agents = self.env.number_agents # Models self.local_model = QNetwork(self.state_size, self.action_size, seed=0).to(device) self.target_model = QNetwork(self.state_size, self.action_size, seed=0).to(device) self.optimizer = optim.Adam(self.local_model.parameters(), lr=self.learning_rate) # Buffer memory self.memory = Buffer(self.buffer_size, self.batch_size, seed=0, device=device) # Initialize time step (for updating every "update_every" time steps) self.t_step = 0
def make_unity_env(env_directory, num_env, visual, start_index=0): """ Create a wrapped, monitored Unity environment. """ def make_env(rank): # pylint: disable=C0111 def _thunk(): env = UnityEnv(env_directory, rank, use_visual=True) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return env return _thunk if visual: return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)]) else: rank = MPI.COMM_WORLD.Get_rank() env = UnityEnv(env_directory, rank, use_visual=False) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return env
def test_gym_wrapper_visual(mock_env, use_uint8): mock_spec = create_mock_group_spec(number_visual_observations=1) mock_decision_step, mock_terminal_step = create_mock_vector_steps( mock_spec, number_visual_observations=1) setup_mock_unityenvironment(mock_env, mock_spec, mock_decision_step, mock_terminal_step) env = UnityEnv(" ", use_visual=True, uint8_visual=use_uint8) assert isinstance(env, UnityEnv) assert isinstance(env.reset(), np.ndarray) actions = env.action_space.sample() assert actions.shape[0] == 2 obs, rew, done, info = env.step(actions) assert env.observation_space.contains(obs) assert isinstance(obs, np.ndarray) assert isinstance(rew, float) assert isinstance(done, (bool, np.bool_)) assert isinstance(info, dict)
def make_env(env_name, seed=-1, render_mode=False, full_episode=False, workerid=1): env = UnityEnv(env_name, worker_id=workerid, use_visual=True) print(str(env)) ''' print("environment details") print("env.action_space", env.action_space) print("high, low", env.action_space.high, env.action_space.low) print("environment details") print("env.observation_space", env.observation_space) print("high, low", env.observation_space.high, env.observation_space.low) assert False ''' return env
def test_sanitize_action_shuffled_id(mock_env): mock_spec = create_mock_group_spec(vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]) mock_step = create_mock_vector_step_result(num_agents=5) mock_step.agent_id = np.array(range(5)) setup_mock_unityenvironment(mock_env, mock_spec, mock_step) env = UnityEnv(" ", use_visual=False, multiagent=True) shuffled_step_result = create_mock_vector_step_result(num_agents=5) shuffled_order = [4, 2, 3, 1, 0] shuffled_step_result.reward = np.array(shuffled_order) shuffled_step_result.agent_id = np.array(shuffled_order) sanitized_result = env._sanitize_info(shuffled_step_result) for expected_reward, reward in zip(range(5), sanitized_result.reward): assert expected_reward == reward for expected_agent_id, agent_id in zip(range(5), sanitized_result.agent_id): assert expected_agent_id == agent_id