def test_ppo_model_dc_vector(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=0) env = UnityEnvironment(" ") model = PPOModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, ] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.action_masks: np.ones([2, 2]), } sess.run(run_list, feed_dict=feed_dict) env.close()
def init_unity_env(env_path, show_visuals=True): worker_id = 0 done = False while not done: if worker_id > 64: sys.exit() try: env = UnityEnvironment(env_path, worker_id=worker_id, no_graphics=not show_visuals) done = True except mlagents.envs.exception.UnityWorkerInUseException: worker_id += 1 env.reset(train_mode=True) brain_name = list(env.brains.keys())[0] state_space = env.brains[brain_name].vector_observation_space_size action_space = env.brains[brain_name].vector_action_space_size n_agents = env._n_agents[brain_name] multiagent = True if n_agents > 1 else False return env, state_space, action_space, n_agents, multiagent, brain_name
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") memory_size = 128 model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, model.memory_out, ] feed_dict = { model.batch_size: 1, model.sequence_length: 2, model.memory_in: np.zeros((1, memory_size)), model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.epsilon: np.array([[0, 1]]), } sess.run(run_list, feed_dict=feed_dict) env.close()
def do_rollout(env: UnityEnvironment, brain_name): """ Builds a path by running through an environment using a provided function to select actions. """ obs, rewards, actions, human_obs = [], [], [], [] curr_info = env.reset(train_mode=False) # Primary environment loop while not env.global_done: ob = curr_info action = 2 * np.random.rand() - 1 obs.append(ob) actions.append(action) new_info = env.step(vector_action=action)[brain_name] ob, rew, done, info = env.step(action) rewards.append(rew) human_obs.append(info.get("human_obs")) if done: break # Build path dictionary path = { "obs": np.array(obs), "original_rewards": np.array(rewards), "actions": np.array(actions), "human_obs": np.array(human_obs) } return path
def _init_env(self, replay_config, sac_config, model_root_path): if self.config['build_path'] is None or self.config['build_path'] == '': self.env = UnityEnvironment() else: self.env = UnityEnvironment(file_name=self.config['build_path'], no_graphics=True, base_port=self.config['build_port'], args=['--scene', self.config['scene']]) self.logger.info(f'{self.config["build_path"]} initialized') self.default_brain_name = self.env.brain_names[0] brain_params = self.env.brains[self.default_brain_name] state_dim = brain_params.vector_observation_space_size * brain_params.num_stacked_vector_observations action_dim = brain_params.vector_action_space_size[0] custom_sac_model = importlib.import_module(self.config['sac']) shutil.copyfile(f'{self.config["sac"]}.py', f'{model_root_path}/{self.config["sac"]}.py') self.sac = SAC_DS_with_Replay_Base( state_dim=state_dim, action_dim=action_dim, model_root_path=model_root_path, model=custom_sac_model, use_rnn=self.config['use_rnn'], replay_config=replay_config, burn_in_step=self.config['burn_in_step'], n_step=self.config['n_step'], **sac_config)
def test_ppo_get_value_estimates(mock_communicator, mock_launcher, dummy_config): tf.reset_default_graph() mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") brain_infos = env.reset() brain_info = brain_infos[env.external_brain_names[0]] trainer_parameters = dummy_config model_path = env.external_brain_names[0] trainer_parameters["model_path"] = model_path trainer_parameters["keep_checkpoints"] = 3 policy = PPOPolicy(0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False) run_out = policy.get_value_estimates(brain_info, 0, done=False) for key, val in run_out.items(): assert type(key) is str assert type(val) is float run_out = policy.get_value_estimates(brain_info, 0, done=True) for key, val in run_out.items(): assert type(key) is str assert val == 0.0 # Check if we ignore terminal states properly policy.reward_signals["extrinsic"].use_terminal_states = False run_out = policy.get_value_estimates(brain_info, 0, done=True) for key, val in run_out.items(): assert type(key) is str assert val != 0.0 env.close()
def test_close(mock_communicator, mock_launcher): comm = MockCommunicator(discrete_action=False, visual_inputs=0) mock_communicator.return_value = comm env = UnityEnvironment(" ") assert env._loaded env.close() assert not env._loaded assert comm.has_been_closed
def setup_connection_with_unity(self, build_scene): # Connect to Unity and get environment self.env = UnityEnvironment(file_name=build_scene, worker_id=0, seed=1) # Reset the environment self.env_info = self.env.reset(train_mode=True) # Set the default brain to work with self.default_brain = "Robot"
def test_step(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") brain = env.brains["RealFakeBrain"] brain_info = env.step() brain_info = env.step([0] * brain.vector_action_space_size[0] * len(brain_info["RealFakeBrain"].agents)) with pytest.raises(UnityActionException): env.step([0]) brain_info = env.step([-1] * brain.vector_action_space_size[0] * len(brain_info["RealFakeBrain"].agents)) env.close() assert isinstance(brain_info, dict) assert isinstance(brain_info["RealFakeBrain"], BrainInfo) assert isinstance(brain_info["RealFakeBrain"].visual_observations, list) assert isinstance(brain_info["RealFakeBrain"].vector_observations, np.ndarray) assert (len(brain_info["RealFakeBrain"].visual_observations) == brain.number_visual_observations) assert len(brain_info["RealFakeBrain"].vector_observations) == len( brain_info["RealFakeBrain"].agents) assert (len(brain_info["RealFakeBrain"].vector_observations[0]) == brain.vector_observation_space_size * brain.num_stacked_vector_observations) print("\n\n\n\n\n\n\n" + str(brain_info["RealFakeBrain"].local_done)) assert not brain_info["RealFakeBrain"].local_done[0] assert brain_info["RealFakeBrain"].local_done[2]
def test_int_channel(): sender = IntChannel() receiver = IntChannel() sender.send_int(5) sender.send_int(6) data = UnityEnvironment._generate_side_channel_data( {sender.channel_type: sender}) UnityEnvironment._parse_side_channel_message( {receiver.channel_type: receiver}, data) assert receiver.list_int[0] == 5 assert receiver.list_int[1] == 6
def get_unity_envs(): # check the python environment print("Python version: ", sys.version) if (sys.version_info[0] < 3): raise Exception("ERROR: ML-Agents Toolkit requires Python 3") # set the unity environment env = UnityEnvironment(file_name=UNITY_PATH, base_port=5005) brain = env.brain_names[0] env.reset(train_mode=True)[brain] return env, brain
def _start_env(file_name="", seed=0, worker_id=0): log2logger("Starting Environment: " + str(worker_id)) if file_name == "": env = UnityEnvironment(file_name=None, seed=seed) else: env = UnityEnvironment(file_name=file_name, seed=seed, worker_id=worker_id) return env
def _init_env(self): if self.config['build_path'] is None or self.config['build_path'] == '': self.env = UnityEnvironment() else: self.env = UnityEnvironment(file_name=self.config['build_path'], no_graphics=self._train_mode, base_port=self.config['build_port'], args=['--scene', self.config['scene']]) self.logger.info(f'{self.config["build_path"]} initialized') self.default_brain_name = self.env.brain_names[0]
def __init__(self, file_name=None, worker_id=0, base_port=5005, seed=0, docker_training=False, no_graphics=False, timeout_wait=30, train_mode=True, **kwargs): """ Args: file_name (Optional[str]): Name of Unity environment binary. base_port (int): Port number to connect to Unity environment. `worker_id` increments on top of this. worker_id (int): Number to add to `base_port`. Used for asynchronous agent scenarios. docker_training (bool): Informs this class, whether the process is being run within a container. Default: False. no_graphics (bool): Whether to run the Unity simulator in no-graphics mode. Default: False. timeout_wait (int): Time (in seconds) to wait for connection from environment. train_mode (bool): Whether to run in training mode, speeding up the simulation. Default: True. """ # First create the UnityMLAgentsEnvironment to get state and action spaces, then create RLgraph Environment # instance. self.mlagents_env = UnityEnvironment( file_name, worker_id, base_port, seed, docker_training, no_graphics ) all_brain_info = self.mlagents_env.reset() # Get all possible information from AllBrainInfo. # TODO: Which scene do we pick? self.scene_key = next(iter(all_brain_info)) first_brain_info = all_brain_info[self.scene_key] num_environments = len(first_brain_info.agents) state_space = {} if len(first_brain_info.vector_observations[0]) > 0: state_space["vector"] = get_space_from_op(first_brain_info.vector_observations[0]) # TODO: This is a hack. if state_space["vector"].dtype == np.float64: state_space["vector"].dtype = np.float32 if len(first_brain_info.visual_observations) > 0: state_space["visual"] = get_space_from_op(first_brain_info.visual_observations[0]) if first_brain_info.text_observations[0]: state_space["text"] = get_space_from_op(first_brain_info.text_observations[0]) if len(state_space) == 1: self.state_key = next(iter(state_space)) state_space = state_space[self.state_key] else: self.state_key = None state_space = Dict(state_space) action_space = get_space_from_op(first_brain_info.action_masks[0]) if action_space.dtype == np.float64: action_space.dtype = np.float32 super(MLAgentsEnv, self).__init__( num_environments=num_environments, state_space=state_space, action_space=action_space, **kwargs ) # Caches the last observation we made (after stepping or resetting). self.last_state = []
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config): tf.reset_default_graph() mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") brain_infos = env.reset() brain_info = brain_infos[env.external_brain_names[0]] trainer_parameters = dummy_config model_path = env.external_brain_names[0] trainer_parameters["model_path"] = model_path trainer_parameters["keep_checkpoints"] = 3 policy = PPOPolicy(0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False) run_out = policy.evaluate(brain_info) assert run_out["action"].shape == (3, 2) env.close()
def test_reset(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") brain = env.brains["RealFakeBrain"] brain_info = env.reset() env.close() assert isinstance(brain_info, dict) assert isinstance(brain_info["RealFakeBrain"], BrainInfo) assert isinstance(brain_info["RealFakeBrain"].visual_observations, list) assert isinstance(brain_info["RealFakeBrain"].vector_observations, np.ndarray) assert (len(brain_info["RealFakeBrain"].visual_observations) == brain.number_visual_observations) assert len(brain_info["RealFakeBrain"].vector_observations) == len( brain_info["RealFakeBrain"].agents) assert (len(brain_info["RealFakeBrain"].vector_observations[0]) == brain.vector_observation_space_size)
def __init__(self, env_file='data/Reacher.exe', no_graphics=True, mlagents=False): if mlagents: from mlagents.envs.environment import UnityEnvironment else: from unityagents import UnityEnvironment self.env = UnityEnvironment(file_name=env_file, no_graphics=no_graphics) self.brain_name = self.env.brain_names[0] brain = self.env.brains[self.brain_name] self.action_size = brain.vector_action_space_size if type(self.action_size) != int: self.action_size = self.action_size[0] env_info = self.env.reset(train_mode=True)[self.brain_name] self.state_size = env_info.vector_observations.shape[1] self.num_agents = len(env_info.agents)
def test_raw_bytes(): sender = RawBytesChannel() receiver = RawBytesChannel() sender.send_raw_data("foo".encode("ascii")) sender.send_raw_data("bar".encode("ascii")) data = UnityEnvironment._generate_side_channel_data( {sender.channel_type: sender}) UnityEnvironment._parse_side_channel_message( {receiver.channel_type: receiver}, data) messages = receiver.get_and_clear_received_messages() assert len(messages) == 2 assert messages[0].decode("ascii") == "foo" assert messages[1].decode("ascii") == "bar" messages = receiver.get_and_clear_received_messages() assert len(messages) == 0
class UnityEnv(): """Unity Reacher Environment Wrapper https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Examples.md """ def __init__(self, env_file='data/Reacher.exe', no_graphics=True, mlagents=False): if mlagents: from mlagents.envs.environment import UnityEnvironment else: from unityagents import UnityEnvironment self.env = UnityEnvironment(file_name=env_file, no_graphics=no_graphics) self.brain_name = self.env.brain_names[0] brain = self.env.brains[self.brain_name] self.action_size = brain.vector_action_space_size if type(self.action_size) != int: self.action_size = self.action_size[0] env_info = self.env.reset(train_mode=True)[self.brain_name] self.state_size = env_info.vector_observations.shape[1] self.num_agents = len(env_info.agents) def reset(self, train=True): env_info = self.env.reset(train_mode=train)[self.brain_name] return env_info.vector_observations def close(self): self.env.close() def step(self, actions): actions = np.clip(actions, -1, 1) env_info = self.env.step(actions)[self.brain_name] next_states = env_info.vector_observations rewards = env_info.rewards dones = env_info.local_done return next_states, np.array(rewards), np.array(dones) @property def action_shape(self): return (self.num_agents, self.action_size)
def test_reset(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") spec = env.get_agent_group_spec("RealFakeBrain") env.reset() batched_step_result = env.get_step_result("RealFakeBrain") env.close() assert isinstance(batched_step_result, BatchedStepResult) assert len(spec.observation_shapes) == len(batched_step_result.obs) n_agents = batched_step_result.n_agents() for shape, obs in zip(spec.observation_shapes, batched_step_result.obs): assert (n_agents, ) + shape == obs.shape
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config): tf.reset_default_graph() mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") env.reset() brain_name = env.get_agent_groups()[0] brain_info = step_result_to_brain_info( env.get_step_result(brain_name), env.get_agent_group_spec(brain_name)) brain_params = group_spec_to_brain_parameters( brain_name, env.get_agent_group_spec(brain_name)) trainer_parameters = dummy_config model_path = brain_name trainer_parameters["model_path"] = model_path trainer_parameters["keep_checkpoints"] = 3 policy = PPOPolicy(0, brain_params, trainer_parameters, False, False) run_out = policy.evaluate(brain_info) assert run_out["action"].shape == (3, 2) env.close()
def test_cc_bc_model(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0 ) env = UnityEnvironment(" ") model = BehavioralCloningModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.policy] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), } sess.run(run_list, feed_dict=feed_dict) env.close()
def test_float_properties(): sender = FloatPropertiesChannel() receiver = FloatPropertiesChannel() sender.set_property("prop1", 1.0) data = UnityEnvironment._generate_side_channel_data( {sender.channel_type: sender}) UnityEnvironment._parse_side_channel_message( {receiver.channel_type: receiver}, data) val = receiver.get_property("prop1") assert val == 1.0 val = receiver.get_property("prop2") assert val is None sender.set_property("prop2", 2.0) data = UnityEnvironment._generate_side_channel_data( {sender.channel_type: sender}) UnityEnvironment._parse_side_channel_message( {receiver.channel_type: receiver}, data) val = receiver.get_property("prop1") assert val == 1.0 val = receiver.get_property("prop2") assert val == 2.0 assert len(receiver.list_properties()) == 2 assert "prop1" in receiver.list_properties() assert "prop2" in receiver.list_properties() val = sender.get_property("prop1") assert val == 1.0 assert receiver.get_property_dict_copy() == {"prop1": 1.0, "prop2": 2.0} assert receiver.get_property_dict_copy() == sender.get_property_dict_copy()
def init_unity_env(env_path, show_visuals=True): # Find a worker_id < 64 that's not in use worker_id = 0 done = False while not done: if worker_id > 64: sys.exit() try: env = UnityEnvironment(env_path, worker_id=worker_id, no_graphics=not show_visuals) done = True except mlagents.envs.exception.UnityWorkerInUseException: worker_id += 1 # Get state and action space, as well as multiagent and multibrain info from environment env.reset(train_mode=not show_visuals) # brain_name = list(env.brains.keys())[0] brain_names = list(env.brains.keys()) if len(brain_names) > 1: multibrain = True n_agents = env._n_agents[brain_names[0]] + env._n_agents[ brain_names[1]] else: multibrain = False n_agents = env._n_agents[brain_names[0]] # WalkerVis is a version of the Walker environment with one brain 'WalkerVis' # having visual observations, whereas 'Walker' brain does not. # The visual observations are used for recording episodes state_space = env.brains[brain_names[0]].vector_observation_space_size action_space = env.brains[brain_names[0]].vector_action_space_size multiagent = True if n_agents > 1 else False return env, state_space, action_space, n_agents, multiagent, brain_names, multibrain
def create_unity_environment(worker_id: int) -> UnityEnvironment: env_seed = seed if not env_seed: env_seed = seed_pool[worker_id % len(seed_pool)] return UnityEnvironment( file_name=env_path, worker_id=worker_id, seed=env_seed, docker_training=docker_training, no_graphics=no_graphics, base_port=start_port, args=env_args, )
def run(train_mode, load_model, env_name): env = UnityEnvironment(file_name=env_name) default_brain = env.brain_names[0] agent = DDPGAgent(state_size, action_size, train_mode, load_model) rewards = deque(maxlen=print_interval) success_cnt = 0 step = 0 for episode in range(run_episode + test_episode): if episode == run_episode: train_mode = False env_info = env.reset(train_mode=train_mode)[default_brain] state = env_info.vector_observations[0] episode_rewards = 0 done = False while not done: step += 1 action = agent.get_action([state])[0] #print(action) env_info = env.step(action)[default_brain] next_state = env_info.vector_observations[0] reward = env_info.rewards[0] done = env_info.local_done[0] episode_rewards += reward if train_mode: agent.append_sample(state, action, reward, next_state, done) state = next_state if episode > start_train_episode and train_mode: agent.train_model() success_cnt = success_cnt + 1 if reward == 1 else success_cnt rewards.append(episode_rewards) agent.save_samples(episode) if episode % print_interval == 0 and episode != 0: print("step: {} / episode: {} / reward: {:.3f} / success_cnt: {}". format(step, episode, np.mean(rewards), success_cnt)) agent.Write_Summray(np.mean(rewards), success_cnt, episode) success_cnt = 0 if train_mode and episode % save_interval == 0 and episode != 0: print("model saved") agent.save_model() env.close()
def __init__(self, environment_filename=None, docker_training=False, worker_id=0, retro=True, timeout_wait=30, realtime_mode=False, config=None, greyscale=False): """ Arguments: environment_filename: The file path to the Unity executable. Does not require the extension. docker_training: Whether this is running within a docker environment and should use a virtual frame buffer (xvfb). worker_id: The index of the worker in the case where multiple environments are running. Each environment reserves port (5005 + worker_id) for communication with the Unity executable. retro: Resize visual observation to 84x84 (int8) and flattens action space. timeout_wait: Time for python interface to wait for environment to connect. realtime_mode: Whether to render the environment window image and run environment at realtime. """ self._env = UnityEnvironment(environment_filename, worker_id, docker_training=docker_training, timeout_wait=timeout_wait) split_name = self._env.academy_name.split('-v') if len(split_name) == 2 and split_name[0] == "ObstacleTower": self.name, self.version = split_name else: raise UnityGymException( "Attempting to launch non-Obstacle Tower environment") if self.version not in self.ALLOWED_VERSIONS: raise UnityGymException( "Invalid Obstacle Tower version. Your build is v" + self.version + " but only the following versions are compatible with this gym: " + str(self.ALLOWED_VERSIONS)) self.visual_obs = None self._current_state = None self._n_agents = None self._flattener = None self._greyscale = greyscale # Environment reset parameters self._seed = None self._floor = None self.realtime_mode = realtime_mode self.game_over = False # Hidden flag used by Atari environments to determine if the game is over self.retro = retro if config != None: self.config = config else: self.config = None flatten_branched = self.retro uint8_visual = self.retro # Check brain configuration if len(self._env.brains) != 1: raise UnityGymException( "There can only be one brain in a UnityEnvironment " "if it is wrapped in a gym.") self.brain_name = self._env.external_brain_names[0] brain = self._env.brains[self.brain_name] if brain.number_visual_observations == 0: raise UnityGymException( "Environment provides no visual observations.") self.uint8_visual = uint8_visual if brain.number_visual_observations > 1: logger.warning( "The environment contains more than one visual observation. " "Please note that only the first will be provided in the observation." ) # Check for number of agents in scene. initial_info = self._env.reset( train_mode=not self.realtime_mode)[self.brain_name] self._check_agents(len(initial_info.agents)) # Set observation and action spaces if len(brain.vector_action_space_size) == 1: self._action_space = spaces.Discrete( brain.vector_action_space_size[0]) else: if flatten_branched: self._flattener = ActionFlattener( brain.vector_action_space_size) self._action_space = self._flattener.action_space else: self._action_space = spaces.MultiDiscrete( brain.vector_action_space_size) high = np.array([np.inf] * brain.vector_observation_space_size) self.action_meanings = brain.vector_action_descriptions if self._greyscale: depth = 1 else: depth = 3 image_space_max = 1.0 image_space_dtype = np.float32 camera_height = brain.camera_resolutions[0]["height"] camera_width = brain.camera_resolutions[0]["width"] if self.retro: image_space_max = 255 image_space_dtype = np.uint8 camera_height = 84 camera_width = 84 image_space = spaces.Box(0, image_space_max, dtype=image_space_dtype, shape=(camera_height, camera_width, depth)) if self.retro: self._observation_space = image_space else: max_float = np.finfo(np.float32).max keys_space = spaces.Discrete(5) time_remaining_space = spaces.Box(low=0.0, high=max_float, shape=(1, ), dtype=np.float32) floor_space = spaces.Discrete(9999) self._observation_space = spaces.Tuple( (image_space, keys_space, time_remaining_space, floor_space))
class UnityEnv(gym.Env): """ Provides Gym wrapper for Unity Learning Environments. Multi-agent environments use lists for object types, as done here: https://github.com/openai/multiagent-particle-envs """ def __init__( self, environment_filename: str, worker_id: int = 0, use_visual: bool = False, uint8_visual: bool = False, multiagent: bool = False, flatten_branched: bool = False, no_graphics: bool = False, allow_multiple_visual_obs: bool = False, ): """ Environment initialization :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym. :param worker_id: Worker number for environment. :param use_visual: Whether to use visual observation or vector observation. :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0). :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done). :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than MultiDiscrete. :param no_graphics: Whether to run the Unity simulator in no-graphics mode :param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one. """ self._env = UnityEnvironment(environment_filename, worker_id, no_graphics=no_graphics) # Take a single step so that the brain information will be sent over if not self._env.brains: self._env.step() self.name = self._env.academy_name self.visual_obs = None self._current_state = None self._n_agents = None self._multiagent = multiagent self._flattener = None self.game_over = ( False ) # Hidden flag used by Atari environments to determine if the game is over self._allow_multiple_visual_obs = allow_multiple_visual_obs # Check brain configuration if len(self._env.brains) != 1: raise UnityGymException( "There can only be one brain in a UnityEnvironment " "if it is wrapped in a gym.") if len(self._env.external_brain_names) <= 0: raise UnityGymException( "There are not any external brain in the UnityEnvironment") self.brain_name = self._env.external_brain_names[0] brain = self._env.brains[self.brain_name] if use_visual and brain.number_visual_observations == 0: raise UnityGymException( "`use_visual` was set to True, however there are no" " visual observations as part of this environment.") self.use_visual = brain.number_visual_observations >= 1 and use_visual if not use_visual and uint8_visual: logger.warning( "`uint8_visual was set to true, but visual observations are not in use. " "This setting will not have any effect.") else: self.uint8_visual = uint8_visual if brain.number_visual_observations > 1 and not self._allow_multiple_visual_obs: logger.warning( "The environment contains more than one visual observation. " "You must define allow_multiple_visual_obs=True to received them all. " "Otherwise, please note that only the first will be provided in the observation." ) if brain.num_stacked_vector_observations != 1: raise UnityGymException( "There can only be one stacked vector observation in a UnityEnvironment " "if it is wrapped in a gym.") # Check for number of agents in scene. initial_info = self._env.reset()[self.brain_name] self._check_agents(len(initial_info.agents)) # Set observation and action spaces if brain.vector_action_space_type == "discrete": if len(brain.vector_action_space_size) == 1: self._action_space = spaces.Discrete( brain.vector_action_space_size[0]) else: if flatten_branched: self._flattener = ActionFlattener( brain.vector_action_space_size) self._action_space = self._flattener.action_space else: self._action_space = spaces.MultiDiscrete( brain.vector_action_space_size) else: if flatten_branched: logger.warning( "The environment has a non-discrete action space. It will " "not be flattened.") high = np.array([1] * brain.vector_action_space_size[0]) self._action_space = spaces.Box(-high, high, dtype=np.float32) high = np.array([np.inf] * brain.vector_observation_space_size) self.action_meanings = brain.vector_action_descriptions if self.use_visual: shape = ( brain.camera_resolutions[0].height, brain.camera_resolutions[0].width, brain.camera_resolutions[0].num_channels, ) if uint8_visual: self._observation_space = spaces.Box(0, 255, dtype=np.uint8, shape=shape) else: self._observation_space = spaces.Box(0, 1, dtype=np.float32, shape=shape) else: self._observation_space = spaces.Box(-high, high, dtype=np.float32) def reset(self): """Resets the state of the environment and returns an initial observation. In the case of multi-agent environments, this is a list. Returns: observation (object/list): the initial observation of the space. """ info = self._env.reset()[self.brain_name] n_agents = len(info.agents) self._check_agents(n_agents) self.game_over = False if not self._multiagent: obs, reward, done, info = self._single_step(info) else: obs, reward, done, info = self._multi_step(info) return obs def step(self, action): """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). In the case of multi-agent environments, these are lists. Args: action (object/list): an action provided by the environment Returns: observation (object/list): agent's observation of the current environment reward (float/list) : amount of reward returned after previous action done (boolean/list): whether the episode has ended. info (dict): contains auxiliary diagnostic information, including BrainInfo. """ # Use random actions for all other agents in environment. if self._multiagent: if not isinstance(action, list): raise UnityGymException( "The environment was expecting `action` to be a list.") if len(action) != self._n_agents: raise UnityGymException( "The environment was expecting a list of {} actions.". format(self._n_agents)) else: if self._flattener is not None: # Action space is discrete and flattened - we expect a list of scalars action = [ self._flattener.lookup_action(_act) for _act in action ] action = np.array(action) else: if self._flattener is not None: # Translate action into list action = self._flattener.lookup_action(action) info = self._env.step(action)[self.brain_name] n_agents = len(info.agents) self._check_agents(n_agents) self._current_state = info if not self._multiagent: obs, reward, done, info = self._single_step(info) self.game_over = done else: obs, reward, done, info = self._multi_step(info) self.game_over = all(done) return obs, reward, done, info def _single_step(self, info): if self.use_visual: visual_obs = info.visual_observations if self._allow_multiple_visual_obs: visual_obs_list = [] for obs in visual_obs: visual_obs_list.append(self._preprocess_single(obs[0])) self.visual_obs = visual_obs_list else: self.visual_obs = self._preprocess_single(visual_obs[0][0]) default_observation = self.visual_obs else: default_observation = info.vector_observations[0, :] return ( default_observation, info.rewards[0], info.local_done[0], { "text_observation": info.text_observations[0], "brain_info": info }, ) def _preprocess_single(self, single_visual_obs): if self.uint8_visual: return (255.0 * single_visual_obs).astype(np.uint8) else: return single_visual_obs def _multi_step(self, info): if self.use_visual: self.visual_obs = self._preprocess_multi(info.visual_observations) default_observation = self.visual_obs else: default_observation = info.vector_observations return ( list(default_observation), info.rewards, info.local_done, { "text_observation": info.text_observations, "brain_info": info }, ) def _preprocess_multi(self, multiple_visual_obs): if self.uint8_visual: return [(255.0 * _visual_obs).astype(np.uint8) for _visual_obs in multiple_visual_obs] else: return multiple_visual_obs def render(self, mode="rgb_array"): return self.visual_obs def close(self): """Override _close in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ self._env.close() def get_action_meanings(self): return self.action_meanings def seed(self, seed=None): """Sets the seed for this env's random number generator(s). Currently not implemented. """ logger.warn("Could not seed environment %s", self.name) return def _check_agents(self, n_agents): if not self._multiagent and n_agents > 1: raise UnityGymException( "The environment was launched as a single-agent environment, however" "there is more than one agent in the scene.") elif self._multiagent and n_agents <= 1: raise UnityGymException( "The environment was launched as a mutli-agent environment, however" "there is only one agent in the scene.") if self._n_agents is None: self._n_agents = n_agents logger.info("{} agents within environment.".format(n_agents)) elif self._n_agents != n_agents: raise UnityGymException( "The number of agents in the environment has changed since " "initialization. This is not supported.") @property def metadata(self): return {"render.modes": ["rgb_array"]} @property def reward_range(self): return -float("inf"), float("inf") @property def spec(self): return None @property def action_space(self): return self._action_space @property def observation_space(self): return self._observation_space @property def number_agents(self): return self._n_agents
class MLAgentsEnv(VectorEnv): """ An Environment sitting behind a tcp connection and communicating through this adapter. Note: Communication between Unity and Python takes place over an open socket without authentication. Ensure that the network where training takes place is secure. """ def __init__(self, file_name=None, worker_id=0, base_port=5005, seed=0, docker_training=False, no_graphics=False, timeout_wait=30, train_mode=True, **kwargs): """ Args: file_name (Optional[str]): Name of Unity environment binary. base_port (int): Port number to connect to Unity environment. `worker_id` increments on top of this. worker_id (int): Number to add to `base_port`. Used for asynchronous agent scenarios. docker_training (bool): Informs this class, whether the process is being run within a container. Default: False. no_graphics (bool): Whether to run the Unity simulator in no-graphics mode. Default: False. timeout_wait (int): Time (in seconds) to wait for connection from environment. train_mode (bool): Whether to run in training mode, speeding up the simulation. Default: True. """ # First create the UnityMLAgentsEnvironment to get state and action spaces, then create RLgraph Environment # instance. self.mlagents_env = UnityEnvironment( file_name, worker_id, base_port, seed, docker_training, no_graphics ) all_brain_info = self.mlagents_env.reset() # Get all possible information from AllBrainInfo. # TODO: Which scene do we pick? self.scene_key = next(iter(all_brain_info)) first_brain_info = all_brain_info[self.scene_key] num_environments = len(first_brain_info.agents) state_space = {} if len(first_brain_info.vector_observations[0]) > 0: state_space["vector"] = get_space_from_op(first_brain_info.vector_observations[0]) # TODO: This is a hack. if state_space["vector"].dtype == np.float64: state_space["vector"].dtype = np.float32 if len(first_brain_info.visual_observations) > 0: state_space["visual"] = get_space_from_op(first_brain_info.visual_observations[0]) if first_brain_info.text_observations[0]: state_space["text"] = get_space_from_op(first_brain_info.text_observations[0]) if len(state_space) == 1: self.state_key = next(iter(state_space)) state_space = state_space[self.state_key] else: self.state_key = None state_space = Dict(state_space) brain_params = next(iter(self.mlagents_env.brains.values())) if brain_params.vector_action_space_type == "discrete": highs = brain_params.vector_action_space_size # MultiDiscrete (Tuple(IntBox)). if any(h != highs[0] for h in highs): action_space = Tuple([IntBox(h) for h in highs]) # Normal IntBox: else: action_space = IntBox( low=np.zeros_like(highs, dtype=np.int32), high=np.array(highs, dtype=np.int32), shape=(len(highs),) ) else: action_space = get_space_from_op(first_brain_info.action_masks[0]) if action_space.dtype == np.float64: action_space.dtype = np.float32 super(MLAgentsEnv, self).__init__( num_environments=num_environments, state_space=state_space, action_space=action_space, **kwargs ) # Caches the last observation we made (after stepping or resetting). self.last_state = None def get_env(self): return self def reset(self, index=0): # Reset entire MLAgentsEnv iff global_done is True. if self.mlagents_env.global_done is True or self.last_state is None: self.reset_all() return self.last_state[index] def reset_all(self): all_brain_info = self.mlagents_env.reset() self.last_state = self._get_state_from_brain_info(all_brain_info) return self.last_state def step(self, actions, text_actions=None, **kwargs): # MLAgents Envs don't like tuple-actions. if isinstance(actions[0], tuple): actions = [list(a) for a in actions] all_brain_info = self.mlagents_env.step( # TODO: Only support vector actions for now. vector_action=actions, memory=None, text_action=text_actions, value=None ) self.last_state = self._get_state_from_brain_info(all_brain_info) r = self._get_reward_from_brain_info(all_brain_info) t = self._get_terminal_from_brain_info(all_brain_info) return self.last_state, r, t, None def render(self): # TODO: If no_graphics is True, maybe user can render through this method manually? pass def terminate(self): self.mlagents_env.close() def terminate_all(self): return self.terminate() def __str__(self): return "MLAgentsEnv(port={}{})".format( self.mlagents_env.port, " [loaded]" if self.mlagents_env._loaded else "" ) def _get_state_from_brain_info(self, all_brain_info): brain_info = all_brain_info[self.scene_key] if self.state_key is None: return {"vector": list(brain_info.vector_observations), "visual": list(brain_info.visual_observations), "text": list(brain_info.text_observations)} elif self.state_key == "vector": return list(brain_info.vector_observations) elif self.state_key == "visual": return list(brain_info.visual_observations) elif self.state_key == "text": return list(brain_info.text_observations) def _get_reward_from_brain_info(self, all_brain_info): brain_info = all_brain_info[self.scene_key] return [np.array(r_, dtype=np.float32) for r_ in brain_info.rewards] def _get_terminal_from_brain_info(self, all_brain_info): brain_info = all_brain_info[self.scene_key] return brain_info.local_done
def __init__( self, environment_filename: str, worker_id: int = 0, use_visual: bool = False, uint8_visual: bool = False, multiagent: bool = False, flatten_branched: bool = False, no_graphics: bool = False, allow_multiple_visual_obs: bool = False, ): """ Environment initialization :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym. :param worker_id: Worker number for environment. :param use_visual: Whether to use visual observation or vector observation. :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0). :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done). :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than MultiDiscrete. :param no_graphics: Whether to run the Unity simulator in no-graphics mode :param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one. """ self._env = UnityEnvironment(environment_filename, worker_id, no_graphics=no_graphics) # Take a single step so that the brain information will be sent over if not self._env.brains: self._env.step() self.name = self._env.academy_name self.visual_obs = None self._current_state = None self._n_agents = None self._multiagent = multiagent self._flattener = None self.game_over = ( False ) # Hidden flag used by Atari environments to determine if the game is over self._allow_multiple_visual_obs = allow_multiple_visual_obs # Check brain configuration if len(self._env.brains) != 1: raise UnityGymException( "There can only be one brain in a UnityEnvironment " "if it is wrapped in a gym.") if len(self._env.external_brain_names) <= 0: raise UnityGymException( "There are not any external brain in the UnityEnvironment") self.brain_name = self._env.external_brain_names[0] brain = self._env.brains[self.brain_name] if use_visual and brain.number_visual_observations == 0: raise UnityGymException( "`use_visual` was set to True, however there are no" " visual observations as part of this environment.") self.use_visual = brain.number_visual_observations >= 1 and use_visual if not use_visual and uint8_visual: logger.warning( "`uint8_visual was set to true, but visual observations are not in use. " "This setting will not have any effect.") else: self.uint8_visual = uint8_visual if brain.number_visual_observations > 1 and not self._allow_multiple_visual_obs: logger.warning( "The environment contains more than one visual observation. " "You must define allow_multiple_visual_obs=True to received them all. " "Otherwise, please note that only the first will be provided in the observation." ) if brain.num_stacked_vector_observations != 1: raise UnityGymException( "There can only be one stacked vector observation in a UnityEnvironment " "if it is wrapped in a gym.") # Check for number of agents in scene. initial_info = self._env.reset()[self.brain_name] self._check_agents(len(initial_info.agents)) # Set observation and action spaces if brain.vector_action_space_type == "discrete": if len(brain.vector_action_space_size) == 1: self._action_space = spaces.Discrete( brain.vector_action_space_size[0]) else: if flatten_branched: self._flattener = ActionFlattener( brain.vector_action_space_size) self._action_space = self._flattener.action_space else: self._action_space = spaces.MultiDiscrete( brain.vector_action_space_size) else: if flatten_branched: logger.warning( "The environment has a non-discrete action space. It will " "not be flattened.") high = np.array([1] * brain.vector_action_space_size[0]) self._action_space = spaces.Box(-high, high, dtype=np.float32) high = np.array([np.inf] * brain.vector_observation_space_size) self.action_meanings = brain.vector_action_descriptions if self.use_visual: shape = ( brain.camera_resolutions[0].height, brain.camera_resolutions[0].width, brain.camera_resolutions[0].num_channels, ) if uint8_visual: self._observation_space = spaces.Box(0, 255, dtype=np.uint8, shape=shape) else: self._observation_space = spaces.Box(0, 1, dtype=np.float32, shape=shape) else: self._observation_space = spaces.Box(-high, high, dtype=np.float32)