def advance(self, env: EnvManager) -> int: with hierarchical_timer("env_step"): time_start_step = time() new_step_infos = env.step() delta_time_step = time() - time_start_step for step_info in new_step_infos: for brain_name, trainer in self.trainers.items(): if brain_name in self.trainer_metrics: self.trainer_metrics[brain_name].add_delta_step( delta_time_step) trainer.add_experiences( step_info.previous_all_brain_info, step_info.current_all_brain_info, step_info.brain_name_to_action_info[brain_name].outputs, ) trainer.process_experiences(step_info.previous_all_brain_info, step_info.current_all_brain_info) for brain_name, trainer in self.trainers.items(): if brain_name in self.trainer_metrics: self.trainer_metrics[brain_name].add_delta_step( delta_time_step) if self.train_model and trainer.get_step <= trainer.get_max_steps: trainer.increment_step(len(new_step_infos)) if trainer.is_ready_update(): # Perform gradient descent with experience buffer with hierarchical_timer("update_policy"): trainer.update_policy() env.set_policy(brain_name, trainer.policy) return len(new_step_infos)
def _postprocess_steps( self, env_steps: List[EnvironmentResponse] ) -> List[EnvironmentStep]: step_infos = [] timer_nodes = [] for step in env_steps: payload: StepResponse = step.payload env_worker = self.env_workers[step.worker_id] new_step = EnvironmentStep( env_worker.previous_step.current_all_brain_info, payload.all_brain_info, env_worker.previous_all_action_info, ) step_infos.append(new_step) env_worker.previous_step = new_step if payload.timer_root: timer_nodes.append(payload.timer_root) if timer_nodes: with hierarchical_timer("workers") as main_timer_node: for worker_timer_node in timer_nodes: main_timer_node.merge( worker_timer_node, root_name="worker_root", is_parallel=True ) return step_infos
def advance(self, env: EnvManager) -> int: with hierarchical_timer("env_step"): time_start_step = time() new_step_infos = env.step() delta_time_step = time() - time_start_step for step_info in new_step_infos: for brain_name, trainer in self.trainers.items(): if brain_name in self.trainer_metrics: self.trainer_metrics[brain_name].add_delta_step( delta_time_step) if step_info.has_actions_for_brain(brain_name): trainer.add_experiences( step_info.previous_all_brain_info[brain_name], step_info.current_all_brain_info[brain_name], step_info.brain_name_to_action_info[brain_name]. outputs, ) trainer.process_experiences( step_info.previous_all_brain_info[brain_name], step_info.current_all_brain_info[brain_name], ) for brain_name, trainer in self.trainers.items(): if brain_name in self.trainer_metrics: self.trainer_metrics[brain_name].add_delta_step( delta_time_step) if self.train_model and trainer.get_step <= trainer.get_max_steps: trainer.increment_step(len(new_step_infos)) if trainer.is_ready_update(): # Perform gradient descent with experience buffer with hierarchical_timer("update_policy"): trainer.update_policy() env.set_policy(brain_name, trainer.policy) else: # Avoid memory leak during inference # Eventually this whole block will take place in advance() # But currently this only calls clear_update_buffer() in RLTrainer # and nothing in the base class trainer.advance() return len(new_step_infos)
def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray: """ Converts byte array observation image into numpy array, re-sizes it, and optionally converts it to grey scale :param gray_scale: Whether to convert the image to grayscale. :param image_bytes: input byte array corresponding to image :return: processed numpy array of observation from environment """ with hierarchical_timer("image_decompress"): image_bytearray = bytearray(image_bytes) image = Image.open(io.BytesIO(image_bytearray)) # Normally Image loads lazily, this forces it to do loading in the timer scope. image.load() s = np.array(image) / 255.0 if gray_scale: s = np.mean(s, axis=2) s = np.reshape(s, [s.shape[0], s.shape[1], 1]) return s
def step(self) -> None: if self._is_first_message: return self.reset() if not self._loaded: raise UnityEnvironmentException("No Unity environment is loaded.") # fill the blanks for missing actions for group_name in self._env_specs: if group_name not in self._env_actions: n_agents = 0 if group_name in self._env_state: n_agents = self._env_state[group_name].n_agents() self._env_actions[group_name] = self._env_specs[ group_name ].create_empty_action(n_agents) step_input = self._generate_step_input(self._env_actions) with hierarchical_timer("communicator.exchange"): outputs = self.communicator.exchange(step_input) if outputs is None: raise UnityCommunicationException("Communicator has stopped.") self._update_group_specs(outputs) rl_output = outputs.rl_output self._update_state(rl_output) self._env_actions.clear()
def step( self, vector_action: Dict[str, np.ndarray] = None, memory: Optional[Dict[str, np.ndarray]] = None, text_action: Optional[Dict[str, List[str]]] = None, value: Optional[Dict[str, np.ndarray]] = None, custom_action: Dict[str, Any] = None, ) -> AllBrainInfo: """ Provides the environment with an action, moves the environment dynamics forward accordingly, and returns observation, state, and reward information to the agent. :param value: Value estimates provided by agents. :param vector_action: Agent's vector action. Can be a scalar or vector of int/floats. :param memory: Vector corresponding to memory used for recurrent policies. :param text_action: Text action to send to environment for. :param custom_action: Optional instance of a CustomAction protobuf message. :return: AllBrainInfo : A Data structure corresponding to the new state of the environment. """ if self._is_first_message: return self.reset() vector_action = {} if vector_action is None else vector_action memory = {} if memory is None else memory text_action = {} if text_action is None else text_action value = {} if value is None else value custom_action = {} if custom_action is None else custom_action # Check that environment is loaded, and episode is currently running. if not self._loaded: raise UnityEnvironmentException("No Unity environment is loaded.") else: if isinstance(vector_action, self.SINGLE_BRAIN_ACTION_TYPES): if self._num_external_brains == 1: vector_action = { self._external_brain_names[0]: vector_action } elif self._num_external_brains > 1: raise UnityActionException( "You have {0} brains, you need to feed a dictionary of brain names a keys, " "and vector_actions as values".format( self._num_brains)) else: raise UnityActionException( "There are no external brains in the environment, " "step cannot take a vector_action input") if isinstance(memory, self.SINGLE_BRAIN_ACTION_TYPES): if self._num_external_brains == 1: memory = {self._external_brain_names[0]: memory} elif self._num_external_brains > 1: raise UnityActionException( "You have {0} brains, you need to feed a dictionary of brain names as keys " "and memories as values".format(self._num_brains)) else: raise UnityActionException( "There are no external brains in the environment, " "step cannot take a memory input") if isinstance(text_action, self.SINGLE_BRAIN_TEXT_TYPES): if self._num_external_brains == 1: text_action = {self._external_brain_names[0]: text_action} elif self._num_external_brains > 1: raise UnityActionException( "You have {0} brains, you need to feed a dictionary of brain names as keys " "and text_actions as values".format(self._num_brains)) else: raise UnityActionException( "There are no external brains in the environment, " "step cannot take a value input") if isinstance(value, self.SINGLE_BRAIN_ACTION_TYPES): if self._num_external_brains == 1: value = {self._external_brain_names[0]: value} elif self._num_external_brains > 1: raise UnityActionException( "You have {0} brains, you need to feed a dictionary of brain names as keys " "and state/action value estimates as values".format( self._num_brains)) else: raise UnityActionException( "There are no external brains in the environment, " "step cannot take a value input") if isinstance(custom_action, CustomAction): if self._num_external_brains == 1: custom_action = { self._external_brain_names[0]: custom_action } elif self._num_external_brains > 1: raise UnityActionException( "You have {0} brains, you need to feed a dictionary of brain names as keys " "and CustomAction instances as values".format( self._num_brains)) else: raise UnityActionException( "There are no external brains in the environment, " "step cannot take a custom_action input") for brain_name in (list(vector_action.keys()) + list(memory.keys()) + list(text_action.keys())): if brain_name not in self._external_brain_names: raise UnityActionException( "The name {0} does not correspond to an external brain " "in the environment".format(brain_name)) for brain_name in self._external_brain_names: n_agent = self._n_agents[brain_name] if brain_name not in vector_action: if self._brains[ brain_name].vector_action_space_type == "discrete": vector_action[brain_name] = ([0.0] * n_agent * len( self._brains[brain_name].vector_action_space_size)) else: vector_action[brain_name] = ( [0.0] * n_agent * self._brains[brain_name]. vector_action_space_size[0]) else: vector_action[brain_name] = self._flatten( vector_action[brain_name]) if brain_name not in memory: memory[brain_name] = [] else: if memory[brain_name] is None: memory[brain_name] = [] else: memory[brain_name] = self._flatten(memory[brain_name]) if brain_name not in text_action: text_action[brain_name] = [""] * n_agent else: if text_action[brain_name] is None: text_action[brain_name] = [""] * n_agent if brain_name not in custom_action: custom_action[brain_name] = [None] * n_agent else: if custom_action[brain_name] is None: custom_action[brain_name] = [None] * n_agent if isinstance(custom_action[brain_name], CustomAction): custom_action[brain_name] = [ custom_action[brain_name] ] * n_agent number_text_actions = len(text_action[brain_name]) if not ((number_text_actions == n_agent) or number_text_actions == 0): raise UnityActionException( "There was a mismatch between the provided text_action and " "the environment's expectation: " "The brain {0} expected {1} text_action but was given {2}" .format(brain_name, n_agent, number_text_actions)) discrete_check = (self._brains[brain_name]. vector_action_space_type == "discrete") expected_discrete_size = n_agent * len( self._brains[brain_name].vector_action_space_size) continuous_check = (self._brains[brain_name]. vector_action_space_type == "continuous") expected_continuous_size = ( self._brains[brain_name].vector_action_space_size[0] * n_agent) if not ((discrete_check and len(vector_action[brain_name]) == expected_discrete_size) or (continuous_check and len(vector_action[brain_name]) == expected_continuous_size)): raise UnityActionException( "There was a mismatch between the provided action and " "the environment's expectation: " "The brain {0} expected {1} {2} action(s), but was provided: {3}" .format( brain_name, str(expected_discrete_size) if discrete_check else str(expected_continuous_size), self._brains[brain_name].vector_action_space_type, str(vector_action[brain_name]), )) step_input = self._generate_step_input(vector_action, memory, text_action, value, custom_action) with hierarchical_timer("communicator.exchange"): outputs = self.communicator.exchange(step_input) if outputs is None: raise UnityCommunicationException("Communicator has stopped.") rl_output = outputs.rl_output state = self._get_state(rl_output) for _b in self._external_brain_names: self._n_agents[_b] = len(state[_b].agents) return state
def load_demonstration( file_path: str ) -> Tuple[BrainParameters, List[AgentInfoActionPairProto], int]: """ Loads and parses a demonstration file. :param file_path: Location of demonstration file (.demo). :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data. """ # First 32 bytes of file dedicated to meta-data. INITIAL_POS = 33 file_paths = [] if os.path.isdir(file_path): all_files = os.listdir(file_path) for _file in all_files: if _file.endswith(".demo"): file_paths.append(os.path.join(file_path, _file)) if not all_files: raise ValueError( "There are no '.demo' files in the provided directory.") elif os.path.isfile(file_path): file_paths.append(file_path) file_extension = pathlib.Path(file_path).suffix if file_extension != ".demo": raise ValueError( "The file is not a '.demo' file. Please provide a file with the " "correct extension.") else: raise FileNotFoundError( "The demonstration file or directory {} does not exist.".format( file_path)) brain_params = None brain_param_proto = None info_action_pairs = [] total_expected = 0 for _file_path in file_paths: with open(_file_path, "rb") as fp: with hierarchical_timer("read_file"): data = fp.read() next_pos, pos, obs_decoded = 0, 0, 0 while pos < len(data): next_pos, pos = _DecodeVarint32(data, pos) if obs_decoded == 0: meta_data_proto = DemonstrationMetaProto() meta_data_proto.ParseFromString(data[pos:pos + next_pos]) total_expected += meta_data_proto.number_steps pos = INITIAL_POS if obs_decoded == 1: brain_param_proto = BrainParametersProto() brain_param_proto.ParseFromString(data[pos:pos + next_pos]) pos += next_pos if obs_decoded > 1: agent_info_action = AgentInfoActionPairProto() agent_info_action.ParseFromString(data[pos:pos + next_pos]) if brain_params is None: brain_params = BrainParameters.from_proto( brain_param_proto, agent_info_action.agent_info) info_action_pairs.append(agent_info_action) if len(info_action_pairs) == total_expected: break pos += next_pos obs_decoded += 1 return brain_params, info_action_pairs, total_expected
def test_timers() -> None: with mock.patch("mlagents.envs.timers._global_timer_stack", new_callable=timers.TimerStack) as test_timer: # First, run some simple code with timers.hierarchical_timer("top_level"): for i in range(3): with timers.hierarchical_timer("multiple"): decorated_func(i, i) raised = False try: with timers.hierarchical_timer("raises"): raise RuntimeError("timeout!") except RuntimeError: raised = True with timers.hierarchical_timer("post_raise"): assert raised pass # We expect the hierarchy to look like # (root) # top_level # multiple # decorated_func # raises # post_raise root = test_timer.root assert root.children.keys() == {"top_level"} top_level = root.children["top_level"] assert top_level.children.keys() == { "multiple", "raises", "post_raise" } # make sure the scope was closed properly when the exception was raised raises = top_level.children["raises"] assert raises.count == 1 multiple = top_level.children["multiple"] assert multiple.count == 3 timer_tree = test_timer.get_timing_tree() expected_tree = { "name": "root", "total": mock.ANY, "count": 1, "self": mock.ANY, "children": [{ "name": "top_level", "total": mock.ANY, "count": 1, "self": mock.ANY, "children": [ { "name": "multiple", "total": mock.ANY, "count": 3, "self": mock.ANY, "children": [{ "name": "decorated_func", "total": mock.ANY, "count": 3, "self": mock.ANY, }], }, { "name": "raises", "total": mock.ANY, "count": 1, "self": mock.ANY, }, { "name": "post_raise", "total": mock.ANY, "count": 1, "self": mock.ANY, }, ], }], "gauges": [{ "name": "my_gauge", "value": 4.0, "max": 4.0, "min": 0.0, "count": 3 }], } assert timer_tree == expected_tree