def test_process_visual_observation(): in_array_1 = np.random.rand(128, 128, 3) proto_obs_1 = generate_compressed_proto_obs(in_array_1) in_array_2 = np.random.rand(128, 128, 3) proto_obs_2 = generate_compressed_proto_obs(in_array_2) ap1 = AgentInfoProto() ap1.observations.extend([proto_obs_1]) ap2 = AgentInfoProto() ap2.observations.extend([proto_obs_2]) ap_list = [ap1, ap2] arr = _process_visual_observation(0, (128, 128, 3), ap_list) assert list(arr.shape) == [2, 128, 128, 3] assert (arr[0, :, :, :] - in_array_1 < 0.01).all() assert (arr[1, :, :, :] - in_array_2 < 0.01).all()
def _get_agent_infos(self): dict_agent_info = {} list_agent_info = [] vector_obs = [1, 2, 3] observations = [ ObservationProto( compressed_data=None, shape=[30, 40, 3], compression_type=COMPRESSION_TYPE_PNG, ) for _ in range(self.visual_inputs) ] vector_obs_proto = ObservationProto( float_data=ObservationProto.FloatData(data=vector_obs), shape=[len(vector_obs)], compression_type=COMPRESSION_TYPE_NONE, ) observations.append(vector_obs_proto) for i in range(self.num_agents): list_agent_info.append( AgentInfoProto( reward=1, done=(i == 2), max_step_reached=False, id=i, observations=observations, ) ) dict_agent_info["RealFakeBrain"] = UnityRLOutputProto.ListAgentInfoProto( value=list_agent_info ) return dict_agent_info
def _make_agent_info_proto(vector_obs: List[float]) -> AgentInfoProto: obs = ObservationProto( float_data=ObservationProto.FloatData(data=vector_obs), shape=[len(vector_obs)], compression_type=COMPRESSION_TYPE_NONE, ) agent_info_proto = AgentInfoProto(observations=[obs]) return agent_info_proto
def test_from_agent_proto_nan(mock_warning, mock_nan_to_num): agent_info_proto = AgentInfoProto() agent_info_proto.stacked_vector_observation.extend( [1.0, 2.0, float("nan")]) brain_info = BrainInfo.from_agent_proto(1, [agent_info_proto], test_brain) # nan gets set to 0.0 expected = [1.0, 2.0, 0.0] assert (brain_info.vector_observations == expected).all() mock_nan_to_num.assert_called() mock_warning.assert_called()
def test_from_agent_proto_fast_path(mock_warning, mock_nan_to_num): """ Check that all finite values skips the nan_to_num call """ agent_info_proto = AgentInfoProto() agent_info_proto.stacked_vector_observation.extend([1.0, 2.0, 3.0]) brain_info = BrainInfo.from_agent_proto(1, [agent_info_proto], test_brain) expected = [1.0, 2.0, 3.0] assert (brain_info.vector_observations == expected).all() mock_nan_to_num.assert_not_called() mock_warning.assert_not_called()
def test_from_agent_proto_inf(mock_warning, mock_nan_to_num): agent_info_proto = AgentInfoProto() agent_info_proto.stacked_vector_observation.extend( [1.0, float("inf"), 0.0]) brain_info = BrainInfo.from_agent_proto(1, [agent_info_proto], test_brain) # inf should get set to float_max expected = [1.0, sys.float_info.max, 0.0] assert (brain_info.vector_observations == expected).all() mock_nan_to_num.assert_called() # We don't warn on inf, just NaN mock_warning.assert_not_called()
def reset( self, config: Dict[str, float] = None, train_mode: bool = True, custom_reset_parameters: Any = None, ) -> AllBrainInfo: # type: ignore self._reset_agent() agent_info = AgentInfoProto( stacked_vector_observation=[self.goal] * OBS_SIZE, done=False, max_step_reached=False, ) return { BRAIN_NAME: BrainInfo.from_agent_proto(0, [agent_info], self._brains[BRAIN_NAME]) }
def step( self, vector_action: Dict[str, Any] = None, memory: Dict[str, Any] = None, value: Dict[str, Any] = None, ) -> AllBrainInfo: assert vector_action is not None if self.discrete: act = vector_action[BRAIN_NAME][0][0] delta = 1 if act else -1 else: delta = vector_action[BRAIN_NAME][0][0] delta = clamp(delta, -STEP_SIZE, STEP_SIZE) self.position += delta self.position = clamp(self.position, -1, 1) self.step_count += 1 done = self.position >= 1.0 or self.position <= -1.0 if done: reward = SUCCESS_REWARD * self.position * self.goal else: reward = -TIME_PENALTY vector_obs = [self.goal] * OBS_SIZE vector_obs_proto = ObservationProto( float_data=ObservationProto.FloatData(data=vector_obs), shape=[len(vector_obs)], compression_type=COMPRESSION_TYPE_NONE, ) agent_info = AgentInfoProto(reward=reward, done=bool(done), observations=[vector_obs_proto]) if done: self._reset_agent() return { BRAIN_NAME: BrainInfo.from_agent_proto(0, [agent_info], self._brains[BRAIN_NAME]) }
def _get_agent_infos(self): dict_agent_info = {} if self.is_discrete: vector_action = [1] else: vector_action = [1, 2] list_agent_info = [] if self.num_stacks == 1: observation = [1, 2, 3] else: observation = [1, 2, 3, 1, 2, 3] compressed_obs = [ CompressedObservationProto( data=None, shape=[30, 40, 3], compression_type=CompressionTypeProto.PNG) for _ in range(self.visual_inputs) ] for i in range(self.num_agents): list_agent_info.append( AgentInfoProto( stacked_vector_observation=observation, reward=1, stored_vector_actions=vector_action, stored_text_actions="", text_observation="", memories=[], done=(i == 2), max_step_reached=False, id=i, compressed_observations=compressed_obs, )) dict_agent_info[ "RealFakeBrain"] = UnityRLOutputProto.ListAgentInfoProto( value=list_agent_info) return dict_agent_info
def reset( self, config: Dict[str, float] = None, train_mode: bool = True, custom_reset_parameters: Any = None, ) -> AllBrainInfo: # type: ignore self._reset_agent() vector_obs = [self.goal] * OBS_SIZE vector_obs_proto = ObservationProto( float_data=ObservationProto.FloatData(data=vector_obs), shape=[len(vector_obs)], compression_type=COMPRESSION_TYPE_NONE, ) agent_info = AgentInfoProto(done=False, max_step_reached=False, observations=[vector_obs_proto]) return { BRAIN_NAME: BrainInfo.from_agent_proto(0, [agent_info], self._brains[BRAIN_NAME]) }
def step( self, vector_action: Dict[str, Any] = None, memory: Dict[str, Any] = None, text_action: Dict[str, Any] = None, value: Dict[str, Any] = None, ) -> AllBrainInfo: assert vector_action is not None if self.discrete: act = vector_action[BRAIN_NAME][0][0] delta = 1 if act else -1 else: delta = vector_action[BRAIN_NAME][0][0] delta = clamp(delta, -STEP_SIZE, STEP_SIZE) self.position += delta self.position = clamp(self.position, -1, 1) self.step_count += 1 done = self.position >= 1.0 or self.position <= -1.0 if done: reward = SUCCESS_REWARD * self.position * self.goal else: reward = -TIME_PENALTY agent_info = AgentInfoProto(stacked_vector_observation=[self.goal] * OBS_SIZE, reward=reward, done=done) if done: self._reset_agent() return { BRAIN_NAME: BrainInfo.from_agent_proto(0, [agent_info], self._brains[BRAIN_NAME]) }
def generate_list_agent_proto(n_agent: int, shape: List[Tuple[int]]) -> List[AgentInfoProto]: result = [] for agent_index in range(n_agent): ap = AgentInfoProto() ap.reward = agent_index ap.done = agent_index % 2 == 0 ap.max_step_reached = agent_index % 2 == 1 ap.id = agent_index ap.action_mask.extend([True, False] * 5) obs_proto_list = [] for obs_index in range(len(shape)): obs_proto = ObservationProto() obs_proto.shape.extend(list(shape[obs_index])) obs_proto.compression_type = NONE obs_proto.float_data.data.extend([0.1] * np.prod(shape[obs_index])) obs_proto_list.append(obs_proto) ap.observations.extend(obs_proto_list) result.append(ap) return result
def load_demonstration(file_path: str) -> Tuple[BrainParameters, List[BrainInfo], int]: """ Loads and parses a demonstration file. :param file_path: Location of demonstration file (.demo). :return: BrainParameter and list of BrainInfos containing demonstration data. """ # First 32 bytes of file dedicated to meta-data. INITIAL_POS = 33 file_paths = [] if os.path.isdir(file_path): all_files = os.listdir(file_path) for _file in all_files: if _file.endswith(".demo"): file_paths.append(os.path.join(file_path, _file)) if not all_files: raise ValueError("There are no '.demo' files in the provided directory.") elif os.path.isfile(file_path): file_paths.append(file_path) file_extension = pathlib.Path(file_path).suffix if file_extension != ".demo": raise ValueError( "The file is not a '.demo' file. Please provide a file with the " "correct extension." ) else: raise FileNotFoundError( "The demonstration file or directory {} does not exist.".format(file_path) ) brain_params = None brain_param_proto = None brain_infos = [] total_expected = 0 for _file_path in file_paths: data = open(_file_path, "rb").read() next_pos, pos, obs_decoded = 0, 0, 0 while pos < len(data): next_pos, pos = _DecodeVarint32(data, pos) if obs_decoded == 0: meta_data_proto = DemonstrationMetaProto() meta_data_proto.ParseFromString(data[pos : pos + next_pos]) total_expected += meta_data_proto.number_steps pos = INITIAL_POS if obs_decoded == 1: brain_param_proto = BrainParametersProto() brain_param_proto.ParseFromString(data[pos : pos + next_pos]) pos += next_pos if obs_decoded > 1: agent_info = AgentInfoProto() agent_info.ParseFromString(data[pos : pos + next_pos]) if brain_params is None: brain_params = BrainParameters.from_proto( brain_param_proto, agent_info ) brain_info = BrainInfo.from_agent_proto(0, [agent_info], brain_params) brain_infos.append(brain_info) if len(brain_infos) == total_expected: break pos += next_pos obs_decoded += 1 return brain_params, brain_infos, total_expected