def generate_list_agent_proto( n_agent: int, shape: List[Tuple[int]], infinite_rewards: bool = False, nan_observations: bool = False, ) -> List[AgentInfoProto]: result = [] for agent_index in range(n_agent): ap = AgentInfoProto() ap.reward = float("inf") if infinite_rewards else agent_index ap.done = agent_index % 2 == 0 ap.max_step_reached = agent_index % 4 == 0 ap.id = agent_index ap.action_mask.extend([True, False] * 5) obs_proto_list = [] for obs_index in range(len(shape)): obs_proto = ObservationProto() obs_proto.shape.extend(list(shape[obs_index])) obs_proto.compression_type = NONE obs_proto.float_data.data.extend( ([float("nan")] if nan_observations else [0.1]) * np.prod(shape[obs_index])) obs_proto_list.append(obs_proto) ap.observations.extend(obs_proto_list) result.append(ap) return result
def _get_agent_infos(self): dict_agent_info = {} list_agent_info = [] vector_obs = [1, 2, 3] observations = [ ObservationProto( compressed_data=None, shape=[30, 40, 3], compression_type=COMPRESSION_TYPE_PNG, ) for _ in range(self.visual_inputs) ] vector_obs_proto = ObservationProto( float_data=ObservationProto.FloatData(data=vector_obs), shape=[len(vector_obs)], compression_type=COMPRESSION_TYPE_NONE, ) observations.append(vector_obs_proto) for i in range(self.num_agents): list_agent_info.append( AgentInfoProto( reward=1, done=(i == 2), max_step_reached=False, id=i, observations=observations, )) dict_agent_info[ "RealFakeBrain"] = UnityRLOutputProto.ListAgentInfoProto( value=list_agent_info) return dict_agent_info
def generate_compressed_proto_obs(in_array: np.ndarray, grayscale: bool = False) -> ObservationProto: obs_proto = ObservationProto() obs_proto.compressed_data = generate_compressed_data(in_array) obs_proto.compression_type = PNG if grayscale: # grayscale flag is only used for old API without mapping expected_shape = [in_array.shape[0], in_array.shape[1], 1] obs_proto.shape.extend(expected_shape) else: obs_proto.shape.extend(in_array.shape) return obs_proto
def generate_compressed_proto_obs_with_mapping( in_array: np.ndarray, mapping: List[int]) -> ObservationProto: obs_proto = ObservationProto() obs_proto.compressed_data = generate_compressed_data(in_array) obs_proto.compression_type = PNG if mapping is not None: obs_proto.compressed_channel_mapping.extend(mapping) expected_shape = [ in_array.shape[0], in_array.shape[1], len({m for m in mapping if m >= 0}), ] obs_proto.shape.extend(expected_shape) else: obs_proto.shape.extend(in_array.shape) return obs_proto
def proto_from_batched_step_result( batched_step_result: BatchedStepResult ) -> List[AgentInfoProto]: agent_info_protos: List[AgentInfoProto] = [] for agent_id in batched_step_result.agent_id: agent_id_index = batched_step_result.agent_id_to_index[agent_id] reward = batched_step_result.reward[agent_id_index] done = batched_step_result.done[agent_id_index] max_step_reached = batched_step_result.max_step[agent_id_index] agent_mask = None if batched_step_result.action_mask is not None: agent_mask = [] # type: ignore for _branch in batched_step_result.action_mask: agent_mask = np.concatenate( (agent_mask, _branch[agent_id_index, :]), axis=0 ) observations: List[ObservationProto] = [] for all_observations_of_type in batched_step_result.obs: observation = all_observations_of_type[agent_id_index] if len(observation.shape) == 3: observations.append(generate_uncompressed_proto_obs(observation)) else: observations.append( ObservationProto( float_data=ObservationProto.FloatData(data=observation), shape=[len(observation)], compression_type=NONE, ) ) agent_info_proto = AgentInfoProto( reward=reward, done=done, id=agent_id, max_step_reached=max_step_reached, action_mask=agent_mask, observations=observations, ) agent_info_protos.append(agent_info_proto) return agent_info_protos
def proto_from_steps(decision_steps: DecisionSteps, terminal_steps: TerminalSteps) -> List[AgentInfoProto]: agent_info_protos: List[AgentInfoProto] = [] # Take care of the DecisionSteps first for agent_id in decision_steps.agent_id: agent_id_index = decision_steps.agent_id_to_index[agent_id] reward = decision_steps.reward[agent_id_index] done = False max_step_reached = False agent_mask: Any = None if decision_steps.action_mask is not None: agent_mask = [] for _branch in decision_steps.action_mask: agent_mask = np.concatenate( (agent_mask, _branch[agent_id_index, :]), axis=0) agent_mask = agent_mask.astype(np.bool).tolist() observations: List[ObservationProto] = [] for all_observations_of_type in decision_steps.obs: observation = all_observations_of_type[agent_id_index] if len(observation.shape) == 3: observations.append( generate_uncompressed_proto_obs(observation)) else: observations.append( ObservationProto( float_data=ObservationProto.FloatData( data=observation), shape=[len(observation)], compression_type=NONE, )) agent_info_proto = AgentInfoProto( reward=reward, done=done, id=agent_id, max_step_reached=bool(max_step_reached), action_mask=agent_mask, observations=observations, ) agent_info_protos.append(agent_info_proto) # Take care of the TerminalSteps second for agent_id in terminal_steps.agent_id: agent_id_index = terminal_steps.agent_id_to_index[agent_id] reward = terminal_steps.reward[agent_id_index] done = True max_step_reached = terminal_steps.interrupted[agent_id_index] final_observations: List[ObservationProto] = [] for all_observations_of_type in terminal_steps.obs: observation = all_observations_of_type[agent_id_index] if len(observation.shape) == 3: final_observations.append( generate_uncompressed_proto_obs(observation)) else: final_observations.append( ObservationProto( float_data=ObservationProto.FloatData( data=observation), shape=[len(observation)], compression_type=NONE, )) agent_info_proto = AgentInfoProto( reward=reward, done=done, id=agent_id, max_step_reached=bool(max_step_reached), action_mask=None, observations=final_observations, ) agent_info_protos.append(agent_info_proto) return agent_info_protos
def generate_uncompressed_proto_obs(in_array: np.ndarray) -> ObservationProto: obs_proto = ObservationProto() obs_proto.float_data.data.extend(in_array.flatten().tolist()) obs_proto.compression_type = NONE obs_proto.shape.extend(in_array.shape) return obs_proto
def generate_compressed_proto_obs(in_array: np.ndarray) -> ObservationProto: obs_proto = ObservationProto() obs_proto.compressed_data = generate_compressed_data(in_array) obs_proto.compression_type = PNG obs_proto.shape.extend(in_array.shape) return obs_proto