def _get_agent_infos(self):
        dict_agent_info = {}
        list_agent_info = []
        vector_obs = [1, 2, 3]

        observations = [
            ObservationProto(
                compressed_data=None,
                shape=[30, 40, 3],
                compression_type=COMPRESSION_TYPE_PNG,
            ) for _ in range(self.visual_inputs)
        ]
        vector_obs_proto = ObservationProto(
            float_data=ObservationProto.FloatData(data=vector_obs),
            shape=[len(vector_obs)],
            compression_type=COMPRESSION_TYPE_NONE,
        )
        observations.append(vector_obs_proto)

        for i in range(self.num_agents):
            list_agent_info.append(
                AgentInfoProto(
                    reward=1,
                    done=(i == 2),
                    max_step_reached=False,
                    id=i,
                    observations=observations,
                ))
        dict_agent_info[
            "RealFakeBrain"] = UnityRLOutputProto.ListAgentInfoProto(
                value=list_agent_info)
        return dict_agent_info
Example #2
0
def proto_from_batched_step_result(
    batched_step_result: BatchedStepResult
) -> List[AgentInfoProto]:
    agent_info_protos: List[AgentInfoProto] = []
    for agent_id in batched_step_result.agent_id:
        agent_id_index = batched_step_result.agent_id_to_index[agent_id]
        reward = batched_step_result.reward[agent_id_index]
        done = batched_step_result.done[agent_id_index]
        max_step_reached = batched_step_result.max_step[agent_id_index]
        agent_mask = None
        if batched_step_result.action_mask is not None:
            agent_mask = []  # type: ignore
            for _branch in batched_step_result.action_mask:
                agent_mask = np.concatenate(
                    (agent_mask, _branch[agent_id_index, :]), axis=0
                )
        observations: List[ObservationProto] = []
        for all_observations_of_type in batched_step_result.obs:
            observation = all_observations_of_type[agent_id_index]
            if len(observation.shape) == 3:
                observations.append(generate_uncompressed_proto_obs(observation))
            else:
                observations.append(
                    ObservationProto(
                        float_data=ObservationProto.FloatData(data=observation),
                        shape=[len(observation)],
                        compression_type=NONE,
                    )
                )

        agent_info_proto = AgentInfoProto(
            reward=reward,
            done=done,
            id=agent_id,
            max_step_reached=max_step_reached,
            action_mask=agent_mask,
            observations=observations,
        )
        agent_info_protos.append(agent_info_proto)
    return agent_info_protos
def proto_from_steps(decision_steps: DecisionSteps,
                     terminal_steps: TerminalSteps) -> List[AgentInfoProto]:
    agent_info_protos: List[AgentInfoProto] = []
    # Take care of the DecisionSteps first
    for agent_id in decision_steps.agent_id:
        agent_id_index = decision_steps.agent_id_to_index[agent_id]
        reward = decision_steps.reward[agent_id_index]
        done = False
        max_step_reached = False
        agent_mask: Any = None
        if decision_steps.action_mask is not None:
            agent_mask = []
            for _branch in decision_steps.action_mask:
                agent_mask = np.concatenate(
                    (agent_mask, _branch[agent_id_index, :]), axis=0)
            agent_mask = agent_mask.astype(np.bool).tolist()
        observations: List[ObservationProto] = []
        for all_observations_of_type in decision_steps.obs:
            observation = all_observations_of_type[agent_id_index]
            if len(observation.shape) == 3:
                observations.append(
                    generate_uncompressed_proto_obs(observation))
            else:
                observations.append(
                    ObservationProto(
                        float_data=ObservationProto.FloatData(
                            data=observation),
                        shape=[len(observation)],
                        compression_type=NONE,
                    ))
        agent_info_proto = AgentInfoProto(
            reward=reward,
            done=done,
            id=agent_id,
            max_step_reached=bool(max_step_reached),
            action_mask=agent_mask,
            observations=observations,
        )
        agent_info_protos.append(agent_info_proto)
    # Take care of the TerminalSteps second
    for agent_id in terminal_steps.agent_id:
        agent_id_index = terminal_steps.agent_id_to_index[agent_id]
        reward = terminal_steps.reward[agent_id_index]
        done = True
        max_step_reached = terminal_steps.interrupted[agent_id_index]

        final_observations: List[ObservationProto] = []
        for all_observations_of_type in terminal_steps.obs:
            observation = all_observations_of_type[agent_id_index]
            if len(observation.shape) == 3:
                final_observations.append(
                    generate_uncompressed_proto_obs(observation))
            else:
                final_observations.append(
                    ObservationProto(
                        float_data=ObservationProto.FloatData(
                            data=observation),
                        shape=[len(observation)],
                        compression_type=NONE,
                    ))
        agent_info_proto = AgentInfoProto(
            reward=reward,
            done=done,
            id=agent_id,
            max_step_reached=bool(max_step_reached),
            action_mask=None,
            observations=final_observations,
        )
        agent_info_protos.append(agent_info_proto)

    return agent_info_protos