def _get_agent_infos(self):
        dict_agent_info = {}
        list_agent_info = []
        vector_obs = [1, 2, 3]

        observations = [
            ObservationProto(
                compressed_data=None,
                shape=[30, 40, 3],
                compression_type=COMPRESSION_TYPE_PNG,
            )
            for _ in range(self.visual_inputs)
        ]
        vector_obs_proto = ObservationProto(
            float_data=ObservationProto.FloatData(data=vector_obs),
            shape=[len(vector_obs)],
            compression_type=COMPRESSION_TYPE_NONE,
        )
        observations.append(vector_obs_proto)

        for i in range(self.num_agents):
            list_agent_info.append(
                AgentInfoProto(
                    reward=1,
                    done=(i == 2),
                    max_step_reached=False,
                    id=i,
                    observations=observations,
                )
            )
        dict_agent_info["RealFakeBrain"] = UnityRLOutputProto.ListAgentInfoProto(
            value=list_agent_info
        )
        return dict_agent_info
 def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
     bp = BrainParametersProto(
         vector_action_size=[2],
         vector_action_descriptions=["", ""],
         vector_action_space_type=discrete if self.is_discrete else continuous,
         brain_name=self.brain_name,
         is_training=True,
     )
     rl_init = UnityRLInitializationOutputProto(
         name="RealFakeAcademy",
         version=UnityEnvironment.API_VERSION,
         log_path="",
         brain_parameters=[bp],
     )
     output = UnityRLOutputProto(agentInfos=self._get_agent_infos())
     return UnityOutputProto(rl_initialization_output=rl_init, rl_output=output)
Example #3
0
    def _get_agent_infos(self):
        dict_agent_info = {}
        if self.is_discrete:
            vector_action = [1]
        else:
            vector_action = [1, 2]
        list_agent_info = []
        if self.num_stacks == 1:
            observation = [1, 2, 3]
        else:
            observation = [1, 2, 3, 1, 2, 3]

        compressed_obs = [
            CompressedObservationProto(
                data=None,
                shape=[30, 40, 3],
                compression_type=CompressionTypeProto.PNG)
            for _ in range(self.visual_inputs)
        ]

        for i in range(self.num_agents):
            list_agent_info.append(
                AgentInfoProto(
                    stacked_vector_observation=observation,
                    reward=1,
                    stored_vector_actions=vector_action,
                    stored_text_actions="",
                    text_observation="",
                    memories=[],
                    done=(i == 2),
                    max_step_reached=False,
                    id=i,
                    compressed_observations=compressed_obs,
                ))
        dict_agent_info[
            "RealFakeBrain"] = UnityRLOutputProto.ListAgentInfoProto(
                value=list_agent_info)
        return dict_agent_info
 def exchange(self, inputs: UnityInputProto) -> UnityOutputProto:
     result = UnityRLOutputProto(agentInfos=self._get_agent_infos())
     return UnityOutputProto(rl_output=result)