Exemple #1
0
 def record_demo(action_sizes, num_visual=0, num_vector=1):
     env = RecordEnvironment(
         [BRAIN_NAME],
         action_sizes=action_sizes,
         num_visual=num_visual,
         num_vector=num_vector,
         n_demos=100,
     )
     # If we want to use true demos, we can solve the env in the usual way
     # Otherwise, we can just call solve to execute the optimal policy
     env.solve()
     agent_info_protos = env.demonstration_protos[BRAIN_NAME]
     meta_data_proto = DemonstrationMetaProto()
     continuous_action_size, discrete_action_size = action_sizes
     action_spec_proto = ActionSpecProto(
         num_continuous_actions=continuous_action_size,
         num_discrete_actions=discrete_action_size,
         discrete_branch_sizes=[2] if discrete_action_size > 0 else None,
     )
     brain_param_proto = BrainParametersProto(brain_name=BRAIN_NAME,
                                              is_training=True,
                                              action_spec=action_spec_proto)
     action_type = "Discrete" if action_sizes else "Continuous"
     demo_path_name = "1DTest" + action_type + ".demo"
     demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
     write_demo(demo_path, meta_data_proto, brain_param_proto,
                agent_info_protos)
     return demo_path
 def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
     if self.is_discrete:
         action_spec = ActionSpecProto(num_discrete_actions=2,
                                       discrete_branch_sizes=[3, 2])
     else:
         action_spec = ActionSpecProto(num_continuous_actions=2)
     bp = BrainParametersProto(brain_name=self.brain_name,
                               is_training=True,
                               action_spec=action_spec)
     rl_init = UnityRLInitializationOutputProto(
         name="RealFakeAcademy",
         communication_version=UnityEnvironment.API_VERSION,
         package_version="mock_package_version",
         log_path="",
         brain_parameters=[bp],
     )
     output = UnityRLOutputProto(agentInfos=self._get_agent_infos())
     return UnityOutputProto(rl_initialization_output=rl_init,
                             rl_output=output)