예제 #1
0
 def _generate_reset_input(self, training, config: ArenaConfig) -> UnityRLInput:
     rl_in = UnityRLInput()
     rl_in.is_training = training
     rl_in.command = 1
     rl_reset = UnityRLResetInput()
     if (config is not None):
         rl_reset.CopyFrom(config.dict_to_arena_config())
     result = UnityInput()
     result.rl_input.CopyFrom(rl_in)
     result.rl_reset_input.CopyFrom(rl_reset)
     return result
예제 #2
0
 def _generate_step_input(self, vector_action, memory, text_action, value) -> UnityRLInput:
     rl_in = UnityRLInput()
     for b in vector_action:
         n_agents = self._n_agents[b]
         if n_agents == 0:
             continue
         _a_s = len(vector_action[b]) // n_agents
         _m_s = len(memory[b]) // n_agents
         for i in range(n_agents):
             action = AgentActionProto(
                 vector_actions=vector_action[b][i * _a_s: (i + 1) * _a_s],
                 memories=memory[b][i * _m_s: (i + 1) * _m_s],
                 text_actions=text_action[b][i],
             )
             if b in value:
                 if value[b] is not None:
                     action.value = float(value[b][i])
             rl_in.agent_actions[b].value.extend([action])
             rl_in.command = 0
     return self.wrap_unity_input(rl_in)