def _generate_reset_input(self, training, config: ArenaConfig) -> UnityRLInput: rl_in = UnityRLInput() rl_in.is_training = training rl_in.command = 1 rl_reset = UnityRLResetInput() if (config is not None): rl_reset.CopyFrom(config.dict_to_arena_config()) result = UnityInput() result.rl_input.CopyFrom(rl_in) result.rl_reset_input.CopyFrom(rl_reset) return result
def _generate_step_input(self, vector_action, memory, text_action, value) -> UnityRLInput: rl_in = UnityRLInput() for b in vector_action: n_agents = self._n_agents[b] if n_agents == 0: continue _a_s = len(vector_action[b]) // n_agents _m_s = len(memory[b]) // n_agents for i in range(n_agents): action = AgentActionProto( vector_actions=vector_action[b][i * _a_s: (i + 1) * _a_s], memories=memory[b][i * _m_s: (i + 1) * _m_s], text_actions=text_action[b][i], ) if b in value: if value[b] is not None: action.value = float(value[b][i]) rl_in.agent_actions[b].value.extend([action]) rl_in.command = 0 return self.wrap_unity_input(rl_in)