def handle_gym_message(self, envelope: Envelope) -> None: """ Forward a message to gym. :param envelope: the envelope :return: None """ gym_message = GymSerializer().decode(envelope.message) performative = gym_message.get("performative") if GymMessage.Performative( performative) == GymMessage.Performative.ACT: action = gym_message.get("action") step_id = gym_message.get("step_id") observation, reward, done, info = self.gym_env.step( action) # type: ignore msg = GymMessage(performative=GymMessage.Performative.PERCEPT, observation=observation, reward=reward, done=done, info=info, step_id=step_id) msg_bytes = GymSerializer().encode(msg) envelope = Envelope(to=envelope.sender, sender=DEFAULT_GYM, protocol_id=GymMessage.protocol_id, message=msg_bytes) self._send(envelope) elif GymMessage.Performative( performative) == GymMessage.Performative.RESET: self.gym_env.reset() # type: ignore elif GymMessage.Performative( performative) == GymMessage.Performative.CLOSE: self.gym_env.close() # type: ignore
def _decode_percept(self, envelope: Envelope, expected_step_id: int) -> Message: """ Receive the response from the gym environment in the form of an envelope and decode it. The response is a PERCEPT message containing the usual 'observation', 'reward', 'done', 'info' parameters. :param expected_step_id: the expected step id :return: a message received as a response to the action performed in apply_action. """ if envelope is not None: if envelope.protocol_id == 'gym': gym_msg = GymSerializer().decode(envelope.message) gym_msg_performative = GymMessage.Performative( gym_msg.get("performative")) gym_msg_step_id = gym_msg.get("step_id") if gym_msg_performative == GymMessage.Performative.PERCEPT and gym_msg_step_id == expected_step_id: return gym_msg else: raise ValueError( "Unexpected performative or no step_id: {}".format( gym_msg_performative)) else: raise ValueError("Unknown protocol_id: {}".format( envelope.protocol_id)) else: raise ValueError("Missing envelope.")
def close(self) -> None: """ Close the environment. :return: None """ gym_msg = GymMessage(performative=GymMessage.Performative.CLOSE) gym_bytes = GymSerializer().encode(gym_msg) envelope = Envelope(to=DEFAULT_GYM, sender=self._agent_public_key, protocol_id=GymMessage.protocol_id, message=gym_bytes) self._agent.outbox.put(envelope) self._disconnect()
def _encode_action(self, action: Action, step_id: int) -> Envelope: """ Encode the 'action' sent to the step function as one or several envelopes. :param action: the action that is the output of an RL algorithm. :param step_id: the step id :return: an envelope """ gym_msg = GymMessage(performative=GymMessage.Performative.ACT, action=action, step_id=step_id) gym_bytes = GymSerializer().encode(gym_msg) envelope = Envelope(to=DEFAULT_GYM, sender=self._agent_public_key, protocol_id=GymMessage.protocol_id, message=gym_bytes) return envelope
def reset(self) -> None: """ Reset the environment. :return: None """ if not self._agent.multiplexer.is_connected: self._connect() gym_msg = GymMessage(performative=GymMessage.Performative.RESET) gym_bytes = GymSerializer().encode(gym_msg) envelope = Envelope(to=DEFAULT_GYM, sender=self._agent_public_key, protocol_id=GymMessage.protocol_id, message=gym_bytes) self._agent.outbox.put(envelope)
def reset(self) -> None: """ Reset the environment. :return: None """ self._step_count = 0 self._is_rl_agent_trained = False gym_msg = GymMessage(performative=GymMessage.Performative.RESET) gym_bytes = GymSerializer().encode(gym_msg) envelope = Envelope(to=DEFAULT_GYM, sender=self._skill_context.agent_public_key, protocol_id=GymMessage.protocol_id, message=gym_bytes) self._skill_context.outbox.put(envelope)
def handle_envelope(self, envelope: Envelope) -> None: """ Handle envelopes. :param envelope: the envelope :return: None """ gym_msg = GymSerializer().decode(envelope.message) gym_msg_performative = GymMessage.Performative( gym_msg.get("performative")) if gym_msg_performative == GymMessage.Performative.PERCEPT: assert self.context.tasks is not None, "Incorrect initialization." assert len(self.context.tasks) == 1, "Too many tasks loaded!" gym_task = cast(GymTask, self.context.tasks[0]) gym_task.proxy_env_queue.put(gym_msg) else: raise ValueError( "Unexpected performative or no step_id: {}".format( gym_msg_performative))