def poll( self, ) -> Tuple[MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict]: from ray.rllib.env.base_env import with_dummy_agent_id if not self.first_reset_done: self.first_reset_done = True self.new_obs = self.vector_env.vector_reset() new_obs = dict(enumerate(self.new_obs)) rewards = dict(enumerate(self.cur_rewards)) dones = dict(enumerate(self.cur_dones)) infos = dict(enumerate(self.cur_infos)) # Empty all states (in case `poll()` gets called again). self.new_obs = [] self.cur_rewards = [] self.cur_dones = [] self.cur_infos = [] return ( with_dummy_agent_id(new_obs), with_dummy_agent_id(rewards), with_dummy_agent_id(dones, "__all__"), with_dummy_agent_id(infos), {}, )
def _poll( self, ) -> Tuple[MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict]: from ray.rllib.env.base_env import with_dummy_agent_id all_obs, all_rewards, all_dones, all_infos = {}, {}, {}, {} off_policy_actions = {} for eid, episode in self.external_env._episodes.copy().items(): data = episode.get_data() cur_done = ( episode.cur_done_dict["__all__"] if self.multiagent else episode.cur_done ) if cur_done: del self.external_env._episodes[eid] if data: if self.prep: all_obs[eid] = self.prep.transform(data["obs"]) else: all_obs[eid] = data["obs"] all_rewards[eid] = data["reward"] all_dones[eid] = data["done"] all_infos[eid] = data["info"] if "off_policy_action" in data: off_policy_actions[eid] = data["off_policy_action"] if self.multiagent: # Ensure a consistent set of keys # rely on all_obs having all possible keys for now. for eid, eid_dict in all_obs.items(): for agent_id in eid_dict.keys(): def fix(d, zero_val): if agent_id not in d[eid]: d[eid][agent_id] = zero_val fix(all_rewards, 0.0) fix(all_dones, False) fix(all_infos, {}) return (all_obs, all_rewards, all_dones, all_infos, off_policy_actions) else: return ( with_dummy_agent_id(all_obs), with_dummy_agent_id(all_rewards), with_dummy_agent_id(all_dones, "__all__"), with_dummy_agent_id(all_infos), with_dummy_agent_id(off_policy_actions), )
def poll(self) -> Tuple[MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict]: from ray.rllib.env.base_env import with_dummy_agent_id if self.new_obs is None: self.new_obs = self.vector_env.vector_reset() new_obs = dict(enumerate(self.new_obs)) rewards = dict(enumerate(self.cur_rewards)) dones = dict(enumerate(self.cur_dones)) infos = dict(enumerate(self.cur_infos)) self.new_obs = [] self.cur_rewards = [] self.cur_dones = [] self.cur_infos = [] return with_dummy_agent_id(new_obs), \ with_dummy_agent_id(rewards), \ with_dummy_agent_id(dones, "__all__"), \ with_dummy_agent_id(infos), {}