예제 #1
0
    def poll(
        self,
    ) -> Tuple[MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict,
               MultiEnvDict]:
        from ray.rllib.env.base_env import with_dummy_agent_id

        if not self.first_reset_done:
            self.first_reset_done = True
            self.new_obs = self.vector_env.vector_reset()
        new_obs = dict(enumerate(self.new_obs))
        rewards = dict(enumerate(self.cur_rewards))
        dones = dict(enumerate(self.cur_dones))
        infos = dict(enumerate(self.cur_infos))

        # Empty all states (in case `poll()` gets called again).
        self.new_obs = []
        self.cur_rewards = []
        self.cur_dones = []
        self.cur_infos = []

        return (
            with_dummy_agent_id(new_obs),
            with_dummy_agent_id(rewards),
            with_dummy_agent_id(dones, "__all__"),
            with_dummy_agent_id(infos),
            {},
        )
예제 #2
0
파일: external_env.py 프로젝트: parasj/ray
    def _poll(
        self,
    ) -> Tuple[MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict]:
        from ray.rllib.env.base_env import with_dummy_agent_id

        all_obs, all_rewards, all_dones, all_infos = {}, {}, {}, {}
        off_policy_actions = {}
        for eid, episode in self.external_env._episodes.copy().items():
            data = episode.get_data()
            cur_done = (
                episode.cur_done_dict["__all__"]
                if self.multiagent
                else episode.cur_done
            )
            if cur_done:
                del self.external_env._episodes[eid]
            if data:
                if self.prep:
                    all_obs[eid] = self.prep.transform(data["obs"])
                else:
                    all_obs[eid] = data["obs"]
                all_rewards[eid] = data["reward"]
                all_dones[eid] = data["done"]
                all_infos[eid] = data["info"]
                if "off_policy_action" in data:
                    off_policy_actions[eid] = data["off_policy_action"]
        if self.multiagent:
            # Ensure a consistent set of keys
            # rely on all_obs having all possible keys for now.
            for eid, eid_dict in all_obs.items():
                for agent_id in eid_dict.keys():

                    def fix(d, zero_val):
                        if agent_id not in d[eid]:
                            d[eid][agent_id] = zero_val

                    fix(all_rewards, 0.0)
                    fix(all_dones, False)
                    fix(all_infos, {})
            return (all_obs, all_rewards, all_dones, all_infos, off_policy_actions)
        else:
            return (
                with_dummy_agent_id(all_obs),
                with_dummy_agent_id(all_rewards),
                with_dummy_agent_id(all_dones, "__all__"),
                with_dummy_agent_id(all_infos),
                with_dummy_agent_id(off_policy_actions),
            )
예제 #3
0
 def poll(self) -> Tuple[MultiEnvDict, MultiEnvDict, MultiEnvDict,
                         MultiEnvDict, MultiEnvDict]:
     from ray.rllib.env.base_env import with_dummy_agent_id
     if self.new_obs is None:
         self.new_obs = self.vector_env.vector_reset()
     new_obs = dict(enumerate(self.new_obs))
     rewards = dict(enumerate(self.cur_rewards))
     dones = dict(enumerate(self.cur_dones))
     infos = dict(enumerate(self.cur_infos))
     self.new_obs = []
     self.cur_rewards = []
     self.cur_dones = []
     self.cur_infos = []
     return with_dummy_agent_id(new_obs), \
         with_dummy_agent_id(rewards), \
         with_dummy_agent_id(dones, "__all__"), \
         with_dummy_agent_id(infos), {}