Ejemplo n.º 1
0
    def log_returns(self,
                    episode_id: str,
                    reward_dict: MultiAgentDict,
                    info_dict: MultiAgentDict = None,
                    multiagent_done_dict: MultiAgentDict = None) -> None:
        """Record returns from the environment.

        The reward will be attributed to the previous action taken by the
        episode. Rewards accumulate until the next action. If no reward is
        logged before the next action, a reward of 0.0 is assumed.

        Args:
            episode_id: Episode id returned from start_episode().
            reward_dict: Reward from the environment agents.
            info_dict: Optional info dict.
            multiagent_done_dict: Optional done dict for agents.
        """

        episode = self._get(episode_id)

        # Accumulate reward by agent.
        # For existing agents, we want to add the reward up.
        for agent, rew in reward_dict.items():
            if agent in episode.cur_reward_dict:
                episode.cur_reward_dict[agent] += rew
            else:
                episode.cur_reward_dict[agent] = rew

        if multiagent_done_dict:
            for agent, done in multiagent_done_dict.items():
                episode.cur_done_dict[agent] = done

        if info_dict:
            episode.cur_info_dict = info_dict or {}
Ejemplo n.º 2
0
 def observe(self, obs: MultiAgentDict, rewards: MultiAgentDict,
             dones: MultiAgentDict, infos: MultiAgentDict):
     self.last_obs = obs
     for ag, r in rewards.items():
         if ag in self.last_rewards:
             self.last_rewards[ag] += r
         else:
             self.last_rewards[ag] = r
     for ag, d in dones.items():
         if ag in self.last_dones:
             self.last_dones[ag] = self.last_dones[ag] or d
         else:
             self.last_dones[ag] = d
     self.last_infos = infos
Ejemplo n.º 3
0
    def step(
        self, action_dict: MultiAgentDict
    ) -> Tuple[dict, dict, Dict[Union[str, Any], Union[bool, Any]], dict]:
        hunters = []
        n = 0
        #print(len(self.agents))
        observation, reward, done, reproduce = {}, {}, {}, {}
        alive = []
        #print(len(action_dict), action_dict)
        for i, action in action_dict.items():
            if not i in self.dones:
                observation[i], reward[i], done[i], reproduce[i] = self.agents[
                    i].step(action)
                if done[i]:
                    self.dones.append(i)
                alive.append(i)

        for i in alive:
            # print("len", observation, action_dict[0], reward)
            if not i in self.dones:
                if reproduce[i]:
                    new_hunter = HunterEnv()
                    observation[len(self.agents)] = new_hunter.reset()
                    reward[len(self.agents)] = 0
                    done[len(self.agents)] = False
                    reproduce[len(self.agents)] = False
                    self.agents.append(new_hunter)
        done["__all__"] = len(self.dones) == len(self.agents)
        #print(observation)
        self.alive = len(observation)
        return observation, reward, done, reproduce
Ejemplo n.º 4
0
    def observation_space_contains(self, x: MultiAgentDict) -> bool:
        """Checks if the observation space contains the given key.

        Args:
            x: Observations to check.

        Returns:
            True if the observation space contains the given all observations
                in x.
        """
        if (not hasattr(self, "_spaces_in_preferred_format")
                or self._spaces_in_preferred_format is None):
            self._spaces_in_preferred_format = (
                self._check_if_space_maps_agent_id_to_sub_space())
        if self._spaces_in_preferred_format:
            for key, agent_obs in x.items():
                if not self.observation_space[key].contains(agent_obs):
                    return False
            if not all(k in self.observation_space for k in x):
                if log_once(
                        "possibly_bad_multi_agent_dict_missing_agent_observations"
                ):
                    logger.warning(
                        "You environment returns observations that are "
                        "MultiAgentDicts with incomplete information. "
                        "Meaning that they only contain information on a subset of"
                        " participating agents. Ignore this warning if this is "
                        "intended, for example if your environment is a turn-based "
                        "simulation.")
            return True

        logger.warning("observation_space_contains() has not been implemented")
        return True
Ejemplo n.º 5
0
    def step(
        self, action_dict: MultiAgentDict
    ) -> Tuple[MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict]:

        processed_action_dict = {}
        for agent_id, action in action_dict.items():
            if agent_id in self.agents_to_action_converters:
                convertor: RestrictedToBaseGameActionSpaceConverter = self.agents_to_action_converters[
                    agent_id]
                base_game_action, _, _ = convertor.get_base_game_action(
                    obs=self._agents_to_current_obs[agent_id],
                    restricted_game_action=action,
                    use_delegate_policy_exploration=self.
                    _use_delegate_policy_exploration,
                    clip_base_game_actions=self._clip_base_game_actions,
                    delegate_policy_state=None)
                processed_action_dict[agent_id] = base_game_action
            else:
                processed_action_dict[agent_id] = action

        obs, rews, dones, infos = self.base_env.step(
            action_dict=processed_action_dict)

        for agent_id, observation in obs.items():
            self._agents_to_current_obs[agent_id] = observation

        return obs, rews, dones, infos
    def step(self, action_dict: MultiAgentDict) -> Tuple[
        MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict]:
        obs = {}
        rewards = {}
        dones = {}
        infos = {}
        for agent_name, action in action_dict.items():
            if agent_name.endswith('_high'):
                agent = self.flat_env.agents[agent_name]
                action_name = HIGH_LEVEL_ACTIONS[action]
                agent.high_level_steps += 1
                agent_id = f'{agent.low_level_prefix}{action_name}_{agent_name}_{agent.high_level_steps}'
                if action_name == 'COLLECT':
                    obs.update({agent_id : get_collect_observation_from_game_state(agent.last_game_state)})
                elif action_name == 'DESTROY':
                    obs.update({agent_id : get_destroy_observation_from_game_state(agent.last_game_state)})
                elif action_name == 'KILL':
                    obs.update({agent_id : get_kill_observation_from_game_state(agent.last_game_state)})
                else:
                    raise Exception()
                rewards.update({agent_id: 0})
                dones.update({agent_id: False })
                self.high_low_mapping[agent_name] = agent_id
                agent.current_mode = action_name
                agent.current_sub_id = agent_id
                #print(f'Agent {agent_name} now {action_name}')
            else:
                #agent_1_high
                #agent_1_low_1
                #agent_2_low_5
                agent_parts = agent_name.split('_')
                high_level_agent_name = f'{agent_parts[2]}_{agent_parts[3]}_high'
                self.action_buffer[high_level_agent_name] = action
                #print(f'Add to buffer: Agent {high_level_agent_name} - Action {action}')

                #agent = self.flat_env.agents[high_level_agent_name]

        if len(self.action_buffer) == len(self.flat_env.active_agents):
            obs, rewards, dones, infos = self.flat_env.step(self.action_buffer)

            self.action_buffer = {}
            pass
        else:
            dones.update({'__all__' : False})

        return obs, rewards, dones, infos
Ejemplo n.º 7
0
    def step(
        self, action_dict: MultiAgentDict
    ) -> Tuple[MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict]:

        for agent_id, action in action_dict.items():
            if self._agents_to_current_valid_actions_mask[
                    agent_id] is not None:
                assert self._agents_to_current_valid_actions_mask[agent_id][action] == 1.0, f"\nagent is {agent_id} " \
                                                                                            f"action is {action}" \
                                                                                            f"rstr valid_actions are {self._agents_to_current_valid_actions_mask[agent_id]}"

        base_obs_dict, rews, dones, infos = self.base_env.step(
            action_dict=action_dict)

        restricted_game_obs = self._convert_obs_to_restricted_game(
            base_game_obs_dict=base_obs_dict, dones=dones)

        return restricted_game_obs, rews, dones, infos
Ejemplo n.º 8
0
    def _convert_obs_to_restricted_game(self,
                                        base_game_obs_dict: MultiAgentDict,
                                        dones):
        obs_dict_out = {}

        self._agents_to_current_valid_actions_mask = {
            agent: None
            for agent in range(2)
        }

        for agent_id, base_game_obs in base_game_obs_dict.items():
            if agent_id in self.agent_conversions:
                if not dones["__all__"]:
                    base_game_obs_as_tuple = tuple(base_game_obs)
                    try:
                        restricted_game_obs = self.agent_conversions[
                            agent_id].orig_obs_to_restricted_game_obs[
                                base_game_obs_as_tuple]
                        # assert len(restricted_game_obs) == 90, "only needs to be true for 20x dummy leduc"
                    except KeyError:
                        assert isinstance(base_game_obs_as_tuple, tuple)
                        assert base_game_obs_as_tuple[0] == \
                               list(self.agent_conversions[agent_id].orig_obs_to_restricted_game_obs.keys())[0][
                                   0], f"key provided is {base_game_obs_as_tuple}\n agent id is {agent_id} \n example key is {list(self.agent_conversions[agent_id].orig_obs_to_restricted_game_obs.keys())[0]}"
                        assert len(base_game_obs_as_tuple) == len(
                            list(self.agent_conversions[agent_id].
                                 orig_obs_to_restricted_game_obs.keys())[0]
                        ), f"{len(base_game_obs_as_tuple)} {len(list(self.agent_conversions[agent_id].orig_obs_to_restricted_game_obs.keys())[0])}"
                        print(
                            f"keys are: {self.agent_conversions[agent_id].orig_obs_to_restricted_game_obs.keys()}\n\nlooking for {base_game_obs_as_tuple}"
                        )
                        raise
                    self._agents_to_current_valid_actions_mask[agent_id] = \
                    self.agent_conversions[agent_id].orig_obs_to_restricted_game_valid_actions_mask[
                        base_game_obs_as_tuple]
                    obs_dict_out[agent_id] = restricted_game_obs
                else:
                    restricted_game_obs = np.zeros(
                        shape=self.observation_space.shape, dtype=np.float32)
                    restricted_game_obs[:len(base_game_obs)] = base_game_obs
                    obs_dict_out[agent_id] = restricted_game_obs
            else:
                obs_dict_out[agent_id] = base_game_obs
        return obs_dict_out
Ejemplo n.º 9
0
    def step(
        self, action_dict: MultiAgentDict
    ) -> Tuple[MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict]:
        #print(len(self.agents))
        observation, reward, done, reproduce = {}, {}, {}, {}
        alive = []
        #print(len(action_dict), action_dict)
        for id, action in action_dict.items():
            #i = int(id.split('_')[1])

            if not id in self.dones:
                observation[id], reward[id], done[id], reproduce[
                    id] = self.agents[id].step(action)
                if done[id]:
                    self.dones.append(id)
                alive.append(id)
                # else:
                #     observation[id], reward[id], done[id], reproduce[id] = self.prey_agents[id].step(action)
                #     if done[id]:
                #         self.dones.append(id)
                #     alive.append(id)

        for id in alive:
            # print("len", observation, action_dict[0], reward)
            if not id in self.dones:
                if reproduce[id]:
                    if "hunter" in id:
                        self.hunter_count += 1
                        new_agent = HunterEnv()
                        new_id = "hunter_" + str(self.hunter_count)
                    else:
                        self.prey_count += 1
                        new_agent = PreyEnv()
                        new_id = "prey_" + str(self.prey_count)

                    observation[new_id] = new_agent.reset()
                    reward[new_id] = 0
                    done[new_id] = False
                    reproduce[new_id] = False
                    self.agents[new_id] = new_agent
        done["__all__"] = len(self.dones) == len(self.agents)
        #print(observation)
        self.alive = len(observation)
        return observation, reward, done, reproduce