Ejemplo n.º 1
0
    def reset(self):
        """Resets the env and returns observations from ready agents.

        Returns:
            obs (dict): New observations for each ready agent.
        """
        self.curr_step = 0
        self.beam_pos = []
        self.agents = {}
        self.setup_agents()
        self.reset_map()
        self.map_events()

        observations = {}
        map_with_agents = self.get_map_with_agents()
        for agent_id, agent in self.agents.items():
            position = agent['position']
            rgb_arr = utils.map_to_colors(
                utils.return_view(map_with_agents, position,
                                  self.agent_view_range,
                                  self.agent_view_range), self.color_map)
            rgb_arr = cv2.resize(rgb_arr.astype(np.uint8), (42, 42),
                                 interpolation=cv2.INTER_AREA)  # only for test
            observations[agent_id] = rgb_arr
        return observations
Ejemplo n.º 2
0
    def reset(self):
        """Reset the environment.

        This method is performed in between rollouts. It resets the state of
        the environment.

        Returns
        -------
        observation: dict of numpy ndarray
            the initial observation of the space. The initial reward is assumed
            to be zero.
        """
        self.beam_pos = []
        self.agents = {}
        self.setup_agents()
        self.reset_map()
        self.custom_map_update()

        map_with_agents = self.get_map_with_agents()

        observations = {}
        for agent in self.agents.values():
            agent.grid = util.return_view(map_with_agents, agent.pos,
                                          agent.row_size, agent.col_size)
            rgb_arr = self.map_to_colors(agent.get_state(), self.color_map)
            observations[agent.agent_id] = rgb_arr
        return observations
    def setup_agents(self):
        map_with_agents = self.get_map_with_agents()

        for i in range(self.num_agents):
            agent_id = 'agent-' + str(i)
            spawn_point = self.spawn_point()
            rotation = self.spawn_rotation()
            grid = util.return_view(map_with_agents, spawn_point,
                                    HARVEST_VIEW_SIZE, HARVEST_VIEW_SIZE)
            agent = HarvestAgent(agent_id, spawn_point, rotation, grid)
            self.agents[agent_id] = agent
    def setup_agents(self):
        """Constructs all the agents in self.agent"""
        map_with_agents = self.get_map_with_agents()

        for i in range(self.num_agents):
            agent_id = 'agent-' + str(i)
            spawn_point = self.spawn_point()
            rotation = self.spawn_rotation()
            grid = util.return_view(map_with_agents, spawn_point,
                                    CLEANUP_VIEW_SIZE, CLEANUP_VIEW_SIZE)
            agent = CleanupAgent(agent_id, spawn_point, rotation, grid)
            self.agents[agent_id] = agent
Ejemplo n.º 5
0
    def step(self, actions):
        """Takes in a dict of actions and converts them to a map update

        Parameters
        ----------
        actions: dict {agent-id: int}
            dict of actions, keyed by agent-id that are passed to the agent. The agent
            interprets the int and converts it to a command

        Returns
        -------
        observations: dict of arrays representing agent observations
        rewards: dict of rewards for each agent
        dones: dict indicating whether each agent is done
        info: dict to pass extra info to gym
        """

        self.beam_pos = []
        agent_actions = {}
        for agent_id, action in actions.items():
            agent_action = self.agents[agent_id].action_map(action)
            agent_actions[agent_id] = agent_action

        # move
        self.update_moves(agent_actions)
        for agent in self.agents.values():
            pos = agent.get_pos()
            new_char = agent.consume(self.world_map[pos[0], pos[1]])
            self.world_map[pos[0], pos[1]] = new_char

        # execute custom moves like firing
        self.update_custom_moves(agent_actions)

        # execute spawning events
        self.custom_map_update()

        map_with_agents = self.get_map_with_agents()

        observations = {}
        rewards = {}
        dones = {}
        info = {}
        for agent in self.agents.values():
            agent.grid = util.return_view(map_with_agents, agent.pos,
                                          agent.row_size, agent.col_size)
            rgb_arr = self.map_to_colors(agent.get_state(), self.color_map)
            observations[agent.agent_id] = rgb_arr
            rewards[agent.agent_id] = agent.compute_reward()
            dones[agent.agent_id] = agent.get_done()
        dones["__all__"] = np.any(list(dones.values()))
        return observations, rewards, dones, info
    def spawn_apples(self):
        """Construct the apples spawned in this step.

        Returns
        -------
        new_apple_points: list of 2-d lists
            a list containing lists indicating the spawn positions of new apples
        """

        new_apple_points = []
        for i in range(len(self.apple_points)):
            row, col = self.apple_points[i]
            # apples can't spawn where agents are standing or where an apple already is
            if [row, col
                ] not in self.agent_pos and self.world_map[row, col] != 'A':
                window = util.return_view(self.world_map, self.apple_points[i],
                                          APPLE_RADIUS, APPLE_RADIUS)
                num_apples = self.count_apples(window)
                spawn_prob = SPAWN_PROB[min(num_apples, 3)]
                rand_num = np.random.rand(1)[0]
                if rand_num < spawn_prob:
                    new_apple_points.append((row, col, 'A'))
        return new_apple_points
Ejemplo n.º 7
0
    def step(self, actions):
        """Returns observations from ready agents.

        The returns are dicts mapping from agent_id strings to values. The
        number of agents in the env can vary over time.

        Returns
        -------
            obs (dict): New observations for each ready agent.
            rewards (dict): Reward values for each ready agent. If the
                episode is just started, the value will be None.
            dones (dict): Done values for each ready agent. The special key
                "__all__" (required) is used to indicate env termination.
            infos (dict): Optional info values for each agent id.
        """
        observations = {}
        rewards = defaultdict(int)
        dones = {}
        info = {}

        self.beam_pos = []

        # filter outcast players
        actions_temp = {}
        for agent_id, agent in self.agents.items():
            pos = agent['position']
            if pos[0] == -99:
                continue
            actions_temp[agent_id] = actions[agent_id]

        actions = actions_temp

        self.update_moves(actions)

        for agent_id, agent in self.agents.items():
            pos = agent['position']
            if pos[0] == -99:
                rewards[agent_id] = rewards[agent_id]
                continue
            new_char, reward = self.consume(self.world_map[pos[0], pos[1]])
            rewards[agent_id] = rewards[agent_id] + reward
            self.world_map[pos[0], pos[1]] = new_char

        # execute custom moves like firing

        self.update_special_actions(actions, rewards)  # inside update moves
        self.map_events()  # step of desidgend env

        # execute spawning events

        map_with_agents = self.get_map_with_agents()

        for agent_id, agent in self.agents.items():
            # pos = agent['position']
            # if pos[0] == -99:
            #     rewards[agent_id] = rewards[agent_id]
            #     continue
            agent_view = utils.return_view(map_with_agents, agent['position'],
                                           self.agent_view_range,
                                           self.agent_view_range)
            rgb_arr = utils.map_to_colors(agent_view, self.color_map)
            rgb_arr = self.rotate_view(agent['orientation'], rgb_arr)
            rgb_arr = cv2.resize(rgb_arr.astype(np.uint8), (42, 42),
                                 interpolation=cv2.INTER_AREA)  # only for test
            observations[agent_id] = rgb_arr
            dones[agent_id] = False  # no final state here!
        dones["__all__"] = np.any(list(dones.values()))
        if self.curr_step >= self.run_steps:
            dones["__all__"] = True
        self.curr_step += 1

        return [observations, rewards, dones, info]
Ejemplo n.º 8
0
 def get_state(self):
     return util.return_view(self.grid, self.get_pos(), self.row_size,
                             self.col_size)
 def get_state(self):
     return util.return_view(self.full_map, self.pos, self.row_size,
                             self.col_size)
Ejemplo n.º 10
0
 def get_state(self):
     pos = self.global_ref_point if self.global_ref_point else self.get_pos(
     )
     return util.return_view(self.grid, pos, self.row_size, self.col_size)
Ejemplo n.º 11
0
 def get_state_global(self, global_ref_point):
     return util.return_view(self.grid, global_ref_point, self.row_size,
                             self.col_size)