def reset(self): """Resets the env and returns observations from ready agents. Returns: obs (dict): New observations for each ready agent. """ self.curr_step = 0 self.beam_pos = [] self.agents = {} self.setup_agents() self.reset_map() self.map_events() observations = {} map_with_agents = self.get_map_with_agents() for agent_id, agent in self.agents.items(): position = agent['position'] rgb_arr = utils.map_to_colors( utils.return_view(map_with_agents, position, self.agent_view_range, self.agent_view_range), self.color_map) rgb_arr = cv2.resize(rgb_arr.astype(np.uint8), (42, 42), interpolation=cv2.INTER_AREA) # only for test observations[agent_id] = rgb_arr return observations
def reset(self): """Reset the environment. This method is performed in between rollouts. It resets the state of the environment. Returns ------- observation: dict of numpy ndarray the initial observation of the space. The initial reward is assumed to be zero. """ self.beam_pos = [] self.agents = {} self.setup_agents() self.reset_map() self.custom_map_update() map_with_agents = self.get_map_with_agents() observations = {} for agent in self.agents.values(): agent.grid = util.return_view(map_with_agents, agent.pos, agent.row_size, agent.col_size) rgb_arr = self.map_to_colors(agent.get_state(), self.color_map) observations[agent.agent_id] = rgb_arr return observations
def setup_agents(self): map_with_agents = self.get_map_with_agents() for i in range(self.num_agents): agent_id = 'agent-' + str(i) spawn_point = self.spawn_point() rotation = self.spawn_rotation() grid = util.return_view(map_with_agents, spawn_point, HARVEST_VIEW_SIZE, HARVEST_VIEW_SIZE) agent = HarvestAgent(agent_id, spawn_point, rotation, grid) self.agents[agent_id] = agent
def setup_agents(self): """Constructs all the agents in self.agent""" map_with_agents = self.get_map_with_agents() for i in range(self.num_agents): agent_id = 'agent-' + str(i) spawn_point = self.spawn_point() rotation = self.spawn_rotation() grid = util.return_view(map_with_agents, spawn_point, CLEANUP_VIEW_SIZE, CLEANUP_VIEW_SIZE) agent = CleanupAgent(agent_id, spawn_point, rotation, grid) self.agents[agent_id] = agent
def step(self, actions): """Takes in a dict of actions and converts them to a map update Parameters ---------- actions: dict {agent-id: int} dict of actions, keyed by agent-id that are passed to the agent. The agent interprets the int and converts it to a command Returns ------- observations: dict of arrays representing agent observations rewards: dict of rewards for each agent dones: dict indicating whether each agent is done info: dict to pass extra info to gym """ self.beam_pos = [] agent_actions = {} for agent_id, action in actions.items(): agent_action = self.agents[agent_id].action_map(action) agent_actions[agent_id] = agent_action # move self.update_moves(agent_actions) for agent in self.agents.values(): pos = agent.get_pos() new_char = agent.consume(self.world_map[pos[0], pos[1]]) self.world_map[pos[0], pos[1]] = new_char # execute custom moves like firing self.update_custom_moves(agent_actions) # execute spawning events self.custom_map_update() map_with_agents = self.get_map_with_agents() observations = {} rewards = {} dones = {} info = {} for agent in self.agents.values(): agent.grid = util.return_view(map_with_agents, agent.pos, agent.row_size, agent.col_size) rgb_arr = self.map_to_colors(agent.get_state(), self.color_map) observations[agent.agent_id] = rgb_arr rewards[agent.agent_id] = agent.compute_reward() dones[agent.agent_id] = agent.get_done() dones["__all__"] = np.any(list(dones.values())) return observations, rewards, dones, info
def spawn_apples(self): """Construct the apples spawned in this step. Returns ------- new_apple_points: list of 2-d lists a list containing lists indicating the spawn positions of new apples """ new_apple_points = [] for i in range(len(self.apple_points)): row, col = self.apple_points[i] # apples can't spawn where agents are standing or where an apple already is if [row, col ] not in self.agent_pos and self.world_map[row, col] != 'A': window = util.return_view(self.world_map, self.apple_points[i], APPLE_RADIUS, APPLE_RADIUS) num_apples = self.count_apples(window) spawn_prob = SPAWN_PROB[min(num_apples, 3)] rand_num = np.random.rand(1)[0] if rand_num < spawn_prob: new_apple_points.append((row, col, 'A')) return new_apple_points
def step(self, actions): """Returns observations from ready agents. The returns are dicts mapping from agent_id strings to values. The number of agents in the env can vary over time. Returns ------- obs (dict): New observations for each ready agent. rewards (dict): Reward values for each ready agent. If the episode is just started, the value will be None. dones (dict): Done values for each ready agent. The special key "__all__" (required) is used to indicate env termination. infos (dict): Optional info values for each agent id. """ observations = {} rewards = defaultdict(int) dones = {} info = {} self.beam_pos = [] # filter outcast players actions_temp = {} for agent_id, agent in self.agents.items(): pos = agent['position'] if pos[0] == -99: continue actions_temp[agent_id] = actions[agent_id] actions = actions_temp self.update_moves(actions) for agent_id, agent in self.agents.items(): pos = agent['position'] if pos[0] == -99: rewards[agent_id] = rewards[agent_id] continue new_char, reward = self.consume(self.world_map[pos[0], pos[1]]) rewards[agent_id] = rewards[agent_id] + reward self.world_map[pos[0], pos[1]] = new_char # execute custom moves like firing self.update_special_actions(actions, rewards) # inside update moves self.map_events() # step of desidgend env # execute spawning events map_with_agents = self.get_map_with_agents() for agent_id, agent in self.agents.items(): # pos = agent['position'] # if pos[0] == -99: # rewards[agent_id] = rewards[agent_id] # continue agent_view = utils.return_view(map_with_agents, agent['position'], self.agent_view_range, self.agent_view_range) rgb_arr = utils.map_to_colors(agent_view, self.color_map) rgb_arr = self.rotate_view(agent['orientation'], rgb_arr) rgb_arr = cv2.resize(rgb_arr.astype(np.uint8), (42, 42), interpolation=cv2.INTER_AREA) # only for test observations[agent_id] = rgb_arr dones[agent_id] = False # no final state here! dones["__all__"] = np.any(list(dones.values())) if self.curr_step >= self.run_steps: dones["__all__"] = True self.curr_step += 1 return [observations, rewards, dones, info]
def get_state(self): return util.return_view(self.grid, self.get_pos(), self.row_size, self.col_size)
def get_state(self): return util.return_view(self.full_map, self.pos, self.row_size, self.col_size)
def get_state(self): pos = self.global_ref_point if self.global_ref_point else self.get_pos( ) return util.return_view(self.grid, pos, self.row_size, self.col_size)
def get_state_global(self, global_ref_point): return util.return_view(self.grid, global_ref_point, self.row_size, self.col_size)