def decode(type_idx, color_idx, state): """Create an object from a 3-tuple state description.""" obj_type = minigrid.IDX_TO_OBJECT[type_idx] if obj_type != 'agent': color = minigrid.IDX_TO_COLOR[color_idx] if obj_type == 'empty' or obj_type == 'unseen': return None if obj_type == 'wall': v = minigrid.Wall(color) elif obj_type == 'floor': v = minigrid.Floor(color) elif obj_type == 'ball': v = minigrid.Ball(color) elif obj_type == 'key': v = minigrid.Key(color) elif obj_type == 'box': v = minigrid.Box(color) elif obj_type == 'door': # State, 0: open, 1: closed, 2: locked is_open = state == 0 is_locked = state == 2 v = Door(color, is_open, is_locked) elif obj_type == 'goal': v = minigrid.Goal() elif obj_type == 'lava': v = minigrid.Lava() elif obj_type == 'agent': v = Agent(color_idx, state) else: assert False, "unknown object type in decode '%s'" % obj_type return v
def reset_random(self): """Use domain randomization to create the environment.""" self.graph = grid_graph(dim=[self.width - 2, self.height - 2]) self.step_count = 0 self.adversary_step_count = 0 # Current position and direction of the agent self.reset_agent_status() self.agent_start_pos = None self.goal_pos = None # Extra metrics self.reset_metrics() # Create empty grid self._gen_grid(self.width, self.height) # Randomly place goal self.goal_pos = self.place_obj(minigrid.Goal(), max_tries=100) # Randomly place agent self.agent_start_dir = self._rand_int(0, 4) self.agent_start_pos = self.place_one_agent(0, rand_dir=False) # Randomly place walls for _ in range(int(self.n_clutter / 2)): self.place_obj(minigrid.Wall(), max_tries=100) self.compute_shortest_path() self.n_clutter_placed = int(self.n_clutter / 2) return self.reset_agent()
def _gen_grid(self, width, height): self.players = [] self.respawn_pool = RespawnPool() self.beam_collection = [] arena = self.base_arena.regenerate() self.height, self.width = arena.shape self.grid = multigrid.Grid(width=self.width, height=self.height) for i in range(self.width): for j in range(self.height): entry = arena[j, i] if entry == '*': self.put_obj(minigrid.Wall(), i, j) for team in self.base_arena.teams: self.put_obj( team, team.flag.init_pos[ 0], # Coordinates of numpy and gym_minigrid are inverted team.flag.init_pos[1]) team.flag.cur_pos = team.cur_pos self.place_agent() self.actions = CapturingTheFlag.Actions self.mission = "capture the opponent's flag"
def decode(type_idx, color_idx, state): """Create an object from a 3-tuple state description.""" obj_type = minigrid.IDX_TO_OBJECT[type_idx] if obj_type != "agent": color = minigrid.IDX_TO_COLOR[color_idx] if obj_type == "empty" or obj_type == "unseen": return None if obj_type == "wall": v = minigrid.Wall(color) elif obj_type == "floor": v = minigrid.Floor(color) elif obj_type == "ball": v = minigrid.Ball(color) elif obj_type == "key": v = minigrid.Key(color) elif obj_type == "box": v = minigrid.Box(color) elif obj_type == "door": # State, 0: open, 1: closed, 2: locked is_open = state == 0 is_locked = state == 2 v = Door(color, is_open, is_locked) elif obj_type == "goal": v = minigrid.Goal() elif obj_type == "lava": v = minigrid.Lava() elif obj_type == "agent": v = Agent(color_idx, state) else: assert False, "unknown object type in decode '%s'" % obj_type return v
def _gen_grid(self, width, height): self.grid = multigrid.Grid(width, height) self.grid.wall_rect(0, 0, width, height) for _ in range(self.n_clutter): self.place_obj(minigrid.Wall(), max_tries=100) self.place_agent() self.mission = 'Play tag'
def _gen_grid(self, width, height): self.grid = multigrid.Grid(width, height) self.grid.wall_rect(0, 0, width, height) for i in range(self.n_goals): self.place_obj(self.objects[i], max_tries=100) for _ in range(self.n_clutter): self.place_obj(minigrid.Wall(), max_tries=100) self.place_agent() self.mission = 'pick up coins corresponding to your color'
def _gen_grid(self, width, height): self.grid = multigrid.Grid(width, height) self.grid.wall_rect(0, 0, width, height) for stag in self.stags: self.place_obj(stag, max_tries=100) for plant in self.plants: self.place_obj(plant, max_tries=100) for _ in range(self.n_clutter): self.place_obj(minigrid.Wall(), max_tries=100) self.place_agent() self.mission = 'Toggle the stag at the same time'
def _gen_grid(self, width, height): self.grid = multigrid.Grid(width, height) self.grid.wall_rect(0, 0, width, height) for i in range(self.n_goals): pos = self.place_obj(multigrid.Door(color='red', is_locked=True), max_tries=100) self.goal_pos[i] = pos for _ in range(self.n_clutter): self.place_obj(minigrid.Wall(), max_tries=100) self.place_agent() self.mission = 'meet up'
def _gen_grid(self, width, height): self.grid = multigrid.Grid(width, height) self.grid.wall_rect(0, 0, width, height) if self.randomize_goal: self.place_obj(minigrid.Goal(), max_tries=100) else: self.put_obj(minigrid.Goal(), width - 2, height - 2) for _ in range(self.n_clutter): if self.walls_are_lava: self.place_obj(minigrid.Lava(), max_tries=100) else: self.place_obj(minigrid.Wall(), max_tries=100) self.place_agent() self.mission = 'get to the green square'
def slice(self, top_x, top_y, width, height, agent_pos=None): """Get a subset of the grid for agents' partial observations.""" grid = Grid(width, height) for j in range(0, height): for i in range(0, width): x = top_x + i y = top_y + j if x >= 0 and x < self.width and y >= 0 and y < self.height: v = self.get(x, y) else: v = minigrid.Wall() grid.set(i, j, v) return grid
def _gen_grid(self, width, height): # Create an empty grid self.grid = multigrid.Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Goal self.put_obj(minigrid.Goal(), self.goal_pos[0], self.goal_pos[1]) # Agent self.place_agent_at_pos(0, self.start_pos) # Walls for x in range(self.bit_map.shape[0]): for y in range(self.bit_map.shape[1]): if self.bit_map[y, x]: # Add an offset of 1 for the outer walls self.put_obj(minigrid.Wall(), x + 1, y + 1)
def _gen_grid(self, width, height): self.grid = multigrid.Grid(width, height) self.grid.wall_rect(0, 0, width, height) self.objects = [] self.colors = (np.random.choice(len(minigrid.IDX_TO_COLOR) - 1, size=self.n_colors, replace=False) + 1).tolist() for i in range(self.n_goals): if self.random_colors: color = minigrid.IDX_TO_COLOR[np.random.choice(self.colors)] else: color = minigrid.IDX_TO_COLOR[self.colors[i % self.n_colors]] self.objects.append(minigrid.Ball(color=color)) self.place_obj(self.objects[i], max_tries=100) for _ in range(self.n_clutter): self.place_obj(minigrid.Wall(), max_tries=100) self.place_agent() self.mission = 'pick up objects'
def step_adversary(self, action): """The adversary gets a step for each available square in the grid. At each step it chooses whether to place the goal, the agent, a block, or nothing. If it chooses agent or goal and they have already been placed, they will be moved to the new location. Args: action: An integer in range 0-3 specifying which object to place: 0 = goal 1 = agent 2 = wall 3 = nothing Returns: Standard RL observation, reward (always 0), done, and info """ done = False if self.adversary_step_count < self.adversary_max_steps: x, y = self.get_xy_from_step(self.adversary_step_count) # Place goal if action == 0: if self.goal_pos is None: self.put_obj(minigrid.Goal(), x, y) else: goal = self.grid.get(self.goal_pos[0], self.goal_pos[1]) self.grid.set(self.goal_pos[0], self.goal_pos[1], None) self.put_obj(goal, x, y) self.goal_pos = (x, y) # Place the agent elif action == 1: if self.agent_start_pos is not None: agent = self.grid.get(self.agent_start_pos[0], self.agent_start_pos[1]) self.grid.set(self.agent_start_pos[0], self.agent_start_pos[1], None) else: agent = None self.agent_start_pos = np.array([x, y]) self.place_agent_at_pos(0, self.agent_start_pos, rand_dir=False, agent_obj=agent) # Place wall elif action == 2: self.put_obj(minigrid.Wall(), x, y) self.n_clutter_placed += 1 self.wall_locs.append((x - 1, y - 1)) self.adversary_step_count += 1 # End of episode if self.adversary_step_count >= self.adversary_max_steps: done = True # If the adversary has not placed the agent or goal, place them randomly if self.agent_start_pos is None: self.agent_start_pos = self.select_random_grid_position() # If wall exists here, remove it self.remove_wall(self.agent_start_pos[0], self.agent_start_pos[1]) self.place_agent_at_pos(0, self.agent_start_pos, rand_dir=False) self.deliberate_agent_placement = 0 else: self.deliberate_agent_placement = 1 if self.goal_pos is None: self.goal_pos = self.select_random_grid_position() # If wall exists here, remove it self.remove_wall(self.goal_pos[0], self.goal_pos[1]) self.put_obj(minigrid.Goal(), self.goal_pos[0], self.goal_pos[1]) # Build graph after we are certain agent and goal are placed for w in self.wall_locs: self.graph.remove_node(w) self.compute_shortest_path() else: x, y = self.get_xy_from_step(self.adversary_step_count) image = self.grid.encode() obs = { 'image': image, 'time_step': [self.adversary_step_count], 'random_z': self.generate_random_z(), 'x': [x], 'y': [y] } return obs, 0, done, {}
def step_adversary(self, loc): """The adversary gets n_clutter + 2 moves to place the goal, agent, blocks. The action space is the number of possible squares in the grid. The squares are numbered from left to right, top to bottom. Args: loc: An integer specifying the location to place the next object which must be decoded into x, y coordinates. Returns: Standard RL observation, reward (always 0), done, and info """ if loc >= self.adversary_action_dim: raise ValueError( 'Position passed to step_adversary is outside the grid.') # Add offset of 1 for outside walls x = int(loc % (self.width - 2)) + 1 y = int(loc / (self.width - 2)) + 1 done = False if self.choose_goal_last: should_choose_goal = self.adversary_step_count == self.adversary_max_steps - 2 should_choose_agent = self.adversary_step_count == self.adversary_max_steps - 1 else: should_choose_goal = self.adversary_step_count == 0 should_choose_agent = self.adversary_step_count == 1 # Place goal if should_choose_goal: # If there is goal noise, sometimes randomly place the goal if random.random() < self.goal_noise: self.goal_pos = self.place_obj(minigrid.Goal(), max_tries=100) else: self.remove_wall( x, y) # Remove any walls that might be in this loc self.put_obj(minigrid.Goal(), x, y) self.goal_pos = (x, y) # Place the agent elif should_choose_agent: self.remove_wall(x, y) # Remove any walls that might be in this loc # Goal has already been placed here if self.grid.get(x, y) is not None: # Place agent randomly self.agent_start_pos = self.place_one_agent(0, rand_dir=False) self.deliberate_agent_placement = 0 else: self.agent_start_pos = np.array([x, y]) self.place_agent_at_pos(0, self.agent_start_pos, rand_dir=False) self.deliberate_agent_placement = 1 # Place wall elif self.adversary_step_count < self.adversary_max_steps: # If there is already an object there, action does nothing if self.grid.get(x, y) is None: self.put_obj(minigrid.Wall(), x, y) self.n_clutter_placed += 1 self.wall_locs.append((x - 1, y - 1)) self.adversary_step_count += 1 # End of episode if self.adversary_step_count >= self.adversary_max_steps: done = True # Build graph after we are certain agent and goal are placed for w in self.wall_locs: self.graph.remove_node(w) self.compute_shortest_path() image = self.grid.encode() obs = { 'image': image, 'time_step': [self.adversary_step_count], 'random_z': self.generate_random_z() } return obs, 0, done, {}