def _gen_grid(self, width, height): # Create the grid self.grid = multigrid.Grid(width, height) # Generate the surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) room_w = width // 2 room_h = height // 2 # For each row of rooms for j in range(0, 2): # For each column for i in range(0, 2): x_left = i * room_w y_top = j * room_h x_right = x_left + room_w y_bottom = y_top + room_h # Vertical wall and door if i + 1 < 2: self.grid.vert_wall(x_right, y_top, room_h) if not (j == 1 and self.two_rooms and height < 7): pos = (x_right, self._rand_int(y_top + 1, y_bottom)) if not (pos[0] <= 1 or pos[0] >= width - 1 or pos[1] <= 0 or pos[1] >= height - 1): self.grid.set(*pos, None) # Horizontal wall and door if not self.two_rooms: if j + 1 < 2: self.grid.horz_wall(x_left, y_bottom, room_w) pos = (self._rand_int(x_left + 1, x_right), y_bottom) if not (pos[0] <= 1 or pos[0] >= width - 1 or pos[1] <= 0 or pos[1] >= height - 1): self.grid.set(*pos, None) # Randomize the player start position and orientation if self._agent_default_pos is not None: self.agent_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.agent_dir = self._rand_int(0, 4) # random start direction else: self.place_agent() if self._goal_default_pos is not None: goal = minigrid.Goal() self.put_obj(goal, *self._goal_default_pos) goal.init_pos, goal.cur_pos = self._goal_default_pos else: self.place_obj(minigrid.Goal()) self.mission = 'Reach the goal'
def _gen_grid(self, width, height): self.grid = multigrid.Grid(width, height) self.grid.wall_rect(0, 0, width, height) if self.randomize_goal: self.place_obj(minigrid.Goal(), max_tries=100) else: self.put_obj(minigrid.Goal(), width - 2, height - 2) for _ in range(self.n_clutter): self.place_obj(LavaWall(), max_tries=100) self.place_agent() self.mission = 'get to the green square'
def decode(type_idx, color_idx, state): """Create an object from a 3-tuple state description.""" obj_type = minigrid.IDX_TO_OBJECT[type_idx] if obj_type != 'agent': color = minigrid.IDX_TO_COLOR[color_idx] if obj_type == 'empty' or obj_type == 'unseen': return None if obj_type == 'wall': v = minigrid.Wall(color) elif obj_type == 'floor': v = minigrid.Floor(color) elif obj_type == 'ball': v = minigrid.Ball(color) elif obj_type == 'key': v = minigrid.Key(color) elif obj_type == 'box': v = minigrid.Box(color) elif obj_type == 'door': # State, 0: open, 1: closed, 2: locked is_open = state == 0 is_locked = state == 2 v = Door(color, is_open, is_locked) elif obj_type == 'goal': v = minigrid.Goal() elif obj_type == 'lava': v = minigrid.Lava() elif obj_type == 'agent': v = Agent(color_idx, state) else: assert False, "unknown object type in decode '%s'" % obj_type return v
def reset_random(self): """Use domain randomization to create the environment.""" self.graph = grid_graph(dim=[self.width - 2, self.height - 2]) self.step_count = 0 self.adversary_step_count = 0 # Current position and direction of the agent self.reset_agent_status() self.agent_start_pos = None self.goal_pos = None # Extra metrics self.reset_metrics() # Create empty grid self._gen_grid(self.width, self.height) # Randomly place goal self.goal_pos = self.place_obj(minigrid.Goal(), max_tries=100) # Randomly place agent self.agent_start_dir = self._rand_int(0, 4) self.agent_start_pos = self.place_one_agent(0, rand_dir=False) # Randomly place walls for _ in range(int(self.n_clutter / 2)): self.place_obj(minigrid.Wall(), max_tries=100) self.compute_shortest_path() self.n_clutter_placed = int(self.n_clutter / 2) return self.reset_agent()
def _gen_grid(self, width, height): self.height = height # Create an empty grid self.grid = multigrid.Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place a goal in the bottom-right corner self.put_obj(minigrid.Goal(), width - 2, height - 2) # Create a vertical splitting wall if width <= 5: start_idx = 2 else: start_idx = 3 self.split_idx = self._rand_int(start_idx, width - 2) self.grid.vert_wall(self.split_idx, 0) # Place the agent at a random position and orientation # on the left side of the splitting wall self.place_agent(size=(self.split_idx, height)) # Place a door in the wall door_idx = self._rand_int(1, width - 2) self.put_obj(multigrid.Door('yellow', is_locked=True), self.split_idx, door_idx) # Place a yellow key on the left side self.place_obj(obj=minigrid.Key('yellow'), top=(0, 0), size=(self.split_idx, height)) self.mission = 'Use the key to open the door and then get to the goal'
def _gen_grid(self, width, height): self.height = height # Create an empty grid self.grid = multigrid.Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place a goal in the bottom-right corner self.place_obj(minigrid.Goal(), max_tries=100) self.place_agent() for i in range(self.n_agents): self.doors[i] = multigrid.Door('grey', is_locked=True) self.place_obj(self.doors[i], max_tries=100) self.keys[i] = minigrid.Key('grey') self.place_obj(self.keys[i], max_tries=100) self.balls[i] = minigrid.Ball('purple') self.place_obj(self.balls[i], max_tries=100) self.boxes[i] = minigrid.Box('green') self.place_obj(self.boxes[i], max_tries=100) self.task_idx = [0] * self.n_agents self.mission = 'Do some random tasks'
def decode(type_idx, color_idx, state): """Create an object from a 3-tuple state description.""" obj_type = minigrid.IDX_TO_OBJECT[type_idx] if obj_type != "agent": color = minigrid.IDX_TO_COLOR[color_idx] if obj_type == "empty" or obj_type == "unseen": return None if obj_type == "wall": v = minigrid.Wall(color) elif obj_type == "floor": v = minigrid.Floor(color) elif obj_type == "ball": v = minigrid.Ball(color) elif obj_type == "key": v = minigrid.Key(color) elif obj_type == "box": v = minigrid.Box(color) elif obj_type == "door": # State, 0: open, 1: closed, 2: locked is_open = state == 0 is_locked = state == 2 v = Door(color, is_open, is_locked) elif obj_type == "goal": v = minigrid.Goal() elif obj_type == "lava": v = minigrid.Lava() elif obj_type == "agent": v = Agent(color_idx, state) else: assert False, "unknown object type in decode '%s'" % obj_type return v
def _gen_grid(self, width, height): # Create an empty grid self.grid = multigrid.Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) if self.randomize_goal: self.place_obj(minigrid.Goal(), max_tries=100) else: # Place a goal square in the bottom-right corner self.put_obj(minigrid.Goal(), width - 2, height - 2) # Place the agents self.place_agent() self.mission = 'get to the green goal square'
def _gen_grid(self, width, height): self.grid = minigrid.Grid(self.width, self.height) for x in range(self.width): for y in range(self.height): if self._raw_grid[x, y] != ' ': if self._raw_grid[x, y] == 's': self.agent_pos = (x, y) self.agent_dir = self._rand_int(0, 4) obj = ASCII_TO_OBJECT[self._raw_grid[x, y]] obj = obj if obj is None else obj() self.grid.set(x, y, obj) # If a start position has not been specified, place agent randomly. if 's' not in self._raw_grid: self.place_agent() # If no goal has been specified, place goal randomly. if 'g' not in self._raw_grid: self.place_obj(minigrid.Goal()) self.mission = self._mission
def _gen_grid(self, width, height): # Create an empty grid self.grid = multigrid.Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Goal self.put_obj(minigrid.Goal(), self.goal_pos[0], self.goal_pos[1]) # Agent self.place_agent_at_pos(0, self.start_pos) # Walls for x in range(self.bit_map.shape[0]): for y in range(self.bit_map.shape[1]): if self.bit_map[y, x]: # Add an offset of 1 for the outer walls self.put_obj(minigrid.Wall(), x + 1, y + 1)
def step_adversary(self, action): """The adversary gets a step for each available square in the grid. At each step it chooses whether to place the goal, the agent, a block, or nothing. If it chooses agent or goal and they have already been placed, they will be moved to the new location. Args: action: An integer in range 0-3 specifying which object to place: 0 = goal 1 = agent 2 = wall 3 = nothing Returns: Standard RL observation, reward (always 0), done, and info """ done = False if self.adversary_step_count < self.adversary_max_steps: x, y = self.get_xy_from_step(self.adversary_step_count) # Place goal if action == 0: if self.goal_pos is None: self.put_obj(minigrid.Goal(), x, y) else: goal = self.grid.get(self.goal_pos[0], self.goal_pos[1]) self.grid.set(self.goal_pos[0], self.goal_pos[1], None) self.put_obj(goal, x, y) self.goal_pos = (x, y) # Place the agent elif action == 1: if self.agent_start_pos is not None: agent = self.grid.get(self.agent_start_pos[0], self.agent_start_pos[1]) self.grid.set(self.agent_start_pos[0], self.agent_start_pos[1], None) else: agent = None self.agent_start_pos = np.array([x, y]) self.place_agent_at_pos(0, self.agent_start_pos, rand_dir=False, agent_obj=agent) # Place wall elif action == 2: self.put_obj(minigrid.Wall(), x, y) self.n_clutter_placed += 1 self.wall_locs.append((x - 1, y - 1)) self.adversary_step_count += 1 # End of episode if self.adversary_step_count >= self.adversary_max_steps: done = True # If the adversary has not placed the agent or goal, place them randomly if self.agent_start_pos is None: self.agent_start_pos = self.select_random_grid_position() # If wall exists here, remove it self.remove_wall(self.agent_start_pos[0], self.agent_start_pos[1]) self.place_agent_at_pos(0, self.agent_start_pos, rand_dir=False) self.deliberate_agent_placement = 0 else: self.deliberate_agent_placement = 1 if self.goal_pos is None: self.goal_pos = self.select_random_grid_position() # If wall exists here, remove it self.remove_wall(self.goal_pos[0], self.goal_pos[1]) self.put_obj(minigrid.Goal(), self.goal_pos[0], self.goal_pos[1]) # Build graph after we are certain agent and goal are placed for w in self.wall_locs: self.graph.remove_node(w) self.compute_shortest_path() else: x, y = self.get_xy_from_step(self.adversary_step_count) image = self.grid.encode() obs = { 'image': image, 'time_step': [self.adversary_step_count], 'random_z': self.generate_random_z(), 'x': [x], 'y': [y] } return obs, 0, done, {}
def step_adversary(self, loc): """The adversary gets n_clutter + 2 moves to place the goal, agent, blocks. The action space is the number of possible squares in the grid. The squares are numbered from left to right, top to bottom. Args: loc: An integer specifying the location to place the next object which must be decoded into x, y coordinates. Returns: Standard RL observation, reward (always 0), done, and info """ if loc >= self.adversary_action_dim: raise ValueError( 'Position passed to step_adversary is outside the grid.') # Add offset of 1 for outside walls x = int(loc % (self.width - 2)) + 1 y = int(loc / (self.width - 2)) + 1 done = False if self.choose_goal_last: should_choose_goal = self.adversary_step_count == self.adversary_max_steps - 2 should_choose_agent = self.adversary_step_count == self.adversary_max_steps - 1 else: should_choose_goal = self.adversary_step_count == 0 should_choose_agent = self.adversary_step_count == 1 # Place goal if should_choose_goal: # If there is goal noise, sometimes randomly place the goal if random.random() < self.goal_noise: self.goal_pos = self.place_obj(minigrid.Goal(), max_tries=100) else: self.remove_wall( x, y) # Remove any walls that might be in this loc self.put_obj(minigrid.Goal(), x, y) self.goal_pos = (x, y) # Place the agent elif should_choose_agent: self.remove_wall(x, y) # Remove any walls that might be in this loc # Goal has already been placed here if self.grid.get(x, y) is not None: # Place agent randomly self.agent_start_pos = self.place_one_agent(0, rand_dir=False) self.deliberate_agent_placement = 0 else: self.agent_start_pos = np.array([x, y]) self.place_agent_at_pos(0, self.agent_start_pos, rand_dir=False) self.deliberate_agent_placement = 1 # Place wall elif self.adversary_step_count < self.adversary_max_steps: # If there is already an object there, action does nothing if self.grid.get(x, y) is None: self.put_obj(minigrid.Wall(), x, y) self.n_clutter_placed += 1 self.wall_locs.append((x - 1, y - 1)) self.adversary_step_count += 1 # End of episode if self.adversary_step_count >= self.adversary_max_steps: done = True # Build graph after we are certain agent and goal are placed for w in self.wall_locs: self.graph.remove_node(w) self.compute_shortest_path() image = self.grid.encode() obs = { 'image': image, 'time_step': [self.adversary_step_count], 'random_z': self.generate_random_z() } return obs, 0, done, {}
def render(self, mode="array", **kwargs): if mode == "array": arr = copy.deepcopy(self.world_tensor) arr[tuple(self.world_radius + self.current_position)] = 9 return arr elif mode == "curses": if self.world_dim == 1: space_list = ["_"] * (1 + 2 * self.world_radius) goal_ind = self.goal_position[0] + self.world_radius space_list[goal_ind] = "G" space_list[2 * self.world_radius - goal_ind] = "W" space_list[self.current_position[0] + self.world_radius] = "X" to_print = " ".join(space_list) if self.curses_screen is None: self.curses_screen = curses.initscr() self.curses_screen.addstr(0, 0, to_print) if "extra_text" in kwargs: self.curses_screen.addstr(1, 0, kwargs["extra_text"]) self.curses_screen.refresh() elif self.world_dim == 2: space_list = [ ["_"] * (1 + 2 * self.world_radius) for _ in range(1 + 2 * self.world_radius) ] for row_ind in range(1 + 2 * self.world_radius): for col_ind in range(1 + 2 * self.world_radius): if self.world_tensor[row_ind][col_ind] == self.GOAL: space_list[row_ind][col_ind] = "G" if self.world_tensor[row_ind][col_ind] == self.WRONG_CORNER: space_list[row_ind][col_ind] = "C" if self.world_tensor[row_ind][col_ind] == self.WALL: space_list[row_ind][col_ind] = "W" if ( (row_ind, col_ind) == self.world_radius + self.current_position ).all(): space_list[row_ind][col_ind] = "X" if self.curses_screen is None: self.curses_screen = curses.initscr() for i, sl in enumerate(space_list): self.curses_screen.addstr(i, 0, " ".join(sl)) self.curses_screen.addstr(len(space_list), 0, str(self.state())) if "extra_text" in kwargs: self.curses_screen.addstr( len(space_list) + 1, 0, kwargs["extra_text"] ) self.curses_screen.refresh() else: raise NotImplementedError("Cannot render worlds of > 2 dimensions.") elif mode == "minigrid": height = width = 2 * self.world_radius + 2 grid = minigrid.Grid(width, height) # Generate the surrounding walls grid.horz_wall(0, 0) grid.horz_wall(0, height - 1) grid.vert_wall(0, 0) grid.vert_wall(width - 1, 0) # Place fake agent at the center agent_pos = np.array(self.positions[-1]) + 1 + self.world_radius # grid.set(*agent_pos, None) agent = minigrid.Goal() agent.color = "red" grid.set(agent_pos[0], agent_pos[1], agent) agent.init_pos = tuple(agent_pos) agent.cur_pos = tuple(agent_pos) goal_pos = self.goal_position + self.world_radius goal = minigrid.Goal() grid.set(goal_pos[0], goal_pos[1], goal) goal.init_pos = tuple(goal_pos) goal.cur_pos = tuple(goal_pos) highlight_mask = np.zeros((height, width), dtype=bool) minx, maxx = max(1, agent_pos[0] - 5), min(height - 1, agent_pos[0] + 5) miny, maxy = max(1, agent_pos[1] - 5), min(height - 1, agent_pos[1] + 5) highlight_mask[minx : (maxx + 1), miny : (maxy + 1)] = True img = grid.render( minigrid.TILE_PIXELS, agent_pos, None, highlight_mask=highlight_mask ) return img else: raise NotImplementedError("Unknown render mode {}.".format(mode)) time.sleep(0.0 if "sleep_time" not in kwargs else kwargs["sleep_time"])