class SimpleEnv(MiniGridEnv): """ Simple empty environment where the agent starts in the middle, target is randomly generated. """ def __init__(self, size=5): assert size % 2 != 0, "Size needs to be odd" super().__init__(grid_size=size, max_steps=4 * size * size, see_through_walls=False) def _gen_grid(self, width, height): # Create empty grid self.grid = Grid(width, height) self.grid.wall_rect(0, 0, width, height) # Agent starts in the center self.start_pos = (width // 2, height // 2) self.start_dir = 0 # Goal is anywhere but the center self.place_obj(Goal()) # Set mission string self.mission = "GO TO GREEN SQUARE"
def _gen_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Place the agent in the top-left corner self.start_pos = (int(width / 2), int(height / 2)) self.start_dir = 3 # Create walls for x in range(0, width): for y in range(0, height): self.grid.set(x, y, Wall()) # Create paths if self.is_double: for y in range(height // 2 - self.corridor_length, height // 2 + self.corridor_length + 1): self.grid.set(width // 2, y, None) for x in range(width // 2 - self.corridor_length, width // 2 + self.corridor_length + 1): self.grid.set(x, height // 2 - self.corridor_length, None) self.grid.set(x, height // 2 + self.corridor_length, None) else: for y in range(height // 2 - self.corridor_length, height // 2 + 1): self.grid.set(width // 2, y, None) for x in range(width // 2 - self.corridor_length, width // 2 + self.corridor_length + 1): self.grid.set(x, height // 2 - self.corridor_length, None) # Create rewards reward_positions = self._reward_positions(width, height) self._gen_rewards(reward_positions)
def get_actions(self, obss): preprocessed_obss = self.preprocess_obss(obss) with torch.no_grad(): if self.model.recurrent: dist, _, self.memories = self.model(preprocessed_obss, self.memories) else: dist, _, x, y, z = self.model(preprocessed_obss, introspect=True) if self.number == 7: Grid.decode(y[0][0].round().numpy()).render_human() print(len(x), len(y), len(z)) print(y[0].shape, z[0].shape) print(x, y, z) self.number += 1 if self.argmax: actions = dist.probs.max(1, keepdim=True)[1] else: actions = dist.sample() if torch.cuda.is_available(): actions = actions.cpu().numpy() return actions
def _gen_grid(self, width, height): self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place a goal square in the bottom-right corner # self.put_obj(Goal(), width - 2, height - 2) for i in range(6): self.put_obj(Wall(), 3, i + 1) self.put_obj(Door('blue'), 3, 5) self.put_obj(Ball('red'), 4, 1) self.put_obj(Key('green'), 4, 2) self.put_obj(Box('grey'), 4, 3) self.put_obj(Ball('blue'), 4, 4) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.mission = "get to the green goal square"
def _gen_grid(self, width, height): assert width % 2 == 1 and height % 2 == 1 # odd size # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place the agent in the top-left corner self.agent_pos = (1, 1) self.agent_dir = 0 # Place a goal square in the bottom-right corner self.put_obj(Goal(), width - 2, height - 2) # Place obstacles (lava or walls) v, h = object(), object( ) # singleton `vertical` and `horizontal` objects # Lava rivers or walls specified by direction and position in grid rivers = [(v, i) for i in range(2, height - 2, 2)] rivers += [(h, j) for j in range(2, width - 2, 2)] self.np_random.shuffle(rivers) rivers = rivers[:self.num_crossings] # sample random rivers rivers_v = sorted([pos for direction, pos in rivers if direction is v]) rivers_h = sorted([pos for direction, pos in rivers if direction is h]) obstacle_pos = itt.chain( itt.product(range(1, width - 1), rivers_h), itt.product(rivers_v, range(1, height - 1)), ) for i, j in obstacle_pos: self.put_obj(self.obstacle_type(), i, j) # Sample path to goal path = [h] * len(rivers_v) + [v] * len(rivers_h) self.np_random.shuffle(path) # Create openings limits_v = [0] + rivers_v + [height - 1] limits_h = [0] + rivers_h + [width - 1] room_i, room_j = 0, 0 for direction in path: if direction is h: i = limits_v[room_i + 1] j = self.np_random.choice( range(limits_h[room_j] + 1, limits_h[room_j + 1])) room_i += 1 elif direction is v: i = self.np_random.choice( range(limits_v[room_i] + 1, limits_v[room_i + 1])) j = limits_h[room_j + 1] room_j += 1 else: assert False self.grid.set(i, j, None) self.mission = ("avoid the lava and get to the green goal square" if self.obstacle_type == Lava else "find the opening and get to the green goal square")
def _gen_grid(self, width, height, val=False, seen=True): # Create the grid self.grid = Grid(width, height) # Generate surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) # Even during validation, start state distribution # should be the same as that during training if not self.rnd_start: self._agent_default_pos = (1, self.grid_size - 2) else: self._agent_default_pos = None # Place the agent at the center if self._agent_default_pos is not None: self.start_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos = goal.curr_pos = self._goal_default_pos self.mission = goal.init_pos
def _gen_grid(self, width, height, val=False, seen=True): assert width >= 10 and height >= 10, "Environment too small to place objects" # Create the grid self.grid = Grid(width, height) # Generate surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) np.random.seed(self.grid_seed) for obj_idx in range(self.num_objects): while True: c_x, c_y = np.random.choice(list(range( 2, self.grid_size - 3))), np.random.choice( list(range(2, self.grid_size - 3))) #obj_size = np.random.choice(list(range(1, self.obj_size+1))) obj_size = self.obj_size if obj_size == 3: cells = list( product([c_x - 1, c_x, c_x + 1], [c_y - 1, c_y, c_y + 1])) elif obj_size == 2: cells = list(product([c_x, c_x + 1], [c_y, c_y + 1])) elif obj_size == 1: cells = list(product([c_x], [c_y])) else: raise ValueError valid = True for cell in cells: cell = self.grid.get(cell[0], cell[1]) if not (cell is None or cell.can_overlap()): valid = False break if valid: for cell in cells: self.grid.set(*cell, Wall()) break # Set the start position and the goal position depending upon where the obstacles are present goal = Goal() # [NOTE] : This is a hack, add option to set goal location from arguments. self.grid.set(*self._goal_default_pos, goal) goal.init_pos = goal.curr_pos = self._goal_default_pos self.mission = goal.init_pos self.start_pos = self._agent_default_pos
class PlayGround(MiniGridEnv): def __init__(self, size=16, agent_start_pos=(8, 8), agent_start_dir=None, ): self.agent_start_pos = agent_start_pos self.agent_start_dir = agent_start_dir super().__init__( grid_size=size, max_steps=200, # Set this to True for maximum speed see_through_walls=True ) def _gen_grid(self, width, height): self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place a goal square in the bottom-right corner # self.put_obj(Goal(), width - 2, height - 2) self.put_obj(Ball(rand_color()), 2, 1) self.put_obj(Ball(rand_color()), 4, 1) self.put_obj(Ball(rand_color()), 4, 1) self.put_obj(Key(rand_color()), 5, 2) self.put_obj(Box(rand_color()), 4, 3) self.put_obj(Ball(rand_color()), 4, 4) self.put_obj(Ball(rand_color()), 12, 2) self.put_obj(Ball(rand_color()), 14, 1) self.put_obj(Key(rand_color()), 14, 2) self.put_obj(Key(rand_color()), 11, 2) self.put_obj(Box(rand_color()), 14, 3) self.put_obj(Ball(rand_color()), 13, 1) self.put_obj(Key(rand_color()), 3, 11) self.put_obj(Ball(rand_color()), 5, 12) self.put_obj(Key(rand_color()), 2, 14) self.put_obj(Box(rand_color()), 3, 14) self.put_obj(Ball(rand_color()), 5, 13) self.put_obj(Key(rand_color()), 13, 13) self.put_obj(Ball(rand_color()), 12, 13) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = np.random.randint(0,4) else: self.place_agent() self.mission = "get to the green goal square"
def _gen_grid(self, width, height): # Create the grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) room_w = width // 2 room_h = height // 2 # For each row of rooms for j in range(0, 2): # For each column for i in range(0, 2): xL = i * room_w yT = j * room_h xR = xL + room_w yB = yT + room_h # Bottom wall and door if i + 1 < 2: self.grid.vert_wall(xR, yT, room_h) # pos = (xR, self._rand_int(yT + 1, yB)) # self.grid.set(*pos, None) # Bottom wall and door if j + 1 < 2: self.grid.horz_wall(xL, yB, room_w) # pos = (self._rand_int(xL + 1, xR), yB) # self.grid.set(*pos, None) for hallway in self.hallways.values(): self.grid.set(*hallway, None) # Randomize the player start position and orientation if self._agent_default_pos is not None: self.agent_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.agent_dir = self._rand_int(0, 4) else: self.place_agent() if self._goal_default_pos is not None: goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos, goal.cur_pos = self._goal_default_pos else: self.place_obj(Goal()) self.mission = 'Reach the goal'
def _gen_grid(self, width, height): # Create empty grid self.grid = Grid(width, height) self.grid.wall_rect(0, 0, width, height) # Agent starts in the center self.start_pos = (width // 2, height // 2) self.start_dir = 0 # Goal is anywhere but the center self.place_obj(Goal()) # Set mission string self.mission = "GO TO GREEN SQUARE"
def _gen_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.mission = "get to the green goal square"
def decode(array): """ Decode an array grid encoding back into a grid """ width, height, channels = array.shape assert channels == 3 grid = Grid(width, height) for i in range(width): for j in range(height): typeIdx, colorIdx, state = array[i, j] if typeIdx == OBJECT_TO_IDX['unseen'] or \ typeIdx == OBJECT_TO_IDX['empty']: continue objType = IDX_TO_OBJECT[typeIdx] color = IDX_TO_COLOR[colorIdx] # State, 0: open, 1: closed, 2: locked is_open = state == 0 is_locked = state == 2 if objType == 'wall': v = Wall(color) elif objType == 'floor': v = Floor(color) elif objType == 'ball': v = Ball(color) elif objType == 'key': v = Key(color) elif objType == 'box': v = Box(color) elif objType == 'door': v = Door(color, is_open, is_locked) elif objType == 'goal': v = Goal() elif objType == 'lava': v = Lava() elif objType == 'agent': v = None else: assert False, "unknown obj type in decode '%s'" % objType grid.set(i, j, v) return grid
class EmptyMultigoal(MiniGridEnv): def __init__( self, size=8, agent_start_pos=None, agent_start_dir=None, n_goals=2, n_traps=1, ): self.n_goals = n_goals self.n_traps = n_traps self.agent_start_pos = agent_start_pos self.agent_start_dir = agent_start_dir size += 2 super().__init__( grid_size=size, max_steps=4 * size * size, # Set this to True for maximum speed see_through_walls=True, agent_view_size=size * 2 + 1, # init as fully observable ) def _gen_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place the goals for _ in range(self.n_goals): self.place_obj(Goal()) # Place the traps for _ in range(self.n_traps): self.place_obj(Lava()) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.mission = "get to the green goal square, avoid the lava"
class PlayGround2(MiniGridEnv): def __init__(self, size=8, agent_start_pos=(1, 1), agent_start_dir=0, ): self.agent_start_pos = agent_start_pos self.agent_start_dir = agent_start_dir super().__init__( grid_size=size, max_steps=4 * size * size, # Set this to True for maximum speed see_through_walls=True ) def _gen_grid(self, width, height): self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place a goal square in the bottom-right corner # self.put_obj(Goal(), width - 2, height - 2) for i in range(6): self.put_obj(Wall(), 3, i + 1) self.put_obj(Door('blue'), 3, 5) self.put_obj(Ball('red'), 4, 1) self.put_obj(Key('green'), 4, 2) self.put_obj(Box('grey'), 4, 3) self.put_obj(Ball('blue'), 4, 4) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.mission = "get to the green goal square"
def _gen_grid(self, width: int, height: int) -> None: """Generate grid space. Jobs: - create grid world - create wall - set starting point - set goal - set lava """ assert width >= 5 and height >= 5 # Current position and direction of the agent self.agent_pos: Tuple[int, int] = (1, 1) # (0,0) is wall self.agent_dir: int = 0 # Create an empty grid self.grid = Grid(width, height) # Create wall self.grid.wall_rect(0, 0, width, height) # Create Goal for position in self.goal_pos: goal_with_wall = self.__adjust_pos_consider_walls(position) self.__set_grid_type(*goal_with_wall, Goal()) # Create Lava if self.obstacle_pos: for lava_pos in self.obstacle_pos: lava_with_wall = self.__adjust_pos_consider_walls(lava_pos) self.__set_grid_type(*lava_with_wall, self.obstacle_type()) # Settings for reward_grid for cell in itertools.product( range(self.valid_height), range(self.valid_width) ): if cell in self.goal_pos: self.reward_grid[cell] = self.goal_reward elif cell in self.obstacle_pos: self.reward_grid[cell] = self.obstacle_reward else: self.reward_grid[cell] = self.default_reward
def _gen_grid(self, width, height): self.grid = Grid(width, height) self.grid.wall_rect(0, 0, width, height) # self.start_pos = (2, 2) yl, xl, _ = self.observation_space.spaces["image"].shape self.start_pos = (random.randint(2, yl - 2), random.randint(2, xl - 2)) self.agent_pos = self.start_pos # TODO: the env holding agent traits is shit! self.start_dir = random.randint(0, 3) self.agent_dir = self.start_dir self.snake = Snake( [self.start_pos, tuple(self.start_pos - self.dir_vec)]) [self.grid.set(*pos, Lava()) for pos in self.snake.body] self.spawn_new_food() self.mission = None
def _gen_grid(self, width, height): self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place a goal square in the bottom-right corner # self.put_obj(Goal(), width - 2, height - 2) self.put_obj(Ball(rand_color()), 2, 1) self.put_obj(Ball(rand_color()), 4, 1) self.put_obj(Ball(rand_color()), 4, 1) self.put_obj(Key(rand_color()), 5, 2) self.put_obj(Box(rand_color()), 4, 3) self.put_obj(Ball(rand_color()), 4, 4) self.put_obj(Ball(rand_color()), 12, 2) self.put_obj(Ball(rand_color()), 14, 1) self.put_obj(Key(rand_color()), 14, 2) self.put_obj(Key(rand_color()), 11, 2) self.put_obj(Box(rand_color()), 14, 3) self.put_obj(Ball(rand_color()), 13, 1) self.put_obj(Key(rand_color()), 3, 11) self.put_obj(Ball(rand_color()), 5, 12) self.put_obj(Key(rand_color()), 2, 14) self.put_obj(Box(rand_color()), 3, 14) self.put_obj(Ball(rand_color()), 5, 13) self.put_obj(Key(rand_color()), 13, 13) self.put_obj(Ball(rand_color()), 12, 13) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = np.random.randint(0,4) else: self.place_agent() self.mission = "get to the green goal square"
def _gen_grid(self, height, width): # Create the grid self.grid = Grid(height, width) self.room_grid = [] # For each row of rooms for i in range(0, self.num_rows): row = [] # For each column of rooms for j in range(0, self.num_cols): room = Room( (i * (self.room_size - 1), j * (self.room_size - 1)), (self.room_size, self.room_size)) row.append(room) # Generate the walls for this room self.wall_rect(*room.top, *room.size) self.room_grid.append(row) # For each row of rooms for i in range(0, self.num_rows): # For each column of rooms for j in range(0, self.num_cols): room = self.room_grid[i][j] i_l, j_l = (room.top[0] + 1, room.top[1] + 1) i_m, j_m = (room.top[0] + room.size[0] - 1, room.top[1] + room.size[1] - 1) # Door positions if j < self.num_cols - 1: room.neighbors['right'] = self.room_grid[i][j + 1] room.door_pos['right'] = (self.rng.randint(i_l, i_m), j_m) if i < self.num_rows - 1: room.neighbors['down'] = self.room_grid[i + 1][j] room.door_pos['down'] = (i_m, self.rng.randint(j_l, j_m)) if j > 0: room.neighbors['left'] = self.room_grid[i][j - 1] room.door_pos['left'] = room.neighbors['left'].door_pos[ 'right'] if i > 0: room.neighbors['up'] = self.room_grid[i - 1][j] room.door_pos['up'] = room.neighbors['up'].door_pos['down'] # The agent starts in the middle, facing right self.agent.pos = ((self.num_rows // 2) * (self.room_size - 1) + (self.room_size // 2), (self.num_cols // 2) * (self.room_size - 1) + (self.room_size // 2)) self.agent.state = 'right'
def _gen_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place the goals for _ in range(self.n_goals): self.place_obj(Goal()) # Place the traps for _ in range(self.n_traps): self.place_obj(Lava()) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.mission = "get to the green goal square, avoid the lava"
def _gent_basic_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # random create road roads = [] for i in range(self.roads): choice = random.randint(0, 4) start = random.randint(1, self.fault_rate) if choice == 0: # _width = random.randint(2, width - 2) for j in range(start, width - 1): roads.append((_width, j)) elif choice == 1: _width = random.randint(2, width - 2) for j in range(width - start - 1, 0, -1): roads.append((_width, j)) elif choice == 2: _he = random.randint(2, height - 2) for j in range(start, height - 1): roads.append((j, _he)) else: _he = random.randint(2, height - 2) for j in range(height - start - 1, 0, -1): roads.append((j, _he)) for i in roads: self.put_obj(Ball(color="blue"), *i) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.put_obj(Key(), *self.agent_pos) return roads
def _save_obs(obs, out_dir, fname, tile_size=12): """ Render an agent observation and save as image """ from gym_minigrid.minigrid import Grid agent_view_size = obs.shape[0] grid, vis_mask = Grid.decode(obs) # Render the whole grid img = grid.render(tile_size, agent_pos=(agent_view_size // 2, agent_view_size - 1), agent_dir=3, highlight_mask=vis_mask) plt.imsave(os.path.join(out_dir, fname), img) plt.clf()
def get_obs_render(self, obs, tile_pixels=CELL_PIXELS // 2, mode='rgb_array'): """ Render an agent observation for visualization """ if self.obs_render is None: obs_render = Renderer(self.agent_view_size * tile_pixels, self.agent_view_size * tile_pixels, self._render) self.obs_render = obs_render else: obs_render = self.obs_render r = obs_render r.beginFrame() grid = Grid.decode(obs) # Render the whole grid grid.render(r, tile_pixels) # Draw the agent ratio = tile_pixels / CELL_PIXELS r.push() r.scale(ratio, ratio) r.translate(CELL_PIXELS * (0.5 + self.agent_view_size // 2), CELL_PIXELS * (self.agent_view_size - 0.5)) r.rotate(3 * 90) r.setLineColor(255, 0, 0) r.setColor(255, 0, 0) r.drawPolygon([(-12, 10), (12, 0), (-12, -10)]) r.pop() r.endFrame() if mode == 'rgb_array': return get_array_from_pixmap(r) elif mode == 'pixmap': return r.getPixmap() return r.getPixmap()
# Run for a few episodes num_episodes = 0 while num_episodes < 5: # Pick a random action action = random.randint(0, env.action_space.n - 1) obs, reward, done, info = env.step(action) # Validate the agent position assert env.agent_pos[0] < env.grid_size assert env.agent_pos[1] < env.grid_size # Test observation encode/decode roundtrip img = obs['image'] grid = Grid.decode(img) img2 = grid.encode() assert np.array_equal(img, img2) # Check that the reward is within the specified range assert reward >= env.reward_range[0], reward assert reward <= env.reward_range[1], reward if done: num_episodes += 1 env.reset() env.render('rgb_array') env.close()
class SingleTMaze(MiniGridEnv): is_double = False reward_values = dict(goal=1, fake_goal=0.1) view_size: int = None def __init__(self, corridor_length=3, reward_position=0, max_steps=None, is_double=False, view_size=None, max_corridor_length=None): if max_corridor_length is None: max_corridor_length = corridor_length self.max_corridor_length = max_corridor_length self.view_size = view_size if view_size is not None else 7 self.is_double = is_double self.reward_position = reward_position self.corridor_length = corridor_length assert corridor_length > 0 if max_steps is None: max_steps = 4 + 4 * corridor_length super().__init__( grid_size=3 + 2 * self.max_corridor_length, max_steps=max_steps, see_through_walls=True, # True for maximum performance agent_view_size=self.view_size, ) self.reward_range = (min(self.reward_values["fake_goal"], 0), self.reward_values["goal"]) @property def mission(self): goals = ["UPPER LEFT", "UPPER RIGHT", "LOWER RIGHT", "LOWER LEFT"] return f'Goal is {goals[self.reward_position]}' def _gen_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Place the agent in the top-left corner self.start_pos = (int(width / 2), int(height / 2)) self.start_dir = 3 # Create walls for x in range(0, width): for y in range(0, height): self.grid.set(x, y, Wall()) # Create paths if self.is_double: for y in range(height // 2 - self.corridor_length, height // 2 + self.corridor_length + 1): self.grid.set(width // 2, y, None) for x in range(width // 2 - self.corridor_length, width // 2 + self.corridor_length + 1): self.grid.set(x, height // 2 - self.corridor_length, None) self.grid.set(x, height // 2 + self.corridor_length, None) else: for y in range(height // 2 - self.corridor_length, height // 2 + 1): self.grid.set(width // 2, y, None) for x in range(width // 2 - self.corridor_length, width // 2 + self.corridor_length + 1): self.grid.set(x, height // 2 - self.corridor_length, None) # Create rewards reward_positions = self._reward_positions(width, height) self._gen_rewards(reward_positions) def _reward_positions(self, width, height): reward_positions = [ (width // 2 - self.corridor_length, height // 2 - self.corridor_length), (width // 2 + self.corridor_length, height // 2 - self.corridor_length), (width // 2 + self.corridor_length, height // 2 + self.corridor_length), (width // 2 - self.corridor_length, height // 2 + self.corridor_length), ] if not self.is_double: reward_positions = reward_positions[:2] return reward_positions def _reward(self): min_steps = (1 + 2 * self.corridor_length) if self.is_double and self.reward_position > 1: min_steps += 2 redundant_steps = max(0, self.step_count - min_steps) max_steps = self.max_steps - min_steps + 1 cell = self.grid.get(self.agent_pos[0], self.agent_pos[1]) max_reward = self.reward_values["fake_goal"] if hasattr(cell, "is_goal") and cell.is_goal: max_reward = self.reward_values["goal"] return min(max_reward, max_reward * (1 - min(1, (redundant_steps / max_steps)))) def _gen_rewards(self, rewards_pos: List[Tuple[int, int]]): for i, (x, y) in enumerate(rewards_pos): g = Goal() self.grid.set(x, y, g) g.is_goal = False if self.reward_position == i % len(rewards_pos): g.is_goal = True def render(self, mode='human', close=False, **kwargs): reward_positions = self._reward_positions(width=self.width, height=self.height) goal = self.grid.get(*reward_positions[self.reward_position]) assert goal.is_goal start_color = goal.color goal.color = 'blue' ret = super().render(mode, close, **kwargs) goal.color = start_color return ret
class OptRewardCrossingEnv(OptRewardMiniGridEnv): """ Environment with wall or lava obstacles, sparse reward. """ def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None): self.num_crossings = num_crossings self.obstacle_type = obstacle_type super().__init__( grid_size=size, max_steps=4 * size * size, # Set this to True for maximum speed see_through_walls=False, seed=None) def _gen_grid(self, width, height): assert width % 2 == 1 and height % 2 == 1 # odd size # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place the agent in the top-left corner self.agent_pos = (1, 1) self.agent_dir = 0 # Place a goal square in the bottom-right corner self.put_obj(Goal(), width - 2, height - 2) # Place obstacles (lava or walls) v, h = object(), object( ) # singleton `vertical` and `horizontal` objects # Lava rivers or walls specified by direction and position in grid rivers = [(v, i) for i in range(2, height - 2, 2)] rivers += [(h, j) for j in range(2, width - 2, 2)] self.np_random.shuffle(rivers) rivers = rivers[:self.num_crossings] # sample random rivers rivers_v = sorted([pos for direction, pos in rivers if direction is v]) rivers_h = sorted([pos for direction, pos in rivers if direction is h]) obstacle_pos = itt.chain( itt.product(range(1, width - 1), rivers_h), itt.product(rivers_v, range(1, height - 1)), ) for i, j in obstacle_pos: self.put_obj(self.obstacle_type(), i, j) # Sample path to goal path = [h] * len(rivers_v) + [v] * len(rivers_h) self.np_random.shuffle(path) # Create openings limits_v = [0] + rivers_v + [height - 1] limits_h = [0] + rivers_h + [width - 1] room_i, room_j = 0, 0 for direction in path: if direction is h: i = limits_v[room_i + 1] j = self.np_random.choice( range(limits_h[room_j] + 1, limits_h[room_j + 1])) room_i += 1 elif direction is v: i = self.np_random.choice( range(limits_v[room_i] + 1, limits_v[room_i + 1])) j = limits_h[room_j + 1] room_j += 1 else: assert False self.grid.set(i, j, None) self.put_obj(Goal(), i, j) self.subgoal_pos = np.asarray([i, j]) self.horizontal = (direction == h) self.mission = ("avoid the lava and get to the green goal square" if self.obstacle_type == Lava else "find the opening and get to the green goal square")
class NineRoomsEnv(MiniGridSimple): # Only 4 actions needed, left, right, up and down class NineRoomsCardinalActions(IntEnum): # Cardinal movement right = 0 down = 1 left = 2 up = 3 def __len__(self): return 4 def __init__( self, grid_size=20, passage_size=1, max_steps=100, seed=133, rnd_start=0, start_state_exclude_rooms=[], ): self.grid_size = grid_size self.passage_size = passage_size self._goal_default_pos = (1, 1) # set to 1 if agent is to be randomly spawned self.rnd_start = rnd_start # If self.rnd_start =1, don't spawn in these rooms self.start_state_exclude_rooms = start_state_exclude_rooms super().__init__(grid_size=grid_size, max_steps=max_steps, seed=seed, see_through_walls=False) self.nActions = len(NineRoomsEnv.NineRoomsCardinalActions) # Set the action and observation spaces self.actions = NineRoomsEnv.NineRoomsCardinalActions self.action_space = spaces.Discrete(self.nActions) self.max_cells = (grid_size - 1) * (grid_size - 1) self.observation_space = spaces.Tuple( [spaces.Discrete(grid_size), spaces.Discrete(grid_size)]) self.observation_size = self.grid_size * self.grid_size self.observation_shape = (self.observation_size, ) self.T = max_steps # Change the observation space to return the position in the grid @property def category(self): # [TODO] Make sure this doesn't break after self.agent_pos is changed to numpy.ndarray return self.cell_cat_map[self.agent_pos] def reward(self): # -1 for every action except if the action leads to the goal state return 1 if self.success else 0 def _gen_grid(self, width, height, val=False, seen=True): # Create the grid self.grid = Grid(width, height) # Generate surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) # Place horizontal walls through the grid self.grid.horz_wall(0, height // 3) self.grid.horz_wall(0, (2 * height) // 3) # Place vertical walls through the grid self.grid.vert_wall(width // 3, 0) self.grid.vert_wall((2 * width) // 3, 0) # Create passages passage_anchors = [(width // 3, height // 3), (width // 3, (2 * height) // 3), ((2 * width) // 3, height // 3), ((2 * width) // 3, (2 * height) // 3)] passage_cells = [] for anchor in passage_anchors: for delta in range(-1 * self.passage_size, self.passage_size + 1): passage_cells.append((anchor[0] + delta, anchor[1])) passage_cells.append((anchor[0], anchor[1] + delta)) for cell in passage_cells: self.grid.set(*cell, None) # Even during validation, start state distribution # should be the same as that during training if not self.rnd_start: self._agent_default_pos = ((width - 2) // 2, (height - 2) // 2) else: self._agent_default_pos = None # Place the agent at the center if self._agent_default_pos is not None: self.start_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter else: if len(self.start_state_exclude_rooms) == 0: self.place_agent() else: valid_start_pos = [] if seen: exclude_from = self.start_state_exclude_rooms else: exclude_from = [ x for x in range(1, 10) if x not in self.start_state_exclude_rooms ] for room in range(1, 10): if room in exclude_from: continue # Ignore that there are walls for now, can handle that with rejection sampling # Get x coordinates of allowed cells valid_x = [] if room % 3 == 1: valid_x = list(range(1, width // 3)) elif room % 3 == 2: valid_x = list(range(width // 3 + 1, (2 * width) // 3)) else: valid_x = list(range((2 * width) // 3 + 1, width - 1)) # Get valid y-coordinates of allowed cells valid_y = [] if (room - 1) // 3 == 0: valid_y = list(range(1, height // 3)) elif (room - 1) // 3 == 1: valid_y = list( range(height // 3 + 1, (2 * height) // 3)) else: valid_y = list(range((2 * height) // 3 + 1, height - 1)) room_cells = list(product(valid_x, valid_y)) valid_start_pos += room_cells # Make sure start position doesn't conflict with other cells while True: _start_pos = valid_start_pos[np.random.choice( len(valid_start_pos))] row = _start_pos[1] col = _start_pos[0] cell = self.grid.get(row, col) if cell is None or cell.can_overlap(): break self.start_pos = (col, row) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos = goal.curr_pos = self._goal_default_pos self.mission = goal.init_pos def reset(self, val=False, seen=True): obs, info = super().reset(val=val, seen=seen) # add state feature to obs state_feat = self._encode_state(obs['agent_pos']) obs.update(dict(state_feat=state_feat)) return obs, info def step(self, action): self.step_count += 1 ''' Reward doesn't depend on action, but just state. reward = -1 if not (in_goal_state) else 0 ''' if not self.done: # check if currently at the goal state if self.agent_pos == self.mission: # No penalty, episode done self.done = True self.success = True else: # Cardinal movement if action in self.move_actions: move_pos = self.around_pos(action) fwd_cell = self.grid.get(*move_pos) self.agent_dir = (action - 1) % 4 if fwd_cell == None or fwd_cell.can_overlap( ) or self.is_goal(move_pos): self.agent_pos = move_pos else: raise ValueError("Invalid Action: {} ".format(action)) reward = self.reward() if self.step_count >= self.max_steps - 1: # print("Max Steps Exceeded.") self.done = True obs = self.gen_obs() # Add state features to the observation state_feat = self._encode_state(obs['agent_pos']) obs.update(dict(state_feat=state_feat)) info = { 'done': self.done, 'agent_pos': np.array(self.agent_pos), } if self.render_rgb: info['rgb_grid'] = self.render(mode='rgb_array') if self.done: info.update({ 'image': self.encode_grid(), 'success': self.success, 'agent_pos': self.agent_pos, }) return obs, reward, self.done, info def _encode_state(self, state): """ Encode the state to generate observation. """ feat = np.ones(self.width * self.height, dtype=float) curr_x, curr_y = state[1], state[0] curr_pos = curr_y * self.width + curr_x feat[curr_pos:] = 0 return feat
class FourRooms(FourRoomsEnv): """ Overwrites the original generator to make the hallway states static """ def __init__(self, agent_pos: tuple = (1, 1), goal_pos: tuple = (15, 15)): self.hallways = { 'top' : (9, 4), 'left' : (3, 9), 'right': (16, 9), 'bot' : (9, 14) } super().__init__(agent_pos=agent_pos, goal_pos=goal_pos) def _reward(self): return 1 def _gen_grid(self, width, height): # Create the grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) room_w = width // 2 room_h = height // 2 # For each row of rooms for j in range(0, 2): # For each column for i in range(0, 2): xL = i * room_w yT = j * room_h xR = xL + room_w yB = yT + room_h # Bottom wall and door if i + 1 < 2: self.grid.vert_wall(xR, yT, room_h) # pos = (xR, self._rand_int(yT + 1, yB)) # self.grid.set(*pos, None) # Bottom wall and door if j + 1 < 2: self.grid.horz_wall(xL, yB, room_w) # pos = (self._rand_int(xL + 1, xR), yB) # self.grid.set(*pos, None) for hallway in self.hallways.values(): self.grid.set(*hallway, None) # Randomize the player start position and orientation if self._agent_default_pos is not None: self.agent_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.agent_dir = self._rand_int(0, 4) else: self.place_agent() if self._goal_default_pos is not None: goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos, goal.cur_pos = self._goal_default_pos else: self.place_obj(Goal()) self.mission = 'Reach the goal'
# Run for a few episodes num_episodes = 0 while num_episodes < 5: # Pick a random action action = random.randint(0, env.action_space.n - 1) obs, reward, done, info = env.step(action) # Validate the agent position assert env.agent_pos[0] < env.width assert env.agent_pos[1] < env.height # Test observation encode/decode roundtrip img = obs['image'] vis_mask = img[:, :, 0] != OBJECT_TO_IDX['unseen'] # hackish img2 = Grid.decode(img).encode(vis_mask=vis_mask) assert np.array_equal(img, img2) # Test the env to string function str(env) # Check that the reward is within the specified range assert reward >= env.reward_range[0], reward assert reward <= env.reward_range[1], reward if done: num_episodes += 1 env.reset() env.render('rgb_array')
def _gen_grid(self, width, height, val=False, seen=True): # Create the grid self.grid = Grid(width, height) # Generate surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) # Place horizontal walls through the grid self.grid.horz_wall(0, height // 3) self.grid.horz_wall(0, (2 * height) // 3) # Place vertical walls through the grid self.grid.vert_wall(width // 3, 0) self.grid.vert_wall((2 * width) // 3, 0) # Create passages passage_anchors = [(width // 3, height // 3), (width // 3, (2 * height) // 3), ((2 * width) // 3, height // 3), ((2 * width) // 3, (2 * height) // 3)] passage_cells = [] for anchor in passage_anchors: for delta in range(-1 * self.passage_size, self.passage_size + 1): passage_cells.append((anchor[0] + delta, anchor[1])) passage_cells.append((anchor[0], anchor[1] + delta)) for cell in passage_cells: self.grid.set(*cell, None) # Even during validation, start state distribution # should be the same as that during training if not self.rnd_start: self._agent_default_pos = ((width - 2) // 2, (height - 2) // 2) else: self._agent_default_pos = None # Place the agent at the center if self._agent_default_pos is not None: self.start_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter else: if len(self.start_state_exclude_rooms) == 0: self.place_agent() else: valid_start_pos = [] if seen: exclude_from = self.start_state_exclude_rooms else: exclude_from = [ x for x in range(1, 10) if x not in self.start_state_exclude_rooms ] for room in range(1, 10): if room in exclude_from: continue # Ignore that there are walls for now, can handle that with rejection sampling # Get x coordinates of allowed cells valid_x = [] if room % 3 == 1: valid_x = list(range(1, width // 3)) elif room % 3 == 2: valid_x = list(range(width // 3 + 1, (2 * width) // 3)) else: valid_x = list(range((2 * width) // 3 + 1, width - 1)) # Get valid y-coordinates of allowed cells valid_y = [] if (room - 1) // 3 == 0: valid_y = list(range(1, height // 3)) elif (room - 1) // 3 == 1: valid_y = list( range(height // 3 + 1, (2 * height) // 3)) else: valid_y = list(range((2 * height) // 3 + 1, height - 1)) room_cells = list(product(valid_x, valid_y)) valid_start_pos += room_cells # Make sure start position doesn't conflict with other cells while True: _start_pos = valid_start_pos[np.random.choice( len(valid_start_pos))] row = _start_pos[1] col = _start_pos[0] cell = self.grid.get(row, col) if cell is None or cell.can_overlap(): break self.start_pos = (col, row) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos = goal.curr_pos = self._goal_default_pos self.mission = goal.init_pos
# Run for a few episodes num_episodes = 0 while num_episodes < 5: # Pick a random action action = random.randint(0, env.action_space.n - 1) obs, reward, done, info = env.step(action) # Validate the agent position assert env.agent_pos[0] < env.width assert env.agent_pos[1] < env.height # Test observation encode/decode roundtrip img = obs['image'] grid, vis_mask = Grid.decode(img) img2 = grid.encode(vis_mask=vis_mask) assert np.array_equal(img, img2) # Test the env to string function str(env) # Check that the reward is within the specified range assert reward >= env.reward_range[0], reward assert reward <= env.reward_range[1], reward if done: num_episodes += 1 env.reset() env.render('rgb_array')
class Simple2D(SearchEnv): def __init__(self, width=100, height=100, agent_view=7, roads=1, max_step=None, fault_rate=0.3, tf=True): self.roads = roads self.fault_rate = int(fault_rate * min([width, height])) self.mission = "go to ball as much as possible" super().__init__(tf, width, height, agent_view, max_step) def _extrinsic_reward(self): raise NotImplementedError def _gen_grid(self, width, height): _ = self._gent_basic_grid(width, height) def _gent_basic_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # random create road roads = [] for i in range(self.roads): choice = random.randint(0, 4) start = random.randint(1, self.fault_rate) if choice == 0: # _width = random.randint(2, width - 2) for j in range(start, width - 1): roads.append((_width, j)) elif choice == 1: _width = random.randint(2, width - 2) for j in range(width - start - 1, 0, -1): roads.append((_width, j)) elif choice == 2: _he = random.randint(2, height - 2) for j in range(start, height - 1): roads.append((j, _he)) else: _he = random.randint(2, height - 2) for j in range(height - start - 1, 0, -1): roads.append((j, _he)) for i in roads: self.put_obj(Ball(color="blue"), *i) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.put_obj(Key(), *self.agent_pos) return roads def _reward(self): return self._build_rewards()[self.agent_pos[0]][self.agent_pos[1]] def _check_finish(self): if self.step_count >= self.max_steps or self.battery == 0: return -1 elif self._extrinsic_reward()[0] == 1: return 1 else: return 0 def _build_rewards(self): rewards = [] roads = set() for i in self.grid.grid: if i is not None and i.type == "ball": rewards.append(0) roads.add(i.cur_pos) elif i is not None and i.type == "box" and self.memory[i.cur_pos[0]][i.cur_pos[1]] > 0: rewards.append(0) roads.add(i.cur_pos) else: rewards.append(-1) for i in self.gen_obs_grid()[0].grid: if i is not None and i.type == "box": roads.add(i.cur_pos) rewards = np.array(rewards).reshape(20, 20).T for i in list(itertools.product(*[list(range(self.width)), list(range(self.height))])): rewards[i[0]][i[1]] = - min([abs(j[0] - i[0]) + abs(j[1] - i[1]) for j in roads]) + rewards[i[0]][i[1]] for i in roads: rewards[i[0]][i[1]] = 0 return rewards