class FourRooms(FourRoomsEnv): """ Overwrites the original generator to make the hallway states static """ def __init__(self, agent_pos: tuple = (1, 1), goal_pos: tuple = (15, 15)): self.hallways = { 'top' : (9, 4), 'left' : (3, 9), 'right': (16, 9), 'bot' : (9, 14) } super().__init__(agent_pos=agent_pos, goal_pos=goal_pos) def _reward(self): return 1 def _gen_grid(self, width, height): # Create the grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) room_w = width // 2 room_h = height // 2 # For each row of rooms for j in range(0, 2): # For each column for i in range(0, 2): xL = i * room_w yT = j * room_h xR = xL + room_w yB = yT + room_h # Bottom wall and door if i + 1 < 2: self.grid.vert_wall(xR, yT, room_h) # pos = (xR, self._rand_int(yT + 1, yB)) # self.grid.set(*pos, None) # Bottom wall and door if j + 1 < 2: self.grid.horz_wall(xL, yB, room_w) # pos = (self._rand_int(xL + 1, xR), yB) # self.grid.set(*pos, None) for hallway in self.hallways.values(): self.grid.set(*hallway, None) # Randomize the player start position and orientation if self._agent_default_pos is not None: self.agent_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.agent_dir = self._rand_int(0, 4) else: self.place_agent() if self._goal_default_pos is not None: goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos, goal.cur_pos = self._goal_default_pos else: self.place_obj(Goal()) self.mission = 'Reach the goal'
class NineRoomsEnv(MiniGridSimple): # Only 4 actions needed, left, right, up and down class NineRoomsCardinalActions(IntEnum): # Cardinal movement right = 0 down = 1 left = 2 up = 3 def __len__(self): return 4 def __init__( self, grid_size=20, passage_size=1, max_steps=100, seed=133, rnd_start=0, start_state_exclude_rooms=[], ): self.grid_size = grid_size self.passage_size = passage_size self._goal_default_pos = (1, 1) # set to 1 if agent is to be randomly spawned self.rnd_start = rnd_start # If self.rnd_start =1, don't spawn in these rooms self.start_state_exclude_rooms = start_state_exclude_rooms super().__init__(grid_size=grid_size, max_steps=max_steps, seed=seed, see_through_walls=False) self.nActions = len(NineRoomsEnv.NineRoomsCardinalActions) # Set the action and observation spaces self.actions = NineRoomsEnv.NineRoomsCardinalActions self.action_space = spaces.Discrete(self.nActions) self.max_cells = (grid_size - 1) * (grid_size - 1) self.observation_space = spaces.Tuple( [spaces.Discrete(grid_size), spaces.Discrete(grid_size)]) self.observation_size = self.grid_size * self.grid_size self.observation_shape = (self.observation_size, ) self.T = max_steps # Change the observation space to return the position in the grid @property def category(self): # [TODO] Make sure this doesn't break after self.agent_pos is changed to numpy.ndarray return self.cell_cat_map[self.agent_pos] def reward(self): # -1 for every action except if the action leads to the goal state return 1 if self.success else 0 def _gen_grid(self, width, height, val=False, seen=True): # Create the grid self.grid = Grid(width, height) # Generate surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) # Place horizontal walls through the grid self.grid.horz_wall(0, height // 3) self.grid.horz_wall(0, (2 * height) // 3) # Place vertical walls through the grid self.grid.vert_wall(width // 3, 0) self.grid.vert_wall((2 * width) // 3, 0) # Create passages passage_anchors = [(width // 3, height // 3), (width // 3, (2 * height) // 3), ((2 * width) // 3, height // 3), ((2 * width) // 3, (2 * height) // 3)] passage_cells = [] for anchor in passage_anchors: for delta in range(-1 * self.passage_size, self.passage_size + 1): passage_cells.append((anchor[0] + delta, anchor[1])) passage_cells.append((anchor[0], anchor[1] + delta)) for cell in passage_cells: self.grid.set(*cell, None) # Even during validation, start state distribution # should be the same as that during training if not self.rnd_start: self._agent_default_pos = ((width - 2) // 2, (height - 2) // 2) else: self._agent_default_pos = None # Place the agent at the center if self._agent_default_pos is not None: self.start_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter else: if len(self.start_state_exclude_rooms) == 0: self.place_agent() else: valid_start_pos = [] if seen: exclude_from = self.start_state_exclude_rooms else: exclude_from = [ x for x in range(1, 10) if x not in self.start_state_exclude_rooms ] for room in range(1, 10): if room in exclude_from: continue # Ignore that there are walls for now, can handle that with rejection sampling # Get x coordinates of allowed cells valid_x = [] if room % 3 == 1: valid_x = list(range(1, width // 3)) elif room % 3 == 2: valid_x = list(range(width // 3 + 1, (2 * width) // 3)) else: valid_x = list(range((2 * width) // 3 + 1, width - 1)) # Get valid y-coordinates of allowed cells valid_y = [] if (room - 1) // 3 == 0: valid_y = list(range(1, height // 3)) elif (room - 1) // 3 == 1: valid_y = list( range(height // 3 + 1, (2 * height) // 3)) else: valid_y = list(range((2 * height) // 3 + 1, height - 1)) room_cells = list(product(valid_x, valid_y)) valid_start_pos += room_cells # Make sure start position doesn't conflict with other cells while True: _start_pos = valid_start_pos[np.random.choice( len(valid_start_pos))] row = _start_pos[1] col = _start_pos[0] cell = self.grid.get(row, col) if cell is None or cell.can_overlap(): break self.start_pos = (col, row) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos = goal.curr_pos = self._goal_default_pos self.mission = goal.init_pos def reset(self, val=False, seen=True): obs, info = super().reset(val=val, seen=seen) # add state feature to obs state_feat = self._encode_state(obs['agent_pos']) obs.update(dict(state_feat=state_feat)) return obs, info def step(self, action): self.step_count += 1 ''' Reward doesn't depend on action, but just state. reward = -1 if not (in_goal_state) else 0 ''' if not self.done: # check if currently at the goal state if self.agent_pos == self.mission: # No penalty, episode done self.done = True self.success = True else: # Cardinal movement if action in self.move_actions: move_pos = self.around_pos(action) fwd_cell = self.grid.get(*move_pos) self.agent_dir = (action - 1) % 4 if fwd_cell == None or fwd_cell.can_overlap( ) or self.is_goal(move_pos): self.agent_pos = move_pos else: raise ValueError("Invalid Action: {} ".format(action)) reward = self.reward() if self.step_count >= self.max_steps - 1: # print("Max Steps Exceeded.") self.done = True obs = self.gen_obs() # Add state features to the observation state_feat = self._encode_state(obs['agent_pos']) obs.update(dict(state_feat=state_feat)) info = { 'done': self.done, 'agent_pos': np.array(self.agent_pos), } if self.render_rgb: info['rgb_grid'] = self.render(mode='rgb_array') if self.done: info.update({ 'image': self.encode_grid(), 'success': self.success, 'agent_pos': self.agent_pos, }) return obs, reward, self.done, info def _encode_state(self, state): """ Encode the state to generate observation. """ feat = np.ones(self.width * self.height, dtype=float) curr_x, curr_y = state[1], state[0] curr_pos = curr_y * self.width + curr_x feat[curr_pos:] = 0 return feat
class FourRoomsEnv(MiniGridEnv): """ Classic 4 rooms gridworld environment. Can specify agent and goal position, if not it set at random. """ def __init__(self, agent_pos=None, goal_pos=None, size=None): self._agent_default_pos = agent_pos self._goal_default_pos = goal_pos super().__init__(grid_size=size, max_steps=math.inf) # 100) s = CHW(3, size, size) # self.observation_space.spaces["image"] self.observation_space = spaces.Box( low=0, high=255, # TODO shape=(s.width, s.height, s.channels), dtype='uint8') self.states_visited = set() def step(self, action): obs, reward, done, infos = super().step(action) cur_pos = (*self.agent_pos, self.agent_dir) self.states_visited.add(cur_pos) return self.observation(obs), reward, done, infos def observation(self, obs): state = obs["image"] env = self.unwrapped full_grid = self.grid.encode() # todo: Cache this encoding full_grid[self.agent_pos[0]][self.agent_pos[1]] = np.array( [OBJECT_TO_IDX['agent'], COLOR_TO_IDX['red'], self.agent_dir]) return full_grid def reset(self): obs = super().reset() return self.observation(obs) def _gen_grid(self, width, height): # Create the grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) room_w = width // 2 room_h = height // 2 # For each row of rooms for j in range(0, 2): # For each column for i in range(0, 2): xL = i * room_w yT = j * room_h xR = xL + room_w yB = yT + room_h # Bottom wall and door if i + 1 < 2: self.grid.vert_wall(xR, yT, room_h) pos = (xR, self._rand_int(yT + 1, yB)) self.grid.set(*pos, None) # Bottom wall and door if j + 1 < 2: self.grid.horz_wall(xL, yB, room_w) pos = (self._rand_int(xL + 1, xR), yB) self.grid.set(*pos, None) # Randomize the player start position and orientation if self._agent_default_pos is not None: self.agent_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) # assuming random start direction self.agent_dir = self._rand_int(0, 4) else: self.place_agent() if self._goal_default_pos is not None: goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos, goal.cur_pos = self._goal_default_pos else: self.place_obj(Goal()) self.mission = 'Reach the goal'
class Cluttered(MiniGridSimple): # Only 4 actions needed, left, right, up and down class ClutteredCardinalActions(IntEnum): # Cardinal movement right = 0 down = 1 left = 2 up = 3 def __len__(self): return 4 def __init__( self, grid_size=20, num_objects=5, obj_size=3, max_steps=100, seed=133, state_encoding="thermal", rnd_start=0, ): self.state_encoding = state_encoding self.grid_size = grid_size self.num_objects = num_objects self.obj_size = obj_size # set to 1 if agent is to be randomly spawned self.rnd_start = rnd_start self.grid_seed = 12 # This only works for 15x15 grid with 6 obstacles #self._goal_default_pos = (6, 10) #self._goal_default_pos = (self.grid_size-2, self.grid_size-2) self._goal_default_pos = (7, 12) # This is used for some of the experiments. self._agent_default_pos = (7, 6) # If self.rnd_start =1, don't spawn in these rooms super().__init__(grid_size=grid_size, max_steps=max_steps, seed=seed, see_through_walls=False) self.nActions = len(Cluttered.ClutteredCardinalActions) # Set the action and observation spaces self.actions = Cluttered.ClutteredCardinalActions self.action_space = spaces.Discrete(self.nActions) self.max_cells = (grid_size - 1) * (grid_size - 1) self.observation_space = spaces.Tuple( [spaces.Discrete(grid_size), spaces.Discrete(grid_size)]) self.observation_size = self.grid_size * self.grid_size self.observation_shape = (self.observation_size, ) self.T = max_steps # Change the observation space to return the position in the grid def reward(self): # -1 for every action except if the action leads to the goal state #return 0 if self.success else -1 return 0 if self.success else -1 / self.T def _gen_grid(self, width, height, val=False, seen=True): assert width >= 10 and height >= 10, "Environment too small to place objects" # Create the grid self.grid = Grid(width, height) # Generate surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) np.random.seed(self.grid_seed) for obj_idx in range(self.num_objects): while True: c_x, c_y = np.random.choice(list(range( 2, self.grid_size - 3))), np.random.choice( list(range(2, self.grid_size - 3))) #obj_size = np.random.choice(list(range(1, self.obj_size+1))) obj_size = self.obj_size if obj_size == 3: cells = list( product([c_x - 1, c_x, c_x + 1], [c_y - 1, c_y, c_y + 1])) elif obj_size == 2: cells = list(product([c_x, c_x + 1], [c_y, c_y + 1])) elif obj_size == 1: cells = list(product([c_x], [c_y])) else: raise ValueError valid = True for cell in cells: cell = self.grid.get(cell[0], cell[1]) if not (cell is None or cell.can_overlap()): valid = False break if valid: for cell in cells: self.grid.set(*cell, Wall()) break # Set the start position and the goal position depending upon where the obstacles are present goal = Goal() # [NOTE] : This is a hack, add option to set goal location from arguments. self.grid.set(*self._goal_default_pos, goal) goal.init_pos = goal.curr_pos = self._goal_default_pos self.mission = goal.init_pos self.start_pos = self._agent_default_pos def reset(self, val=False, seen=True): obs, info = super().reset(val=val, seen=seen) # add state feature to obs state_feat = self._encode_state(obs['agent_pos']) obs.update(dict(state_feat=state_feat)) return obs, info def step(self, action): self.step_count += 1 ''' Reward doesn't depend on action, but just state. reward = -1 if not (in_goal_state) else 0 ''' if not self.done: # check if currently at the goal state if self.agent_pos == self.mission: # No penalty, episode done self.done = True self.success = True else: # Cardinal movement if action in self.move_actions: move_pos = self.around_pos(action) fwd_cell = self.grid.get(*move_pos) self.agent_dir = (action - 1) % 4 if fwd_cell == None or fwd_cell.can_overlap( ) or self.is_goal(move_pos): self.agent_pos = move_pos else: raise ValueError("Invalid Action: {} ".format(action)) reward = self.reward() if self.step_count >= self.max_steps - 1: # print("Max Steps Exceeded.") self.done = True obs = self.gen_obs() # Add state features to the observation state_feat = self._encode_state(obs['agent_pos']) obs.update(dict(state_feat=state_feat)) info = { 'done': self.done, 'agent_pos': np.array(self.agent_pos), } if self.render_rgb: info['rgb_grid'] = self.render(mode='rgb_array') if self.done: info.update({ 'image': self.encode_grid(), 'success': self.success, 'agent_pos': self.agent_pos, }) return obs, reward, self.done, info def _encode_state(self, state): """ Encode the state to generate observation. """ feat = np.ones(self.width * self.height, dtype=float) curr_x, curr_y = state[1], state[0] curr_pos = curr_y * self.width + curr_x if self.state_encoding == "thermal": feat[curr_pos:] = 0 elif self.state_encoding == "one-hot": feat[:] = 0 feat[curr_pos] = 1 return feat
class EmptyGridWorld(MiniGridSimple): # Only 4 actions needed, left, right, up and down class CardnalActions(IntEnum): # Cardinal movement right = 0 down = 1 left = 2 up = 3 def __len__(self): return 4 def __init__( self, grid_size=20, max_steps=100, state_encoding="thermal", seed=133, rnd_start=0, ): self.state_encoding = state_encoding self.grid_size = grid_size self._goal_default_pos = (self.grid_size - 2, 1) # set to 1 if agent is to be randomly spawned self.rnd_start = rnd_start super().__init__(grid_size=grid_size, max_steps=max_steps, seed=seed, see_through_walls=False) self.nActions = len(EmptyGridWorld.CardnalActions) # Set the action and observation spaces self.actions = EmptyGridWorld.CardnalActions self.action_space = spaces.Discrete(self.nActions) self.max_cells = (grid_size - 1) * (grid_size - 1) self.observation_space = spaces.Tuple( [spaces.Discrete(grid_size), spaces.Discrete(grid_size)]) self.observation_size = self.grid_size * self.grid_size self.observation_shape = (self.observation_size, ) self.T = max_steps # Change the observation space to return the position in the grid @property def category(self): # [TODO] Make sure this doesn't break after self.agent_pos is changed to numpy.ndarray return self.cell_cat_map[self.agent_pos] def reward(self): # -1 for every action except if the action leads to the goal state return 1 if self.success else -1 / self.T def _gen_grid(self, width, height, val=False, seen=True): # Create the grid self.grid = Grid(width, height) # Generate surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) # Even during validation, start state distribution # should be the same as that during training if not self.rnd_start: self._agent_default_pos = (1, self.grid_size - 2) else: self._agent_default_pos = None # Place the agent at the center if self._agent_default_pos is not None: self.start_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos = goal.curr_pos = self._goal_default_pos self.mission = goal.init_pos def reset(self, val=False, seen=True): obs, info = super().reset(val=val, seen=seen) # add state feature to obs state_feat = self._encode_state(obs['agent_pos']) obs.update(dict(state_feat=state_feat)) return obs, info def step(self, action): self.step_count += 1 ''' Reward doesn't depend on action, but just state. reward = -1 if not (in_goal_state) else 0 ''' if not self.done: # check if currently at the goal state if self.agent_pos == self.mission: # No penalty, episode done self.done = True self.success = True else: # Cardinal movement if action in self.move_actions: move_pos = self.around_pos(action) fwd_cell = self.grid.get(*move_pos) self.agent_dir = (action - 1) % 4 if fwd_cell == None or fwd_cell.can_overlap( ) or self.is_goal(move_pos): self.agent_pos = move_pos else: raise ValueError("Invalid Action: {} ".format(action)) reward = self.reward() if self.step_count >= self.max_steps - 1: # print("Max Steps Exceeded.") self.done = True obs = self.gen_obs() # Add state features to the observation state_feat = self._encode_state(obs['agent_pos']) obs.update(dict(state_feat=state_feat)) info = { 'done': self.done, 'agent_pos': np.array(self.agent_pos), } if self.render_rgb: info['rgb_grid'] = self.render(mode='rgb_array') if self.done: info.update({ 'image': self.encode_grid(), 'success': self.success, 'agent_pos': self.agent_pos, }) return obs, reward, self.done, info def _encode_state(self, state): """ Encode the state to generate observation. """ feat = np.ones(self.width * self.height, dtype=float) curr_x, curr_y = state[0], state[1] curr_pos = curr_y * self.width + curr_x if self.state_encoding == "thermal": feat[curr_pos:] = 0 elif self.state_encoding == "one-hot": feat[:] = 0 feat[curr_pos] = 1 return feat