class SimpleEnv(MiniGridEnv): """ Simple empty environment where the agent starts in the middle, target is randomly generated. """ def __init__(self, size=5): assert size % 2 != 0, "Size needs to be odd" super().__init__(grid_size=size, max_steps=4 * size * size, see_through_walls=False) def _gen_grid(self, width, height): # Create empty grid self.grid = Grid(width, height) self.grid.wall_rect(0, 0, width, height) # Agent starts in the center self.start_pos = (width // 2, height // 2) self.start_dir = 0 # Goal is anywhere but the center self.place_obj(Goal()) # Set mission string self.mission = "GO TO GREEN SQUARE"
class PlayGround(MiniGridEnv): def __init__(self, size=16, agent_start_pos=(8, 8), agent_start_dir=None, ): self.agent_start_pos = agent_start_pos self.agent_start_dir = agent_start_dir super().__init__( grid_size=size, max_steps=200, # Set this to True for maximum speed see_through_walls=True ) def _gen_grid(self, width, height): self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place a goal square in the bottom-right corner # self.put_obj(Goal(), width - 2, height - 2) self.put_obj(Ball(rand_color()), 2, 1) self.put_obj(Ball(rand_color()), 4, 1) self.put_obj(Ball(rand_color()), 4, 1) self.put_obj(Key(rand_color()), 5, 2) self.put_obj(Box(rand_color()), 4, 3) self.put_obj(Ball(rand_color()), 4, 4) self.put_obj(Ball(rand_color()), 12, 2) self.put_obj(Ball(rand_color()), 14, 1) self.put_obj(Key(rand_color()), 14, 2) self.put_obj(Key(rand_color()), 11, 2) self.put_obj(Box(rand_color()), 14, 3) self.put_obj(Ball(rand_color()), 13, 1) self.put_obj(Key(rand_color()), 3, 11) self.put_obj(Ball(rand_color()), 5, 12) self.put_obj(Key(rand_color()), 2, 14) self.put_obj(Box(rand_color()), 3, 14) self.put_obj(Ball(rand_color()), 5, 13) self.put_obj(Key(rand_color()), 13, 13) self.put_obj(Ball(rand_color()), 12, 13) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = np.random.randint(0,4) else: self.place_agent() self.mission = "get to the green goal square"
class EmptyMultigoal(MiniGridEnv): def __init__( self, size=8, agent_start_pos=None, agent_start_dir=None, n_goals=2, n_traps=1, ): self.n_goals = n_goals self.n_traps = n_traps self.agent_start_pos = agent_start_pos self.agent_start_dir = agent_start_dir size += 2 super().__init__( grid_size=size, max_steps=4 * size * size, # Set this to True for maximum speed see_through_walls=True, agent_view_size=size * 2 + 1, # init as fully observable ) def _gen_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place the goals for _ in range(self.n_goals): self.place_obj(Goal()) # Place the traps for _ in range(self.n_traps): self.place_obj(Lava()) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.mission = "get to the green goal square, avoid the lava"
class PlayGround2(MiniGridEnv): def __init__(self, size=8, agent_start_pos=(1, 1), agent_start_dir=0, ): self.agent_start_pos = agent_start_pos self.agent_start_dir = agent_start_dir super().__init__( grid_size=size, max_steps=4 * size * size, # Set this to True for maximum speed see_through_walls=True ) def _gen_grid(self, width, height): self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place a goal square in the bottom-right corner # self.put_obj(Goal(), width - 2, height - 2) for i in range(6): self.put_obj(Wall(), 3, i + 1) self.put_obj(Door('blue'), 3, 5) self.put_obj(Ball('red'), 4, 1) self.put_obj(Key('green'), 4, 2) self.put_obj(Box('grey'), 4, 3) self.put_obj(Ball('blue'), 4, 4) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.mission = "get to the green goal square"
class OptRewardCrossingEnv(OptRewardMiniGridEnv): """ Environment with wall or lava obstacles, sparse reward. """ def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None): self.num_crossings = num_crossings self.obstacle_type = obstacle_type super().__init__( grid_size=size, max_steps=4 * size * size, # Set this to True for maximum speed see_through_walls=False, seed=None) def _gen_grid(self, width, height): assert width % 2 == 1 and height % 2 == 1 # odd size # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place the agent in the top-left corner self.agent_pos = (1, 1) self.agent_dir = 0 # Place a goal square in the bottom-right corner self.put_obj(Goal(), width - 2, height - 2) # Place obstacles (lava or walls) v, h = object(), object( ) # singleton `vertical` and `horizontal` objects # Lava rivers or walls specified by direction and position in grid rivers = [(v, i) for i in range(2, height - 2, 2)] rivers += [(h, j) for j in range(2, width - 2, 2)] self.np_random.shuffle(rivers) rivers = rivers[:self.num_crossings] # sample random rivers rivers_v = sorted([pos for direction, pos in rivers if direction is v]) rivers_h = sorted([pos for direction, pos in rivers if direction is h]) obstacle_pos = itt.chain( itt.product(range(1, width - 1), rivers_h), itt.product(rivers_v, range(1, height - 1)), ) for i, j in obstacle_pos: self.put_obj(self.obstacle_type(), i, j) # Sample path to goal path = [h] * len(rivers_v) + [v] * len(rivers_h) self.np_random.shuffle(path) # Create openings limits_v = [0] + rivers_v + [height - 1] limits_h = [0] + rivers_h + [width - 1] room_i, room_j = 0, 0 for direction in path: if direction is h: i = limits_v[room_i + 1] j = self.np_random.choice( range(limits_h[room_j] + 1, limits_h[room_j + 1])) room_i += 1 elif direction is v: i = self.np_random.choice( range(limits_v[room_i] + 1, limits_v[room_i + 1])) j = limits_h[room_j + 1] room_j += 1 else: assert False self.grid.set(i, j, None) self.put_obj(Goal(), i, j) self.subgoal_pos = np.asarray([i, j]) self.horizontal = (direction == h) self.mission = ("avoid the lava and get to the green goal square" if self.obstacle_type == Lava else "find the opening and get to the green goal square")
class Simple2D(SearchEnv): def __init__(self, width=100, height=100, agent_view=7, roads=1, max_step=None, fault_rate=0.3, tf=True): self.roads = roads self.fault_rate = int(fault_rate * min([width, height])) self.mission = "go to ball as much as possible" super().__init__(tf, width, height, agent_view, max_step) def _extrinsic_reward(self): raise NotImplementedError def _gen_grid(self, width, height): _ = self._gent_basic_grid(width, height) def _gent_basic_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # random create road roads = [] for i in range(self.roads): choice = random.randint(0, 4) start = random.randint(1, self.fault_rate) if choice == 0: # _width = random.randint(2, width - 2) for j in range(start, width - 1): roads.append((_width, j)) elif choice == 1: _width = random.randint(2, width - 2) for j in range(width - start - 1, 0, -1): roads.append((_width, j)) elif choice == 2: _he = random.randint(2, height - 2) for j in range(start, height - 1): roads.append((j, _he)) else: _he = random.randint(2, height - 2) for j in range(height - start - 1, 0, -1): roads.append((j, _he)) for i in roads: self.put_obj(Ball(color="blue"), *i) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.put_obj(Key(), *self.agent_pos) return roads def _reward(self): return self._build_rewards()[self.agent_pos[0]][self.agent_pos[1]] def _check_finish(self): if self.step_count >= self.max_steps or self.battery == 0: return -1 elif self._extrinsic_reward()[0] == 1: return 1 else: return 0 def _build_rewards(self): rewards = [] roads = set() for i in self.grid.grid: if i is not None and i.type == "ball": rewards.append(0) roads.add(i.cur_pos) elif i is not None and i.type == "box" and self.memory[i.cur_pos[0]][i.cur_pos[1]] > 0: rewards.append(0) roads.add(i.cur_pos) else: rewards.append(-1) for i in self.gen_obs_grid()[0].grid: if i is not None and i.type == "box": roads.add(i.cur_pos) rewards = np.array(rewards).reshape(20, 20).T for i in list(itertools.product(*[list(range(self.width)), list(range(self.height))])): rewards[i[0]][i[1]] = - min([abs(j[0] - i[0]) + abs(j[1] - i[1]) for j in roads]) + rewards[i[0]][i[1]] for i in roads: rewards[i[0]][i[1]] = 0 return rewards
class SnakeEnv(MiniGridEnv): """ Empty grid environment, no obstacles, sparse reward """ # Enumeration of possible actions class Actions(IntEnum): # Turn left, turn right, move forward left = 0 right = 1 forward = 2 def __init__(self, size=9): super().__init__(grid_size=size, max_steps=None, see_through_walls=True) self.actions = SnakeEnv.Actions self.action_space = spaces.Discrete(len(self.actions)) # self.observation_space = spaces.Dict({ # 'image': spaces.Box( # low=0, # high=255, # shape=(size,size,3), # dtype='uint8' # ) # # }) def spawn_new_food(self): empties = [(i, j) for i in range(self.grid.height) for j in range(self.grid.width) if self.grid.get(i, j) is None and self.grid.get(i, j) != tuple(self.agent_pos)] self.grid.set(*random.choice(empties), Goal()) def _gen_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) self.grid.wall_rect(0, 0, width, height) # self.start_pos = (2, 2) yl, xl, _ = self.observation_space.spaces['image'].shape self.start_pos = (random.randint(2, yl - 2), random.randint(2, xl - 2)) self.agent_pos = self.start_pos #TODO: the env holding agent traits is shit! self.start_dir = random.randint(0, 3) self.agent_dir = self.start_dir self.snake = Snake( [self.start_pos, tuple(self.start_pos - self.dir_vec)]) [self.grid.set(*pos, Lava()) for pos in self.snake.body] self.spawn_new_food() self.mission = None def reset(self): return super().reset() # def gen_obs(self): # image = self.grid.encode() # # obs = { # 'image': image, # 'direction': self.agent_dir, # 'mission': self.mission # } # # return obs def step(self, action): self.step_count += 1 done = False if action == self.actions.left: self.agent_dir = (self.agent_dir - 1) % 4 elif action == self.actions.right: self.agent_dir = (self.agent_dir + 1) % 4 elif action == self.actions.forward: pass else: assert False, "unknown action: %d" % action fwd_pos = self.agent_pos + self.dir_vec fwd_cell = self.grid.get(*fwd_pos) if fwd_cell is None: self.grid.set(*self.agent_pos, Lava()) self.snake.grow_head(*fwd_pos) self.grid.set(*self.snake.rm_tail(), None) self.agent_pos = fwd_pos reward = -0.001 elif fwd_cell.type == 'goal': self.grid.set(*self.agent_pos, Lava()) self.snake.grow_head(*fwd_pos) self.agent_pos = fwd_pos self.spawn_new_food() reward = 1.0 elif (fwd_cell.type == 'lava' or fwd_cell.type == 'wall'): reward = -1.0 done = True else: assert False if self.step_count == 1 and done: assert False obs = self.gen_obs() assert any([ isinstance(self.grid.get(i, j), Goal) for i in range(self.grid.height) for j in range(self.grid.width) ]) return obs, reward, done, {}
class SnakeEnv(MiniGridEnv): class Actions(IntEnum): left = 0 right = 1 forward = 2 def __init__(self, size=9): super().__init__(grid_size=size, max_steps=None, see_through_walls=True) self.actions = SnakeEnv.Actions self.action_space = spaces.Discrete(len(self.actions)) def spawn_new_food(self): empties = [(i, j) for i in range(self.grid.height) for j in range(self.grid.width) if self.grid.get(i, j) is None and self.grid.get(i, j) != tuple(self.agent_pos)] self.grid.set(*random.choice(empties), Goal()) def _gen_grid(self, width, height): self.grid = Grid(width, height) self.grid.wall_rect(0, 0, width, height) # self.start_pos = (2, 2) yl, xl, _ = self.observation_space.spaces["image"].shape self.start_pos = (random.randint(2, yl - 2), random.randint(2, xl - 2)) self.agent_pos = self.start_pos # TODO: the env holding agent traits is shit! self.start_dir = random.randint(0, 3) self.agent_dir = self.start_dir self.snake = Snake( [self.start_pos, tuple(self.start_pos - self.dir_vec)]) [self.grid.set(*pos, Lava()) for pos in self.snake.body] self.spawn_new_food() self.mission = None def reset(self): return super().reset() def step(self, action): self.step_count += 1 done = False if action == self.actions.left: self.agent_dir = (self.agent_dir - 1) % 4 elif action == self.actions.right: self.agent_dir = (self.agent_dir + 1) % 4 elif action == self.actions.forward: pass else: assert False, "unknown action: %d" % action fwd_pos = self.agent_pos + self.dir_vec fwd_cell = self.grid.get(*fwd_pos) if fwd_cell is None: self.grid.set(*self.agent_pos, Lava()) self.snake.grow_head(*fwd_pos) self.grid.set(*self.snake.rm_tail(), None) self.agent_pos = fwd_pos reward = -0.001 elif fwd_cell.type == "goal": self.grid.set(*self.agent_pos, Lava()) self.snake.grow_head(*fwd_pos) self.agent_pos = fwd_pos self.spawn_new_food() reward = 1.0 elif fwd_cell.type == "lava" or fwd_cell.type == "wall": reward = -1.0 done = True else: assert False if self.step_count == 1 and done: assert False obs = self.gen_obs() assert any([ isinstance(self.grid.get(i, j), Goal) for i in range(self.grid.height) for j in range(self.grid.width) ]) return obs, reward, done, {}
class CustomLavaEnv(MiniGridEnv): """Define custom lava environment. Notes: - Inherit MiniGridEnv - there are 4 actions: left, right, up, down - when agent arrives at lava, get -10 point - when agent arrives at goal, get 100 point - Valid Area: - The boundary cells are always wall. - The argument width and height define the size of the valid area. which does not include the wall. - It is the reason why 2 is added to width and height. - The start, obastacle and goal positions should consider the walls too. It can be checked and adjusted using `self.__adjust_pos_consider_walls` method. """ def __init__( self, width: int = 5, height: int = 5, max_steps: int = 100, see_through_walls: bool = False, seed: int = 1, agent_view_size: int = 7, obstacle_type: WorldObj = Lava, obstacle_pos: Tuple[Tuple[int, int], ...] = (), obstacle_reward: int = -10, goal_reward: int = 100, default_reward: int = 0, ) -> None: """Initialize.""" self.valid_width = width self.valid_height = height self.width: int = width + 2 # add 2 for surrounding wall self.height: int = height + 2 # add 2 for surrounding wall # Setting for obstacles self.obstacle_type: WorldObj = obstacle_type self.obstacle_pos = obstacle_pos self.goal_pos: Tuple[Tuple[int, int], ...] = ( (self.valid_height - 1, self.valid_width - 1), ) # Action enumeration for this environment self.actions = VALID_ACTIONS # Actions are discrete integer values self.action_space = spaces.Discrete(len(self.actions)) # Number of cells (width and height) in the agent view assert agent_view_size % 2 == 1 assert agent_view_size >= 3 self.agent_view_size = agent_view_size # Observations are dictionaries containing an # encoding of the grid and a textual 'mission' string self.observation_space = spaces.Box( low=0, high=255, shape=(self.agent_view_size, self.agent_view_size, 3), dtype="uint8", ) self.observation_space = spaces.Dict({"image": self.observation_space}) self.default_reward = default_reward self.goal_reward = goal_reward self.obstacle_reward = obstacle_reward # Range of possible rewards self.reward_range = (-10, 100) self.reward_grid: np.ndarray = np.zeros((self.valid_height, self.valid_width)) # Window to use for human rendering mode self.window = None # Environment configuration self.grid_size: Tuple[int, int] = (height, width) self.max_steps: int = max_steps self.see_through_walls: bool = see_through_walls # Initialize the RNG self.seed(seed=seed) self.mission = None # Initialize the state self.reset() def __set_grid_type(self, height: int, width: int, grid_type: WorldObj) -> None: """Set grid type. Notes: - Grid.set() method's argument order is little bit confusing. (width, hieght, type) not (height, width, type). """ self.grid.set(width, height, grid_type) def __get_grid_type(self, height: int, width: int) -> WorldObj: """Set grid type. Notes: - Grid.set() method's argument order is little bit confusing. (width, hieght, type) not (height, width, type). """ return self.grid.get(width, height) def _gen_grid(self, width: int, height: int) -> None: """Generate grid space. Jobs: - create grid world - create wall - set starting point - set goal - set lava """ assert width >= 5 and height >= 5 # Current position and direction of the agent self.agent_pos: Tuple[int, int] = (1, 1) # (0,0) is wall self.agent_dir: int = 0 # Create an empty grid self.grid = Grid(width, height) # Create wall self.grid.wall_rect(0, 0, width, height) # Create Goal for position in self.goal_pos: goal_with_wall = self.__adjust_pos_consider_walls(position) self.__set_grid_type(*goal_with_wall, Goal()) # Create Lava if self.obstacle_pos: for lava_pos in self.obstacle_pos: lava_with_wall = self.__adjust_pos_consider_walls(lava_pos) self.__set_grid_type(*lava_with_wall, self.obstacle_type()) # Settings for reward_grid for cell in itertools.product( range(self.valid_height), range(self.valid_width) ): if cell in self.goal_pos: self.reward_grid[cell] = self.goal_reward elif cell in self.obstacle_pos: self.reward_grid[cell] = self.obstacle_reward else: self.reward_grid[cell] = self.default_reward def __adjust_pos_consider_walls(self, position: Tuple[int, int]) -> Tuple[int, int]: """Check validity of the input positions and adjust it with walls.""" row, col = position assert row >= 0 assert row <= self.height - 2 assert col >= 0 assert col <= self.width - 2 return (row + 1, col + 1) def __get_pos_on_valid_area(self) -> Tuple[int, int]: """Get agent position. Notes: - agent_pos in MiniGridEnv has form (column, row) not (row, column). So the return value switch the order of the agent position for forward position. """ col, row = self.agent_pos return (row - 1, col - 1) def gen_obs(self) -> Dict[str, Any]: """Wrap the parent's gen_obs method for additional observation. Notes: - original obs: image(np.array) - Added obs: pos(Tuple[int, int]), reward_grid(np.array) """ obs = super().gen_obs() obs.update( pos=self.__get_pos_on_valid_area(), reward_grid=self.reward_grid, ) return obs def step(self, action: int) -> Tuple[Dict[str, Any], int, bool, Dict[str, Any]]: """Take action.""" self.step_count += 1 reward, done = self.step_forward(action) if self.step_count >= self.max_steps: done = True obs = self.gen_obs() return obs, reward, done, {} def __get_forward_pos_and_agent_dir( self, action: int ) -> Tuple[Tuple[int, int], int]: """Get forward position with action. Notes: - actions: - left: 0 - right: 1 - up: 2 - down: 3 - agent_dir(MiniGridEnv): - left: 2 - right: 0 - up: 3 - down: 1 - agent_pos in MiniGridEnv has form (column, row) not (row, column). So the return value switch the order of the agent position for forward position. """ cur_c, cur_r = self.agent_pos # change direction with action value if action == self.actions["right"]: agent_dir = 0 cur_c += 1 elif action == self.actions["down"]: agent_dir = 1 cur_r += 1 elif action == self.actions["left"]: agent_dir = 2 cur_c -= 1 elif action == self.actions["up"]: agent_dir = 3 cur_r -= 1 else: raise NotImplementedError("Unknown action {}".format(action)) return (cur_r, cur_c), agent_dir def step_forward(self, action: int) -> Tuple[int, bool]: """Move agent with action.""" reward = self.default_reward done = False # get information about the forward cell fwd_pos, self.agent_dir = self.__get_forward_pos_and_agent_dir(action) fwd_cell = self.__get_grid_type(*fwd_pos) fwd_r, fwd_c = fwd_pos # forward cell is empty if fwd_cell is None: self.agent_pos = (fwd_c, fwd_r) # forward cell is goal elif fwd_cell.type == "goal": self.agent_pos = (fwd_c, fwd_r) reward = self.goal_reward done = True # forward cell is lava elif fwd_cell is not None and fwd_cell.type == "lava": self.agent_pos = (fwd_c, fwd_r) reward = self.obstacle_reward done = True # forward cell is Wall elif fwd_cell is not None and fwd_cell.type == "wall": pass # unknown type else: AssertionError("unknown action") return reward, done
class EmptyEnv(MiniGridEnv): """ Empty grid environment, no obstacles, sparse reward """ def __init__( self, size=15, agent_start_pos=(1, 1), agent_start_dir=0, ): self.agent_start_pos = agent_start_pos self.agent_start_dir = agent_start_dir super().__init__( grid_size=size, max_steps=math.inf, # 4*size*size, # Set this to True for maximum speed see_through_walls=True) s = CHW(3, size, size) self.observation_space = spaces.Box( low=0, high=255, # TODO shape=(s.width, s.height, s.channels), dtype='uint8') self.states_visited = set() def step(self, action): obs, reward, done, infos = super().step(action) cur_pos = (*self.agent_pos, self.agent_dir) self.states_visited.add(cur_pos) return self.observation(obs), reward, done, infos def observation(self, obs): state = obs["image"] env = self.unwrapped full_grid = self.grid.encode() # todo: Cache this encoding full_grid[self.agent_pos[0]][self.agent_pos[1]] = np.array( [OBJECT_TO_IDX['agent'], COLOR_TO_IDX['red'], self.agent_dir]) return full_grid def reset(self): obs = super().reset() return self.observation(obs) # ["image"] def _gen_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.mission = "get to the green goal square"