예제 #1
0
class SimpleEnv(MiniGridEnv):
    """
    Simple empty environment where the agent starts in the middle,
    target is randomly generated.
    """
    def __init__(self, size=5):
        assert size % 2 != 0, "Size needs to be odd"
        super().__init__(grid_size=size,
                         max_steps=4 * size * size,
                         see_through_walls=False)

    def _gen_grid(self, width, height):
        # Create empty grid
        self.grid = Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)

        # Agent starts in the center
        self.start_pos = (width // 2, height // 2)
        self.start_dir = 0

        # Goal is anywhere but the center
        self.place_obj(Goal())

        # Set mission string
        self.mission = "GO TO GREEN SQUARE"
class PlayGround(MiniGridEnv):
    def __init__(self,
                 size=16,
                 agent_start_pos=(8, 8),
                 agent_start_dir=None,
                 ):
        self.agent_start_pos = agent_start_pos
        self.agent_start_dir = agent_start_dir

        super().__init__(
            grid_size=size,
            max_steps=200,
            # Set this to True for maximum speed
            see_through_walls=True
        )

    def _gen_grid(self, width, height):
        self.grid = Grid(width, height)
        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place a goal square in the bottom-right corner
        # self.put_obj(Goal(), width - 2, height - 2)
        self.put_obj(Ball(rand_color()), 2, 1)

        self.put_obj(Ball(rand_color()), 4, 1)
        self.put_obj(Ball(rand_color()), 4, 1)
        self.put_obj(Key(rand_color()), 5, 2)
        self.put_obj(Box(rand_color()), 4, 3)
        self.put_obj(Ball(rand_color()), 4, 4)

        self.put_obj(Ball(rand_color()), 12, 2)

        self.put_obj(Ball(rand_color()), 14, 1)
        self.put_obj(Key(rand_color()), 14, 2)
        self.put_obj(Key(rand_color()), 11, 2)
        self.put_obj(Box(rand_color()), 14, 3)
        self.put_obj(Ball(rand_color()), 13, 1)

        self.put_obj(Key(rand_color()), 3, 11)
        self.put_obj(Ball(rand_color()), 5, 12)
        self.put_obj(Key(rand_color()), 2, 14)
        self.put_obj(Box(rand_color()), 3, 14)
        self.put_obj(Ball(rand_color()), 5, 13)


        self.put_obj(Key(rand_color()), 13, 13)
        self.put_obj(Ball(rand_color()), 12, 13)

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = np.random.randint(0,4)
        else:
            self.place_agent()

        self.mission = "get to the green goal square"
예제 #3
0
class EmptyMultigoal(MiniGridEnv):
    def __init__(
        self,
        size=8,
        agent_start_pos=None,
        agent_start_dir=None,
        n_goals=2,
        n_traps=1,
    ):
        self.n_goals = n_goals
        self.n_traps = n_traps
        self.agent_start_pos = agent_start_pos
        self.agent_start_dir = agent_start_dir

        size += 2
        super().__init__(
            grid_size=size,
            max_steps=4 * size * size,
            # Set this to True for maximum speed
            see_through_walls=True,
            agent_view_size=size * 2 + 1,  # init as fully observable
        )

    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place the goals
        for _ in range(self.n_goals):
            self.place_obj(Goal())

        # Place the traps
        for _ in range(self.n_traps):
            self.place_obj(Lava())

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()

        self.mission = "get to the green goal square, avoid the lava"
class PlayGround2(MiniGridEnv):
    def __init__(self,
                 size=8,
                 agent_start_pos=(1, 1),
                 agent_start_dir=0,
                 ):

        self.agent_start_pos = agent_start_pos
        self.agent_start_dir = agent_start_dir

        super().__init__(
            grid_size=size,
            max_steps=4 * size * size,
            # Set this to True for maximum speed
            see_through_walls=True
        )

    def _gen_grid(self, width, height):

        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place a goal square in the bottom-right corner
        # self.put_obj(Goal(), width - 2, height - 2)

        for i in range(6):
            self.put_obj(Wall(), 3, i + 1)

        self.put_obj(Door('blue'), 3, 5)

        self.put_obj(Ball('red'), 4, 1)
        self.put_obj(Key('green'), 4, 2)
        self.put_obj(Box('grey'), 4, 3)
        self.put_obj(Ball('blue'), 4, 4)

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()

        self.mission = "get to the green goal square"
예제 #5
0
class OptRewardCrossingEnv(OptRewardMiniGridEnv):
    """
    Environment with wall or lava obstacles, sparse reward.
    """
    def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None):
        self.num_crossings = num_crossings
        self.obstacle_type = obstacle_type
        super().__init__(
            grid_size=size,
            max_steps=4 * size * size,
            # Set this to True for maximum speed
            see_through_walls=False,
            seed=None)

    def _gen_grid(self, width, height):
        assert width % 2 == 1 and height % 2 == 1  # odd size

        # Create an empty grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place the agent in the top-left corner
        self.agent_pos = (1, 1)
        self.agent_dir = 0

        # Place a goal square in the bottom-right corner
        self.put_obj(Goal(), width - 2, height - 2)

        # Place obstacles (lava or walls)
        v, h = object(), object(
        )  # singleton `vertical` and `horizontal` objects

        # Lava rivers or walls specified by direction and position in grid
        rivers = [(v, i) for i in range(2, height - 2, 2)]
        rivers += [(h, j) for j in range(2, width - 2, 2)]
        self.np_random.shuffle(rivers)
        rivers = rivers[:self.num_crossings]  # sample random rivers
        rivers_v = sorted([pos for direction, pos in rivers if direction is v])
        rivers_h = sorted([pos for direction, pos in rivers if direction is h])
        obstacle_pos = itt.chain(
            itt.product(range(1, width - 1), rivers_h),
            itt.product(rivers_v, range(1, height - 1)),
        )
        for i, j in obstacle_pos:
            self.put_obj(self.obstacle_type(), i, j)

        # Sample path to goal
        path = [h] * len(rivers_v) + [v] * len(rivers_h)
        self.np_random.shuffle(path)

        # Create openings
        limits_v = [0] + rivers_v + [height - 1]
        limits_h = [0] + rivers_h + [width - 1]
        room_i, room_j = 0, 0
        for direction in path:
            if direction is h:
                i = limits_v[room_i + 1]
                j = self.np_random.choice(
                    range(limits_h[room_j] + 1, limits_h[room_j + 1]))
                room_i += 1
            elif direction is v:
                i = self.np_random.choice(
                    range(limits_v[room_i] + 1, limits_v[room_i + 1]))
                j = limits_h[room_j + 1]
                room_j += 1
            else:
                assert False
            self.grid.set(i, j, None)
            self.put_obj(Goal(), i, j)
            self.subgoal_pos = np.asarray([i, j])
            self.horizontal = (direction == h)

        self.mission = ("avoid the lava and get to the green goal square"
                        if self.obstacle_type == Lava else
                        "find the opening and get to the green goal square")
예제 #6
0
class Simple2D(SearchEnv):

    def __init__(self, width=100, height=100, agent_view=7, roads=1,
                 max_step=None, fault_rate=0.3, tf=True):
        self.roads = roads
        self.fault_rate = int(fault_rate * min([width, height]))
        self.mission = "go to ball as much as possible"
        super().__init__(tf, width, height, agent_view, max_step)

    def _extrinsic_reward(self):
        raise NotImplementedError

    def _gen_grid(self, width, height):
        _ = self._gent_basic_grid(width, height)

    def _gent_basic_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)
        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)
        # random create road
        roads = []
        for i in range(self.roads):
            choice = random.randint(0, 4)
            start = random.randint(1, self.fault_rate)
            if choice == 0:
                #
                _width = random.randint(2, width - 2)
                for j in range(start, width - 1):
                    roads.append((_width, j))
            elif choice == 1:
                _width = random.randint(2, width - 2)
                for j in range(width - start - 1, 0, -1):
                    roads.append((_width, j))
            elif choice == 2:
                _he = random.randint(2, height - 2)
                for j in range(start, height - 1):
                    roads.append((j, _he))
            else:
                _he = random.randint(2, height - 2)
                for j in range(height - start - 1, 0, -1):
                    roads.append((j, _he))
        for i in roads:
            self.put_obj(Ball(color="blue"), *i)
        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()
        self.put_obj(Key(), *self.agent_pos)
        return roads

    def _reward(self):
        return self._build_rewards()[self.agent_pos[0]][self.agent_pos[1]]

    def _check_finish(self):
        if self.step_count >= self.max_steps or self.battery == 0:
            return -1
        elif self._extrinsic_reward()[0] == 1:
            return 1
        else:
            return 0

    def _build_rewards(self):
        rewards = []
        roads = set()
        for i in self.grid.grid:
            if i is not None and i.type == "ball":
                rewards.append(0)
                roads.add(i.cur_pos)
            elif i is not None and i.type == "box" and self.memory[i.cur_pos[0]][i.cur_pos[1]] > 0:
                rewards.append(0)
                roads.add(i.cur_pos)
            else:
                rewards.append(-1)
        for i in self.gen_obs_grid()[0].grid:
            if i is not None and i.type == "box":
                roads.add(i.cur_pos)
        rewards = np.array(rewards).reshape(20, 20).T
        for i in list(itertools.product(*[list(range(self.width)), list(range(self.height))])):
            rewards[i[0]][i[1]] = - min([abs(j[0] - i[0]) + abs(j[1] - i[1]) for j in roads]) + rewards[i[0]][i[1]]
        for i in roads:
            rewards[i[0]][i[1]] = 0
        return rewards
예제 #7
0
class SnakeEnv(MiniGridEnv):
    """
    Empty grid environment, no obstacles, sparse reward
    """

    # Enumeration of possible actions
    class Actions(IntEnum):
        # Turn left, turn right, move forward
        left = 0
        right = 1
        forward = 2

    def __init__(self, size=9):

        super().__init__(grid_size=size,
                         max_steps=None,
                         see_through_walls=True)
        self.actions = SnakeEnv.Actions
        self.action_space = spaces.Discrete(len(self.actions))

        # self.observation_space = spaces.Dict({
        #     'image': spaces.Box(
        #         low=0,
        #         high=255,
        #         shape=(size,size,3),
        #         dtype='uint8'
        #     )
        #
        # })

    def spawn_new_food(self):
        empties = [(i, j) for i in range(self.grid.height)
                   for j in range(self.grid.width)
                   if self.grid.get(i, j) is None
                   and self.grid.get(i, j) != tuple(self.agent_pos)]
        self.grid.set(*random.choice(empties), Goal())

    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        self.grid.wall_rect(0, 0, width, height)

        # self.start_pos = (2, 2)
        yl, xl, _ = self.observation_space.spaces['image'].shape
        self.start_pos = (random.randint(2, yl - 2), random.randint(2, xl - 2))
        self.agent_pos = self.start_pos  #TODO: the env holding agent traits is shit!
        self.start_dir = random.randint(0, 3)
        self.agent_dir = self.start_dir
        self.snake = Snake(
            [self.start_pos,
             tuple(self.start_pos - self.dir_vec)])
        [self.grid.set(*pos, Lava()) for pos in self.snake.body]

        self.spawn_new_food()

        self.mission = None

    def reset(self):
        return super().reset()

    # def gen_obs(self):
    #     image = self.grid.encode()
    #
    #     obs = {
    #         'image': image,
    #         'direction': self.agent_dir,
    #         'mission': self.mission
    #     }
    #
    #     return obs

    def step(self, action):
        self.step_count += 1

        done = False

        if action == self.actions.left:
            self.agent_dir = (self.agent_dir - 1) % 4

        elif action == self.actions.right:
            self.agent_dir = (self.agent_dir + 1) % 4

        elif action == self.actions.forward:
            pass
        else:
            assert False, "unknown action: %d" % action

        fwd_pos = self.agent_pos + self.dir_vec
        fwd_cell = self.grid.get(*fwd_pos)

        if fwd_cell is None:
            self.grid.set(*self.agent_pos, Lava())
            self.snake.grow_head(*fwd_pos)
            self.grid.set(*self.snake.rm_tail(), None)
            self.agent_pos = fwd_pos

            reward = -0.001

        elif fwd_cell.type == 'goal':
            self.grid.set(*self.agent_pos, Lava())
            self.snake.grow_head(*fwd_pos)
            self.agent_pos = fwd_pos

            self.spawn_new_food()
            reward = 1.0

        elif (fwd_cell.type == 'lava' or fwd_cell.type == 'wall'):
            reward = -1.0
            done = True

        else:
            assert False

        if self.step_count == 1 and done:
            assert False

        obs = self.gen_obs()
        assert any([
            isinstance(self.grid.get(i, j), Goal)
            for i in range(self.grid.height) for j in range(self.grid.width)
        ])
        return obs, reward, done, {}
예제 #8
0
class SnakeEnv(MiniGridEnv):
    class Actions(IntEnum):
        left = 0
        right = 1
        forward = 2

    def __init__(self, size=9):

        super().__init__(grid_size=size,
                         max_steps=None,
                         see_through_walls=True)
        self.actions = SnakeEnv.Actions
        self.action_space = spaces.Discrete(len(self.actions))

    def spawn_new_food(self):
        empties = [(i, j) for i in range(self.grid.height)
                   for j in range(self.grid.width)
                   if self.grid.get(i, j) is None
                   and self.grid.get(i, j) != tuple(self.agent_pos)]
        self.grid.set(*random.choice(empties), Goal())

    def _gen_grid(self, width, height):
        self.grid = Grid(width, height)

        self.grid.wall_rect(0, 0, width, height)

        # self.start_pos = (2, 2)
        yl, xl, _ = self.observation_space.spaces["image"].shape
        self.start_pos = (random.randint(2, yl - 2), random.randint(2, xl - 2))
        self.agent_pos = self.start_pos  # TODO: the env holding agent traits is shit!
        self.start_dir = random.randint(0, 3)
        self.agent_dir = self.start_dir
        self.snake = Snake(
            [self.start_pos,
             tuple(self.start_pos - self.dir_vec)])
        [self.grid.set(*pos, Lava()) for pos in self.snake.body]

        self.spawn_new_food()

        self.mission = None

    def reset(self):
        return super().reset()

    def step(self, action):
        self.step_count += 1

        done = False

        if action == self.actions.left:
            self.agent_dir = (self.agent_dir - 1) % 4

        elif action == self.actions.right:
            self.agent_dir = (self.agent_dir + 1) % 4

        elif action == self.actions.forward:
            pass
        else:
            assert False, "unknown action: %d" % action

        fwd_pos = self.agent_pos + self.dir_vec
        fwd_cell = self.grid.get(*fwd_pos)

        if fwd_cell is None:
            self.grid.set(*self.agent_pos, Lava())
            self.snake.grow_head(*fwd_pos)
            self.grid.set(*self.snake.rm_tail(), None)
            self.agent_pos = fwd_pos

            reward = -0.001

        elif fwd_cell.type == "goal":
            self.grid.set(*self.agent_pos, Lava())
            self.snake.grow_head(*fwd_pos)
            self.agent_pos = fwd_pos

            self.spawn_new_food()
            reward = 1.0

        elif fwd_cell.type == "lava" or fwd_cell.type == "wall":
            reward = -1.0
            done = True

        else:
            assert False

        if self.step_count == 1 and done:
            assert False

        obs = self.gen_obs()
        assert any([
            isinstance(self.grid.get(i, j), Goal)
            for i in range(self.grid.height) for j in range(self.grid.width)
        ])
        return obs, reward, done, {}
예제 #9
0
class CustomLavaEnv(MiniGridEnv):
    """Define custom lava environment.

    Notes:
        - Inherit MiniGridEnv
        - there are 4 actions: left, right, up, down
        - when agent arrives at lava, get -10 point
        - when agent arrives at goal, get 100 point
        - Valid Area:
            - The boundary cells are always wall.
            - The argument width and height define the size of the valid area.
                which does not include the wall.
            - It is the reason why 2 is added to width and height.
            - The start, obastacle and goal positions should consider the walls
                too. It can be checked and adjusted using
                `self.__adjust_pos_consider_walls` method.
    """

    def __init__(
        self,
        width: int = 5,
        height: int = 5,
        max_steps: int = 100,
        see_through_walls: bool = False,
        seed: int = 1,
        agent_view_size: int = 7,
        obstacle_type: WorldObj = Lava,
        obstacle_pos: Tuple[Tuple[int, int], ...] = (),
        obstacle_reward: int = -10,
        goal_reward: int = 100,
        default_reward: int = 0,
    ) -> None:
        """Initialize."""
        self.valid_width = width
        self.valid_height = height
        self.width: int = width + 2  # add 2 for surrounding wall
        self.height: int = height + 2  # add 2 for surrounding wall

        # Setting for obstacles
        self.obstacle_type: WorldObj = obstacle_type
        self.obstacle_pos = obstacle_pos

        self.goal_pos: Tuple[Tuple[int, int], ...] = (
            (self.valid_height - 1, self.valid_width - 1),
        )

        # Action enumeration for this environment
        self.actions = VALID_ACTIONS

        # Actions are discrete integer values
        self.action_space = spaces.Discrete(len(self.actions))

        # Number of cells (width and height) in the agent view
        assert agent_view_size % 2 == 1
        assert agent_view_size >= 3
        self.agent_view_size = agent_view_size

        # Observations are dictionaries containing an
        # encoding of the grid and a textual 'mission' string
        self.observation_space = spaces.Box(
            low=0,
            high=255,
            shape=(self.agent_view_size, self.agent_view_size, 3),
            dtype="uint8",
        )
        self.observation_space = spaces.Dict({"image": self.observation_space})

        self.default_reward = default_reward
        self.goal_reward = goal_reward
        self.obstacle_reward = obstacle_reward

        # Range of possible rewards
        self.reward_range = (-10, 100)
        self.reward_grid: np.ndarray = np.zeros((self.valid_height, self.valid_width))

        # Window to use for human rendering mode
        self.window = None

        # Environment configuration
        self.grid_size: Tuple[int, int] = (height, width)
        self.max_steps: int = max_steps
        self.see_through_walls: bool = see_through_walls

        # Initialize the RNG
        self.seed(seed=seed)

        self.mission = None

        # Initialize the state
        self.reset()

    def __set_grid_type(self, height: int, width: int, grid_type: WorldObj) -> None:
        """Set grid type.

        Notes:
            - Grid.set() method's argument order is little bit confusing.
                (width, hieght, type) not (height, width, type).
        """
        self.grid.set(width, height, grid_type)

    def __get_grid_type(self, height: int, width: int) -> WorldObj:
        """Set grid type.

        Notes:
            - Grid.set() method's argument order is little bit confusing.
                (width, hieght, type) not (height, width, type).
        """
        return self.grid.get(width, height)

    def _gen_grid(self, width: int, height: int) -> None:
        """Generate grid space.

        Jobs:
            - create grid world
            - create wall
            - set starting point
            - set goal
            - set lava
        """
        assert width >= 5 and height >= 5

        # Current position and direction of the agent
        self.agent_pos: Tuple[int, int] = (1, 1)  # (0,0) is wall
        self.agent_dir: int = 0

        # Create an empty grid
        self.grid = Grid(width, height)

        # Create wall
        self.grid.wall_rect(0, 0, width, height)

        # Create Goal
        for position in self.goal_pos:
            goal_with_wall = self.__adjust_pos_consider_walls(position)
            self.__set_grid_type(*goal_with_wall, Goal())

        # Create Lava
        if self.obstacle_pos:
            for lava_pos in self.obstacle_pos:
                lava_with_wall = self.__adjust_pos_consider_walls(lava_pos)
                self.__set_grid_type(*lava_with_wall, self.obstacle_type())

        # Settings for reward_grid
        for cell in itertools.product(
            range(self.valid_height), range(self.valid_width)
        ):
            if cell in self.goal_pos:
                self.reward_grid[cell] = self.goal_reward
            elif cell in self.obstacle_pos:
                self.reward_grid[cell] = self.obstacle_reward
            else:
                self.reward_grid[cell] = self.default_reward

    def __adjust_pos_consider_walls(self, position: Tuple[int, int]) -> Tuple[int, int]:
        """Check validity of the input positions and adjust it with walls."""
        row, col = position
        assert row >= 0
        assert row <= self.height - 2
        assert col >= 0
        assert col <= self.width - 2

        return (row + 1, col + 1)

    def __get_pos_on_valid_area(self) -> Tuple[int, int]:
        """Get agent position.

        Notes:
            - agent_pos in MiniGridEnv has form (column, row) not (row, column).
                So the return value switch the order of the agent position for
                forward position.
        """
        col, row = self.agent_pos
        return (row - 1, col - 1)

    def gen_obs(self) -> Dict[str, Any]:
        """Wrap the parent's gen_obs method for additional observation.

        Notes:
            - original obs: image(np.array)
            - Added obs: pos(Tuple[int, int]), reward_grid(np.array)
        """
        obs = super().gen_obs()
        obs.update(
            pos=self.__get_pos_on_valid_area(),
            reward_grid=self.reward_grid,
        )

        return obs

    def step(self, action: int) -> Tuple[Dict[str, Any], int, bool, Dict[str, Any]]:
        """Take action."""
        self.step_count += 1

        reward, done = self.step_forward(action)

        if self.step_count >= self.max_steps:
            done = True

        obs = self.gen_obs()

        return obs, reward, done, {}

    def __get_forward_pos_and_agent_dir(
        self, action: int
    ) -> Tuple[Tuple[int, int], int]:
        """Get forward position with action.

        Notes:
            - actions:
                - left: 0
                - right: 1
                - up: 2
                - down: 3
            - agent_dir(MiniGridEnv):
                - left: 2
                - right: 0
                - up: 3
                - down: 1
            - agent_pos in MiniGridEnv has form (column, row) not (row, column).
                So the return value switch the order of the agent position for
                forward position.
        """
        cur_c, cur_r = self.agent_pos
        # change direction with action value
        if action == self.actions["right"]:
            agent_dir = 0
            cur_c += 1
        elif action == self.actions["down"]:
            agent_dir = 1
            cur_r += 1
        elif action == self.actions["left"]:
            agent_dir = 2
            cur_c -= 1
        elif action == self.actions["up"]:
            agent_dir = 3
            cur_r -= 1
        else:
            raise NotImplementedError("Unknown action {}".format(action))

        return (cur_r, cur_c), agent_dir

    def step_forward(self, action: int) -> Tuple[int, bool]:
        """Move agent with action."""
        reward = self.default_reward
        done = False

        # get information about the forward cell
        fwd_pos, self.agent_dir = self.__get_forward_pos_and_agent_dir(action)
        fwd_cell = self.__get_grid_type(*fwd_pos)
        fwd_r, fwd_c = fwd_pos

        # forward cell is empty
        if fwd_cell is None:
            self.agent_pos = (fwd_c, fwd_r)
        # forward cell is goal
        elif fwd_cell.type == "goal":
            self.agent_pos = (fwd_c, fwd_r)
            reward = self.goal_reward
            done = True
        # forward cell is lava
        elif fwd_cell is not None and fwd_cell.type == "lava":
            self.agent_pos = (fwd_c, fwd_r)
            reward = self.obstacle_reward
            done = True
        # forward cell is Wall
        elif fwd_cell is not None and fwd_cell.type == "wall":
            pass
        # unknown type
        else:
            AssertionError("unknown action")

        return reward, done
예제 #10
0
class EmptyEnv(MiniGridEnv):
    """
    Empty grid environment, no obstacles, sparse reward
    """
    def __init__(
            self,
            size=15,
            agent_start_pos=(1, 1),
            agent_start_dir=0,
    ):
        self.agent_start_pos = agent_start_pos
        self.agent_start_dir = agent_start_dir

        super().__init__(
            grid_size=size,
            max_steps=math.inf,  # 4*size*size,
            # Set this to True for maximum speed
            see_through_walls=True)

        s = CHW(3, size, size)
        self.observation_space = spaces.Box(
            low=0,
            high=255,  # TODO
            shape=(s.width, s.height, s.channels),
            dtype='uint8')

        self.states_visited = set()

    def step(self, action):
        obs, reward, done, infos = super().step(action)

        cur_pos = (*self.agent_pos, self.agent_dir)
        self.states_visited.add(cur_pos)

        return self.observation(obs), reward, done, infos

    def observation(self, obs):
        state = obs["image"]

        env = self.unwrapped
        full_grid = self.grid.encode()  # todo: Cache this encoding
        full_grid[self.agent_pos[0]][self.agent_pos[1]] = np.array(
            [OBJECT_TO_IDX['agent'], COLOR_TO_IDX['red'], self.agent_dir])

        return full_grid

    def reset(self):
        obs = super().reset()
        return self.observation(obs)  # ["image"]

    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()

        self.mission = "get to the green goal square"