Ejemplo n.º 1
0
class SimpleEnv(MiniGridEnv):
    """
    Simple empty environment where the agent starts in the middle,
    target is randomly generated.
    """
    def __init__(self, size=5):
        assert size % 2 != 0, "Size needs to be odd"
        super().__init__(grid_size=size,
                         max_steps=4 * size * size,
                         see_through_walls=False)

    def _gen_grid(self, width, height):
        # Create empty grid
        self.grid = Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)

        # Agent starts in the center
        self.start_pos = (width // 2, height // 2)
        self.start_dir = 0

        # Goal is anywhere but the center
        self.place_obj(Goal())

        # Set mission string
        self.mission = "GO TO GREEN SQUARE"
    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        # Place the agent in the top-left corner
        self.start_pos = (int(width / 2), int(height / 2))
        self.start_dir = 3

        # Create walls
        for x in range(0, width):
            for y in range(0, height):
                self.grid.set(x, y, Wall())

        # Create paths
        if self.is_double:
            for y in range(height // 2 - self.corridor_length, height // 2 + self.corridor_length + 1):
                self.grid.set(width // 2, y, None)
            for x in range(width // 2 - self.corridor_length, width // 2 + self.corridor_length + 1):
                self.grid.set(x, height // 2 - self.corridor_length, None)
                self.grid.set(x, height // 2 + self.corridor_length, None)
        else:
            for y in range(height // 2 - self.corridor_length, height // 2 + 1):
                self.grid.set(width // 2, y, None)
            for x in range(width // 2 - self.corridor_length, width // 2 + self.corridor_length + 1):
                self.grid.set(x, height // 2 - self.corridor_length, None)

        # Create rewards
        reward_positions = self._reward_positions(width, height)
        self._gen_rewards(reward_positions)
Ejemplo n.º 3
0
    def get_actions(self, obss):
        preprocessed_obss = self.preprocess_obss(obss)

        with torch.no_grad():
            if self.model.recurrent:
                dist, _, self.memories = self.model(preprocessed_obss,
                                                    self.memories)
            else:
                dist, _, x, y, z = self.model(preprocessed_obss,
                                              introspect=True)

                if self.number == 7:
                    Grid.decode(y[0][0].round().numpy()).render_human()
                    print(len(x), len(y), len(z))
                    print(y[0].shape, z[0].shape)
                    print(x, y, z)
                self.number += 1

        if self.argmax:
            actions = dist.probs.max(1, keepdim=True)[1]
        else:
            actions = dist.sample()

        if torch.cuda.is_available():
            actions = actions.cpu().numpy()

        return actions
    def _gen_grid(self, width, height):

        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place a goal square in the bottom-right corner
        # self.put_obj(Goal(), width - 2, height - 2)

        for i in range(6):
            self.put_obj(Wall(), 3, i + 1)

        self.put_obj(Door('blue'), 3, 5)

        self.put_obj(Ball('red'), 4, 1)
        self.put_obj(Key('green'), 4, 2)
        self.put_obj(Box('grey'), 4, 3)
        self.put_obj(Ball('blue'), 4, 4)

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()

        self.mission = "get to the green goal square"
Ejemplo n.º 5
0
    def _gen_grid(self, width, height):
        assert width % 2 == 1 and height % 2 == 1  # odd size

        # Create an empty grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place the agent in the top-left corner
        self.agent_pos = (1, 1)
        self.agent_dir = 0

        # Place a goal square in the bottom-right corner
        self.put_obj(Goal(), width - 2, height - 2)

        # Place obstacles (lava or walls)
        v, h = object(), object(
        )  # singleton `vertical` and `horizontal` objects

        # Lava rivers or walls specified by direction and position in grid
        rivers = [(v, i) for i in range(2, height - 2, 2)]
        rivers += [(h, j) for j in range(2, width - 2, 2)]
        self.np_random.shuffle(rivers)
        rivers = rivers[:self.num_crossings]  # sample random rivers
        rivers_v = sorted([pos for direction, pos in rivers if direction is v])
        rivers_h = sorted([pos for direction, pos in rivers if direction is h])
        obstacle_pos = itt.chain(
            itt.product(range(1, width - 1), rivers_h),
            itt.product(rivers_v, range(1, height - 1)),
        )
        for i, j in obstacle_pos:
            self.put_obj(self.obstacle_type(), i, j)

        # Sample path to goal
        path = [h] * len(rivers_v) + [v] * len(rivers_h)
        self.np_random.shuffle(path)

        # Create openings
        limits_v = [0] + rivers_v + [height - 1]
        limits_h = [0] + rivers_h + [width - 1]
        room_i, room_j = 0, 0
        for direction in path:
            if direction is h:
                i = limits_v[room_i + 1]
                j = self.np_random.choice(
                    range(limits_h[room_j] + 1, limits_h[room_j + 1]))
                room_i += 1
            elif direction is v:
                i = self.np_random.choice(
                    range(limits_v[room_i] + 1, limits_v[room_i + 1]))
                j = limits_h[room_j + 1]
                room_j += 1
            else:
                assert False
            self.grid.set(i, j, None)

        self.mission = ("avoid the lava and get to the green goal square"
                        if self.obstacle_type == Lava else
                        "find the opening and get to the green goal square")
Ejemplo n.º 6
0
    def _gen_grid(self, width, height, val=False, seen=True):

        # Create the grid
        self.grid = Grid(width, height)

        # Generate surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)

        # Even during validation, start state distribution
        # should be the same as that during training
        if not self.rnd_start:
            self._agent_default_pos = (1, self.grid_size - 2)
        else:
            self._agent_default_pos = None

        # Place the agent at the center
        if self._agent_default_pos is not None:
            self.start_pos = self._agent_default_pos
            self.grid.set(*self._agent_default_pos, None)
            self.start_dir = self._rand_int(
                0, 4)  # Agent direction doesn't matter

        goal = Goal()
        self.grid.set(*self._goal_default_pos, goal)

        goal.init_pos = goal.curr_pos = self._goal_default_pos

        self.mission = goal.init_pos
Ejemplo n.º 7
0
    def _gen_grid(self, width, height, val=False, seen=True):

        assert width >= 10 and height >= 10, "Environment too small to place objects"
        # Create the grid
        self.grid = Grid(width, height)

        # Generate surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)

        np.random.seed(self.grid_seed)

        for obj_idx in range(self.num_objects):

            while True:
                c_x, c_y = np.random.choice(list(range(
                    2, self.grid_size - 3))), np.random.choice(
                        list(range(2, self.grid_size - 3)))

                #obj_size = np.random.choice(list(range(1, self.obj_size+1)))
                obj_size = self.obj_size

                if obj_size == 3:
                    cells = list(
                        product([c_x - 1, c_x, c_x + 1],
                                [c_y - 1, c_y, c_y + 1]))
                elif obj_size == 2:
                    cells = list(product([c_x, c_x + 1], [c_y, c_y + 1]))
                elif obj_size == 1:
                    cells = list(product([c_x], [c_y]))
                else:
                    raise ValueError

                valid = True
                for cell in cells:
                    cell = self.grid.get(cell[0], cell[1])

                    if not (cell is None or cell.can_overlap()):
                        valid = False
                        break

                if valid:
                    for cell in cells:
                        self.grid.set(*cell, Wall())
                    break

        # Set the start position and the goal position depending upon where the obstacles are present
        goal = Goal()
        # [NOTE] : This is a hack, add option to set goal location from arguments.

        self.grid.set(*self._goal_default_pos, goal)
        goal.init_pos = goal.curr_pos = self._goal_default_pos

        self.mission = goal.init_pos

        self.start_pos = self._agent_default_pos
class PlayGround(MiniGridEnv):
    def __init__(self,
                 size=16,
                 agent_start_pos=(8, 8),
                 agent_start_dir=None,
                 ):
        self.agent_start_pos = agent_start_pos
        self.agent_start_dir = agent_start_dir

        super().__init__(
            grid_size=size,
            max_steps=200,
            # Set this to True for maximum speed
            see_through_walls=True
        )

    def _gen_grid(self, width, height):
        self.grid = Grid(width, height)
        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place a goal square in the bottom-right corner
        # self.put_obj(Goal(), width - 2, height - 2)
        self.put_obj(Ball(rand_color()), 2, 1)

        self.put_obj(Ball(rand_color()), 4, 1)
        self.put_obj(Ball(rand_color()), 4, 1)
        self.put_obj(Key(rand_color()), 5, 2)
        self.put_obj(Box(rand_color()), 4, 3)
        self.put_obj(Ball(rand_color()), 4, 4)

        self.put_obj(Ball(rand_color()), 12, 2)

        self.put_obj(Ball(rand_color()), 14, 1)
        self.put_obj(Key(rand_color()), 14, 2)
        self.put_obj(Key(rand_color()), 11, 2)
        self.put_obj(Box(rand_color()), 14, 3)
        self.put_obj(Ball(rand_color()), 13, 1)

        self.put_obj(Key(rand_color()), 3, 11)
        self.put_obj(Ball(rand_color()), 5, 12)
        self.put_obj(Key(rand_color()), 2, 14)
        self.put_obj(Box(rand_color()), 3, 14)
        self.put_obj(Ball(rand_color()), 5, 13)


        self.put_obj(Key(rand_color()), 13, 13)
        self.put_obj(Ball(rand_color()), 12, 13)

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = np.random.randint(0,4)
        else:
            self.place_agent()

        self.mission = "get to the green goal square"
Ejemplo n.º 9
0
 def _gen_grid(self, width, height):
     
     # Create the grid
     self.grid = Grid(width, height)
     
     # Generate the surrounding walls
     self.grid.horz_wall(0, 0)
     self.grid.horz_wall(0, height - 1)
     self.grid.vert_wall(0, 0)
     self.grid.vert_wall(width - 1, 0)
     
     room_w = width // 2
     room_h = height // 2
     
     # For each row of rooms
     for j in range(0, 2):
         
         # For each column
         for i in range(0, 2):
             xL = i * room_w
             yT = j * room_h
             xR = xL + room_w
             yB = yT + room_h
             
             # Bottom wall and door
             if i + 1 < 2:
                 self.grid.vert_wall(xR, yT, room_h)
                 # pos = (xR, self._rand_int(yT + 1, yB))
                 # self.grid.set(*pos, None)
             
             # Bottom wall and door
             if j + 1 < 2:
                 self.grid.horz_wall(xL, yB, room_w)
                 # pos = (self._rand_int(xL + 1, xR), yB)
                 # self.grid.set(*pos, None)
     
     for hallway in self.hallways.values():
         self.grid.set(*hallway, None)
     
     # Randomize the player start position and orientation
     if self._agent_default_pos is not None:
         self.agent_pos = self._agent_default_pos
         self.grid.set(*self._agent_default_pos, None)
         self.agent_dir = self._rand_int(0, 4)
     else:
         self.place_agent()
     
     if self._goal_default_pos is not None:
         goal = Goal()
         self.grid.set(*self._goal_default_pos, goal)
         goal.init_pos, goal.cur_pos = self._goal_default_pos
     else:
         self.place_obj(Goal())
     
     self.mission = 'Reach the goal'
Ejemplo n.º 10
0
    def _gen_grid(self, width, height):
        # Create empty grid
        self.grid = Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)

        # Agent starts in the center
        self.start_pos = (width // 2, height // 2)
        self.start_dir = 0

        # Goal is anywhere but the center
        self.place_obj(Goal())

        # Set mission string
        self.mission = "GO TO GREEN SQUARE"
Ejemplo n.º 11
0
    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()

        self.mission = "get to the green goal square"
Ejemplo n.º 12
0
    def decode(array):
        """
        Decode an array grid encoding back into a grid
        """

        width, height, channels = array.shape
        assert channels == 3

        grid = Grid(width, height)
        for i in range(width):
            for j in range(height):
                typeIdx, colorIdx, state = array[i, j]

                if typeIdx == OBJECT_TO_IDX['unseen'] or \
                        typeIdx == OBJECT_TO_IDX['empty']:
                    continue

                objType = IDX_TO_OBJECT[typeIdx]
                color = IDX_TO_COLOR[colorIdx]
                # State, 0: open, 1: closed, 2: locked
                is_open = state == 0
                is_locked = state == 2

                if objType == 'wall':
                    v = Wall(color)
                elif objType == 'floor':
                    v = Floor(color)
                elif objType == 'ball':
                    v = Ball(color)
                elif objType == 'key':
                    v = Key(color)
                elif objType == 'box':
                    v = Box(color)
                elif objType == 'door':
                    v = Door(color, is_open, is_locked)
                elif objType == 'goal':
                    v = Goal()
                elif objType == 'lava':
                    v = Lava()
                elif objType == 'agent':
                    v = None
                else:
                    assert False, "unknown obj type in decode '%s'" % objType

                grid.set(i, j, v)

        return grid
Ejemplo n.º 13
0
class EmptyMultigoal(MiniGridEnv):
    def __init__(
        self,
        size=8,
        agent_start_pos=None,
        agent_start_dir=None,
        n_goals=2,
        n_traps=1,
    ):
        self.n_goals = n_goals
        self.n_traps = n_traps
        self.agent_start_pos = agent_start_pos
        self.agent_start_dir = agent_start_dir

        size += 2
        super().__init__(
            grid_size=size,
            max_steps=4 * size * size,
            # Set this to True for maximum speed
            see_through_walls=True,
            agent_view_size=size * 2 + 1,  # init as fully observable
        )

    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place the goals
        for _ in range(self.n_goals):
            self.place_obj(Goal())

        # Place the traps
        for _ in range(self.n_traps):
            self.place_obj(Lava())

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()

        self.mission = "get to the green goal square, avoid the lava"
class PlayGround2(MiniGridEnv):
    def __init__(self,
                 size=8,
                 agent_start_pos=(1, 1),
                 agent_start_dir=0,
                 ):

        self.agent_start_pos = agent_start_pos
        self.agent_start_dir = agent_start_dir

        super().__init__(
            grid_size=size,
            max_steps=4 * size * size,
            # Set this to True for maximum speed
            see_through_walls=True
        )

    def _gen_grid(self, width, height):

        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place a goal square in the bottom-right corner
        # self.put_obj(Goal(), width - 2, height - 2)

        for i in range(6):
            self.put_obj(Wall(), 3, i + 1)

        self.put_obj(Door('blue'), 3, 5)

        self.put_obj(Ball('red'), 4, 1)
        self.put_obj(Key('green'), 4, 2)
        self.put_obj(Box('grey'), 4, 3)
        self.put_obj(Ball('blue'), 4, 4)

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()

        self.mission = "get to the green goal square"
Ejemplo n.º 15
0
    def _gen_grid(self, width: int, height: int) -> None:
        """Generate grid space.

        Jobs:
            - create grid world
            - create wall
            - set starting point
            - set goal
            - set lava
        """
        assert width >= 5 and height >= 5

        # Current position and direction of the agent
        self.agent_pos: Tuple[int, int] = (1, 1)  # (0,0) is wall
        self.agent_dir: int = 0

        # Create an empty grid
        self.grid = Grid(width, height)

        # Create wall
        self.grid.wall_rect(0, 0, width, height)

        # Create Goal
        for position in self.goal_pos:
            goal_with_wall = self.__adjust_pos_consider_walls(position)
            self.__set_grid_type(*goal_with_wall, Goal())

        # Create Lava
        if self.obstacle_pos:
            for lava_pos in self.obstacle_pos:
                lava_with_wall = self.__adjust_pos_consider_walls(lava_pos)
                self.__set_grid_type(*lava_with_wall, self.obstacle_type())

        # Settings for reward_grid
        for cell in itertools.product(
            range(self.valid_height), range(self.valid_width)
        ):
            if cell in self.goal_pos:
                self.reward_grid[cell] = self.goal_reward
            elif cell in self.obstacle_pos:
                self.reward_grid[cell] = self.obstacle_reward
            else:
                self.reward_grid[cell] = self.default_reward
Ejemplo n.º 16
0
    def _gen_grid(self, width, height):
        self.grid = Grid(width, height)

        self.grid.wall_rect(0, 0, width, height)

        # self.start_pos = (2, 2)
        yl, xl, _ = self.observation_space.spaces["image"].shape
        self.start_pos = (random.randint(2, yl - 2), random.randint(2, xl - 2))
        self.agent_pos = self.start_pos  # TODO: the env holding agent traits is shit!
        self.start_dir = random.randint(0, 3)
        self.agent_dir = self.start_dir
        self.snake = Snake(
            [self.start_pos,
             tuple(self.start_pos - self.dir_vec)])
        [self.grid.set(*pos, Lava()) for pos in self.snake.body]

        self.spawn_new_food()

        self.mission = None
    def _gen_grid(self, width, height):
        self.grid = Grid(width, height)
        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place a goal square in the bottom-right corner
        # self.put_obj(Goal(), width - 2, height - 2)
        self.put_obj(Ball(rand_color()), 2, 1)

        self.put_obj(Ball(rand_color()), 4, 1)
        self.put_obj(Ball(rand_color()), 4, 1)
        self.put_obj(Key(rand_color()), 5, 2)
        self.put_obj(Box(rand_color()), 4, 3)
        self.put_obj(Ball(rand_color()), 4, 4)

        self.put_obj(Ball(rand_color()), 12, 2)

        self.put_obj(Ball(rand_color()), 14, 1)
        self.put_obj(Key(rand_color()), 14, 2)
        self.put_obj(Key(rand_color()), 11, 2)
        self.put_obj(Box(rand_color()), 14, 3)
        self.put_obj(Ball(rand_color()), 13, 1)

        self.put_obj(Key(rand_color()), 3, 11)
        self.put_obj(Ball(rand_color()), 5, 12)
        self.put_obj(Key(rand_color()), 2, 14)
        self.put_obj(Box(rand_color()), 3, 14)
        self.put_obj(Ball(rand_color()), 5, 13)


        self.put_obj(Key(rand_color()), 13, 13)
        self.put_obj(Ball(rand_color()), 12, 13)

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = np.random.randint(0,4)
        else:
            self.place_agent()

        self.mission = "get to the green goal square"
Ejemplo n.º 18
0
    def _gen_grid(self, height, width):
        # Create the grid
        self.grid = Grid(height, width)

        self.room_grid = []

        # For each row of rooms
        for i in range(0, self.num_rows):
            row = []

            # For each column of rooms
            for j in range(0, self.num_cols):
                room = Room(
                    (i * (self.room_size - 1), j * (self.room_size - 1)),
                    (self.room_size, self.room_size))
                row.append(room)

                # Generate the walls for this room
                self.wall_rect(*room.top, *room.size)

            self.room_grid.append(row)

        # For each row of rooms
        for i in range(0, self.num_rows):
            # For each column of rooms
            for j in range(0, self.num_cols):
                room = self.room_grid[i][j]

                i_l, j_l = (room.top[0] + 1, room.top[1] + 1)
                i_m, j_m = (room.top[0] + room.size[0] - 1,
                            room.top[1] + room.size[1] - 1)

                # Door positions
                if j < self.num_cols - 1:
                    room.neighbors['right'] = self.room_grid[i][j + 1]
                    room.door_pos['right'] = (self.rng.randint(i_l, i_m), j_m)
                if i < self.num_rows - 1:
                    room.neighbors['down'] = self.room_grid[i + 1][j]
                    room.door_pos['down'] = (i_m, self.rng.randint(j_l, j_m))
                if j > 0:
                    room.neighbors['left'] = self.room_grid[i][j - 1]
                    room.door_pos['left'] = room.neighbors['left'].door_pos[
                        'right']
                if i > 0:
                    room.neighbors['up'] = self.room_grid[i - 1][j]
                    room.door_pos['up'] = room.neighbors['up'].door_pos['down']

        # The agent starts in the middle, facing right
        self.agent.pos = ((self.num_rows // 2) * (self.room_size - 1) +
                          (self.room_size // 2), (self.num_cols // 2) *
                          (self.room_size - 1) + (self.room_size // 2))
        self.agent.state = 'right'
Ejemplo n.º 19
0
    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place the goals
        for _ in range(self.n_goals):
            self.place_obj(Goal())

        # Place the traps
        for _ in range(self.n_traps):
            self.place_obj(Lava())

        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()

        self.mission = "get to the green goal square, avoid the lava"
Ejemplo n.º 20
0
 def _gent_basic_grid(self, width, height):
     # Create an empty grid
     self.grid = Grid(width, height)
     # Generate the surrounding walls
     self.grid.wall_rect(0, 0, width, height)
     # random create road
     roads = []
     for i in range(self.roads):
         choice = random.randint(0, 4)
         start = random.randint(1, self.fault_rate)
         if choice == 0:
             #
             _width = random.randint(2, width - 2)
             for j in range(start, width - 1):
                 roads.append((_width, j))
         elif choice == 1:
             _width = random.randint(2, width - 2)
             for j in range(width - start - 1, 0, -1):
                 roads.append((_width, j))
         elif choice == 2:
             _he = random.randint(2, height - 2)
             for j in range(start, height - 1):
                 roads.append((j, _he))
         else:
             _he = random.randint(2, height - 2)
             for j in range(height - start - 1, 0, -1):
                 roads.append((j, _he))
     for i in roads:
         self.put_obj(Ball(color="blue"), *i)
     # Place the agent
     if self.agent_start_pos is not None:
         self.agent_pos = self.agent_start_pos
         self.agent_dir = self.agent_start_dir
     else:
         self.place_agent()
     self.put_obj(Key(), *self.agent_pos)
     return roads
Ejemplo n.º 21
0
    def _save_obs(obs, out_dir, fname, tile_size=12):
        """
        Render an agent observation and save as image
        """
        from gym_minigrid.minigrid import Grid
        agent_view_size = obs.shape[0]
        grid, vis_mask = Grid.decode(obs)

        # Render the whole grid
        img = grid.render(tile_size,
                          agent_pos=(agent_view_size // 2,
                                     agent_view_size - 1),
                          agent_dir=3,
                          highlight_mask=vis_mask)
        plt.imsave(os.path.join(out_dir, fname), img)
        plt.clf()
Ejemplo n.º 22
0
    def get_obs_render(self,
                       obs,
                       tile_pixels=CELL_PIXELS // 2,
                       mode='rgb_array'):
        """
        Render an agent observation for visualization
        """

        if self.obs_render is None:
            obs_render = Renderer(self.agent_view_size * tile_pixels,
                                  self.agent_view_size * tile_pixels,
                                  self._render)
            self.obs_render = obs_render
        else:
            obs_render = self.obs_render

        r = obs_render

        r.beginFrame()

        grid = Grid.decode(obs)

        # Render the whole grid
        grid.render(r, tile_pixels)

        # Draw the agent
        ratio = tile_pixels / CELL_PIXELS
        r.push()
        r.scale(ratio, ratio)
        r.translate(CELL_PIXELS * (0.5 + self.agent_view_size // 2),
                    CELL_PIXELS * (self.agent_view_size - 0.5))
        r.rotate(3 * 90)
        r.setLineColor(255, 0, 0)
        r.setColor(255, 0, 0)
        r.drawPolygon([(-12, 10), (12, 0), (-12, -10)])
        r.pop()

        r.endFrame()

        if mode == 'rgb_array':
            return get_array_from_pixmap(r)
        elif mode == 'pixmap':
            return r.getPixmap()

        return r.getPixmap()
Ejemplo n.º 23
0
    # Run for a few episodes
    num_episodes = 0
    while num_episodes < 5:
        # Pick a random action
        action = random.randint(0, env.action_space.n - 1)

        obs, reward, done, info = env.step(action)

        # Validate the agent position
        assert env.agent_pos[0] < env.grid_size
        assert env.agent_pos[1] < env.grid_size

        # Test observation encode/decode roundtrip
        img = obs['image']
        grid = Grid.decode(img)
        img2 = grid.encode()
        assert np.array_equal(img, img2)

        # Check that the reward is within the specified range
        assert reward >= env.reward_range[0], reward
        assert reward <= env.reward_range[1], reward

        if done:
            num_episodes += 1
            env.reset()

        env.render('rgb_array')

    env.close()
class SingleTMaze(MiniGridEnv):
    is_double = False
    reward_values = dict(goal=1, fake_goal=0.1)
    view_size: int = None

    def __init__(self, corridor_length=3, reward_position=0, max_steps=None, is_double=False, view_size=None,
                 max_corridor_length=None):
        if max_corridor_length is None:
            max_corridor_length = corridor_length
        self.max_corridor_length = max_corridor_length
        self.view_size = view_size if view_size is not None else 7
        self.is_double = is_double
        self.reward_position = reward_position
        self.corridor_length = corridor_length
        assert corridor_length > 0

        if max_steps is None:
            max_steps = 4 + 4 * corridor_length

        super().__init__(
            grid_size=3 + 2 * self.max_corridor_length,
            max_steps=max_steps,
            see_through_walls=True,  # True for maximum performance
            agent_view_size=self.view_size,
        )
        self.reward_range = (min(self.reward_values["fake_goal"], 0), self.reward_values["goal"])

    @property
    def mission(self):
        goals = ["UPPER LEFT", "UPPER RIGHT", "LOWER RIGHT", "LOWER LEFT"]
        return f'Goal is {goals[self.reward_position]}'

    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        # Place the agent in the top-left corner
        self.start_pos = (int(width / 2), int(height / 2))
        self.start_dir = 3

        # Create walls
        for x in range(0, width):
            for y in range(0, height):
                self.grid.set(x, y, Wall())

        # Create paths
        if self.is_double:
            for y in range(height // 2 - self.corridor_length, height // 2 + self.corridor_length + 1):
                self.grid.set(width // 2, y, None)
            for x in range(width // 2 - self.corridor_length, width // 2 + self.corridor_length + 1):
                self.grid.set(x, height // 2 - self.corridor_length, None)
                self.grid.set(x, height // 2 + self.corridor_length, None)
        else:
            for y in range(height // 2 - self.corridor_length, height // 2 + 1):
                self.grid.set(width // 2, y, None)
            for x in range(width // 2 - self.corridor_length, width // 2 + self.corridor_length + 1):
                self.grid.set(x, height // 2 - self.corridor_length, None)

        # Create rewards
        reward_positions = self._reward_positions(width, height)
        self._gen_rewards(reward_positions)

    def _reward_positions(self, width, height):
        reward_positions = [
            (width // 2 - self.corridor_length, height // 2 - self.corridor_length),
            (width // 2 + self.corridor_length, height // 2 - self.corridor_length),
            (width // 2 + self.corridor_length, height // 2 + self.corridor_length),
            (width // 2 - self.corridor_length, height // 2 + self.corridor_length),
        ]
        if not self.is_double:
            reward_positions = reward_positions[:2]
        return reward_positions

    def _reward(self):
        min_steps = (1 + 2 * self.corridor_length)
        if self.is_double and self.reward_position > 1:
            min_steps += 2
        redundant_steps = max(0, self.step_count - min_steps)
        max_steps = self.max_steps - min_steps + 1
        cell = self.grid.get(self.agent_pos[0], self.agent_pos[1])
        max_reward = self.reward_values["fake_goal"]
        if hasattr(cell, "is_goal") and cell.is_goal:
            max_reward = self.reward_values["goal"]
        return min(max_reward, max_reward * (1 - min(1, (redundant_steps / max_steps))))

    def _gen_rewards(self, rewards_pos: List[Tuple[int, int]]):
        for i, (x, y) in enumerate(rewards_pos):
            g = Goal()
            self.grid.set(x, y, g)
            g.is_goal = False
            if self.reward_position == i % len(rewards_pos):
                g.is_goal = True

    def render(self, mode='human', close=False, **kwargs):
        reward_positions = self._reward_positions(width=self.width, height=self.height)
        goal = self.grid.get(*reward_positions[self.reward_position])
        assert goal.is_goal
        start_color = goal.color
        goal.color = 'blue'
        ret = super().render(mode, close, **kwargs)
        goal.color = start_color
        return ret
Ejemplo n.º 25
0
class OptRewardCrossingEnv(OptRewardMiniGridEnv):
    """
    Environment with wall or lava obstacles, sparse reward.
    """
    def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None):
        self.num_crossings = num_crossings
        self.obstacle_type = obstacle_type
        super().__init__(
            grid_size=size,
            max_steps=4 * size * size,
            # Set this to True for maximum speed
            see_through_walls=False,
            seed=None)

    def _gen_grid(self, width, height):
        assert width % 2 == 1 and height % 2 == 1  # odd size

        # Create an empty grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place the agent in the top-left corner
        self.agent_pos = (1, 1)
        self.agent_dir = 0

        # Place a goal square in the bottom-right corner
        self.put_obj(Goal(), width - 2, height - 2)

        # Place obstacles (lava or walls)
        v, h = object(), object(
        )  # singleton `vertical` and `horizontal` objects

        # Lava rivers or walls specified by direction and position in grid
        rivers = [(v, i) for i in range(2, height - 2, 2)]
        rivers += [(h, j) for j in range(2, width - 2, 2)]
        self.np_random.shuffle(rivers)
        rivers = rivers[:self.num_crossings]  # sample random rivers
        rivers_v = sorted([pos for direction, pos in rivers if direction is v])
        rivers_h = sorted([pos for direction, pos in rivers if direction is h])
        obstacle_pos = itt.chain(
            itt.product(range(1, width - 1), rivers_h),
            itt.product(rivers_v, range(1, height - 1)),
        )
        for i, j in obstacle_pos:
            self.put_obj(self.obstacle_type(), i, j)

        # Sample path to goal
        path = [h] * len(rivers_v) + [v] * len(rivers_h)
        self.np_random.shuffle(path)

        # Create openings
        limits_v = [0] + rivers_v + [height - 1]
        limits_h = [0] + rivers_h + [width - 1]
        room_i, room_j = 0, 0
        for direction in path:
            if direction is h:
                i = limits_v[room_i + 1]
                j = self.np_random.choice(
                    range(limits_h[room_j] + 1, limits_h[room_j + 1]))
                room_i += 1
            elif direction is v:
                i = self.np_random.choice(
                    range(limits_v[room_i] + 1, limits_v[room_i + 1]))
                j = limits_h[room_j + 1]
                room_j += 1
            else:
                assert False
            self.grid.set(i, j, None)
            self.put_obj(Goal(), i, j)
            self.subgoal_pos = np.asarray([i, j])
            self.horizontal = (direction == h)

        self.mission = ("avoid the lava and get to the green goal square"
                        if self.obstacle_type == Lava else
                        "find the opening and get to the green goal square")
Ejemplo n.º 26
0
class NineRoomsEnv(MiniGridSimple):

    # Only 4 actions needed, left, right, up and down

    class NineRoomsCardinalActions(IntEnum):
        # Cardinal movement
        right = 0
        down = 1
        left = 2
        up = 3

        def __len__(self):
            return 4

    def __init__(
        self,
        grid_size=20,
        passage_size=1,
        max_steps=100,
        seed=133,
        rnd_start=0,
        start_state_exclude_rooms=[],
    ):

        self.grid_size = grid_size
        self.passage_size = passage_size

        self._goal_default_pos = (1, 1)

        # set to 1 if agent is to be randomly spawned
        self.rnd_start = rnd_start

        # If self.rnd_start =1, don't spawn in these rooms
        self.start_state_exclude_rooms = start_state_exclude_rooms

        super().__init__(grid_size=grid_size,
                         max_steps=max_steps,
                         seed=seed,
                         see_through_walls=False)

        self.nActions = len(NineRoomsEnv.NineRoomsCardinalActions)

        # Set the action and observation spaces
        self.actions = NineRoomsEnv.NineRoomsCardinalActions

        self.action_space = spaces.Discrete(self.nActions)

        self.max_cells = (grid_size - 1) * (grid_size - 1)

        self.observation_space = spaces.Tuple(
            [spaces.Discrete(grid_size),
             spaces.Discrete(grid_size)])

        self.observation_size = self.grid_size * self.grid_size
        self.observation_shape = (self.observation_size, )

        self.T = max_steps

        # Change the observation space to return the position in the grid

    @property
    def category(self):
        # [TODO] Make sure this doesn't break after self.agent_pos is changed to numpy.ndarray
        return self.cell_cat_map[self.agent_pos]

    def reward(self):
        # -1 for every action except if the action leads to the goal state
        return 1 if self.success else 0

    def _gen_grid(self, width, height, val=False, seen=True):

        # Create the grid
        self.grid = Grid(width, height)

        # Generate surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)

        # Place horizontal walls through the grid
        self.grid.horz_wall(0, height // 3)
        self.grid.horz_wall(0, (2 * height) // 3)

        # Place vertical walls through the grid
        self.grid.vert_wall(width // 3, 0)
        self.grid.vert_wall((2 * width) // 3, 0)

        # Create passages
        passage_anchors = [(width // 3, height // 3),
                           (width // 3, (2 * height) // 3),
                           ((2 * width) // 3, height // 3),
                           ((2 * width) // 3, (2 * height) // 3)]
        passage_cells = []
        for anchor in passage_anchors:
            for delta in range(-1 * self.passage_size, self.passage_size + 1):
                passage_cells.append((anchor[0] + delta, anchor[1]))
                passage_cells.append((anchor[0], anchor[1] + delta))

        for cell in passage_cells:
            self.grid.set(*cell, None)

        # Even during validation, start state distribution
        # should be the same as that during training
        if not self.rnd_start:
            self._agent_default_pos = ((width - 2) // 2, (height - 2) // 2)
        else:
            self._agent_default_pos = None

        # Place the agent at the center
        if self._agent_default_pos is not None:
            self.start_pos = self._agent_default_pos
            self.grid.set(*self._agent_default_pos, None)
            self.start_dir = self._rand_int(
                0, 4)  # Agent direction doesn't matter
        else:

            if len(self.start_state_exclude_rooms) == 0:
                self.place_agent()
            else:
                valid_start_pos = []
                if seen:
                    exclude_from = self.start_state_exclude_rooms
                else:
                    exclude_from = [
                        x for x in range(1, 10)
                        if x not in self.start_state_exclude_rooms
                    ]
                for room in range(1, 10):
                    if room in exclude_from:
                        continue
                    # Ignore that there are walls for now, can handle that with rejection sampling

                    # Get x coordinates of allowed cells
                    valid_x = []
                    if room % 3 == 1:
                        valid_x = list(range(1, width // 3))
                    elif room % 3 == 2:
                        valid_x = list(range(width // 3 + 1, (2 * width) // 3))
                    else:
                        valid_x = list(range((2 * width) // 3 + 1, width - 1))

                    # Get valid y-coordinates of allowed cells
                    valid_y = []
                    if (room - 1) // 3 == 0:
                        valid_y = list(range(1, height // 3))
                    elif (room - 1) // 3 == 1:
                        valid_y = list(
                            range(height // 3 + 1, (2 * height) // 3))
                    else:
                        valid_y = list(range((2 * height) // 3 + 1,
                                             height - 1))

                    room_cells = list(product(valid_x, valid_y))

                    valid_start_pos += room_cells

                # Make sure start position doesn't conflict with other cells
                while True:

                    _start_pos = valid_start_pos[np.random.choice(
                        len(valid_start_pos))]
                    row = _start_pos[1]
                    col = _start_pos[0]
                    cell = self.grid.get(row, col)

                    if cell is None or cell.can_overlap():
                        break

                self.start_pos = (col, row)
                self.start_dir = self._rand_int(
                    0, 4)  # Agent direction doesn't matter

        goal = Goal()
        self.grid.set(*self._goal_default_pos, goal)
        goal.init_pos = goal.curr_pos = self._goal_default_pos

        self.mission = goal.init_pos

    def reset(self, val=False, seen=True):

        obs, info = super().reset(val=val, seen=seen)

        # add state feature to obs
        state_feat = self._encode_state(obs['agent_pos'])

        obs.update(dict(state_feat=state_feat))

        return obs, info

    def step(self, action):

        self.step_count += 1
        '''
         Reward doesn't depend on action, but just state.
         reward = -1 if not (in_goal_state) else 0
        '''

        if not self.done:
            # check if currently at the goal state
            if self.agent_pos == self.mission:
                # No penalty, episode done
                self.done = True
                self.success = True
            else:
                # Cardinal movement
                if action in self.move_actions:
                    move_pos = self.around_pos(action)
                    fwd_cell = self.grid.get(*move_pos)

                    self.agent_dir = (action - 1) % 4

                    if fwd_cell == None or fwd_cell.can_overlap(
                    ) or self.is_goal(move_pos):
                        self.agent_pos = move_pos
                else:
                    raise ValueError("Invalid Action: {} ".format(action))

        reward = self.reward()
        if self.step_count >= self.max_steps - 1:
            # print("Max Steps Exceeded.")
            self.done = True

        obs = self.gen_obs()

        # Add state features to the observation
        state_feat = self._encode_state(obs['agent_pos'])

        obs.update(dict(state_feat=state_feat))

        info = {
            'done': self.done,
            'agent_pos': np.array(self.agent_pos),
        }

        if self.render_rgb:
            info['rgb_grid'] = self.render(mode='rgb_array')

        if self.done:
            info.update({
                'image': self.encode_grid(),
                'success': self.success,
                'agent_pos': self.agent_pos,
            })

        return obs, reward, self.done, info

    def _encode_state(self, state):
        """
        Encode the state to generate observation.
        """
        feat = np.ones(self.width * self.height, dtype=float)
        curr_x, curr_y = state[1], state[0]

        curr_pos = curr_y * self.width + curr_x

        feat[curr_pos:] = 0

        return feat
Ejemplo n.º 27
0
class FourRooms(FourRoomsEnv):
    """ Overwrites the original generator to make the hallway states static """
    
    def __init__(self, agent_pos: tuple = (1, 1), goal_pos: tuple = (15, 15)):
        self.hallways = {
            'top'  : (9, 4),
            'left' : (3, 9),
            'right': (16, 9),
            'bot'  : (9, 14)
        }
        super().__init__(agent_pos=agent_pos, goal_pos=goal_pos)
    
    def _reward(self):
        return 1
    
    def _gen_grid(self, width, height):
        
        # Create the grid
        self.grid = Grid(width, height)
        
        # Generate the surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)
        
        room_w = width // 2
        room_h = height // 2
        
        # For each row of rooms
        for j in range(0, 2):
            
            # For each column
            for i in range(0, 2):
                xL = i * room_w
                yT = j * room_h
                xR = xL + room_w
                yB = yT + room_h
                
                # Bottom wall and door
                if i + 1 < 2:
                    self.grid.vert_wall(xR, yT, room_h)
                    # pos = (xR, self._rand_int(yT + 1, yB))
                    # self.grid.set(*pos, None)
                
                # Bottom wall and door
                if j + 1 < 2:
                    self.grid.horz_wall(xL, yB, room_w)
                    # pos = (self._rand_int(xL + 1, xR), yB)
                    # self.grid.set(*pos, None)
        
        for hallway in self.hallways.values():
            self.grid.set(*hallway, None)
        
        # Randomize the player start position and orientation
        if self._agent_default_pos is not None:
            self.agent_pos = self._agent_default_pos
            self.grid.set(*self._agent_default_pos, None)
            self.agent_dir = self._rand_int(0, 4)
        else:
            self.place_agent()
        
        if self._goal_default_pos is not None:
            goal = Goal()
            self.grid.set(*self._goal_default_pos, goal)
            goal.init_pos, goal.cur_pos = self._goal_default_pos
        else:
            self.place_obj(Goal())
        
        self.mission = 'Reach the goal'
Ejemplo n.º 28
0
    # Run for a few episodes
    num_episodes = 0
    while num_episodes < 5:
        # Pick a random action
        action = random.randint(0, env.action_space.n - 1)

        obs, reward, done, info = env.step(action)

        # Validate the agent position
        assert env.agent_pos[0] < env.width
        assert env.agent_pos[1] < env.height

        # Test observation encode/decode roundtrip
        img = obs['image']
        vis_mask = img[:, :, 0] != OBJECT_TO_IDX['unseen']  # hackish
        img2 = Grid.decode(img).encode(vis_mask=vis_mask)
        assert np.array_equal(img, img2)

        # Test the env to string function
        str(env)

        # Check that the reward is within the specified range
        assert reward >= env.reward_range[0], reward
        assert reward <= env.reward_range[1], reward

        if done:
            num_episodes += 1
            env.reset()

        env.render('rgb_array')
Ejemplo n.º 29
0
    def _gen_grid(self, width, height, val=False, seen=True):

        # Create the grid
        self.grid = Grid(width, height)

        # Generate surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)

        # Place horizontal walls through the grid
        self.grid.horz_wall(0, height // 3)
        self.grid.horz_wall(0, (2 * height) // 3)

        # Place vertical walls through the grid
        self.grid.vert_wall(width // 3, 0)
        self.grid.vert_wall((2 * width) // 3, 0)

        # Create passages
        passage_anchors = [(width // 3, height // 3),
                           (width // 3, (2 * height) // 3),
                           ((2 * width) // 3, height // 3),
                           ((2 * width) // 3, (2 * height) // 3)]
        passage_cells = []
        for anchor in passage_anchors:
            for delta in range(-1 * self.passage_size, self.passage_size + 1):
                passage_cells.append((anchor[0] + delta, anchor[1]))
                passage_cells.append((anchor[0], anchor[1] + delta))

        for cell in passage_cells:
            self.grid.set(*cell, None)

        # Even during validation, start state distribution
        # should be the same as that during training
        if not self.rnd_start:
            self._agent_default_pos = ((width - 2) // 2, (height - 2) // 2)
        else:
            self._agent_default_pos = None

        # Place the agent at the center
        if self._agent_default_pos is not None:
            self.start_pos = self._agent_default_pos
            self.grid.set(*self._agent_default_pos, None)
            self.start_dir = self._rand_int(
                0, 4)  # Agent direction doesn't matter
        else:

            if len(self.start_state_exclude_rooms) == 0:
                self.place_agent()
            else:
                valid_start_pos = []
                if seen:
                    exclude_from = self.start_state_exclude_rooms
                else:
                    exclude_from = [
                        x for x in range(1, 10)
                        if x not in self.start_state_exclude_rooms
                    ]
                for room in range(1, 10):
                    if room in exclude_from:
                        continue
                    # Ignore that there are walls for now, can handle that with rejection sampling

                    # Get x coordinates of allowed cells
                    valid_x = []
                    if room % 3 == 1:
                        valid_x = list(range(1, width // 3))
                    elif room % 3 == 2:
                        valid_x = list(range(width // 3 + 1, (2 * width) // 3))
                    else:
                        valid_x = list(range((2 * width) // 3 + 1, width - 1))

                    # Get valid y-coordinates of allowed cells
                    valid_y = []
                    if (room - 1) // 3 == 0:
                        valid_y = list(range(1, height // 3))
                    elif (room - 1) // 3 == 1:
                        valid_y = list(
                            range(height // 3 + 1, (2 * height) // 3))
                    else:
                        valid_y = list(range((2 * height) // 3 + 1,
                                             height - 1))

                    room_cells = list(product(valid_x, valid_y))

                    valid_start_pos += room_cells

                # Make sure start position doesn't conflict with other cells
                while True:

                    _start_pos = valid_start_pos[np.random.choice(
                        len(valid_start_pos))]
                    row = _start_pos[1]
                    col = _start_pos[0]
                    cell = self.grid.get(row, col)

                    if cell is None or cell.can_overlap():
                        break

                self.start_pos = (col, row)
                self.start_dir = self._rand_int(
                    0, 4)  # Agent direction doesn't matter

        goal = Goal()
        self.grid.set(*self._goal_default_pos, goal)
        goal.init_pos = goal.curr_pos = self._goal_default_pos

        self.mission = goal.init_pos
    # Run for a few episodes
    num_episodes = 0
    while num_episodes < 5:
        # Pick a random action
        action = random.randint(0, env.action_space.n - 1)

        obs, reward, done, info = env.step(action)

        # Validate the agent position
        assert env.agent_pos[0] < env.width
        assert env.agent_pos[1] < env.height

        # Test observation encode/decode roundtrip
        img = obs['image']
        grid, vis_mask = Grid.decode(img)
        img2 = grid.encode(vis_mask=vis_mask)
        assert np.array_equal(img, img2)

        # Test the env to string function
        str(env)

        # Check that the reward is within the specified range
        assert reward >= env.reward_range[0], reward
        assert reward <= env.reward_range[1], reward

        if done:
            num_episodes += 1
            env.reset()

        env.render('rgb_array')
Ejemplo n.º 31
0
class Simple2D(SearchEnv):

    def __init__(self, width=100, height=100, agent_view=7, roads=1,
                 max_step=None, fault_rate=0.3, tf=True):
        self.roads = roads
        self.fault_rate = int(fault_rate * min([width, height]))
        self.mission = "go to ball as much as possible"
        super().__init__(tf, width, height, agent_view, max_step)

    def _extrinsic_reward(self):
        raise NotImplementedError

    def _gen_grid(self, width, height):
        _ = self._gent_basic_grid(width, height)

    def _gent_basic_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)
        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)
        # random create road
        roads = []
        for i in range(self.roads):
            choice = random.randint(0, 4)
            start = random.randint(1, self.fault_rate)
            if choice == 0:
                #
                _width = random.randint(2, width - 2)
                for j in range(start, width - 1):
                    roads.append((_width, j))
            elif choice == 1:
                _width = random.randint(2, width - 2)
                for j in range(width - start - 1, 0, -1):
                    roads.append((_width, j))
            elif choice == 2:
                _he = random.randint(2, height - 2)
                for j in range(start, height - 1):
                    roads.append((j, _he))
            else:
                _he = random.randint(2, height - 2)
                for j in range(height - start - 1, 0, -1):
                    roads.append((j, _he))
        for i in roads:
            self.put_obj(Ball(color="blue"), *i)
        # Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()
        self.put_obj(Key(), *self.agent_pos)
        return roads

    def _reward(self):
        return self._build_rewards()[self.agent_pos[0]][self.agent_pos[1]]

    def _check_finish(self):
        if self.step_count >= self.max_steps or self.battery == 0:
            return -1
        elif self._extrinsic_reward()[0] == 1:
            return 1
        else:
            return 0

    def _build_rewards(self):
        rewards = []
        roads = set()
        for i in self.grid.grid:
            if i is not None and i.type == "ball":
                rewards.append(0)
                roads.add(i.cur_pos)
            elif i is not None and i.type == "box" and self.memory[i.cur_pos[0]][i.cur_pos[1]] > 0:
                rewards.append(0)
                roads.add(i.cur_pos)
            else:
                rewards.append(-1)
        for i in self.gen_obs_grid()[0].grid:
            if i is not None and i.type == "box":
                roads.add(i.cur_pos)
        rewards = np.array(rewards).reshape(20, 20).T
        for i in list(itertools.product(*[list(range(self.width)), list(range(self.height))])):
            rewards[i[0]][i[1]] = - min([abs(j[0] - i[0]) + abs(j[1] - i[1]) for j in roads]) + rewards[i[0]][i[1]]
        for i in roads:
            rewards[i[0]][i[1]] = 0
        return rewards