Ejemplo n.º 1
0
    def _gen_grid(self, width, height):
        # Create the grid
        self.grid = multigrid.Grid(width, height)

        # Generate the surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)

        room_w = width // 2
        room_h = height // 2

        # For each row of rooms
        for j in range(0, 2):
            # For each column
            for i in range(0, 2):
                x_left = i * room_w
                y_top = j * room_h
                x_right = x_left + room_w
                y_bottom = y_top + room_h

                # Vertical wall and door
                if i + 1 < 2:
                    self.grid.vert_wall(x_right, y_top, room_h)
                    if not (j == 1 and self.two_rooms and height < 7):
                        pos = (x_right, self._rand_int(y_top + 1, y_bottom))
                        if not (pos[0] <= 1 or pos[0] >= width - 1
                                or pos[1] <= 0 or pos[1] >= height - 1):
                            self.grid.set(*pos, None)

                # Horizontal wall and door
                if not self.two_rooms:
                    if j + 1 < 2:
                        self.grid.horz_wall(x_left, y_bottom, room_w)
                        pos = (self._rand_int(x_left + 1, x_right), y_bottom)
                        if not (pos[0] <= 1 or pos[0] >= width - 1
                                or pos[1] <= 0 or pos[1] >= height - 1):
                            self.grid.set(*pos, None)

        # Randomize the player start position and orientation
        if self._agent_default_pos is not None:
            self.agent_pos = self._agent_default_pos
            self.grid.set(*self._agent_default_pos, None)
            self.agent_dir = self._rand_int(0, 4)  # random start direction
        else:
            self.place_agent()

        if self._goal_default_pos is not None:
            goal = minigrid.Goal()
            self.put_obj(goal, *self._goal_default_pos)
            goal.init_pos, goal.cur_pos = self._goal_default_pos
        else:
            self.place_obj(minigrid.Goal())

        self.mission = 'Reach the goal'
Ejemplo n.º 2
0
    def _gen_grid(self, width, height):
        self.grid = multigrid.Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)

        if self.randomize_goal:
            self.place_obj(minigrid.Goal(), max_tries=100)
        else:
            self.put_obj(minigrid.Goal(), width - 2, height - 2)
        for _ in range(self.n_clutter):
            self.place_obj(LavaWall(), max_tries=100)

        self.place_agent()

        self.mission = 'get to the green square'
Ejemplo n.º 3
0
    def decode(type_idx, color_idx, state):
        """Create an object from a 3-tuple state description."""

        obj_type = minigrid.IDX_TO_OBJECT[type_idx]
        if obj_type != 'agent':
            color = minigrid.IDX_TO_COLOR[color_idx]

        if obj_type == 'empty' or obj_type == 'unseen':
            return None

        if obj_type == 'wall':
            v = minigrid.Wall(color)
        elif obj_type == 'floor':
            v = minigrid.Floor(color)
        elif obj_type == 'ball':
            v = minigrid.Ball(color)
        elif obj_type == 'key':
            v = minigrid.Key(color)
        elif obj_type == 'box':
            v = minigrid.Box(color)
        elif obj_type == 'door':
            # State, 0: open, 1: closed, 2: locked
            is_open = state == 0
            is_locked = state == 2
            v = Door(color, is_open, is_locked)
        elif obj_type == 'goal':
            v = minigrid.Goal()
        elif obj_type == 'lava':
            v = minigrid.Lava()
        elif obj_type == 'agent':
            v = Agent(color_idx, state)
        else:
            assert False, "unknown object type in decode '%s'" % obj_type

        return v
Ejemplo n.º 4
0
    def reset_random(self):
        """Use domain randomization to create the environment."""
        self.graph = grid_graph(dim=[self.width - 2, self.height - 2])

        self.step_count = 0
        self.adversary_step_count = 0

        # Current position and direction of the agent
        self.reset_agent_status()

        self.agent_start_pos = None
        self.goal_pos = None

        # Extra metrics
        self.reset_metrics()

        # Create empty grid
        self._gen_grid(self.width, self.height)

        # Randomly place goal
        self.goal_pos = self.place_obj(minigrid.Goal(), max_tries=100)

        # Randomly place agent
        self.agent_start_dir = self._rand_int(0, 4)
        self.agent_start_pos = self.place_one_agent(0, rand_dir=False)

        # Randomly place walls
        for _ in range(int(self.n_clutter / 2)):
            self.place_obj(minigrid.Wall(), max_tries=100)

        self.compute_shortest_path()
        self.n_clutter_placed = int(self.n_clutter / 2)

        return self.reset_agent()
Ejemplo n.º 5
0
    def _gen_grid(self, width, height):
        self.height = height

        # Create an empty grid
        self.grid = multigrid.Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place a goal in the bottom-right corner
        self.put_obj(minigrid.Goal(), width - 2, height - 2)

        # Create a vertical splitting wall
        if width <= 5:
            start_idx = 2
        else:
            start_idx = 3
        self.split_idx = self._rand_int(start_idx, width - 2)
        self.grid.vert_wall(self.split_idx, 0)

        # Place the agent at a random position and orientation
        # on the left side of the splitting wall
        self.place_agent(size=(self.split_idx, height))

        # Place a door in the wall
        door_idx = self._rand_int(1, width - 2)
        self.put_obj(multigrid.Door('yellow', is_locked=True), self.split_idx,
                     door_idx)

        # Place a yellow key on the left side
        self.place_obj(obj=minigrid.Key('yellow'),
                       top=(0, 0),
                       size=(self.split_idx, height))

        self.mission = 'Use the key to open the door and then get to the goal'
Ejemplo n.º 6
0
  def _gen_grid(self, width, height):
    self.height = height

    # Create an empty grid
    self.grid = multigrid.Grid(width, height)

    # Generate the surrounding walls
    self.grid.wall_rect(0, 0, width, height)

    # Place a goal in the bottom-right corner
    self.place_obj(minigrid.Goal(), max_tries=100)
    self.place_agent()

    for i in range(self.n_agents):
      self.doors[i] = multigrid.Door('grey', is_locked=True)
      self.place_obj(self.doors[i], max_tries=100)
      self.keys[i] = minigrid.Key('grey')
      self.place_obj(self.keys[i], max_tries=100)
      self.balls[i] = minigrid.Ball('purple')
      self.place_obj(self.balls[i], max_tries=100)
      self.boxes[i] = minigrid.Box('green')
      self.place_obj(self.boxes[i], max_tries=100)

    self.task_idx = [0] * self.n_agents

    self.mission = 'Do some random tasks'
Ejemplo n.º 7
0
    def decode(type_idx, color_idx, state):
        """Create an object from a 3-tuple state description."""

        obj_type = minigrid.IDX_TO_OBJECT[type_idx]
        if obj_type != "agent":
            color = minigrid.IDX_TO_COLOR[color_idx]

        if obj_type == "empty" or obj_type == "unseen":
            return None

        if obj_type == "wall":
            v = minigrid.Wall(color)
        elif obj_type == "floor":
            v = minigrid.Floor(color)
        elif obj_type == "ball":
            v = minigrid.Ball(color)
        elif obj_type == "key":
            v = minigrid.Key(color)
        elif obj_type == "box":
            v = minigrid.Box(color)
        elif obj_type == "door":
            # State, 0: open, 1: closed, 2: locked
            is_open = state == 0
            is_locked = state == 2
            v = Door(color, is_open, is_locked)
        elif obj_type == "goal":
            v = minigrid.Goal()
        elif obj_type == "lava":
            v = minigrid.Lava()
        elif obj_type == "agent":
            v = Agent(color_idx, state)
        else:
            assert False, "unknown object type in decode '%s'" % obj_type

        return v
Ejemplo n.º 8
0
    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = multigrid.Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        if self.randomize_goal:
            self.place_obj(minigrid.Goal(), max_tries=100)
        else:
            # Place a goal square in the bottom-right corner
            self.put_obj(minigrid.Goal(), width - 2, height - 2)

        # Place the agents
        self.place_agent()

        self.mission = 'get to the green goal square'
Ejemplo n.º 9
0
 def _gen_grid(self, width, height):
     self.grid = minigrid.Grid(self.width, self.height)
     for x in range(self.width):
         for y in range(self.height):
             if self._raw_grid[x, y] != ' ':
                 if self._raw_grid[x, y] == 's':
                     self.agent_pos = (x, y)
                     self.agent_dir = self._rand_int(0, 4)
                 obj = ASCII_TO_OBJECT[self._raw_grid[x, y]]
                 obj = obj if obj is None else obj()
                 self.grid.set(x, y, obj)
     # If a start position has not been specified, place agent randomly.
     if 's' not in self._raw_grid:
         self.place_agent()
     # If no goal has been specified, place goal randomly.
     if 'g' not in self._raw_grid:
         self.place_obj(minigrid.Goal())
     self.mission = self._mission
Ejemplo n.º 10
0
    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = multigrid.Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Goal
        self.put_obj(minigrid.Goal(), self.goal_pos[0], self.goal_pos[1])

        # Agent
        self.place_agent_at_pos(0, self.start_pos)

        # Walls
        for x in range(self.bit_map.shape[0]):
            for y in range(self.bit_map.shape[1]):
                if self.bit_map[y, x]:
                    # Add an offset of 1 for the outer walls
                    self.put_obj(minigrid.Wall(), x + 1, y + 1)
Ejemplo n.º 11
0
    def step_adversary(self, action):
        """The adversary gets a step for each available square in the grid.

    At each step it chooses whether to place the goal, the agent, a block, or
    nothing. If it chooses agent or goal and they have already been placed, they
    will be moved to the new location.

    Args:
      action: An integer in range 0-3 specifying which object to place:
        0 = goal
        1 = agent
        2 = wall
        3 = nothing

    Returns:
      Standard RL observation, reward (always 0), done, and info
    """
        done = False

        if self.adversary_step_count < self.adversary_max_steps:
            x, y = self.get_xy_from_step(self.adversary_step_count)

            # Place goal
            if action == 0:
                if self.goal_pos is None:
                    self.put_obj(minigrid.Goal(), x, y)
                else:
                    goal = self.grid.get(self.goal_pos[0], self.goal_pos[1])
                    self.grid.set(self.goal_pos[0], self.goal_pos[1], None)
                    self.put_obj(goal, x, y)
                self.goal_pos = (x, y)

            # Place the agent
            elif action == 1:
                if self.agent_start_pos is not None:
                    agent = self.grid.get(self.agent_start_pos[0],
                                          self.agent_start_pos[1])
                    self.grid.set(self.agent_start_pos[0],
                                  self.agent_start_pos[1], None)
                else:
                    agent = None
                self.agent_start_pos = np.array([x, y])
                self.place_agent_at_pos(0,
                                        self.agent_start_pos,
                                        rand_dir=False,
                                        agent_obj=agent)

            # Place wall
            elif action == 2:
                self.put_obj(minigrid.Wall(), x, y)
                self.n_clutter_placed += 1

                self.wall_locs.append((x - 1, y - 1))

        self.adversary_step_count += 1

        # End of episode
        if self.adversary_step_count >= self.adversary_max_steps:
            done = True

            # If the adversary has not placed the agent or goal, place them randomly
            if self.agent_start_pos is None:
                self.agent_start_pos = self.select_random_grid_position()
                # If wall exists here, remove it
                self.remove_wall(self.agent_start_pos[0],
                                 self.agent_start_pos[1])
                self.place_agent_at_pos(0,
                                        self.agent_start_pos,
                                        rand_dir=False)
                self.deliberate_agent_placement = 0
            else:
                self.deliberate_agent_placement = 1

            if self.goal_pos is None:
                self.goal_pos = self.select_random_grid_position()
                # If wall exists here, remove it
                self.remove_wall(self.goal_pos[0], self.goal_pos[1])
                self.put_obj(minigrid.Goal(), self.goal_pos[0],
                             self.goal_pos[1])

            # Build graph after we are certain agent and goal are placed
            for w in self.wall_locs:
                self.graph.remove_node(w)
            self.compute_shortest_path()
        else:
            x, y = self.get_xy_from_step(self.adversary_step_count)

        image = self.grid.encode()
        obs = {
            'image': image,
            'time_step': [self.adversary_step_count],
            'random_z': self.generate_random_z(),
            'x': [x],
            'y': [y]
        }

        return obs, 0, done, {}
Ejemplo n.º 12
0
    def step_adversary(self, loc):
        """The adversary gets n_clutter + 2 moves to place the goal, agent, blocks.

    The action space is the number of possible squares in the grid. The squares
    are numbered from left to right, top to bottom.

    Args:
      loc: An integer specifying the location to place the next object which
        must be decoded into x, y coordinates.

    Returns:
      Standard RL observation, reward (always 0), done, and info
    """
        if loc >= self.adversary_action_dim:
            raise ValueError(
                'Position passed to step_adversary is outside the grid.')

        # Add offset of 1 for outside walls
        x = int(loc % (self.width - 2)) + 1
        y = int(loc / (self.width - 2)) + 1
        done = False

        if self.choose_goal_last:
            should_choose_goal = self.adversary_step_count == self.adversary_max_steps - 2
            should_choose_agent = self.adversary_step_count == self.adversary_max_steps - 1
        else:
            should_choose_goal = self.adversary_step_count == 0
            should_choose_agent = self.adversary_step_count == 1

        # Place goal
        if should_choose_goal:
            # If there is goal noise, sometimes randomly place the goal
            if random.random() < self.goal_noise:
                self.goal_pos = self.place_obj(minigrid.Goal(), max_tries=100)
            else:
                self.remove_wall(
                    x, y)  # Remove any walls that might be in this loc
                self.put_obj(minigrid.Goal(), x, y)
                self.goal_pos = (x, y)

        # Place the agent
        elif should_choose_agent:
            self.remove_wall(x,
                             y)  # Remove any walls that might be in this loc

            # Goal has already been placed here
            if self.grid.get(x, y) is not None:
                # Place agent randomly
                self.agent_start_pos = self.place_one_agent(0, rand_dir=False)
                self.deliberate_agent_placement = 0
            else:
                self.agent_start_pos = np.array([x, y])
                self.place_agent_at_pos(0,
                                        self.agent_start_pos,
                                        rand_dir=False)
                self.deliberate_agent_placement = 1

        # Place wall
        elif self.adversary_step_count < self.adversary_max_steps:
            # If there is already an object there, action does nothing
            if self.grid.get(x, y) is None:
                self.put_obj(minigrid.Wall(), x, y)
                self.n_clutter_placed += 1
                self.wall_locs.append((x - 1, y - 1))

        self.adversary_step_count += 1

        # End of episode
        if self.adversary_step_count >= self.adversary_max_steps:
            done = True
            # Build graph after we are certain agent and goal are placed
            for w in self.wall_locs:
                self.graph.remove_node(w)
            self.compute_shortest_path()

        image = self.grid.encode()
        obs = {
            'image': image,
            'time_step': [self.adversary_step_count],
            'random_z': self.generate_random_z()
        }

        return obs, 0, done, {}
Ejemplo n.º 13
0
    def render(self, mode="array", **kwargs):
        if mode == "array":
            arr = copy.deepcopy(self.world_tensor)
            arr[tuple(self.world_radius + self.current_position)] = 9
            return arr

        elif mode == "curses":
            if self.world_dim == 1:
                space_list = ["_"] * (1 + 2 * self.world_radius)

                goal_ind = self.goal_position[0] + self.world_radius
                space_list[goal_ind] = "G"
                space_list[2 * self.world_radius - goal_ind] = "W"
                space_list[self.current_position[0] + self.world_radius] = "X"

                to_print = " ".join(space_list)

                if self.curses_screen is None:
                    self.curses_screen = curses.initscr()

                self.curses_screen.addstr(0, 0, to_print)
                if "extra_text" in kwargs:
                    self.curses_screen.addstr(1, 0, kwargs["extra_text"])
                self.curses_screen.refresh()
            elif self.world_dim == 2:
                space_list = [
                    ["_"] * (1 + 2 * self.world_radius)
                    for _ in range(1 + 2 * self.world_radius)
                ]

                for row_ind in range(1 + 2 * self.world_radius):
                    for col_ind in range(1 + 2 * self.world_radius):
                        if self.world_tensor[row_ind][col_ind] == self.GOAL:
                            space_list[row_ind][col_ind] = "G"

                        if self.world_tensor[row_ind][col_ind] == self.WRONG_CORNER:
                            space_list[row_ind][col_ind] = "C"

                        if self.world_tensor[row_ind][col_ind] == self.WALL:
                            space_list[row_ind][col_ind] = "W"

                        if (
                            (row_ind, col_ind)
                            == self.world_radius + self.current_position
                        ).all():
                            space_list[row_ind][col_ind] = "X"

                if self.curses_screen is None:
                    self.curses_screen = curses.initscr()

                for i, sl in enumerate(space_list):
                    self.curses_screen.addstr(i, 0, " ".join(sl))

                self.curses_screen.addstr(len(space_list), 0, str(self.state()))
                if "extra_text" in kwargs:
                    self.curses_screen.addstr(
                        len(space_list) + 1, 0, kwargs["extra_text"]
                    )

                self.curses_screen.refresh()
            else:
                raise NotImplementedError("Cannot render worlds of > 2 dimensions.")
        elif mode == "minigrid":
            height = width = 2 * self.world_radius + 2
            grid = minigrid.Grid(width, height)

            # Generate the surrounding walls
            grid.horz_wall(0, 0)
            grid.horz_wall(0, height - 1)
            grid.vert_wall(0, 0)
            grid.vert_wall(width - 1, 0)

            # Place fake agent at the center
            agent_pos = np.array(self.positions[-1]) + 1 + self.world_radius
            # grid.set(*agent_pos, None)
            agent = minigrid.Goal()
            agent.color = "red"
            grid.set(agent_pos[0], agent_pos[1], agent)
            agent.init_pos = tuple(agent_pos)
            agent.cur_pos = tuple(agent_pos)

            goal_pos = self.goal_position + self.world_radius

            goal = minigrid.Goal()
            grid.set(goal_pos[0], goal_pos[1], goal)
            goal.init_pos = tuple(goal_pos)
            goal.cur_pos = tuple(goal_pos)

            highlight_mask = np.zeros((height, width), dtype=bool)

            minx, maxx = max(1, agent_pos[0] - 5), min(height - 1, agent_pos[0] + 5)
            miny, maxy = max(1, agent_pos[1] - 5), min(height - 1, agent_pos[1] + 5)
            highlight_mask[minx : (maxx + 1), miny : (maxy + 1)] = True

            img = grid.render(
                minigrid.TILE_PIXELS, agent_pos, None, highlight_mask=highlight_mask
            )

            return img

        else:
            raise NotImplementedError("Unknown render mode {}.".format(mode))

        time.sleep(0.0 if "sleep_time" not in kwargs else kwargs["sleep_time"])