コード例 #1
0
ファイル: house.py プロジェクト: pablo-vs/gym-minigrid
    def decode(array):
        """
        Decode an array grid encoding back into a grid
        """

        width, height, channels = array.shape
        assert channels == 3

        grid = Grid(width, height)
        for i in range(width):
            for j in range(height):
                typeIdx, colorIdx, state = array[i, j]

                if typeIdx == OBJECT_TO_IDX['unseen'] or \
                        typeIdx == OBJECT_TO_IDX['empty']:
                    continue

                objType = IDX_TO_OBJECT[typeIdx]
                color = IDX_TO_COLOR[colorIdx]
                # State, 0: open, 1: closed, 2: locked
                is_open = state == 0
                is_locked = state == 2

                if objType == 'wall':
                    v = Wall(color)
                elif objType == 'floor':
                    v = Floor(color)
                elif objType == 'ball':
                    v = Ball(color)
                elif objType == 'key':
                    v = Key(color)
                elif objType == 'box':
                    v = Box(color)
                elif objType == 'door':
                    v = Door(color, is_open, is_locked)
                elif objType == 'goal':
                    v = Goal()
                elif objType == 'lava':
                    v = Lava()
                elif objType == 'agent':
                    v = None
                else:
                    assert False, "unknown obj type in decode '%s'" % objType

                grid.set(i, j, v)

        return grid
コード例 #2
0
class SingleTMaze(MiniGridEnv):
    is_double = False
    reward_values = dict(goal=1, fake_goal=0.1)
    view_size: int = None

    def __init__(self, corridor_length=3, reward_position=0, max_steps=None, is_double=False, view_size=None,
                 max_corridor_length=None):
        if max_corridor_length is None:
            max_corridor_length = corridor_length
        self.max_corridor_length = max_corridor_length
        self.view_size = view_size if view_size is not None else 7
        self.is_double = is_double
        self.reward_position = reward_position
        self.corridor_length = corridor_length
        assert corridor_length > 0

        if max_steps is None:
            max_steps = 4 + 4 * corridor_length

        super().__init__(
            grid_size=3 + 2 * self.max_corridor_length,
            max_steps=max_steps,
            see_through_walls=True,  # True for maximum performance
            agent_view_size=self.view_size,
        )
        self.reward_range = (min(self.reward_values["fake_goal"], 0), self.reward_values["goal"])

    @property
    def mission(self):
        goals = ["UPPER LEFT", "UPPER RIGHT", "LOWER RIGHT", "LOWER LEFT"]
        return f'Goal is {goals[self.reward_position]}'

    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        # Place the agent in the top-left corner
        self.start_pos = (int(width / 2), int(height / 2))
        self.start_dir = 3

        # Create walls
        for x in range(0, width):
            for y in range(0, height):
                self.grid.set(x, y, Wall())

        # Create paths
        if self.is_double:
            for y in range(height // 2 - self.corridor_length, height // 2 + self.corridor_length + 1):
                self.grid.set(width // 2, y, None)
            for x in range(width // 2 - self.corridor_length, width // 2 + self.corridor_length + 1):
                self.grid.set(x, height // 2 - self.corridor_length, None)
                self.grid.set(x, height // 2 + self.corridor_length, None)
        else:
            for y in range(height // 2 - self.corridor_length, height // 2 + 1):
                self.grid.set(width // 2, y, None)
            for x in range(width // 2 - self.corridor_length, width // 2 + self.corridor_length + 1):
                self.grid.set(x, height // 2 - self.corridor_length, None)

        # Create rewards
        reward_positions = self._reward_positions(width, height)
        self._gen_rewards(reward_positions)

    def _reward_positions(self, width, height):
        reward_positions = [
            (width // 2 - self.corridor_length, height // 2 - self.corridor_length),
            (width // 2 + self.corridor_length, height // 2 - self.corridor_length),
            (width // 2 + self.corridor_length, height // 2 + self.corridor_length),
            (width // 2 - self.corridor_length, height // 2 + self.corridor_length),
        ]
        if not self.is_double:
            reward_positions = reward_positions[:2]
        return reward_positions

    def _reward(self):
        min_steps = (1 + 2 * self.corridor_length)
        if self.is_double and self.reward_position > 1:
            min_steps += 2
        redundant_steps = max(0, self.step_count - min_steps)
        max_steps = self.max_steps - min_steps + 1
        cell = self.grid.get(self.agent_pos[0], self.agent_pos[1])
        max_reward = self.reward_values["fake_goal"]
        if hasattr(cell, "is_goal") and cell.is_goal:
            max_reward = self.reward_values["goal"]
        return min(max_reward, max_reward * (1 - min(1, (redundant_steps / max_steps))))

    def _gen_rewards(self, rewards_pos: List[Tuple[int, int]]):
        for i, (x, y) in enumerate(rewards_pos):
            g = Goal()
            self.grid.set(x, y, g)
            g.is_goal = False
            if self.reward_position == i % len(rewards_pos):
                g.is_goal = True

    def render(self, mode='human', close=False, **kwargs):
        reward_positions = self._reward_positions(width=self.width, height=self.height)
        goal = self.grid.get(*reward_positions[self.reward_position])
        assert goal.is_goal
        start_color = goal.color
        goal.color = 'blue'
        ret = super().render(mode, close, **kwargs)
        goal.color = start_color
        return ret
コード例 #3
0
class FourRooms(FourRoomsEnv):
    """ Overwrites the original generator to make the hallway states static """
    
    def __init__(self, agent_pos: tuple = (1, 1), goal_pos: tuple = (15, 15)):
        self.hallways = {
            'top'  : (9, 4),
            'left' : (3, 9),
            'right': (16, 9),
            'bot'  : (9, 14)
        }
        super().__init__(agent_pos=agent_pos, goal_pos=goal_pos)
    
    def _reward(self):
        return 1
    
    def _gen_grid(self, width, height):
        
        # Create the grid
        self.grid = Grid(width, height)
        
        # Generate the surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)
        
        room_w = width // 2
        room_h = height // 2
        
        # For each row of rooms
        for j in range(0, 2):
            
            # For each column
            for i in range(0, 2):
                xL = i * room_w
                yT = j * room_h
                xR = xL + room_w
                yB = yT + room_h
                
                # Bottom wall and door
                if i + 1 < 2:
                    self.grid.vert_wall(xR, yT, room_h)
                    # pos = (xR, self._rand_int(yT + 1, yB))
                    # self.grid.set(*pos, None)
                
                # Bottom wall and door
                if j + 1 < 2:
                    self.grid.horz_wall(xL, yB, room_w)
                    # pos = (self._rand_int(xL + 1, xR), yB)
                    # self.grid.set(*pos, None)
        
        for hallway in self.hallways.values():
            self.grid.set(*hallway, None)
        
        # Randomize the player start position and orientation
        if self._agent_default_pos is not None:
            self.agent_pos = self._agent_default_pos
            self.grid.set(*self._agent_default_pos, None)
            self.agent_dir = self._rand_int(0, 4)
        else:
            self.place_agent()
        
        if self._goal_default_pos is not None:
            goal = Goal()
            self.grid.set(*self._goal_default_pos, goal)
            goal.init_pos, goal.cur_pos = self._goal_default_pos
        else:
            self.place_obj(Goal())
        
        self.mission = 'Reach the goal'
コード例 #4
0
class NineRoomsEnv(MiniGridSimple):

    # Only 4 actions needed, left, right, up and down

    class NineRoomsCardinalActions(IntEnum):
        # Cardinal movement
        right = 0
        down = 1
        left = 2
        up = 3

        def __len__(self):
            return 4

    def __init__(
        self,
        grid_size=20,
        passage_size=1,
        max_steps=100,
        seed=133,
        rnd_start=0,
        start_state_exclude_rooms=[],
    ):

        self.grid_size = grid_size
        self.passage_size = passage_size

        self._goal_default_pos = (1, 1)

        # set to 1 if agent is to be randomly spawned
        self.rnd_start = rnd_start

        # If self.rnd_start =1, don't spawn in these rooms
        self.start_state_exclude_rooms = start_state_exclude_rooms

        super().__init__(grid_size=grid_size,
                         max_steps=max_steps,
                         seed=seed,
                         see_through_walls=False)

        self.nActions = len(NineRoomsEnv.NineRoomsCardinalActions)

        # Set the action and observation spaces
        self.actions = NineRoomsEnv.NineRoomsCardinalActions

        self.action_space = spaces.Discrete(self.nActions)

        self.max_cells = (grid_size - 1) * (grid_size - 1)

        self.observation_space = spaces.Tuple(
            [spaces.Discrete(grid_size),
             spaces.Discrete(grid_size)])

        self.observation_size = self.grid_size * self.grid_size
        self.observation_shape = (self.observation_size, )

        self.T = max_steps

        # Change the observation space to return the position in the grid

    @property
    def category(self):
        # [TODO] Make sure this doesn't break after self.agent_pos is changed to numpy.ndarray
        return self.cell_cat_map[self.agent_pos]

    def reward(self):
        # -1 for every action except if the action leads to the goal state
        return 1 if self.success else 0

    def _gen_grid(self, width, height, val=False, seen=True):

        # Create the grid
        self.grid = Grid(width, height)

        # Generate surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)

        # Place horizontal walls through the grid
        self.grid.horz_wall(0, height // 3)
        self.grid.horz_wall(0, (2 * height) // 3)

        # Place vertical walls through the grid
        self.grid.vert_wall(width // 3, 0)
        self.grid.vert_wall((2 * width) // 3, 0)

        # Create passages
        passage_anchors = [(width // 3, height // 3),
                           (width // 3, (2 * height) // 3),
                           ((2 * width) // 3, height // 3),
                           ((2 * width) // 3, (2 * height) // 3)]
        passage_cells = []
        for anchor in passage_anchors:
            for delta in range(-1 * self.passage_size, self.passage_size + 1):
                passage_cells.append((anchor[0] + delta, anchor[1]))
                passage_cells.append((anchor[0], anchor[1] + delta))

        for cell in passage_cells:
            self.grid.set(*cell, None)

        # Even during validation, start state distribution
        # should be the same as that during training
        if not self.rnd_start:
            self._agent_default_pos = ((width - 2) // 2, (height - 2) // 2)
        else:
            self._agent_default_pos = None

        # Place the agent at the center
        if self._agent_default_pos is not None:
            self.start_pos = self._agent_default_pos
            self.grid.set(*self._agent_default_pos, None)
            self.start_dir = self._rand_int(
                0, 4)  # Agent direction doesn't matter
        else:

            if len(self.start_state_exclude_rooms) == 0:
                self.place_agent()
            else:
                valid_start_pos = []
                if seen:
                    exclude_from = self.start_state_exclude_rooms
                else:
                    exclude_from = [
                        x for x in range(1, 10)
                        if x not in self.start_state_exclude_rooms
                    ]
                for room in range(1, 10):
                    if room in exclude_from:
                        continue
                    # Ignore that there are walls for now, can handle that with rejection sampling

                    # Get x coordinates of allowed cells
                    valid_x = []
                    if room % 3 == 1:
                        valid_x = list(range(1, width // 3))
                    elif room % 3 == 2:
                        valid_x = list(range(width // 3 + 1, (2 * width) // 3))
                    else:
                        valid_x = list(range((2 * width) // 3 + 1, width - 1))

                    # Get valid y-coordinates of allowed cells
                    valid_y = []
                    if (room - 1) // 3 == 0:
                        valid_y = list(range(1, height // 3))
                    elif (room - 1) // 3 == 1:
                        valid_y = list(
                            range(height // 3 + 1, (2 * height) // 3))
                    else:
                        valid_y = list(range((2 * height) // 3 + 1,
                                             height - 1))

                    room_cells = list(product(valid_x, valid_y))

                    valid_start_pos += room_cells

                # Make sure start position doesn't conflict with other cells
                while True:

                    _start_pos = valid_start_pos[np.random.choice(
                        len(valid_start_pos))]
                    row = _start_pos[1]
                    col = _start_pos[0]
                    cell = self.grid.get(row, col)

                    if cell is None or cell.can_overlap():
                        break

                self.start_pos = (col, row)
                self.start_dir = self._rand_int(
                    0, 4)  # Agent direction doesn't matter

        goal = Goal()
        self.grid.set(*self._goal_default_pos, goal)
        goal.init_pos = goal.curr_pos = self._goal_default_pos

        self.mission = goal.init_pos

    def reset(self, val=False, seen=True):

        obs, info = super().reset(val=val, seen=seen)

        # add state feature to obs
        state_feat = self._encode_state(obs['agent_pos'])

        obs.update(dict(state_feat=state_feat))

        return obs, info

    def step(self, action):

        self.step_count += 1
        '''
         Reward doesn't depend on action, but just state.
         reward = -1 if not (in_goal_state) else 0
        '''

        if not self.done:
            # check if currently at the goal state
            if self.agent_pos == self.mission:
                # No penalty, episode done
                self.done = True
                self.success = True
            else:
                # Cardinal movement
                if action in self.move_actions:
                    move_pos = self.around_pos(action)
                    fwd_cell = self.grid.get(*move_pos)

                    self.agent_dir = (action - 1) % 4

                    if fwd_cell == None or fwd_cell.can_overlap(
                    ) or self.is_goal(move_pos):
                        self.agent_pos = move_pos
                else:
                    raise ValueError("Invalid Action: {} ".format(action))

        reward = self.reward()
        if self.step_count >= self.max_steps - 1:
            # print("Max Steps Exceeded.")
            self.done = True

        obs = self.gen_obs()

        # Add state features to the observation
        state_feat = self._encode_state(obs['agent_pos'])

        obs.update(dict(state_feat=state_feat))

        info = {
            'done': self.done,
            'agent_pos': np.array(self.agent_pos),
        }

        if self.render_rgb:
            info['rgb_grid'] = self.render(mode='rgb_array')

        if self.done:
            info.update({
                'image': self.encode_grid(),
                'success': self.success,
                'agent_pos': self.agent_pos,
            })

        return obs, reward, self.done, info

    def _encode_state(self, state):
        """
        Encode the state to generate observation.
        """
        feat = np.ones(self.width * self.height, dtype=float)
        curr_x, curr_y = state[1], state[0]

        curr_pos = curr_y * self.width + curr_x

        feat[curr_pos:] = 0

        return feat
コード例 #5
0
class OptRewardCrossingEnv(OptRewardMiniGridEnv):
    """
    Environment with wall or lava obstacles, sparse reward.
    """
    def __init__(self, size=9, num_crossings=1, obstacle_type=Lava, seed=None):
        self.num_crossings = num_crossings
        self.obstacle_type = obstacle_type
        super().__init__(
            grid_size=size,
            max_steps=4 * size * size,
            # Set this to True for maximum speed
            see_through_walls=False,
            seed=None)

    def _gen_grid(self, width, height):
        assert width % 2 == 1 and height % 2 == 1  # odd size

        # Create an empty grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.wall_rect(0, 0, width, height)

        # Place the agent in the top-left corner
        self.agent_pos = (1, 1)
        self.agent_dir = 0

        # Place a goal square in the bottom-right corner
        self.put_obj(Goal(), width - 2, height - 2)

        # Place obstacles (lava or walls)
        v, h = object(), object(
        )  # singleton `vertical` and `horizontal` objects

        # Lava rivers or walls specified by direction and position in grid
        rivers = [(v, i) for i in range(2, height - 2, 2)]
        rivers += [(h, j) for j in range(2, width - 2, 2)]
        self.np_random.shuffle(rivers)
        rivers = rivers[:self.num_crossings]  # sample random rivers
        rivers_v = sorted([pos for direction, pos in rivers if direction is v])
        rivers_h = sorted([pos for direction, pos in rivers if direction is h])
        obstacle_pos = itt.chain(
            itt.product(range(1, width - 1), rivers_h),
            itt.product(rivers_v, range(1, height - 1)),
        )
        for i, j in obstacle_pos:
            self.put_obj(self.obstacle_type(), i, j)

        # Sample path to goal
        path = [h] * len(rivers_v) + [v] * len(rivers_h)
        self.np_random.shuffle(path)

        # Create openings
        limits_v = [0] + rivers_v + [height - 1]
        limits_h = [0] + rivers_h + [width - 1]
        room_i, room_j = 0, 0
        for direction in path:
            if direction is h:
                i = limits_v[room_i + 1]
                j = self.np_random.choice(
                    range(limits_h[room_j] + 1, limits_h[room_j + 1]))
                room_i += 1
            elif direction is v:
                i = self.np_random.choice(
                    range(limits_v[room_i] + 1, limits_v[room_i + 1]))
                j = limits_h[room_j + 1]
                room_j += 1
            else:
                assert False
            self.grid.set(i, j, None)
            self.put_obj(Goal(), i, j)
            self.subgoal_pos = np.asarray([i, j])
            self.horizontal = (direction == h)

        self.mission = ("avoid the lava and get to the green goal square"
                        if self.obstacle_type == Lava else
                        "find the opening and get to the green goal square")
コード例 #6
0
class SnakeEnv(MiniGridEnv):
    """
    Empty grid environment, no obstacles, sparse reward
    """

    # Enumeration of possible actions
    class Actions(IntEnum):
        # Turn left, turn right, move forward
        left = 0
        right = 1
        forward = 2

    def __init__(self, size=9):

        super().__init__(grid_size=size,
                         max_steps=None,
                         see_through_walls=True)
        self.actions = SnakeEnv.Actions
        self.action_space = spaces.Discrete(len(self.actions))

        # self.observation_space = spaces.Dict({
        #     'image': spaces.Box(
        #         low=0,
        #         high=255,
        #         shape=(size,size,3),
        #         dtype='uint8'
        #     )
        #
        # })

    def spawn_new_food(self):
        empties = [(i, j) for i in range(self.grid.height)
                   for j in range(self.grid.width)
                   if self.grid.get(i, j) is None
                   and self.grid.get(i, j) != tuple(self.agent_pos)]
        self.grid.set(*random.choice(empties), Goal())

    def _gen_grid(self, width, height):
        # Create an empty grid
        self.grid = Grid(width, height)

        self.grid.wall_rect(0, 0, width, height)

        # self.start_pos = (2, 2)
        yl, xl, _ = self.observation_space.spaces['image'].shape
        self.start_pos = (random.randint(2, yl - 2), random.randint(2, xl - 2))
        self.agent_pos = self.start_pos  #TODO: the env holding agent traits is shit!
        self.start_dir = random.randint(0, 3)
        self.agent_dir = self.start_dir
        self.snake = Snake(
            [self.start_pos,
             tuple(self.start_pos - self.dir_vec)])
        [self.grid.set(*pos, Lava()) for pos in self.snake.body]

        self.spawn_new_food()

        self.mission = None

    def reset(self):
        return super().reset()

    # def gen_obs(self):
    #     image = self.grid.encode()
    #
    #     obs = {
    #         'image': image,
    #         'direction': self.agent_dir,
    #         'mission': self.mission
    #     }
    #
    #     return obs

    def step(self, action):
        self.step_count += 1

        done = False

        if action == self.actions.left:
            self.agent_dir = (self.agent_dir - 1) % 4

        elif action == self.actions.right:
            self.agent_dir = (self.agent_dir + 1) % 4

        elif action == self.actions.forward:
            pass
        else:
            assert False, "unknown action: %d" % action

        fwd_pos = self.agent_pos + self.dir_vec
        fwd_cell = self.grid.get(*fwd_pos)

        if fwd_cell is None:
            self.grid.set(*self.agent_pos, Lava())
            self.snake.grow_head(*fwd_pos)
            self.grid.set(*self.snake.rm_tail(), None)
            self.agent_pos = fwd_pos

            reward = -0.001

        elif fwd_cell.type == 'goal':
            self.grid.set(*self.agent_pos, Lava())
            self.snake.grow_head(*fwd_pos)
            self.agent_pos = fwd_pos

            self.spawn_new_food()
            reward = 1.0

        elif (fwd_cell.type == 'lava' or fwd_cell.type == 'wall'):
            reward = -1.0
            done = True

        else:
            assert False

        if self.step_count == 1 and done:
            assert False

        obs = self.gen_obs()
        assert any([
            isinstance(self.grid.get(i, j), Goal)
            for i in range(self.grid.height) for j in range(self.grid.width)
        ])
        return obs, reward, done, {}
コード例 #7
0
ファイル: cluttered.py プロジェクト: mohit-sh/IL-RL
class Cluttered(MiniGridSimple):

    # Only 4 actions needed, left, right, up and down

    class ClutteredCardinalActions(IntEnum):
        # Cardinal movement
        right = 0
        down = 1
        left = 2
        up = 3

        def __len__(self):
            return 4

    def __init__(
        self,
        grid_size=20,
        num_objects=5,
        obj_size=3,
        max_steps=100,
        seed=133,
        state_encoding="thermal",
        rnd_start=0,
    ):

        self.state_encoding = state_encoding
        self.grid_size = grid_size
        self.num_objects = num_objects
        self.obj_size = obj_size

        # set to 1 if agent is to be randomly spawned
        self.rnd_start = rnd_start
        self.grid_seed = 12

        # This only works for 15x15 grid with 6 obstacles
        #self._goal_default_pos = (6, 10)

        #self._goal_default_pos = (self.grid_size-2, self.grid_size-2)
        self._goal_default_pos = (7, 12)

        # This is used for some of the experiments.
        self._agent_default_pos = (7, 6)

        # If self.rnd_start =1, don't spawn in these rooms
        super().__init__(grid_size=grid_size,
                         max_steps=max_steps,
                         seed=seed,
                         see_through_walls=False)

        self.nActions = len(Cluttered.ClutteredCardinalActions)

        # Set the action and observation spaces
        self.actions = Cluttered.ClutteredCardinalActions

        self.action_space = spaces.Discrete(self.nActions)

        self.max_cells = (grid_size - 1) * (grid_size - 1)

        self.observation_space = spaces.Tuple(
            [spaces.Discrete(grid_size),
             spaces.Discrete(grid_size)])

        self.observation_size = self.grid_size * self.grid_size
        self.observation_shape = (self.observation_size, )

        self.T = max_steps

        # Change the observation space to return the position in the grid

    def reward(self):
        # -1 for every action except if the action leads to the goal state
        #return 0 if self.success else -1
        return 0 if self.success else -1 / self.T

    def _gen_grid(self, width, height, val=False, seen=True):

        assert width >= 10 and height >= 10, "Environment too small to place objects"
        # Create the grid
        self.grid = Grid(width, height)

        # Generate surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)

        np.random.seed(self.grid_seed)

        for obj_idx in range(self.num_objects):

            while True:
                c_x, c_y = np.random.choice(list(range(
                    2, self.grid_size - 3))), np.random.choice(
                        list(range(2, self.grid_size - 3)))

                #obj_size = np.random.choice(list(range(1, self.obj_size+1)))
                obj_size = self.obj_size

                if obj_size == 3:
                    cells = list(
                        product([c_x - 1, c_x, c_x + 1],
                                [c_y - 1, c_y, c_y + 1]))
                elif obj_size == 2:
                    cells = list(product([c_x, c_x + 1], [c_y, c_y + 1]))
                elif obj_size == 1:
                    cells = list(product([c_x], [c_y]))
                else:
                    raise ValueError

                valid = True
                for cell in cells:
                    cell = self.grid.get(cell[0], cell[1])

                    if not (cell is None or cell.can_overlap()):
                        valid = False
                        break

                if valid:
                    for cell in cells:
                        self.grid.set(*cell, Wall())
                    break

        # Set the start position and the goal position depending upon where the obstacles are present
        goal = Goal()
        # [NOTE] : This is a hack, add option to set goal location from arguments.

        self.grid.set(*self._goal_default_pos, goal)
        goal.init_pos = goal.curr_pos = self._goal_default_pos

        self.mission = goal.init_pos

        self.start_pos = self._agent_default_pos

    def reset(self, val=False, seen=True):

        obs, info = super().reset(val=val, seen=seen)

        # add state feature to obs
        state_feat = self._encode_state(obs['agent_pos'])

        obs.update(dict(state_feat=state_feat))

        return obs, info

    def step(self, action):

        self.step_count += 1
        '''
         Reward doesn't depend on action, but just state.
         reward = -1 if not (in_goal_state) else 0
        '''

        if not self.done:
            # check if currently at the goal state
            if self.agent_pos == self.mission:
                # No penalty, episode done
                self.done = True
                self.success = True
            else:
                # Cardinal movement
                if action in self.move_actions:
                    move_pos = self.around_pos(action)
                    fwd_cell = self.grid.get(*move_pos)

                    self.agent_dir = (action - 1) % 4

                    if fwd_cell == None or fwd_cell.can_overlap(
                    ) or self.is_goal(move_pos):
                        self.agent_pos = move_pos
                else:
                    raise ValueError("Invalid Action: {} ".format(action))

        reward = self.reward()
        if self.step_count >= self.max_steps - 1:
            # print("Max Steps Exceeded.")
            self.done = True

        obs = self.gen_obs()

        # Add state features to the observation
        state_feat = self._encode_state(obs['agent_pos'])

        obs.update(dict(state_feat=state_feat))

        info = {
            'done': self.done,
            'agent_pos': np.array(self.agent_pos),
        }

        if self.render_rgb:
            info['rgb_grid'] = self.render(mode='rgb_array')

        if self.done:
            info.update({
                'image': self.encode_grid(),
                'success': self.success,
                'agent_pos': self.agent_pos,
            })

        return obs, reward, self.done, info

    def _encode_state(self, state):
        """
        Encode the state to generate observation.
        """
        feat = np.ones(self.width * self.height, dtype=float)
        curr_x, curr_y = state[1], state[0]

        curr_pos = curr_y * self.width + curr_x
        if self.state_encoding == "thermal":
            feat[curr_pos:] = 0
        elif self.state_encoding == "one-hot":
            feat[:] = 0
            feat[curr_pos] = 1

        return feat
コード例 #8
0
class SnakeEnv(MiniGridEnv):
    class Actions(IntEnum):
        left = 0
        right = 1
        forward = 2

    def __init__(self, size=9):

        super().__init__(grid_size=size,
                         max_steps=None,
                         see_through_walls=True)
        self.actions = SnakeEnv.Actions
        self.action_space = spaces.Discrete(len(self.actions))

    def spawn_new_food(self):
        empties = [(i, j) for i in range(self.grid.height)
                   for j in range(self.grid.width)
                   if self.grid.get(i, j) is None
                   and self.grid.get(i, j) != tuple(self.agent_pos)]
        self.grid.set(*random.choice(empties), Goal())

    def _gen_grid(self, width, height):
        self.grid = Grid(width, height)

        self.grid.wall_rect(0, 0, width, height)

        # self.start_pos = (2, 2)
        yl, xl, _ = self.observation_space.spaces["image"].shape
        self.start_pos = (random.randint(2, yl - 2), random.randint(2, xl - 2))
        self.agent_pos = self.start_pos  # TODO: the env holding agent traits is shit!
        self.start_dir = random.randint(0, 3)
        self.agent_dir = self.start_dir
        self.snake = Snake(
            [self.start_pos,
             tuple(self.start_pos - self.dir_vec)])
        [self.grid.set(*pos, Lava()) for pos in self.snake.body]

        self.spawn_new_food()

        self.mission = None

    def reset(self):
        return super().reset()

    def step(self, action):
        self.step_count += 1

        done = False

        if action == self.actions.left:
            self.agent_dir = (self.agent_dir - 1) % 4

        elif action == self.actions.right:
            self.agent_dir = (self.agent_dir + 1) % 4

        elif action == self.actions.forward:
            pass
        else:
            assert False, "unknown action: %d" % action

        fwd_pos = self.agent_pos + self.dir_vec
        fwd_cell = self.grid.get(*fwd_pos)

        if fwd_cell is None:
            self.grid.set(*self.agent_pos, Lava())
            self.snake.grow_head(*fwd_pos)
            self.grid.set(*self.snake.rm_tail(), None)
            self.agent_pos = fwd_pos

            reward = -0.001

        elif fwd_cell.type == "goal":
            self.grid.set(*self.agent_pos, Lava())
            self.snake.grow_head(*fwd_pos)
            self.agent_pos = fwd_pos

            self.spawn_new_food()
            reward = 1.0

        elif fwd_cell.type == "lava" or fwd_cell.type == "wall":
            reward = -1.0
            done = True

        else:
            assert False

        if self.step_count == 1 and done:
            assert False

        obs = self.gen_obs()
        assert any([
            isinstance(self.grid.get(i, j), Goal)
            for i in range(self.grid.height) for j in range(self.grid.width)
        ])
        return obs, reward, done, {}
コード例 #9
0
class FourRoomsEnv(MiniGridEnv):
    """
    Classic 4 rooms gridworld environment.
    Can specify agent and goal position, if not it set at random.
    """
    def __init__(self, agent_pos=None, goal_pos=None, size=None):
        self._agent_default_pos = agent_pos
        self._goal_default_pos = goal_pos
        super().__init__(grid_size=size, max_steps=math.inf)  # 100)

        s = CHW(3, size, size)  # self.observation_space.spaces["image"]
        self.observation_space = spaces.Box(
            low=0,
            high=255,  # TODO
            shape=(s.width, s.height, s.channels),
            dtype='uint8')

        self.states_visited = set()

    def step(self, action):
        obs, reward, done, infos = super().step(action)

        cur_pos = (*self.agent_pos, self.agent_dir)
        self.states_visited.add(cur_pos)

        return self.observation(obs), reward, done, infos

    def observation(self, obs):
        state = obs["image"]

        env = self.unwrapped
        full_grid = self.grid.encode()  # todo: Cache this encoding
        full_grid[self.agent_pos[0]][self.agent_pos[1]] = np.array(
            [OBJECT_TO_IDX['agent'], COLOR_TO_IDX['red'], self.agent_dir])

        return full_grid

    def reset(self):
        obs = super().reset()
        return self.observation(obs)

    def _gen_grid(self, width, height):
        # Create the grid
        self.grid = Grid(width, height)

        # Generate the surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)

        room_w = width // 2
        room_h = height // 2

        # For each row of rooms
        for j in range(0, 2):

            # For each column
            for i in range(0, 2):
                xL = i * room_w
                yT = j * room_h
                xR = xL + room_w
                yB = yT + room_h

                # Bottom wall and door
                if i + 1 < 2:
                    self.grid.vert_wall(xR, yT, room_h)
                    pos = (xR, self._rand_int(yT + 1, yB))
                    self.grid.set(*pos, None)

                # Bottom wall and door
                if j + 1 < 2:
                    self.grid.horz_wall(xL, yB, room_w)
                    pos = (self._rand_int(xL + 1, xR), yB)
                    self.grid.set(*pos, None)

        # Randomize the player start position and orientation
        if self._agent_default_pos is not None:
            self.agent_pos = self._agent_default_pos
            self.grid.set(*self._agent_default_pos, None)
            # assuming random start direction
            self.agent_dir = self._rand_int(0, 4)
        else:
            self.place_agent()

        if self._goal_default_pos is not None:
            goal = Goal()
            self.grid.set(*self._goal_default_pos, goal)
            goal.init_pos, goal.cur_pos = self._goal_default_pos
        else:
            self.place_obj(Goal())

        self.mission = 'Reach the goal'
コード例 #10
0
ファイル: house.py プロジェクト: konichuvak/hotrl
class MultiRoomHouse(MiniGridEnv):
    """
    Environment with multiple rooms (subgoals)
    """
    def __init__(
        self,
        t_out: float = -20,
        t_start: float = 20,
        start_dt: datetime = datetime.now(),
        dt_delta: timedelta = timedelta(minutes=1),
        homies_params: List[Dict] = None,
        homie_reward_scaler: float = 1,
        room_names: List[str] = RoomType,
        minNumRooms=5,
        maxNumRooms=5,
        maxRoomSize=10,
        seed=1337,
    ):

        self.t_out = t_out
        self.t_start = t_start
        self.current_dt = start_dt
        self.timedelta = dt_delta
        self.homie_reward_scaler = homie_reward_scaler
        self.model = RoomModel()

        assert minNumRooms > 0
        assert maxNumRooms >= minNumRooms
        assert maxRoomSize >= 4

        self.room_names = room_names.copy()
        self.room_names.remove("Outside")
        self.minNumRooms = minNumRooms
        self.maxNumRooms = maxNumRooms
        self.maxRoomSize = maxRoomSize

        self.rooms = []

        super().__init__(grid_size=25,
                         max_steps=self.maxNumRooms * 20,
                         seed=seed)
        self.homies = [Homie(self, **params) for params in homies_params]
        for homie in self.homies:
            self.grid.set(*homie.cur_pos, v=homie)

    def _gen_grid(self, width, height):
        roomList = []

        # Choose a random number of rooms to generate
        numRooms = self._rand_int(self.minNumRooms, self.maxNumRooms + 1)

        while len(roomList) < numRooms:
            curRoomList = []

            entryDoorPos = (self._rand_int(0, width - 2),
                            self._rand_int(0, width - 2))

            # Recursively place the rooms
            self._placeRoom(numRooms,
                            roomList=curRoomList,
                            minSz=4,
                            maxSz=self.maxRoomSize,
                            entryDoorWall=2,
                            entryDoorPos=entryDoorPos)

            if len(curRoomList) > len(roomList):
                roomList = curRoomList

        # Store the list of rooms in this environment
        assert len(roomList) > 0
        self.rooms = roomList

        # Create the grid
        self.grid = Grid(width, height)
        wall = Wall()

        prevDoorColor = None

        # For each room
        for idx, room in enumerate(roomList):
            room.name = self.room_names[idx]

            topX, topY = room.top
            sizeX, sizeY = room.size

            # Draw the top and bottom walls
            for i in range(0, sizeX):
                self.grid.set(topX + i, topY, wall)
                self.grid.set(topX + i, topY + sizeY - 1, wall)

            # Draw the left and right walls
            for j in range(0, sizeY):
                self.grid.set(topX, topY + j, wall)
                self.grid.set(topX + sizeX - 1, topY + j, wall)

            # If this isn't the first room, place the entry door
            if idx > 0:
                # Pick a door color different from the previous one
                doorColors = set(COLOR_NAMES)
                if prevDoorColor:
                    doorColors.remove(prevDoorColor)
                # Note: the use of sorting here guarantees determinism,
                # This is needed because Python's set is not deterministic
                doorColor = self._rand_elem(sorted(doorColors))

                entryDoor = Door(doorColor)
                self.grid.set(*room.entryDoorPos, entryDoor)
                prevDoorColor = doorColor

                prevRoom = roomList[idx - 1]
                prevRoom.exitDoorPos = room.entryDoorPos

        # Randomize the starting agent position and direction
        self.place_agent(roomList[0].top, roomList[0].size)

        # Create rooms dict
        rooms_dict = {}
        for r in self.rooms:
            if r.name not in rooms_dict:
                rooms_dict[r.name] = {}
                rooms_dict[r.name]["P"] = {}
            rooms_dict[r.name]["T"] = r.temperature
            rooms_dict[r.name]["A"] = (r.size[0] - 2) * (r.size[1] - 2)
            rooms_dict[r.name]["heat"] = 0
            rooms_dict[r.name]["mask"] = np.zeros((self.width, self.height))
            rooms_dict[r.name]["mask"][r.y1 + 1:r.y2 - 1,
                                       r.x1 + 1:r.x2 - 1] = 1
            P_out = 2 * (r.x2 - r.x1 + r.y2 - r.y1 - 4)
            for r2 in self.rooms:
                if r2 == r:
                    pass
                else:
                    P = 0
                    if r.x1 + 1 == r2.x2 or r.x2 == r2.x1 + 1:
                        overlap = min(r2.y2 - 1, r.y2 - 1) - max(r2.y1,
                                                                 r.y1) - 1
                        if overlap > 0:
                            P += overlap
                            P_out -= overlap
                    if r.y1 + 1 == r2.y2 or r.y2 == r2.y1 + 1:
                        overlap = min(r2.x2 - 1, r.x2 - 1) - max(r2.x1,
                                                                 r.x1) - 1
                        if overlap > 0:
                            P += overlap
                            P_out -= overlap
                    rooms_dict[r.name]["P"][r2.name] = P
            rooms_dict[r.name]["P_out"] = P_out
            rooms_dict[r.name]["T_out"] = self.t_out
        self.rooms_dict = rooms_dict

        # Place the heating tiles in the house
        self.temperatures = np.ones((self.width, self.height)) * self.t_out
        for r, data in self.rooms_dict.items():
            self.temperatures[data["mask"] == 1] = data["T"]

        for i, cell in enumerate(self.grid.grid):
            x, y = divmod(i, width)

            if cell is None:
                self.grid.grid[i] = HeatingTile(
                    temperature=self.temperatures[x, y])
            self.grid.grid[i].temperature = self.temperatures[x, y]

        # # Place the final goal in the last room
        # self.goal_pos = self.place_obj(Goal(), roomList[-1].top,
        #                                roomList[-1].size)

        self.mission = 'save the world'

    def _placeRoom(self, numLeft, roomList, minSz, maxSz, entryDoorWall,
                   entryDoorPos):
        # Choose the room size randomly
        sizeX = self._rand_int(minSz, maxSz + 1)
        sizeY = self._rand_int(minSz, maxSz + 1)

        # The first room will be at the door position
        if len(roomList) == 0:
            topX, topY = entryDoorPos
        # Entry on the right
        elif entryDoorWall == 0:
            topX = entryDoorPos[0] - sizeX + 1
            y = entryDoorPos[1]
            topY = self._rand_int(y - sizeY + 2, y)
        # Entry wall on the south
        elif entryDoorWall == 1:
            x = entryDoorPos[0]
            topX = self._rand_int(x - sizeX + 2, x)
            topY = entryDoorPos[1] - sizeY + 1
        # Entry wall on the left
        elif entryDoorWall == 2:
            topX = entryDoorPos[0]
            y = entryDoorPos[1]
            topY = self._rand_int(y - sizeY + 2, y)
        # Entry wall on the top
        elif entryDoorWall == 3:
            x = entryDoorPos[0]
            topX = self._rand_int(x - sizeX + 2, x)
            topY = entryDoorPos[1]
        else:
            assert False, entryDoorWall

        # If the room is out of the grid, can't place a room here
        if topX < 0 or topY < 0:
            return False
        if topX + sizeX > self.width or topY + sizeY >= self.height:
            return False

        # If the room intersects with previous rooms, can't place it here
        for room in roomList[:-1]:
            nonOverlap =\
                topX + sizeX < room.top[0] or\
                room.top[0] + room.size[0] <= topX or\
                topY + sizeY < room.top[1] or\
                room.top[1] + room.size[1] <= topY

            if not nonOverlap:
                return False

        # Add this room to the list
        roomList.append(
            Room(
                (topX, topY),
                (sizeX, sizeY),
                entryDoorPos,
                None,
                "tmp_room_name",
                self.t_start,
            ))

        # If this was the last room, stop
        if numLeft == 1:
            return True

        # Try placing the next room
        for i in range(0, 8):

            # Pick which wall to place the out door on
            wallSet = set((0, 1, 2, 3))
            wallSet.remove(entryDoorWall)
            exitDoorWall = self._rand_elem(sorted(wallSet))
            nextEntryWall = (exitDoorWall + 2) % 4

            # Pick the exit door position
            # Exit on right wall
            if exitDoorWall == 0:
                exitDoorPos = (topX + sizeX - 1,
                               topY + self._rand_int(1, sizeY - 1))
            # Exit on south wall
            elif exitDoorWall == 1:
                exitDoorPos = (topX + self._rand_int(1, sizeX - 1),
                               topY + sizeY - 1)
            # Exit on left wall
            elif exitDoorWall == 2:
                exitDoorPos = (topX, topY + self._rand_int(1, sizeY - 1))
            # Exit on north wall
            elif exitDoorWall == 3:
                exitDoorPos = (topX + self._rand_int(1, sizeX - 1), topY)
            else:
                assert False

            # Recursively create the other rooms
            success = self._placeRoom(numLeft - 1,
                                      roomList=roomList,
                                      minSz=minSz,
                                      maxSz=maxSz,
                                      entryDoorWall=nextEntryWall,
                                      entryDoorPos=exitDoorPos)

            if success:
                break

        return True

    def _change_temperature(self, rooms_to_heat: List[int]):
        """ Changes the temperature of each object in the house """
        for r in rooms_to_heat:
            self.rooms_dict[self.room_names[r]]["heat"] = 1

        self.rooms_dict = self.model.step(self.rooms_dict)

        for r, data in self.rooms_dict.items():
            self.temperatures[data["mask"] == 1] = data["T"]

        for i, cell in enumerate(self.grid.grid):
            x, y = divmod(i, self.width)

            if cell is None:
                self.grid.grid[i] = HeatingTile(
                    temperature=self.temperatures[x, y])
            self.grid.grid[i].temperature = self.temperatures[x, y]

    def step(self, action):
        self.current_dt += self.timedelta
        self.step_count += 1

        reward = 0
        done = False

        # Move each homie and determine their preference for the temperature
        homie_info = dict()
        for homie in self.homies:
            homie_info[homie] = {}
            homie_info[homie]["room"] = homie.current_room
            homie_info[homie]["dt"] = self.current_dt
            if homie.current_room == "Outside":
                homie_info[homie]["temperature"] = self.t_out
            else:
                homie_info[homie]["temperature"] = self.rooms_dict[
                    homie.current_room]["T"]
            homie_info[homie]["comfort"] = homie.get_preferred_temperature(
                self.current_dt)
            if not homie_info[homie]["comfort"][0] <=\
                   homie_info[homie]["temperature"] <=\
                   homie_info[homie]["comfort"][1]:
                reward += -(min(
                    abs(homie_info[homie]["temperature"] -
                        homie_info[homie]["comfort"][0]),
                    abs(homie_info[homie]["temperature"] -
                        homie_info[homie]["comfort"][1])) *
                            self.homie_reward_scaler)

            homie.step(timestamp=self.current_dt)

        # Adjust the temperature in the house wrt to the preferences of homies
        self._change_temperature(action)

        if self.step_count >= self.max_steps:
            done = True

        obs = self.gen_obs()

        # Remove the agent from the observation
        obs['image'][:, :, 0][obs['image'][:, :, 0] == 10] = 1

        return obs, reward, done, homie_info
コード例 #11
0
class EmptyGridWorld(MiniGridSimple):

    # Only 4 actions needed, left, right, up and down

    class CardnalActions(IntEnum):
        # Cardinal movement
        right = 0
        down = 1
        left = 2
        up = 3

        def __len__(self):
            return 4

    def __init__(
        self,
        grid_size=20,
        max_steps=100,
        state_encoding="thermal",
        seed=133,
        rnd_start=0,
    ):

        self.state_encoding = state_encoding
        self.grid_size = grid_size

        self._goal_default_pos = (self.grid_size - 2, 1)

        # set to 1 if agent is to be randomly spawned
        self.rnd_start = rnd_start

        super().__init__(grid_size=grid_size,
                         max_steps=max_steps,
                         seed=seed,
                         see_through_walls=False)

        self.nActions = len(EmptyGridWorld.CardnalActions)

        # Set the action and observation spaces
        self.actions = EmptyGridWorld.CardnalActions

        self.action_space = spaces.Discrete(self.nActions)

        self.max_cells = (grid_size - 1) * (grid_size - 1)

        self.observation_space = spaces.Tuple(
            [spaces.Discrete(grid_size),
             spaces.Discrete(grid_size)])

        self.observation_size = self.grid_size * self.grid_size
        self.observation_shape = (self.observation_size, )

        self.T = max_steps

        # Change the observation space to return the position in the grid

    @property
    def category(self):
        # [TODO] Make sure this doesn't break after self.agent_pos is changed to numpy.ndarray
        return self.cell_cat_map[self.agent_pos]

    def reward(self):
        # -1 for every action except if the action leads to the goal state
        return 1 if self.success else -1 / self.T

    def _gen_grid(self, width, height, val=False, seen=True):

        # Create the grid
        self.grid = Grid(width, height)

        # Generate surrounding walls
        self.grid.horz_wall(0, 0)
        self.grid.horz_wall(0, height - 1)
        self.grid.vert_wall(0, 0)
        self.grid.vert_wall(width - 1, 0)

        # Even during validation, start state distribution
        # should be the same as that during training
        if not self.rnd_start:
            self._agent_default_pos = (1, self.grid_size - 2)
        else:
            self._agent_default_pos = None

        # Place the agent at the center
        if self._agent_default_pos is not None:
            self.start_pos = self._agent_default_pos
            self.grid.set(*self._agent_default_pos, None)
            self.start_dir = self._rand_int(
                0, 4)  # Agent direction doesn't matter

        goal = Goal()
        self.grid.set(*self._goal_default_pos, goal)

        goal.init_pos = goal.curr_pos = self._goal_default_pos

        self.mission = goal.init_pos

    def reset(self, val=False, seen=True):

        obs, info = super().reset(val=val, seen=seen)

        # add state feature to obs
        state_feat = self._encode_state(obs['agent_pos'])

        obs.update(dict(state_feat=state_feat))

        return obs, info

    def step(self, action):

        self.step_count += 1
        '''
         Reward doesn't depend on action, but just state.
         reward = -1 if not (in_goal_state) else 0
        '''

        if not self.done:
            # check if currently at the goal state
            if self.agent_pos == self.mission:
                # No penalty, episode done
                self.done = True
                self.success = True
            else:
                # Cardinal movement
                if action in self.move_actions:
                    move_pos = self.around_pos(action)
                    fwd_cell = self.grid.get(*move_pos)

                    self.agent_dir = (action - 1) % 4

                    if fwd_cell == None or fwd_cell.can_overlap(
                    ) or self.is_goal(move_pos):
                        self.agent_pos = move_pos
                else:
                    raise ValueError("Invalid Action: {} ".format(action))

        reward = self.reward()
        if self.step_count >= self.max_steps - 1:
            # print("Max Steps Exceeded.")
            self.done = True

        obs = self.gen_obs()

        # Add state features to the observation
        state_feat = self._encode_state(obs['agent_pos'])

        obs.update(dict(state_feat=state_feat))

        info = {
            'done': self.done,
            'agent_pos': np.array(self.agent_pos),
        }

        if self.render_rgb:
            info['rgb_grid'] = self.render(mode='rgb_array')

        if self.done:
            info.update({
                'image': self.encode_grid(),
                'success': self.success,
                'agent_pos': self.agent_pos,
            })

        return obs, reward, self.done, info

    def _encode_state(self, state):
        """
        Encode the state to generate observation.
        """

        feat = np.ones(self.width * self.height, dtype=float)
        curr_x, curr_y = state[0], state[1]

        curr_pos = curr_y * self.width + curr_x

        if self.state_encoding == "thermal":

            feat[curr_pos:] = 0
        elif self.state_encoding == "one-hot":
            feat[:] = 0
            feat[curr_pos] = 1

        return feat