def _gen_rewards(self, rewards_pos: List[Tuple[int, int]]): for i, (x, y) in enumerate(rewards_pos): g = Goal() self.grid.set(x, y, g) g.is_goal = False if self.reward_position == i % len(rewards_pos): g.is_goal = True
def _gen_grid(self, width, height): # Create the grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) room_w = width // 2 room_h = height // 2 # For each row of rooms for j in range(0, 2): # For each column for i in range(0, 2): xL = i * room_w yT = j * room_h xR = xL + room_w yB = yT + room_h # Bottom wall and door if i + 1 < 2: self.grid.vert_wall(xR, yT, room_h) # pos = (xR, self._rand_int(yT + 1, yB)) # self.grid.set(*pos, None) # Bottom wall and door if j + 1 < 2: self.grid.horz_wall(xL, yB, room_w) # pos = (self._rand_int(xL + 1, xR), yB) # self.grid.set(*pos, None) for hallway in self.hallways.values(): self.grid.set(*hallway, None) # Randomize the player start position and orientation if self._agent_default_pos is not None: self.agent_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.agent_dir = self._rand_int(0, 4) else: self.place_agent() if self._goal_default_pos is not None: goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos, goal.cur_pos = self._goal_default_pos else: self.place_obj(Goal()) self.mission = 'Reach the goal'
def _gen_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place a goal square in random location self.goal_pos = ( np.random.randint(1, self.width-1), np.random.randint(1, self.height-1) ) while self.goal_pos == (1,1): self.goal_pos = ( np.random.randint(1, self.width-1), np.random.randint(1, self.height-1) ) self.put_obj( Goal(), *self.goal_pos ) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.mission = "get to the green goal square"
def _gen_grid(self, width, height): assert width % 2 == 1 and height % 2 == 1 # odd size # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place the agent in the top-left corner self.agent_pos = (1, 1) self.agent_dir = 0 # Place a goal square in the bottom-right corner self.put_obj(Goal(), width - 2, height - 2) # Place obstacles (lava or walls) v, h = object(), object( ) # singleton `vertical` and `horizontal` objects # Lava rivers or walls specified by direction and position in grid rivers = [(v, i) for i in range(2, height - 2, 2)] rivers += [(h, j) for j in range(2, width - 2, 2)] self.np_random.shuffle(rivers) rivers = rivers[:self.num_crossings] # sample random rivers rivers_v = sorted([pos for direction, pos in rivers if direction is v]) rivers_h = sorted([pos for direction, pos in rivers if direction is h]) obstacle_pos = itt.chain( itt.product(range(1, width - 1), rivers_h), itt.product(rivers_v, range(1, height - 1)), ) for i, j in obstacle_pos: self.put_obj(self.obstacle_type(), i, j) # Sample path to goal path = [h] * len(rivers_v) + [v] * len(rivers_h) self.np_random.shuffle(path) # Create openings limits_v = [0] + rivers_v + [height - 1] limits_h = [0] + rivers_h + [width - 1] room_i, room_j = 0, 0 for direction in path: if direction is h: i = limits_v[room_i + 1] j = self.np_random.choice( range(limits_h[room_j] + 1, limits_h[room_j + 1])) room_i += 1 elif direction is v: i = self.np_random.choice( range(limits_v[room_i] + 1, limits_v[room_i + 1])) j = limits_h[room_j + 1] room_j += 1 else: assert False self.grid.set(i, j, None) self.mission = ("avoid the lava and get to the green goal square" if self.obstacle_type == Lava else "find the opening and get to the green goal square")
def _gen_grid(self, width, height): super()._gen_grid(width, height) # Make sure the two rooms are directly connected by a locked door door, _ = self.add_door(0, 0, 0, locked=True, color='blue') # Add a key to unlock the door self.add_object(0, 0, 'key', door.color) # self.add_object(1, 0, 'key', door.color) self.place_agent(0, 0) self.door = door # Add balls self.add_object(0, 0, 'ball', door.color) self.add_object(1, 0, 'ball', 'red') # self.add_object(0, 0, 'ball', 'red') # self.add_object(1, 0, 'ball', 'blue') # self.mission = "open the door" # Add boxes self.add_object(0, 0, 'box', door.color) self.add_object(1, 0, 'box', 'red') # self.add_object(0, 0, 'box', 'red') # self.add_object(1, 0, 'box', 'blue') # Add lava self.grid.vert_wall(4, 3, height - 6, Lava) self.grid.horz_wall(9, 2, height - 6, Lava) # self.grid.horz_wall(4, width - 3, 3, Lava) # Place a goal in the bottom-right corner self.put_obj(Goal(), width - 2, height - 2)
def _gen_grid(self, width, height, val=False, seen=True): # Create the grid self.grid = Grid(width, height) # Generate surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) # Even during validation, start state distribution # should be the same as that during training if not self.rnd_start: self._agent_default_pos = (1, self.grid_size - 2) else: self._agent_default_pos = None # Place the agent at the center if self._agent_default_pos is not None: self.start_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos = goal.curr_pos = self._goal_default_pos self.mission = goal.init_pos
def _gen_grid(self, width, height, val=False, seen=True): assert width >= 10 and height >= 10, "Environment too small to place objects" # Create the grid self.grid = Grid(width, height) # Generate surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) np.random.seed(self.grid_seed) for obj_idx in range(self.num_objects): while True: c_x, c_y = np.random.choice(list(range( 2, self.grid_size - 3))), np.random.choice( list(range(2, self.grid_size - 3))) #obj_size = np.random.choice(list(range(1, self.obj_size+1))) obj_size = self.obj_size if obj_size == 3: cells = list( product([c_x - 1, c_x, c_x + 1], [c_y - 1, c_y, c_y + 1])) elif obj_size == 2: cells = list(product([c_x, c_x + 1], [c_y, c_y + 1])) elif obj_size == 1: cells = list(product([c_x], [c_y])) else: raise ValueError valid = True for cell in cells: cell = self.grid.get(cell[0], cell[1]) if not (cell is None or cell.can_overlap()): valid = False break if valid: for cell in cells: self.grid.set(*cell, Wall()) break # Set the start position and the goal position depending upon where the obstacles are present goal = Goal() # [NOTE] : This is a hack, add option to set goal location from arguments. self.grid.set(*self._goal_default_pos, goal) goal.init_pos = goal.curr_pos = self._goal_default_pos self.mission = goal.init_pos self.start_pos = self._agent_default_pos
def _gen_grid(self, width, height): # Create empty grid self.grid = Grid(width, height) self.grid.wall_rect(0, 0, width, height) # Agent starts in the center self.start_pos = (width // 2, height // 2) self.start_dir = 0 # Goal is anywhere but the center self.place_obj(Goal()) # Set mission string self.mission = "GO TO GREEN SQUARE"
def decode(array): """ Decode an array grid encoding back into a grid """ width, height, channels = array.shape assert channels == 3 grid = Grid(width, height) for i in range(width): for j in range(height): typeIdx, colorIdx, state = array[i, j] if typeIdx == OBJECT_TO_IDX['unseen'] or \ typeIdx == OBJECT_TO_IDX['empty']: continue objType = IDX_TO_OBJECT[typeIdx] color = IDX_TO_COLOR[colorIdx] # State, 0: open, 1: closed, 2: locked is_open = state == 0 is_locked = state == 2 if objType == 'wall': v = Wall(color) elif objType == 'floor': v = Floor(color) elif objType == 'ball': v = Ball(color) elif objType == 'key': v = Key(color) elif objType == 'box': v = Box(color) elif objType == 'door': v = Door(color, is_open, is_locked) elif objType == 'goal': v = Goal() elif objType == 'lava': v = Lava() elif objType == 'agent': v = None else: assert False, "unknown obj type in decode '%s'" % objType grid.set(i, j, v) return grid
def _make_hallways(self): """ Creates a sub-room with two hallways and defines one of them to be the goal for the option within the room """ hallways = { 'topleft->topright': ((9, 4), (3, 9)), 'topleft->botleft': ((3, 9), (9, 4)), 'topright->topleft': ((0, 4), (7, 9)), 'topright->botright': ((7, 9), (0, 4)), 'botleft->topleft': ((3, 0), (9, 5)), 'botleft->botright': ((9, 5), (3, 0)), 'botright->topright': ((7, 0), (0, 5)), 'botright->botleft': ((0, 5), (7, 0)), } goal, other_hall = hallways[self.option_name] self.env.grid.set(*goal, Goal()) self.env.grid.set(*other_hall, Wall())
def _add_reward(self): """Add reward in final room.""" i, j = self.lattice.end xL = i * self.room_w + i yB = j * self.room_h + j xR = xL + self.room_w + 1 yT = yB + self.room_h + 1 if self.verbose: print('Adding reward in room ({},{})'.format(i,j)) print('Choosing tile in square [{},{}] X [{},{}]'.format(xL+2, xR, yB+2, yT)) x = random.choice(range(xL+2, xR-1)) y = random.choice(range(yB+2, yT-1)) self.set(x, y, Goal())
def _gen_grid(self, width: int, height: int) -> None: """Generate grid space. Jobs: - create grid world - create wall - set starting point - set goal - set lava """ assert width >= 5 and height >= 5 # Current position and direction of the agent self.agent_pos: Tuple[int, int] = (1, 1) # (0,0) is wall self.agent_dir: int = 0 # Create an empty grid self.grid = Grid(width, height) # Create wall self.grid.wall_rect(0, 0, width, height) # Create Goal for position in self.goal_pos: goal_with_wall = self.__adjust_pos_consider_walls(position) self.__set_grid_type(*goal_with_wall, Goal()) # Create Lava if self.obstacle_pos: for lava_pos in self.obstacle_pos: lava_with_wall = self.__adjust_pos_consider_walls(lava_pos) self.__set_grid_type(*lava_with_wall, self.obstacle_type()) # Settings for reward_grid for cell in itertools.product( range(self.valid_height), range(self.valid_width) ): if cell in self.goal_pos: self.reward_grid[cell] = self.goal_reward elif cell in self.obstacle_pos: self.reward_grid[cell] = self.obstacle_reward else: self.reward_grid[cell] = self.default_reward
def _gen_grid(self, width, height): # Create an empty grid self.grid = Grid(width, height) # Generate the surrounding walls self.grid.wall_rect(0, 0, width, height) # Place the goals for _ in range(self.n_goals): self.place_obj(Goal()) # Place the traps for _ in range(self.n_traps): self.place_obj(Lava()) # Place the agent if self.agent_start_pos is not None: self.agent_pos = self.agent_start_pos self.agent_dir = self.agent_start_dir else: self.place_agent() self.mission = "get to the green goal square, avoid the lava"
def _gen_grid(self, width, height, val=False, seen=True): # Create the grid self.grid = Grid(width, height) # Generate surrounding walls self.grid.horz_wall(0, 0) self.grid.horz_wall(0, height - 1) self.grid.vert_wall(0, 0) self.grid.vert_wall(width - 1, 0) # Place horizontal walls through the grid self.grid.horz_wall(0, height // 3) self.grid.horz_wall(0, (2 * height) // 3) # Place vertical walls through the grid self.grid.vert_wall(width // 3, 0) self.grid.vert_wall((2 * width) // 3, 0) # Create passages passage_anchors = [(width // 3, height // 3), (width // 3, (2 * height) // 3), ((2 * width) // 3, height // 3), ((2 * width) // 3, (2 * height) // 3)] passage_cells = [] for anchor in passage_anchors: for delta in range(-1 * self.passage_size, self.passage_size + 1): passage_cells.append((anchor[0] + delta, anchor[1])) passage_cells.append((anchor[0], anchor[1] + delta)) for cell in passage_cells: self.grid.set(*cell, None) # Even during validation, start state distribution # should be the same as that during training if not self.rnd_start: self._agent_default_pos = ((width - 2) // 2, (height - 2) // 2) else: self._agent_default_pos = None # Place the agent at the center if self._agent_default_pos is not None: self.start_pos = self._agent_default_pos self.grid.set(*self._agent_default_pos, None) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter else: if len(self.start_state_exclude_rooms) == 0: self.place_agent() else: valid_start_pos = [] if seen: exclude_from = self.start_state_exclude_rooms else: exclude_from = [ x for x in range(1, 10) if x not in self.start_state_exclude_rooms ] for room in range(1, 10): if room in exclude_from: continue # Ignore that there are walls for now, can handle that with rejection sampling # Get x coordinates of allowed cells valid_x = [] if room % 3 == 1: valid_x = list(range(1, width // 3)) elif room % 3 == 2: valid_x = list(range(width // 3 + 1, (2 * width) // 3)) else: valid_x = list(range((2 * width) // 3 + 1, width - 1)) # Get valid y-coordinates of allowed cells valid_y = [] if (room - 1) // 3 == 0: valid_y = list(range(1, height // 3)) elif (room - 1) // 3 == 1: valid_y = list( range(height // 3 + 1, (2 * height) // 3)) else: valid_y = list(range((2 * height) // 3 + 1, height - 1)) room_cells = list(product(valid_x, valid_y)) valid_start_pos += room_cells # Make sure start position doesn't conflict with other cells while True: _start_pos = valid_start_pos[np.random.choice( len(valid_start_pos))] row = _start_pos[1] col = _start_pos[0] cell = self.grid.get(row, col) if cell is None or cell.can_overlap(): break self.start_pos = (col, row) self.start_dir = self._rand_int( 0, 4) # Agent direction doesn't matter goal = Goal() self.grid.set(*self._goal_default_pos, goal) goal.init_pos = goal.curr_pos = self._goal_default_pos self.mission = goal.init_pos
def spawn_new_food(self): empties = [(i, j) for i in range(self.grid.height) for j in range(self.grid.width) if self.grid.get(i, j) is None and self.grid.get(i, j) != tuple(self.agent_pos)] self.grid.set(*random.choice(empties), Goal())