def __init__(self, reward_free=False, difficulty=0, array_observation=False): self.reward_free = reward_free self.difficulty = difficulty self.array_observation = array_observation if difficulty not in [0, 1, 2]: raise ValueError("FourRoom difficulty must be in [0, 1, 2]") # Common parameters nrows = 9 ncols = 9 start_coord = (0, 0) terminal_states = ((8, 0), ) success_probability = 0.95 # walls = () for ii in range(9): if ii not in [2, 6]: walls += ((ii, 4), ) for jj in range(9): if jj != 7: walls += ((4, jj), ) # Default reward according to the difficulty if difficulty in [0, 1]: default_reward = 0.0 elif difficulty == 2: default_reward = -0.005 # Rewards according to the difficulty if self.reward_free: reward_at = {} else: if difficulty == 0: reward_at = {(8, 0): 1.0} elif difficulty in [1, 2]: reward_at = { (8, 0): 1.0, (3, 3): 0.1, } # Init base class GridWorld.__init__( self, nrows=nrows, ncols=ncols, start_coord=start_coord, terminal_states=terminal_states, success_probability=success_probability, reward_at=reward_at, walls=walls, default_reward=default_reward, ) # spaces if self.array_observation: self.observation_space = spaces.Box(0.0, 1.0, shape=(2, ))
def __init__(self, reward_free=False, array_observation=False): self.reward_free = reward_free self.array_observation = array_observation # Common parameters nrows = 13 ncols = 17 start_coord = (5, 1) terminal_states = ((7, 7),) success_probability = 0.95 # walls = () for ii in range(13): walls += ((ii, 0),) walls += ((ii, 16),) for jj in range(17): walls += ((0, jj),) walls += ((12, jj),) for ii in range(13): if ii not in [1, 11]: walls += ((ii, 6),) walls += ((ii, 10),) walls += ((11, 6),) for jj in range(17): if jj not in [1, 15]: walls += ((6, jj),) # Default reward according to the difficulty default_reward = 0 # Rewards according to the difficulty if self.reward_free: reward_at = {} else: reward_at = { (7, 7): 10.0, (8, 2): 1.0, (10, 3): 1.0 } for jj in range(7, 16): for ii in range(1, 12): if (ii, jj) not in walls and (ii, jj) != (7, 7): reward_at[(ii, jj)] = -0.05 # Init base class GridWorld.__init__(self, nrows=nrows, ncols=ncols, start_coord=start_coord, terminal_states=terminal_states, success_probability=success_probability, reward_at=reward_at, walls=walls, default_reward=default_reward) # spaces if self.array_observation: self.observation_space = spaces.Box(0.0, 1.0, shape=(2,))
def __init__(self, reward_free=False, array_observation=False): self.reward_free = reward_free self.array_observation = array_observation # Common parameters nrows = 11 ncols = 17 start_coord = (0, 0) terminal_states = ((10, 0), ) success_probability = 0.95 # walls = () for ii in range(11): if ii not in [2, 8]: walls += ((ii, 5), ) walls += ((ii, 11), ) for jj in range(17): if jj != 15: walls += ((5, jj), ) # Default reward according to the difficulty default_reward = -0.001 # Rewards according to the difficulty if self.reward_free: reward_at = {} else: reward_at = { (10, 0): 10.0, (4, 4): 0.1, } # Init base class GridWorld.__init__( self, nrows=nrows, ncols=ncols, start_coord=start_coord, terminal_states=terminal_states, success_probability=success_probability, reward_at=reward_at, walls=walls, default_reward=default_reward, ) # spaces if self.array_observation: self.observation_space = spaces.Box(0.0, 1.0, shape=(2, ))
def __init__( self, nrooms=7, reward_free=False, array_observation=False, room_size=5, success_probability=0.95, remove_walls=False, initial_state_distribution="center", include_traps=False, ): assert nrooms > 0, "nrooms must be > 0" assert initial_state_distribution in ("center", "uniform") self.reward_free = reward_free self.array_observation = array_observation self.nrooms = nrooms self.room_size = room_size self.success_probability = success_probability self.remove_walls = remove_walls self.initial_state_distribution = initial_state_distribution self.include_traps = include_traps # Max number of rooms/columns per row self.max_rooms_per_row = 5 # Room size (default = 5x5) self.room_size = room_size # Grid size self.room_nrows = math.ceil(nrooms / self.max_rooms_per_row) if self.room_nrows > 1: self.room_ncols = self.max_rooms_per_row else: self.room_ncols = nrooms nrows = self.room_size * self.room_nrows + (self.room_nrows - 1) ncols = self.room_size * self.room_ncols + (self.room_ncols - 1) # # walls walls = [] for room_col in range(self.room_ncols - 1): col = (room_col + 1) * (self.room_size + 1) - 1 for jj in range(nrows): if (jj % (self.room_size + 1)) != (self.room_size // 2): walls.append((jj, col)) for room_row in range(self.room_nrows - 1): row = (room_row + 1) * (self.room_size + 1) - 1 for jj in range(ncols): walls.append((row, jj)) # process each room start_coord = None terminal_state = None self.traps = [] count = 0 for room_r in range(self.room_nrows): if room_r % 2 == 0: cols_iterator = range(self.room_ncols) else: cols_iterator = reversed(range(self.room_ncols)) for room_c in cols_iterator: # existing rooms if count < self.nrooms: # remove top wall if ((room_c == self.room_ncols - 1) and (room_r % 2 == 0)) or ( (room_c == 0) and (room_r % 2 == 1) ): if room_r != self.room_nrows - 1: wall_to_remove = self._convert_room_coord_to_global( room_r, room_c, self.room_size, self.room_size // 2 ) if wall_to_remove in walls: walls.remove(wall_to_remove) # rooms to remove else: for ii in range(-1, self.room_size + 1): for jj in range(-1, self.room_size + 1): wall_to_include = self._convert_room_coord_to_global( room_r, room_c, ii, jj ) if ( wall_to_include[0] >= 0 and wall_to_include[0] < nrows and wall_to_include[1] >= 0 and wall_to_include[1] < ncols and (wall_to_include not in walls) ): walls.append(wall_to_include) pass # start coord if count == nrooms // 2: start_coord = self._convert_room_coord_to_global( room_r, room_c, self.room_size // 2, self.room_size // 2 ) # terminal state if count == nrooms - 1: terminal_state = self._convert_room_coord_to_global( room_r, room_c, self.room_size // 2, self.room_size // 2 ) # trap if include_traps: self.traps.append( self._convert_room_coord_to_global( room_r, room_c, self.room_size // 2 + 1, self.room_size // 2 + 1, ) ) count += 1 terminal_states = (terminal_state,) + tuple(self.traps) if self.reward_free: reward_at = {} else: reward_at = { terminal_state: 1.0, start_coord: 0.01, (self.room_size // 2, self.room_size // 2): 0.1, } # Check remove_walls if remove_walls: walls = () # Init base class GridWorld.__init__( self, nrows=nrows, ncols=ncols, start_coord=start_coord, terminal_states=terminal_states, success_probability=success_probability, reward_at=reward_at, walls=walls, default_reward=0.0, ) # Check initial distribution if initial_state_distribution == "uniform": distr = np.ones(self.observation_space.n) / self.observation_space.n self.set_initial_state_distribution(distr) # spaces if self.array_observation: self.discrete_observation_space = self.observation_space self.observation_space = spaces.Box(0.0, 1.0, shape=(2,))