def __init__(self): # init base classes Model.__init__(self) RenderInterface2D.__init__(self) # environment parameters self.max_speed = 8. self.max_torque = 2. self.dt = 0.5 self.gravity = 10. self.mass = 1. self.length = 1. # rendering info self.set_clipping_area((-2.2, 2.2, -2.2, 2.2)) self.set_refresh_interval(10) # observation and action spaces high = np.array([1., 1., self.max_speed], dtype=np.float32) low = -high self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1, ), dtype=np.float32) self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) # initialize self.reset()
def __init__(self, goal_velocity=0): # init base classes Model.__init__(self) RenderInterface2D.__init__(self) self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.goal_velocity = goal_velocity self.force = 0.001 self.gravity = 0.0025 self.low = np.array([self.min_position, -self.max_speed]) self.high = np.array([self.max_position, self.max_speed]) self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(self.low, self.high) self.reward_range = (0.0, 1.0) # rendering info self.set_clipping_area((-1.2, 0.6, -0.2, 1.1)) self.set_refresh_interval(10) # in milliseconds # initial reset self.reset()
def __init__(self, reward_free=False, difficulty=0, array_observation=False): self.reward_free = reward_free self.difficulty = difficulty self.array_observation = array_observation if difficulty not in [0, 1, 2]: raise ValueError("FourRoom difficulty must be in [0, 1, 2]") # Common parameters nrows = 9 ncols = 9 start_coord = (0, 0) terminal_states = ((8, 0), ) success_probability = 0.95 # walls = () for ii in range(9): if ii not in [2, 6]: walls += ((ii, 4), ) for jj in range(9): if jj != 7: walls += ((4, jj), ) # Default reward according to the difficulty if difficulty in [0, 1]: default_reward = 0.0 elif difficulty == 2: default_reward = -0.005 # Rewards according to the difficulty if self.reward_free: reward_at = {} else: if difficulty == 0: reward_at = {(8, 0): 1.0} elif difficulty in [1, 2]: reward_at = { (8, 0): 1.0, (3, 3): 0.1, } # Init base class GridWorld.__init__( self, nrows=nrows, ncols=ncols, start_coord=start_coord, terminal_states=terminal_states, success_probability=success_probability, reward_at=reward_at, walls=walls, default_reward=default_reward, ) # spaces if self.array_observation: self.observation_space = spaces.Box(0.0, 1.0, shape=(2, ))
def __init__(self, noise_room1=0.01, noise_room2=0.01): Model.__init__(self) RenderInterface2D.__init__(self) self.noise_room1 = noise_room1 self.noise_room2 = noise_room2 self.observation_space = spaces.Box( low=np.array([0.0, 0.0]), high=np.array([2.0, 1.0]), ) self.action_space = spaces.Discrete(4) self.reward_range = (0.0, 1.0) self.room_noises = [noise_room1, noise_room2] # environment parameters self.action_displacement = 0.1 self.wall_eps = 0.05 # base reward position self.base_reward_pos = np.array([0.8, 0.8]) # rendering info self.set_clipping_area((0, 2, 0, 1)) self.set_refresh_interval(100) # in milliseconds self.renderer_type = "opengl" # reset self.reset()
def __init__(self, reward_free=False, array_observation=False): self.reward_free = reward_free self.array_observation = array_observation # Common parameters nrows = 13 ncols = 17 start_coord = (5, 1) terminal_states = ((7, 7),) success_probability = 0.95 # walls = () for ii in range(13): walls += ((ii, 0),) walls += ((ii, 16),) for jj in range(17): walls += ((0, jj),) walls += ((12, jj),) for ii in range(13): if ii not in [1, 11]: walls += ((ii, 6),) walls += ((ii, 10),) walls += ((11, 6),) for jj in range(17): if jj not in [1, 15]: walls += ((6, jj),) # Default reward according to the difficulty default_reward = 0 # Rewards according to the difficulty if self.reward_free: reward_at = {} else: reward_at = { (7, 7): 10.0, (8, 2): 1.0, (10, 3): 1.0 } for jj in range(7, 16): for ii in range(1, 12): if (ii, jj) not in walls and (ii, jj) != (7, 7): reward_at[(ii, jj)] = -0.05 # Init base class GridWorld.__init__(self, nrows=nrows, ncols=ncols, start_coord=start_coord, terminal_states=terminal_states, success_probability=success_probability, reward_at=reward_at, walls=walls, default_reward=default_reward) # spaces if self.array_observation: self.observation_space = spaces.Box(0.0, 1.0, shape=(2,))
def __init__(self, reward_free=False, array_observation=False): self.reward_free = reward_free self.array_observation = array_observation # Common parameters nrows = 11 ncols = 17 start_coord = (0, 0) terminal_states = ((10, 0), ) success_probability = 0.95 # walls = () for ii in range(11): if ii not in [2, 8]: walls += ((ii, 5), ) walls += ((ii, 11), ) for jj in range(17): if jj != 15: walls += ((5, jj), ) # Default reward according to the difficulty default_reward = -0.001 # Rewards according to the difficulty if self.reward_free: reward_at = {} else: reward_at = { (10, 0): 10.0, (4, 4): 0.1, } # Init base class GridWorld.__init__( self, nrows=nrows, ncols=ncols, start_coord=start_coord, terminal_states=terminal_states, success_probability=success_probability, reward_at=reward_at, walls=walls, default_reward=default_reward, ) # spaces if self.array_observation: self.observation_space = spaces.Box(0.0, 1.0, shape=(2, ))
def __init__(self): # init base classes Model.__init__(self) RenderInterface2D.__init__(self) self.reward_range = (-1.0, 0.0) # rendering info bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # (left, right, bottom, top) self.set_clipping_area((-bound, bound, -bound, bound)) self.set_refresh_interval(10) # in milliseconds # observation and action spaces high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high self.observation_space = spaces.Box(low=low, high=high) self.action_space = spaces.Discrete(3) # initialize self.state = None self.reset()
def __init__(self, p, action_list, reward_amplitudes, reward_smoothness, reward_centers, A, B, sigma, sigma_init, mu_init): """ Parameters ----------- p : int parameter of the p-norm action_list : list list of actions {u_1, ..., u_m}, each action u_i is a d'-dimensional array reward_amplitudes: list list of reward amplitudes: {b_1, ..., b_n} reward_smoothness : list list of reward smoothness: {c_1, ..., c_n} reward_centers : list list of reward centers: {x_1, ..., x_n} A : numpy.ndarray array A of size (d, d) B : numpy.ndarray array B of size (d, d') sigma : double transition noise sigma sigma_init : double initial state noise sigma_init mu_init : numpy.ndarray array of size (d,) containing the mean of the initial state """ Model.__init__(self) assert p >= 1, "PBall requires p>=1" if p not in [2, np.inf]: logger.warning("For p!=2 or p!=np.inf, PBall \ does not make true projections onto the lp ball.") self.p = p self.d, self.dp = B.shape # d and d' self.m = len(action_list) self.action_list = action_list self.reward_amplitudes = reward_amplitudes self.reward_smoothness = reward_smoothness self.reward_centers = reward_centers self.A = A self.B = B self.sigma = sigma self.sigma_init = sigma_init self.mu_init = mu_init # State and action spaces low = -1.0 * np.ones(self.d, dtype=np.float64) high = np.ones(self.d, dtype=np.float64) self.observation_space = spaces.Box(low, high) self.action_space = spaces.Discrete(self.m) # reward range assert len(self.reward_amplitudes) == len(self.reward_smoothness) assert len(self.reward_amplitudes) == len(self.reward_centers) if len(self.reward_amplitudes) > 0: assert self.reward_amplitudes.max() <= 1.0 and \ self.reward_amplitudes.min() >= 0.0, \ "reward amplitudes b_i must be in [0, 1]" assert self.reward_smoothness.min() > 0.0, \ "reward smoothness c_i must be > 0" self.reward_range = (0, 1.0) # self.name = "Lp-Ball" # Initalize state self.reset()
def __init__( self, nrooms=7, reward_free=False, array_observation=False, room_size=5, success_probability=0.95, remove_walls=False, initial_state_distribution="center", include_traps=False, ): assert nrooms > 0, "nrooms must be > 0" assert initial_state_distribution in ("center", "uniform") self.reward_free = reward_free self.array_observation = array_observation self.nrooms = nrooms self.room_size = room_size self.success_probability = success_probability self.remove_walls = remove_walls self.initial_state_distribution = initial_state_distribution self.include_traps = include_traps # Max number of rooms/columns per row self.max_rooms_per_row = 5 # Room size (default = 5x5) self.room_size = room_size # Grid size self.room_nrows = math.ceil(nrooms / self.max_rooms_per_row) if self.room_nrows > 1: self.room_ncols = self.max_rooms_per_row else: self.room_ncols = nrooms nrows = self.room_size * self.room_nrows + (self.room_nrows - 1) ncols = self.room_size * self.room_ncols + (self.room_ncols - 1) # # walls walls = [] for room_col in range(self.room_ncols - 1): col = (room_col + 1) * (self.room_size + 1) - 1 for jj in range(nrows): if (jj % (self.room_size + 1)) != (self.room_size // 2): walls.append((jj, col)) for room_row in range(self.room_nrows - 1): row = (room_row + 1) * (self.room_size + 1) - 1 for jj in range(ncols): walls.append((row, jj)) # process each room start_coord = None terminal_state = None self.traps = [] count = 0 for room_r in range(self.room_nrows): if room_r % 2 == 0: cols_iterator = range(self.room_ncols) else: cols_iterator = reversed(range(self.room_ncols)) for room_c in cols_iterator: # existing rooms if count < self.nrooms: # remove top wall if ((room_c == self.room_ncols - 1) and (room_r % 2 == 0)) or ( (room_c == 0) and (room_r % 2 == 1) ): if room_r != self.room_nrows - 1: wall_to_remove = self._convert_room_coord_to_global( room_r, room_c, self.room_size, self.room_size // 2 ) if wall_to_remove in walls: walls.remove(wall_to_remove) # rooms to remove else: for ii in range(-1, self.room_size + 1): for jj in range(-1, self.room_size + 1): wall_to_include = self._convert_room_coord_to_global( room_r, room_c, ii, jj ) if ( wall_to_include[0] >= 0 and wall_to_include[0] < nrows and wall_to_include[1] >= 0 and wall_to_include[1] < ncols and (wall_to_include not in walls) ): walls.append(wall_to_include) pass # start coord if count == nrooms // 2: start_coord = self._convert_room_coord_to_global( room_r, room_c, self.room_size // 2, self.room_size // 2 ) # terminal state if count == nrooms - 1: terminal_state = self._convert_room_coord_to_global( room_r, room_c, self.room_size // 2, self.room_size // 2 ) # trap if include_traps: self.traps.append( self._convert_room_coord_to_global( room_r, room_c, self.room_size // 2 + 1, self.room_size // 2 + 1, ) ) count += 1 terminal_states = (terminal_state,) + tuple(self.traps) if self.reward_free: reward_at = {} else: reward_at = { terminal_state: 1.0, start_coord: 0.01, (self.room_size // 2, self.room_size // 2): 0.1, } # Check remove_walls if remove_walls: walls = () # Init base class GridWorld.__init__( self, nrows=nrows, ncols=ncols, start_coord=start_coord, terminal_states=terminal_states, success_probability=success_probability, reward_at=reward_at, walls=walls, default_reward=0.0, ) # Check initial distribution if initial_state_distribution == "uniform": distr = np.ones(self.observation_space.n) / self.observation_space.n self.set_initial_state_distribution(distr) # spaces if self.array_observation: self.discrete_observation_space = self.observation_space self.observation_space = spaces.Box(0.0, 1.0, shape=(2,))