def __init__(self): """Declare agent variables.""" self._x = None self._y = None self._z = None self.num_of_tilings = 8 self.dimension = 1024 self.iht = tiles3.IHT(self.dimension) self.gamma = 0.9 # discount rate self.epsilon = 0.05 # epsilon-greedy self.alpha = 0.5 / self.num_of_tilings # weight vector self.w = None self.z = None # z for eligibility trace self.lmbda = 0.9 # lambda for eligibility trace self.last_action = None self.last_state = None self.Q = None self.count = 0 print('Sarsa(lambda) Agent with tile coding')
def __init__(self, st_low, st_high, nb_actions, learn_rate, num_tilings, init_val): """ Params: st_low - state space low boundry, e.g. [-1.2, -0.07] for mountain car st_high - state space high boundry in all dimensions action_space - list of possible actions learn_rate - step size, will be adjusted for nb_tilings automatically num_tilings - tiling layers - should be power of 2 and at least 4*len(st_low) init_val - initial state-action values """ st_low = np.array(st_low) st_high = np.array(st_high) assert len(st_low) == len(st_high) self._n_dim = len(st_low) self._nb_actions = nb_actions self._lr = learn_rate / num_tilings self._num_tilings = num_tilings self._scales = self._num_tilings / (st_high - st_low) # e.g. 8 tilings, 2d space, 3 actions # nb_total_tiles = (8+1) * (8+1) * 8 * 3 nb_total_tiles = (num_tilings + 1)**self._n_dim * num_tilings * nb_actions self._iht = tiles3.IHT(nb_total_tiles) self._weights = np.zeros(nb_total_tiles) + init_val / num_tilings
def __init__(self, num_dimensions, dimension_ranges, num_actions, num_tiles=2048, num_tilings=8, scale_inputs=False): self.scale_inputs = scale_inputs self.num_tiles = num_tiles self.num_dimensions = num_dimensions self.dimension_ranges = dimension_ranges self.num_actions = num_actions self.theta = [0.001 * random.random() for _ in range(self.num_tiles*num_actions)] # self.theta = [-0.001 * random.random() for _ in range(self.num_tiles*num_actions)] self.theta = np.asarray(self.theta) self.iht = tiles3.IHT(self.num_tiles) self.num_tilings = num_tilings
def __init__(self, iht_size=4096, num_tilings=8, num_tiles=8): """ Initializes the MountainCar Tile Coder Initializers: iht_size -- int, the size of the index hash table, typically a power of 2 num_tilings -- int, the number of tilings num_tiles -- int, the number of tiles. Here both the width and height of the tile coder are the same Class Variables: self.iht -- tc.IHT, the index hash table that the tile coder will use self.num_tilings -- int, the number of tilings the tile coder will use self.num_tiles -- int, the number of tiles the tile coder will use """ self.iht = tiles3.IHT(iht_size) self.num_tilings = num_tilings self.num_tiles = num_tiles
def __init__(self, var_ranges, num_tiles, num_tilings): assert len(var_ranges) == len(num_tiles), \ "Input variables length do not match" assert all(isinstance(val, int) and val > 0 for val in num_tiles), \ "number of tiles should be an array of integers > 0" assert all(len(var_range) == 2 for var_range in var_ranges), \ "variable range should be a finite numeric interval" assert isinstance(num_tilings, int), \ "number of tilings should be an integer" self.var_ranges = var_ranges self.num_tiles = num_tiles self.num_var = len(var_ranges) self.num_tilings = num_tilings self.var_coeff = np.zeros(self.num_var, dtype=np.float32) self.get_coeffs() self.iht_size = self.calc_iht_size() self.iht = tiles3.IHT(self.iht_size)
def __init__(self): super().__init__(action_size=3, discount=1.0) self.step_size = 0.3/8 self.w = np.zeros((2048,1),dtype=float) self.w_last = np.copy(self.w) # difference self.R_mean = 0.0 self.beta = 0.0 # trace self.z = np.zeros_like(self.w) self.lambd = 0.92 self.iht = tiles3.IHT(2048) self.fig = plt.figure() self.ax = Axes3D(self.fig) plt.ion()
def __init__(self, step_size, lam, trace_update=accumulating_trace, num_of_tilings=8, max_size=4096): self.max_size = max_size self.num_of_tilings = num_of_tilings self.trace_update = trace_update self.lam = lam self.step_size = step_size / num_of_tilings self.hash_table = t3.IHT(max_size) self.weights = np.zeros(max_size) self.trace = np.zeros(max_size) self.position_scale = self.num_of_tilings / (POSITION_MAX - POSITION_MIN) self.velocity_scale = self.num_of_tilings / (VELOCITY_MAX - VELOCITY_MIN)
def __init__(self, iht_size=4096, num_tilings=8, num_tiles=8): self.iht = tc.IHT(iht_size) self.num_tilings = num_tilings self.num_tiles = num_tiles
def setUp(self): random.seed(123) self.iht = tc.IHT(1024)