def __init__(self, D, T, L, mem_max, ms, radius, para, asgd=False, linT=6, init=False, W_f=0, V_f=0): self.Q_f = Tilecode(D + 1, T, L, mem_max, min_sample=ms, cores=para.CPU_CORES) self.radius = radius if init: self.W_f = W_f self.V_f = V_f self.first = False else: Twv = int((1 / self.radius) / 2) T = [Twv for t in range(D)] L = int(130 / Twv) self.W_f = Tilecode(D, T, L, mem_max=1, lin_spline=True, linT=linT, cores=para.CPU_CORES) self.V_f = Tilecode(D, T, L, mem_max=1, lin_spline=True, linT=linT, cores=para.CPU_CORES) self.first = True self.D = D self.beta = para.beta self.CORES = para.CPU_CORES self.asgd = asgd
def resetQ(self, D, T, L, mem_max=1, ms=1): """ Reset the Q function Parameters ----------- D : int, Number of state variables T : list of integers, length D Number of tiles per dimension L : int, Number of tilings or 'layers' mem_max : float, optional (default = 1) Tile array size, values less than 1 turns on hashing ms : int, optional (default = 1) Minimum samples per tile for the Q function """ self.Q_f = Tilecode(D + 1, T, L, mem_max, min_sample=ms, cores=self.CORES)
def fit(self, X, radius, prop=1): """ Fit a tile coding data structure to X Parameters ---------- X : array of shape [N, D] Input data (unscaled) radius : float radius for nearest neighbor queries. Tile widths for each dimension of X are int((b[i] - a[i]) / radius) where b and a are the max and min values of X[:,i]. """ a = np.min(X, axis=0) b = np.max(X, axis=0) T = [int((b[i] - a[i]) / radius) + 1 for i in range(self.D)] self.tile = Tilecode(self.D, T, self.L, mem_max=self.mem_max, cores=self.cores, offset=self.offset) self.tile.fit(X, np.ones(X.shape[0]), unsupervised=True, copy=True)
def resetQ(self, D, T, L, mem_max, ms): self.Q_f = Tilecode(D + 1, T, L, mem_max, min_sample=ms, cores=self.CORES)
def __init__(self, D, T, L, radius, beta, ms=1, mem_max=1, cores=1, ASGD=True, linT=6): self.Q_f = Tilecode(D + 1, T, L, mem_max, min_sample=ms, cores=cores) self.radius = radius Twv = int((1 / self.radius) / 2) T = [Twv for t in range(D)] L = int(130 / Twv) # Initialize policy function self.A_f = Tilecode(D, T, L, mem_max=1, lin_spline=True, linT=linT, cores=cores) # Initialize value function self.V_f = Tilecode(D, T, L, mem_max=1, lin_spline=True, linT=linT, cores=cores) self.first = True self.D = D self.beta = beta self.CORES = cores self.asgd = ASGD
def __init__(self, D, maxgrid, radius, para, num_split=40, num_leaf=20, num_est=215): self.Q_f = Tree(n_estimators=num_est, min_samples_split=num_split, min_samples_leaf=num_leaf, n_jobs=para.CPU_CORES) Twv = (1 / radius) / 1.8 T = [Twv for t in range(D)] L = int(140 / Twv) points = maxgrid self.W_f = Tilecode(D, T, L, mem_max=1, lin_spline=True, linT=7, cores=para.CPU_CORES) self.V_f = Tilecode(D, T, L, mem_max=1, lin_spline=True, linT=7, cores=para.CPU_CORES) self.maxgrid = maxgrid self.radius = radius self.D = D self.first = True self.beta = para.beta self.CORES = para.CPU_CORES
def __init__(self, D, T, L, mem_max=1, offset='optimal', cores=1): if D == 1 and offset == 'optimal': offset = 'uniform' self.tile = Tilecode(D, T, L, mem_max=mem_max, min_sample=1, offset=offset, cores=cores)
def __init__(self, D, T, L, mem_max=1, min_sample=1, offset='optimal', lin_spline=False, linT=7, cores=4): if D == 1 and offset == 'optimal': offset = 'uniform' self.tile = Tilecode(D, T, L, mem_max, min_sample, offset, lin_spline, linT, cores)
def fit(self, X, radius, prop=1): """ Fit a density function to X and return a sample grid with a maximum of M points Parameters ---------- X : array of shape [N, D] Input data (unscaled) radius : float minimum distance between points. This determines tile widths. prop : float in (0, 1), optional (default=1.0) Proportion of sample points to return (lowest density points are excluded) Returns ------- GRID, array of shape [M, D] The sample grid with M < N points """ a = np.min(X, axis=0) b = np.max(X, axis=0) #Tr = int(1 / radius) #T = [Tr + 1] * self.D T = [int((b[i] - a[i]) / radius) + 1 for i in range(self.D)] self.tile = Tilecode(self.D, T, self.L, mem_max=self.mem_max, cores=self.cores, offset=self.offset) N = X.shape[0] GRID, max_points = self.tile.fit_samplegrid(X, prop) self.max_points = max_points return GRID