def update(self): # update the posterior mean and std for each arm num_sample = len(self.rewards) if self.gpg is None: self.gpg = GPRegression_Group( self.arms, np.asarray(self.rewards).reshape(num_sample, 1), self.kernel, noise_var=0.005, A=self.sample_groups[:num_sample, :].reshape( num_sample, self.num_arms)) else: self.gpg.set_XY_group(X=self.arms, Y=np.asarray(self.rewards).reshape( num_sample, 1), A=self.sample_groups[:num_sample, :].reshape( num_sample, self.num_arms)) self.gpg.optimize(messages=True) # pred for indi self.mu, self.sigma = self.gpg.predict(self.arms) # pred for group self.group_mu, self.group_sigma = self.gpg.predict(self.arms, A_ast=self.A)
def rec(self): # self.bvalues[self.root] = self.bvalue(self.root) while self.sample_count < self.n: print(self.sample_count) if self.sample_count == 0: A,X,Y = self.add_obs(self.root) kernel = GPy.kern.RBF(input_dim=1, variance=self.kernel_var, lengthscale=self.lengthscale) self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var) # self.m.optimize() self.update_bvalue() regret = self.regret() self.regret_list.append(regret) else: for x in self.leaves: if x not in self.evaluated_nodes: self.bvalues[x] = self.bvalue(x) # self.bvalues[x] = np.inf selected_node = max(self.bvalues, key = self.bvalues.get) # print('selected node: ', selected_node) if self.threshold(selected_node) <= self.V(selected_node.depth) and selected_node.depth <= self.hmax: # print('threshold: ', self.threshold(selected_node)) # print('self.v: ', self.V(selected_node.depth)) del self.bvalues[selected_node] if selected_node.depth > self.rec_node.depth: self.rec_node = selected_node self.expand(selected_node) else: # print('before add obs') A,X,Y = self.add_obs(selected_node) # print('finish add obs') kernel = GPy.kern.RBF(input_dim=1, variance=self.kernel_var, lengthscale=self.lengthscale) kernel.lengthscale.constrain_bounded(self.lengthscale_bounds[0],self.lengthscale_bounds[1], warning=False) kernel.variance.constrain_bounded(self.kernel_var_bounds[0], self.kernel_var_bounds[1], warning=False) self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var) # self.m.set_XY_group(X=X,Y=Y,A=A) if self.opt_flag: self.m.optimize() self.update_bvalue() regret = self.regret() self.regret_list.append(regret) return self.regret_list
class Pipeline(): """GPR-G + form group by cluster + bandits algorithm (UCB/SR) """ def __init__(self, budget, num_arms, num_group, group_method='kmeans', noise=0.1, fixed_noise=None, dynamic_grouping=False): # TODO: implement fixed_noise self.budget = budget self.num_arms = num_arms self.num_group = num_group self.fixed_noise = fixed_noise self.group_method = group_method self.noise = noise self.dynamic_grouping = dynamic_grouping self.arms, self.f_train, self.Y_train = generate_data_func( self.num_arms, self.num_arms, dim=dim, X_train_range_low=X_train_range_low, X_train_range_high=X_train_range_high, x_shift=x_shift, func_type='sin') # self.idx_arms_dict = self.generate_idx_arms(self.arms) # print(self.idx_arms_dict) self.active_arm_idx = set(list(range(self.num_arms))) # choose RBF as default self.kernel = GPy.kern.RBF(input_dim=dim, variance=1., lengthscale=1.) self.sample_groups = np.zeros((self.budget, self.num_arms)) self.gpg = None self.mu = np.zeros((self.num_arms, )) self.sigma = np.ones((self.num_arms, )) self.rewards = [] # def generate_idx_arms(self, arms): # idx_arms_dict = {} # for idx, arm in enumerate(arms): # # print(arm) # arm = str(arm) # arm = list(arm) # print(arm) # idx_arms_dict[idx] = arm # idx_arms_dict[arm] = idx # return idx_arms_dict def sample(self, t): # sample group reward and add it into rewards record sample = self.sample_groups[t, :].dot( self.f_train) + np.random.randn() * self.noise # print(sample) self.rewards.append(sample) def update(self): # update the posterior mean and std for each arm num_sample = len(self.rewards) if self.gpg is None: self.gpg = GPRegression_Group( self.arms, np.asarray(self.rewards).reshape(num_sample, 1), self.kernel, noise_var=0.005, A=self.sample_groups[:num_sample, :].reshape( num_sample, self.num_arms)) else: self.gpg.set_XY_group(X=self.arms, Y=np.asarray(self.rewards).reshape( num_sample, 1), A=self.sample_groups[:num_sample, :].reshape( num_sample, self.num_arms)) self.gpg.optimize(messages=True) # pred for indi self.mu, self.sigma = self.gpg.predict(self.arms) # pred for group self.group_mu, self.group_sigma = self.gpg.predict(self.arms, A_ast=self.A) def form_group(self, num_group): # construct matrix A \in R^{g * n} # each row represents one group # the arms in the group are set to 1, otherwise 0 # print(self.active_arm_idx) sorted_active_arm_idx = np.asarray(np.sort(list(self.active_arm_idx))) data = self.arms[sorted_active_arm_idx, :] label = self.mu.reshape(self.mu.shape[0], 1)[sorted_active_arm_idx, :] if self.group_method == 'kmeans': cluster_features = np.concatenate((data, label), axis=1) cluster_features = StandardScaler().fit_transform(cluster_features) # cluster_features = data kmeans = KMeans(n_clusters=num_group, init='k-means++', random_state=0).fit(cluster_features) group_idx = kmeans.labels_ A = np.zeros((num_group, self.num_arms)) for i, idx in enumerate(sorted_active_arm_idx): A[group_idx[i], idx] = 1 # check whether we need to change group centers code self.group_centers = kmeans.cluster_centers_ if self.dynamic_grouping: # print('chaning group idx.') self.active_group_idx = set(list(range(num_group))) return A elif self.group_method == 'identity': self.group_centers = data return np.eye(N=data.shape[0]) def evaluation(self): # TODO: how to evaluate the pipeline? # evaluate the prediction when budget is run out? print('Prediction for individual:') print('mean squared error: ', mean_squared_error(self.Y_train, self.mu)) # print('Y train: ', self.Y_train) # print('mu: ', self.mu) print('r2 score: ', r2_score(self.Y_train, self.mu)) if not self.dynamic_grouping: print('Prediction for group (select A_ast = A):') group_train = self.A.dot(self.Y_train) print('mean squared error: ', mean_squared_error(group_train, self.group_mu)) print('r2 score: ', r2_score(group_train, self.group_mu))
class GPTree(GPOO): """ Algorithm in Shekhar et al. 2018 """ def __init__(self, f, delta, root_cell, n, k=2, d=1, s=1, reward_type = 'center', sigma = 0.1, opt_x= None, alpha = 0.5, rho = 0.5, u = 2.0, v1 = 1.0, v2 = 1.0, C3 = 1.0, C2 = 1.0, D1=1, **kwarg) -> None: """ alpha, rho (0,1) u >0 0<v2<=1<=v1 C2,C3 > 0 (corollary 1) D1 >= 0 metric dimension (Defi 2) """ hmax = np.log(n) * (1 + 1/alpha) / (2 * alpha * np.log(1/rho)) # e.q. 3.4 super().__init__(f, delta, root_cell, n, k, d, s, reward_type, sigma, opt_x, hmax, **kwarg) # TODO: might need to change constant rate self.beta_n = 0.25 * np.sqrt(np.log(n) + u) self.betastd = {} # Todo: the following parameters might need to be chosen more carefully self.rho = rho self.u = u # claim 1 holds for probability at least 1 - e^{-u} self.v1 = v1 self.v2 = v2 self.C3 = C3 self.C4 = C2 + 2 * np.log(n**2 * np.pi ** 2/6) self.D1 = D1 def g(self,x): """In assumption A2""" # TODO: smoothness assumption, might needs to change later return 0.1 * x def V(self, h): """In claim 2""" # TODO temp = np.sqrt(2 * self.u + self.C4 + h * np.log(self.k) + 4 * self.D1 * np.log(1/self.g(self.v1 * self.rho ** h))) return 4 * self.g(self.v1 * self.rho ** h) * (temp + self.C3) def threshold(self,x): mu, var = self.m.predict(x.features, self.A) return self.beta_n * np.sqrt(var) def bvalue(self, x): mu, var = self.m.predict(x.features, self.A) term1 = mu + self.beta_n * np.sqrt(var) if x.depth > 0: mu_p, var_p = self.m.predict(x.parent.features, self.A) term2 = mu_p + self.beta_n * np.sqrt(var_p) + self.V(x.depth - 1) U = np.min([term1, term2]) else: U = term1 return U + self.V(x.depth) def rec(self): # self.bvalues[self.root] = self.bvalue(self.root) while self.sample_count < self.n: print(self.sample_count) if self.sample_count == 0: A,X,Y = self.add_obs(self.root) kernel = GPy.kern.RBF(input_dim=1, variance=self.kernel_var, lengthscale=self.lengthscale) self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var) # self.m.optimize() self.update_bvalue() regret = self.regret() self.regret_list.append(regret) else: for x in self.leaves: if x not in self.evaluated_nodes: self.bvalues[x] = self.bvalue(x) # self.bvalues[x] = np.inf selected_node = max(self.bvalues, key = self.bvalues.get) # print('selected node: ', selected_node) if self.threshold(selected_node) <= self.V(selected_node.depth) and selected_node.depth <= self.hmax: # print('threshold: ', self.threshold(selected_node)) # print('self.v: ', self.V(selected_node.depth)) del self.bvalues[selected_node] if selected_node.depth > self.rec_node.depth: self.rec_node = selected_node self.expand(selected_node) else: # print('before add obs') A,X,Y = self.add_obs(selected_node) # print('finish add obs') kernel = GPy.kern.RBF(input_dim=1, variance=self.kernel_var, lengthscale=self.lengthscale) kernel.lengthscale.constrain_bounded(self.lengthscale_bounds[0],self.lengthscale_bounds[1], warning=False) kernel.variance.constrain_bounded(self.kernel_var_bounds[0], self.kernel_var_bounds[1], warning=False) self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var) # self.m.set_XY_group(X=X,Y=Y,A=A) if self.opt_flag: self.m.optimize() self.update_bvalue() regret = self.regret() self.regret_list.append(regret) return self.regret_list
def rec(self): self.T_dict[self.root] = 0 self.expand(self.root) for x in self.leaves: A,X,Y = self.add_obs(x) self.rec_node = x regret = self.regret() self.regret_list.append(regret) kernel = GPy.kern.RBF(input_dim=1, variance=self.kernel_var, lengthscale=self.lengthscale) self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var) # self.m.optimize() self.update_bvalue() selected_node = max(self.bvalues, key = self.bvalues.get) while self.sample_count < self.n: # change n to sample budget # for x in self.leaves: # if x not in self.evaluated_nodes: # self.bvalues[x] = self.bvalue(x) # # self.bvalues[x] = np.inf # print('# sample: ', self.sample_count) # print('leaves:') # for i in self.leaves: # print(i.center) # print('bvalues:') # for key, value in self.bvalues.items(): # print(key.center) # print(value) # print('selected node: ', selected_node.center) # print('################################') if self.delta(selected_node.depth) >= self.threshold(selected_node) and selected_node.depth <= self.hmax: # if self.T_dict[selected_node] >= self.threshold(selected_node): del self.bvalues[selected_node] if selected_node.depth > self.rec_node.depth: self.rec_node = selected_node elif selected_node.depth == self.rec_node.depth: if self.m.predict(selected_node.features, A_ast = self.A) > self.m.predict(self.rec_node.features, A_ast = self.A): self.rec_node = selected_node self.expand(selected_node) selected_node = max(self.bvalues, key = self.bvalues.get) A,X,Y = self.add_obs(selected_node) kernel = GPy.kern.RBF(input_dim=1, variance=self.kernel_var, lengthscale=self.lengthscale) kernel.lengthscale.constrain_bounded(self.lengthscale_bounds[0],self.lengthscale_bounds[1], warning=False) kernel.variance.constrain_bounded(self.kernel_var_bounds[0], self.kernel_var_bounds[1], warning=False) self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var) # self.m.set_XY_group(X=X,Y=Y,A=A) if self.opt_flag: self.m.optimize() # print('*****************************') # print('kernel paras:') # print(self.m.kern.variance) # print(self.m.kern.lengthscale) # print(self.gp_noise_var) # print('*****************************') # if self.sample_count % 10 == 0: # self.regression_eva() self.update_bvalue() # print('sample ', self.sample_count) # print('delta ', self.delta(selected_node.depth)) # print('threshold ', self.threshold(selected_node)) # if self.sample_count >=10: # raise Exception regret = self.regret() self.regret_list.append(regret) # import pickle # data_dict = {} # data_dict['X'] = X # data_dict['Y'] = Y # data_dict['A'] = A # with open('save_data.pickle', 'wb') as handle: # pickle.dump(data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) return self.regret_list
class GPOO(Base): """ We extend StoOO to the case where f is sampled from GP. """ def __init__(self, f, delta, root_cell, n, k=2, d=1, s=1, reward_type = 'center', sigma = 0.1, opt_x = None, hmax = 4, **kwarg) -> None: super().__init__(f, delta, root_cell, n, k, d, s, reward_type, sigma, opt_x) self.X_list = [] self.A_list = [] self.Y_list = [] self.sample_count = 0 self.hmax = hmax # self.lengthscale = 0.1 # self.kernel_var = 0.5 # self.gp_noise_var = self.kernel_var * 0.05 # 1e-10 # self.lengthscale_bounds = [0.05, 10] # self.kernel_var_bounds = [0.05, 10] self.lengthscale_bounds = [0.01, 10] self.kernel_var_bounds = [0.05, 10] # self.kernel = GPy.kern.RBF(input_dim=1, # variance=self.kernel_var, # lengthscale=self.lengthscale) # self.f = self.gene_f() # self.opt_x = self.get_opt_x() if kwarg.__len__() == 4: # for gp case, we need to input 4 paras in the following self.lengthscale = kwarg['lengthscale'] self.kernel_var = kwarg['kernel_var'] self.gp_noise_var = kwarg['gp_noise_var'] self.opt_flag = kwarg['opt_flag'] else: print(kwarg.__len__()) raise Exception def beta(self,t = 1): # return 0.5 * np.log(t) return 0.1 * np.log(np.pi**2 * t**2/(6 * 0.1)) # return 1 def add_obs(self,x): """Sample reward of x and add observation x to X_list, A_list, Y_list Return array A,X,Y which contains all previous observations """ A_x = np.zeros((1, self.n * self.s)) A_x[0, self.sample_count * self.s:(self.sample_count+1) * self.s] = 1.0/self.s * np.ones((1,self.s)) self.A_list.append(A_x) self.X_list.append(x.features) reward = self.sample(x) # print('x:', x.center) if x in self.T_dict.keys(): self.T_dict[x] += 1 else: self.T_dict[x] = 1 self.Y_list.append(reward) self.sample_count += 1 A = np.asarray(self.A_list).reshape(self.sample_count, self.n * self.s)[:,:self.sample_count* self.s] X = np.asarray(self.X_list).reshape(self.sample_count * self.s, self.d) Y = np.asarray(self.Y_list).reshape(self.sample_count, 1) return A,X,Y def threshold(self,x): mu, var = self.m.predict(x.features, self.A) return np.sqrt(self.beta(self.sample_count)) * np.sqrt(var) def bvalue(self,x): # A = np.ones(((1, self.s))) * (1.0/self.s) mu, var = self.m.predict(x.features, self.A) return mu + np.sqrt(self.beta(self.sample_count)) * np.sqrt(var) + self.delta(x.depth) def update_bvalue(self): """update bvalue for all leaf nodes. """ for x in self.leaves: self.bvalues[x] = self.bvalue(x) def regression_eva(self): size = 100 x = np.linspace(self.root.cell[0], self.root.cell[1], size).reshape(-1,1) f = self.f(x) mu, var = self.m.predict(x, A_ast = None) std = np.sqrt(var) node_centers = [] for i, node in enumerate(self.evaluated_nodes): node_centers.append(node.center) plt.figure() plt.scatter(node_centers, self.evaluated_fs, label = 'obs') plt.plot(x, f, color = 'tab:orange', label = 'f') plt.plot(x, mu, color = 'tab:blue', label = 'pred') plt.fill_between( x.reshape(-1,), (mu + self.beta() * std).reshape(-1,), (mu - self.beta() * std).reshape(-1,), alpha = 0.3 ) plt.legend() # plt.ylim(-1,2) plt.savefig('reg' + str(self.sample_count) +'_'+self.reward_type+ '_opt' + str(self.opt_flag) + '.pdf') def rec(self): self.T_dict[self.root] = 0 self.expand(self.root) for x in self.leaves: A,X,Y = self.add_obs(x) self.rec_node = x regret = self.regret() self.regret_list.append(regret) kernel = GPy.kern.RBF(input_dim=1, variance=self.kernel_var, lengthscale=self.lengthscale) self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var) # self.m.optimize() self.update_bvalue() selected_node = max(self.bvalues, key = self.bvalues.get) while self.sample_count < self.n: # change n to sample budget # for x in self.leaves: # if x not in self.evaluated_nodes: # self.bvalues[x] = self.bvalue(x) # # self.bvalues[x] = np.inf # print('# sample: ', self.sample_count) # print('leaves:') # for i in self.leaves: # print(i.center) # print('bvalues:') # for key, value in self.bvalues.items(): # print(key.center) # print(value) # print('selected node: ', selected_node.center) # print('################################') if self.delta(selected_node.depth) >= self.threshold(selected_node) and selected_node.depth <= self.hmax: # if self.T_dict[selected_node] >= self.threshold(selected_node): del self.bvalues[selected_node] if selected_node.depth > self.rec_node.depth: self.rec_node = selected_node elif selected_node.depth == self.rec_node.depth: if self.m.predict(selected_node.features, A_ast = self.A) > self.m.predict(self.rec_node.features, A_ast = self.A): self.rec_node = selected_node self.expand(selected_node) selected_node = max(self.bvalues, key = self.bvalues.get) A,X,Y = self.add_obs(selected_node) kernel = GPy.kern.RBF(input_dim=1, variance=self.kernel_var, lengthscale=self.lengthscale) kernel.lengthscale.constrain_bounded(self.lengthscale_bounds[0],self.lengthscale_bounds[1], warning=False) kernel.variance.constrain_bounded(self.kernel_var_bounds[0], self.kernel_var_bounds[1], warning=False) self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var) # self.m.set_XY_group(X=X,Y=Y,A=A) if self.opt_flag: self.m.optimize() # print('*****************************') # print('kernel paras:') # print(self.m.kern.variance) # print(self.m.kern.lengthscale) # print(self.gp_noise_var) # print('*****************************') # if self.sample_count % 10 == 0: # self.regression_eva() self.update_bvalue() # print('sample ', self.sample_count) # print('delta ', self.delta(selected_node.depth)) # print('threshold ', self.threshold(selected_node)) # if self.sample_count >=10: # raise Exception regret = self.regret() self.regret_list.append(regret) # import pickle # data_dict = {} # data_dict['X'] = X # data_dict['Y'] = Y # data_dict['A'] = A # with open('save_data.pickle', 'wb') as handle: # pickle.dump(data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) return self.regret_list
# plt.plot(testX, posteriorTestY[:,:,i], label = 'sample posterior') # plt.plot(X, Y, 'ok', markersize=5) # plt.plot(testX, simY, label = 'posterior mean') # plt.plot(testX, simY - 3 * simMse ** 0.5, '--g') # plt.plot(testX, simY + 3 * simMse ** 0.5, '--g') # plt.legend() # plt.savefig('posterior_f.png') ''' import pickle with open('save_data.pickle', 'rb') as handle: data_dict = pickle.load(handle) # model.set_XY(data_dict['x'], data_dict['y']) model = GPRegression_Group(data_dict['X'], data_dict['Y'], kernel, A=data_dict['A'], noise_var=noise_var) model.optimize() pred, var = model.predict(testX, A_ast=None) std = np.sqrt(var) print(std) plt.scatter(data_dict['X'], data_dict['Y']) plt.plot(testX, simY, label='posterior mean') plt.plot(testX, pred, label='pred') plt.fill_between(testX.reshape(-1, ), np.asarray(pred + std).reshape(-1, ), np.asarray(pred - std).reshape(-1, ), alpha=0.3) plt.legend() plt.show()