Beispiel #1
0
    def update(self):
        # update the posterior mean and std for each arm
        num_sample = len(self.rewards)

        if self.gpg is None:
            self.gpg = GPRegression_Group(
                self.arms,
                np.asarray(self.rewards).reshape(num_sample, 1),
                self.kernel,
                noise_var=0.005,
                A=self.sample_groups[:num_sample, :].reshape(
                    num_sample, self.num_arms))
        else:
            self.gpg.set_XY_group(X=self.arms,
                                  Y=np.asarray(self.rewards).reshape(
                                      num_sample, 1),
                                  A=self.sample_groups[:num_sample, :].reshape(
                                      num_sample, self.num_arms))
        self.gpg.optimize(messages=True)

        # pred for indi
        self.mu, self.sigma = self.gpg.predict(self.arms)
        # pred for group
        self.group_mu, self.group_sigma = self.gpg.predict(self.arms,
                                                           A_ast=self.A)
Beispiel #2
0
    def rec(self):
        # self.bvalues[self.root] = self.bvalue(self.root)
        

        while self.sample_count < self.n:
            print(self.sample_count)
            if self.sample_count == 0:
                
                A,X,Y = self.add_obs(self.root)

                kernel = GPy.kern.RBF(input_dim=1, 
                                    variance=self.kernel_var, 
                                    lengthscale=self.lengthscale) 
                self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var)
                # self.m.optimize()
                self.update_bvalue()
                regret = self.regret()
                self.regret_list.append(regret)
            else:
                for x in self.leaves:
                    if x not in self.evaluated_nodes:
                        self.bvalues[x] = self.bvalue(x)
                        # self.bvalues[x] = np.inf

                selected_node = max(self.bvalues, key = self.bvalues.get)
                # print('selected node: ', selected_node)
        
                if self.threshold(selected_node) <= self.V(selected_node.depth) and selected_node.depth <= self.hmax:
                    # print('threshold: ', self.threshold(selected_node))
                    # print('self.v: ', self.V(selected_node.depth))
                    del self.bvalues[selected_node]
                    if selected_node.depth > self.rec_node.depth:
                        self.rec_node = selected_node
                    self.expand(selected_node)
                else:
                    # print('before add obs')
                    A,X,Y = self.add_obs(selected_node)
                    # print('finish add obs')
                    kernel = GPy.kern.RBF(input_dim=1, 
                                    variance=self.kernel_var, 
                                    lengthscale=self.lengthscale) 
                    kernel.lengthscale.constrain_bounded(self.lengthscale_bounds[0],self.lengthscale_bounds[1], warning=False)
                    kernel.variance.constrain_bounded(self.kernel_var_bounds[0], self.kernel_var_bounds[1], warning=False)

                    self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var)
                    # self.m.set_XY_group(X=X,Y=Y,A=A)
                    if self.opt_flag:
                        self.m.optimize()
                    
                    self.update_bvalue()
                    regret = self.regret()
                    self.regret_list.append(regret)

        return self.regret_list
Beispiel #3
0
class Pipeline():
    """GPR-G + form group by cluster + bandits algorithm (UCB/SR)
    """
    def __init__(self,
                 budget,
                 num_arms,
                 num_group,
                 group_method='kmeans',
                 noise=0.1,
                 fixed_noise=None,
                 dynamic_grouping=False):
        # TODO: implement fixed_noise
        self.budget = budget
        self.num_arms = num_arms
        self.num_group = num_group
        self.fixed_noise = fixed_noise
        self.group_method = group_method
        self.noise = noise
        self.dynamic_grouping = dynamic_grouping

        self.arms, self.f_train, self.Y_train = generate_data_func(
            self.num_arms,
            self.num_arms,
            dim=dim,
            X_train_range_low=X_train_range_low,
            X_train_range_high=X_train_range_high,
            x_shift=x_shift,
            func_type='sin')
        # self.idx_arms_dict = self.generate_idx_arms(self.arms)
        # print(self.idx_arms_dict)
        self.active_arm_idx = set(list(range(self.num_arms)))

        # choose RBF as default
        self.kernel = GPy.kern.RBF(input_dim=dim, variance=1., lengthscale=1.)
        self.sample_groups = np.zeros((self.budget, self.num_arms))
        self.gpg = None
        self.mu = np.zeros((self.num_arms, ))
        self.sigma = np.ones((self.num_arms, ))
        self.rewards = []

    # def generate_idx_arms(self, arms):
    #     idx_arms_dict = {}
    #     for idx, arm in enumerate(arms):
    #         # print(arm)
    #         arm = str(arm)
    #         arm = list(arm)
    #         print(arm)
    #         idx_arms_dict[idx] = arm
    #         idx_arms_dict[arm] = idx
    #     return idx_arms_dict

    def sample(self, t):
        # sample group reward and add it into rewards record
        sample = self.sample_groups[t, :].dot(
            self.f_train) + np.random.randn() * self.noise
        # print(sample)

        self.rewards.append(sample)

    def update(self):
        # update the posterior mean and std for each arm
        num_sample = len(self.rewards)

        if self.gpg is None:
            self.gpg = GPRegression_Group(
                self.arms,
                np.asarray(self.rewards).reshape(num_sample, 1),
                self.kernel,
                noise_var=0.005,
                A=self.sample_groups[:num_sample, :].reshape(
                    num_sample, self.num_arms))
        else:
            self.gpg.set_XY_group(X=self.arms,
                                  Y=np.asarray(self.rewards).reshape(
                                      num_sample, 1),
                                  A=self.sample_groups[:num_sample, :].reshape(
                                      num_sample, self.num_arms))
        self.gpg.optimize(messages=True)

        # pred for indi
        self.mu, self.sigma = self.gpg.predict(self.arms)
        # pred for group
        self.group_mu, self.group_sigma = self.gpg.predict(self.arms,
                                                           A_ast=self.A)

    def form_group(self, num_group):
        # construct matrix A \in R^{g * n}
        # each row represents one group
        # the arms in the group are set to 1, otherwise 0

        # print(self.active_arm_idx)
        sorted_active_arm_idx = np.asarray(np.sort(list(self.active_arm_idx)))
        data = self.arms[sorted_active_arm_idx, :]
        label = self.mu.reshape(self.mu.shape[0], 1)[sorted_active_arm_idx, :]
        if self.group_method == 'kmeans':
            cluster_features = np.concatenate((data, label), axis=1)
            cluster_features = StandardScaler().fit_transform(cluster_features)
            # cluster_features = data

            kmeans = KMeans(n_clusters=num_group,
                            init='k-means++',
                            random_state=0).fit(cluster_features)
            group_idx = kmeans.labels_
            A = np.zeros((num_group, self.num_arms))
            for i, idx in enumerate(sorted_active_arm_idx):
                A[group_idx[i], idx] = 1

            # check whether we need to change group centers code
            self.group_centers = kmeans.cluster_centers_

            if self.dynamic_grouping:
                # print('chaning group idx.')
                self.active_group_idx = set(list(range(num_group)))
            return A
        elif self.group_method == 'identity':
            self.group_centers = data
            return np.eye(N=data.shape[0])

    def evaluation(self):
        # TODO: how to evaluate the pipeline?

        # evaluate the prediction when budget is run out?
        print('Prediction for individual:')
        print('mean squared error: ',
              mean_squared_error(self.Y_train, self.mu))
        # print('Y train: ', self.Y_train)
        # print('mu: ', self.mu)
        print('r2 score: ', r2_score(self.Y_train, self.mu))

        if not self.dynamic_grouping:
            print('Prediction for group (select A_ast = A):')
            group_train = self.A.dot(self.Y_train)
            print('mean squared error: ',
                  mean_squared_error(group_train, self.group_mu))
            print('r2 score: ', r2_score(group_train, self.group_mu))
Beispiel #4
0
class GPTree(GPOO):
    """
        Algorithm in Shekhar et al. 2018
    """
    def __init__(self, f, delta, root_cell, n, k=2, d=1, s=1, reward_type = 'center', sigma = 0.1, opt_x= None, 
                alpha = 0.5, rho = 0.5, u = 2.0, v1 = 1.0, v2 = 1.0, C3 = 1.0, C2 = 1.0, D1=1, **kwarg) -> None:
        """
        alpha, rho (0,1)
        u >0
        0<v2<=1<=v1 
        C2,C3 > 0 (corollary 1)
        D1 >= 0 metric dimension (Defi 2)
        """
        hmax = np.log(n) * (1 + 1/alpha) / (2 * alpha * np.log(1/rho)) # e.q. 3.4
        super().__init__(f, delta, root_cell, n, k, d, s, reward_type, sigma,  opt_x, hmax, **kwarg)
        # TODO: might need to change constant rate
        self.beta_n = 0.25 * np.sqrt(np.log(n) + u)
        self.betastd = {}

        # Todo: the following parameters might need to be chosen more carefully
        
        self.rho = rho
        self.u = u # claim 1 holds for probability at least 1 - e^{-u}
        self.v1 = v1
        self.v2 = v2
        self.C3 = C3
        self.C4 = C2 + 2 * np.log(n**2 * np.pi ** 2/6)
        self.D1 = D1

    def g(self,x):
        """In assumption A2"""
        # TODO: smoothness assumption, might needs to change later
        return 0.1 * x

    def V(self, h):
        """In claim 2"""
        # TODO
        temp = np.sqrt(2 * self.u + self.C4 + h * np.log(self.k) + 4 * self.D1 * np.log(1/self.g(self.v1 * self.rho ** h)))
        return 4 * self.g(self.v1 * self.rho ** h) * (temp + self.C3)

    def threshold(self,x):
        mu, var = self.m.predict(x.features, self.A)
        return self.beta_n * np.sqrt(var) 

    def bvalue(self, x):
        mu, var = self.m.predict(x.features, self.A)
        term1 = mu + self.beta_n * np.sqrt(var)
        if x.depth > 0:
            mu_p, var_p = self.m.predict(x.parent.features, self.A)
            term2 = mu_p + self.beta_n * np.sqrt(var_p) + self.V(x.depth - 1)
            U = np.min([term1, term2])
        else:
            U = term1
        return U + self.V(x.depth)    

    def rec(self):
        # self.bvalues[self.root] = self.bvalue(self.root)
        

        while self.sample_count < self.n:
            print(self.sample_count)
            if self.sample_count == 0:
                
                A,X,Y = self.add_obs(self.root)

                kernel = GPy.kern.RBF(input_dim=1, 
                                    variance=self.kernel_var, 
                                    lengthscale=self.lengthscale) 
                self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var)
                # self.m.optimize()
                self.update_bvalue()
                regret = self.regret()
                self.regret_list.append(regret)
            else:
                for x in self.leaves:
                    if x not in self.evaluated_nodes:
                        self.bvalues[x] = self.bvalue(x)
                        # self.bvalues[x] = np.inf

                selected_node = max(self.bvalues, key = self.bvalues.get)
                # print('selected node: ', selected_node)
        
                if self.threshold(selected_node) <= self.V(selected_node.depth) and selected_node.depth <= self.hmax:
                    # print('threshold: ', self.threshold(selected_node))
                    # print('self.v: ', self.V(selected_node.depth))
                    del self.bvalues[selected_node]
                    if selected_node.depth > self.rec_node.depth:
                        self.rec_node = selected_node
                    self.expand(selected_node)
                else:
                    # print('before add obs')
                    A,X,Y = self.add_obs(selected_node)
                    # print('finish add obs')
                    kernel = GPy.kern.RBF(input_dim=1, 
                                    variance=self.kernel_var, 
                                    lengthscale=self.lengthscale) 
                    kernel.lengthscale.constrain_bounded(self.lengthscale_bounds[0],self.lengthscale_bounds[1], warning=False)
                    kernel.variance.constrain_bounded(self.kernel_var_bounds[0], self.kernel_var_bounds[1], warning=False)

                    self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var)
                    # self.m.set_XY_group(X=X,Y=Y,A=A)
                    if self.opt_flag:
                        self.m.optimize()
                    
                    self.update_bvalue()
                    regret = self.regret()
                    self.regret_list.append(regret)

        return self.regret_list
Beispiel #5
0
    def rec(self):
        self.T_dict[self.root] = 0
        self.expand(self.root)
        for x in self.leaves:
            A,X,Y = self.add_obs(x)
            self.rec_node = x 
            regret = self.regret()
            self.regret_list.append(regret)


        kernel = GPy.kern.RBF(input_dim=1, 
                            variance=self.kernel_var, 
                            lengthscale=self.lengthscale) 
        self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var)
        # self.m.optimize()
        self.update_bvalue()
        selected_node = max(self.bvalues, key = self.bvalues.get)

        while self.sample_count < self.n: # change n to sample budget
            # for x in self.leaves:
            #     if x not in self.evaluated_nodes:
            #         self.bvalues[x] = self.bvalue(x)
            #         # self.bvalues[x] = np.inf

            # print('# sample: ', self.sample_count)
            # print('leaves:')
            # for i in self.leaves:
            #     print(i.center)
            # print('bvalues:')
            # for key, value in self.bvalues.items():
            #     print(key.center)
            #     print(value)

            # print('selected node: ', selected_node.center)
            # print('################################')

            if self.delta(selected_node.depth) >= self.threshold(selected_node) and selected_node.depth <= self.hmax:
            # if self.T_dict[selected_node] >= self.threshold(selected_node):
                del self.bvalues[selected_node]
                if selected_node.depth > self.rec_node.depth:
                    self.rec_node = selected_node
                elif selected_node.depth == self.rec_node.depth:
                    if self.m.predict(selected_node.features, A_ast = self.A) > self.m.predict(self.rec_node.features, A_ast = self.A):
                        self.rec_node = selected_node

                self.expand(selected_node)

            selected_node = max(self.bvalues, key = self.bvalues.get)

            A,X,Y = self.add_obs(selected_node)

            kernel = GPy.kern.RBF(input_dim=1, 
                                variance=self.kernel_var, 
                                lengthscale=self.lengthscale) 
            kernel.lengthscale.constrain_bounded(self.lengthscale_bounds[0],self.lengthscale_bounds[1], warning=False)
            kernel.variance.constrain_bounded(self.kernel_var_bounds[0], self.kernel_var_bounds[1], warning=False)

            self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var)
            # self.m.set_XY_group(X=X,Y=Y,A=A)
            if self.opt_flag:
                self.m.optimize()

            # print('*****************************')
            # print('kernel paras:')
            # print(self.m.kern.variance)
            # print(self.m.kern.lengthscale)
            # print(self.gp_noise_var)
            # print('*****************************')

            # if self.sample_count % 10 == 0:
            #     self.regression_eva()

            self.update_bvalue()
            
            # print('sample ', self.sample_count)
            # print('delta ', self.delta(selected_node.depth))
            # print('threshold ', self.threshold(selected_node))
            # if self.sample_count >=10:
            #     raise Exception
            regret = self.regret()
            self.regret_list.append(regret)

        # import pickle 
        # data_dict = {}
        # data_dict['X'] = X
        # data_dict['Y'] = Y
        # data_dict['A'] = A
        # with open('save_data.pickle', 'wb') as handle:
        #     pickle.dump(data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

        return self.regret_list
Beispiel #6
0
class GPOO(Base):
    """
    We extend StoOO to the case where f is sampled from GP. 

    """
    def __init__(self, f, delta, root_cell, n, k=2, d=1, s=1, reward_type = 'center', sigma = 0.1, opt_x = None, hmax = 4, **kwarg) -> None:
        super().__init__(f, delta, root_cell, n, k, d, s, reward_type, sigma, opt_x)

        self.X_list = []
        self.A_list = []
        self.Y_list = []
        self.sample_count = 0 
        self.hmax = hmax 

        # self.lengthscale = 0.1
        # self.kernel_var = 0.5
        # self.gp_noise_var = self.kernel_var * 0.05 # 1e-10 

        # self.lengthscale_bounds = [0.05, 10]
        # self.kernel_var_bounds = [0.05, 10]
        self.lengthscale_bounds = [0.01, 10]
        self.kernel_var_bounds = [0.05, 10]

        # self.kernel = GPy.kern.RBF(input_dim=1, 
        #                         variance=self.kernel_var, 
        #                         lengthscale=self.lengthscale)

        # self.f = self.gene_f() 
        # self.opt_x = self.get_opt_x()
            
        if kwarg.__len__() == 4:
            # for gp case, we need to input 4 paras in the following
            self.lengthscale = kwarg['lengthscale']
            self.kernel_var = kwarg['kernel_var']
            self.gp_noise_var = kwarg['gp_noise_var']
            self.opt_flag = kwarg['opt_flag']
        else: 
            print(kwarg.__len__())
            raise Exception

    def beta(self,t = 1):
        # return 0.5 * np.log(t)
        return 0.1 * np.log(np.pi**2 * t**2/(6 * 0.1))
        # return 1

    def add_obs(self,x):
        """Sample reward of x and add observation x to X_list, A_list, Y_list 
        Return array A,X,Y which contains all previous observations
        """
        A_x = np.zeros((1, self.n * self.s))
        A_x[0, self.sample_count * self.s:(self.sample_count+1) * self.s] = 1.0/self.s * np.ones((1,self.s))
        self.A_list.append(A_x)
        self.X_list.append(x.features)
        reward = self.sample(x)
        # print('x:', x.center)
        if x in self.T_dict.keys():
            self.T_dict[x] += 1
        else:
            self.T_dict[x] = 1
        self.Y_list.append(reward)

        self.sample_count += 1
        A = np.asarray(self.A_list).reshape(self.sample_count, self.n * self.s)[:,:self.sample_count* self.s]
        X = np.asarray(self.X_list).reshape(self.sample_count * self.s, self.d)
        Y = np.asarray(self.Y_list).reshape(self.sample_count, 1)

        return A,X,Y

    def threshold(self,x):
        mu, var = self.m.predict(x.features, self.A)

        return np.sqrt(self.beta(self.sample_count)) * np.sqrt(var)

    def bvalue(self,x):
        # A = np.ones(((1, self.s))) * (1.0/self.s)
        mu, var = self.m.predict(x.features, self.A)
        return mu + np.sqrt(self.beta(self.sample_count)) * np.sqrt(var) + self.delta(x.depth)

    def update_bvalue(self):
        """update bvalue for all leaf nodes. 
        """
        for x in self.leaves:
            self.bvalues[x] = self.bvalue(x)

    def regression_eva(self):
        size = 100
        x = np.linspace(self.root.cell[0], self.root.cell[1], size).reshape(-1,1)

        f = self.f(x)
        mu, var = self.m.predict(x, A_ast = None)
        std = np.sqrt(var)

        node_centers = []
        for i, node in enumerate(self.evaluated_nodes):
            node_centers.append(node.center)

        plt.figure()
        plt.scatter(node_centers, self.evaluated_fs, label = 'obs')
        plt.plot(x, f, color = 'tab:orange', label = 'f')
        plt.plot(x, mu, color = 'tab:blue', label = 'pred')
        plt.fill_between(
            x.reshape(-1,), 
            (mu + self.beta() * std).reshape(-1,),
            (mu - self.beta() * std).reshape(-1,), 
            alpha = 0.3
            )
        plt.legend()
        # plt.ylim(-1,2)
        plt.savefig('reg' + str(self.sample_count) +'_'+self.reward_type+ '_opt' + str(self.opt_flag) + '.pdf')

    def rec(self):
        self.T_dict[self.root] = 0
        self.expand(self.root)
        for x in self.leaves:
            A,X,Y = self.add_obs(x)
            self.rec_node = x 
            regret = self.regret()
            self.regret_list.append(regret)


        kernel = GPy.kern.RBF(input_dim=1, 
                            variance=self.kernel_var, 
                            lengthscale=self.lengthscale) 
        self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var)
        # self.m.optimize()
        self.update_bvalue()
        selected_node = max(self.bvalues, key = self.bvalues.get)

        while self.sample_count < self.n: # change n to sample budget
            # for x in self.leaves:
            #     if x not in self.evaluated_nodes:
            #         self.bvalues[x] = self.bvalue(x)
            #         # self.bvalues[x] = np.inf

            # print('# sample: ', self.sample_count)
            # print('leaves:')
            # for i in self.leaves:
            #     print(i.center)
            # print('bvalues:')
            # for key, value in self.bvalues.items():
            #     print(key.center)
            #     print(value)

            # print('selected node: ', selected_node.center)
            # print('################################')

            if self.delta(selected_node.depth) >= self.threshold(selected_node) and selected_node.depth <= self.hmax:
            # if self.T_dict[selected_node] >= self.threshold(selected_node):
                del self.bvalues[selected_node]
                if selected_node.depth > self.rec_node.depth:
                    self.rec_node = selected_node
                elif selected_node.depth == self.rec_node.depth:
                    if self.m.predict(selected_node.features, A_ast = self.A) > self.m.predict(self.rec_node.features, A_ast = self.A):
                        self.rec_node = selected_node

                self.expand(selected_node)

            selected_node = max(self.bvalues, key = self.bvalues.get)

            A,X,Y = self.add_obs(selected_node)

            kernel = GPy.kern.RBF(input_dim=1, 
                                variance=self.kernel_var, 
                                lengthscale=self.lengthscale) 
            kernel.lengthscale.constrain_bounded(self.lengthscale_bounds[0],self.lengthscale_bounds[1], warning=False)
            kernel.variance.constrain_bounded(self.kernel_var_bounds[0], self.kernel_var_bounds[1], warning=False)

            self.m = GPRegression_Group(X, Y, kernel, A = A, noise_var=self.gp_noise_var)
            # self.m.set_XY_group(X=X,Y=Y,A=A)
            if self.opt_flag:
                self.m.optimize()

            # print('*****************************')
            # print('kernel paras:')
            # print(self.m.kern.variance)
            # print(self.m.kern.lengthscale)
            # print(self.gp_noise_var)
            # print('*****************************')

            # if self.sample_count % 10 == 0:
            #     self.regression_eva()

            self.update_bvalue()
            
            # print('sample ', self.sample_count)
            # print('delta ', self.delta(selected_node.depth))
            # print('threshold ', self.threshold(selected_node))
            # if self.sample_count >=10:
            #     raise Exception
            regret = self.regret()
            self.regret_list.append(regret)

        # import pickle 
        # data_dict = {}
        # data_dict['X'] = X
        # data_dict['Y'] = Y
        # data_dict['A'] = A
        # with open('save_data.pickle', 'wb') as handle:
        #     pickle.dump(data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

        return self.regret_list
Beispiel #7
0
#     plt.plot(testX, posteriorTestY[:,:,i], label = 'sample posterior')
# plt.plot(X, Y, 'ok', markersize=5)
# plt.plot(testX, simY, label = 'posterior mean')
# plt.plot(testX, simY - 3 * simMse ** 0.5, '--g')
# plt.plot(testX, simY + 3 * simMse ** 0.5, '--g')
# plt.legend()
# plt.savefig('posterior_f.png')
'''
import pickle
with open('save_data.pickle', 'rb') as handle:
    data_dict = pickle.load(handle)

# model.set_XY(data_dict['x'], data_dict['y'])
model = GPRegression_Group(data_dict['X'],
                           data_dict['Y'],
                           kernel,
                           A=data_dict['A'],
                           noise_var=noise_var)
model.optimize()
pred, var = model.predict(testX, A_ast=None)
std = np.sqrt(var)
print(std)
plt.scatter(data_dict['X'], data_dict['Y'])
plt.plot(testX, simY, label='posterior mean')
plt.plot(testX, pred, label='pred')
plt.fill_between(testX.reshape(-1, ),
                 np.asarray(pred + std).reshape(-1, ),
                 np.asarray(pred - std).reshape(-1, ),
                 alpha=0.3)
plt.legend()
plt.show()