def compute_var(self,X,xTest):
        """
        compute variance given X and xTest
        
        Input Parameters
        ----------
        X: the observed points
        xTest: the testing points 
        
        Returns
        -------
        diag(var)
        """ 
        
        xTest=np.asarray(xTest)
        xTest=np.atleast_2d(xTest)
        if self.kernel_name=='SE':
            #Euc_dist=euclidean_distances(xTest,xTest)
            #KK_xTest_xTest=np.exp(-np.square(Euc_dist)/self.lengthscale)+np.eye(xTest.shape[0])*self.noise_delta
            ur = unique_rows(X)
            X=X[ur]
            if xTest.shape[0]<=800:
                Euc_dist_test_train=euclidean_distances(xTest,X)
                #Euc_dist_test_train=dist(xTest, X, matmul='gemm', method='ext', precision='float32')
                KK_xTest_xTrain=np.exp(-np.square(Euc_dist_test_train)/self.lengthscale)
            else:
                KK_xTest_xTrain=cdist(xTest,X,lambda a,b: self.kernel_dist(a,b,self.lengthscale))

            Euc_dist_train_train=euclidean_distances(X,X)
            self.KK_bucb_train_train=np.exp(-np.square(Euc_dist_train_train)/self.lengthscale)+np.eye(X.shape[0])*self.noise_delta        
        else:
            #KK=pdist(xTest,lambda a,b: self.kernel_dist(a,b,self.lengthscale))
            #KK=squareform(KK)
            #KK_xTest_xTest=KK+np.eye(xTest.shape[0])*(1+self.noise_delta)
            ur = unique_rows(X)
            X=X[ur]
            KK_xTest_xTrain=cdist(xTest,X,lambda a,b: self.kernel_dist(a,b,self.lengthscale))
            self.KK_bucb_train_train=cdist(X,X,lambda a,b: self.kernel_dist(a,b,self.lengthscale))+np.eye(X.shape[0])*self.noise_delta
        try:
            temp=np.linalg.solve(self.KK_bucb_train_train,KK_xTest_xTrain.T)
        except:
            temp=np.linalg.lstsq(self.KK_bucb_train_train,KK_xTest_xTrain.T, rcond=-1)
            temp=temp[0]
            
        #var=KK_xTest_xTest-np.dot(temp.T,KK_xTest_xTrain.T)
        var=np.eye(xTest.shape[0])-np.dot(temp.T,KK_xTest_xTrain.T)
        var=np.diag(var)
        var.flags['WRITEABLE']=True
        var[var<1e-100]=0
        return var 
 def compute_loo_predictive(X,lengthscale,noise_delta):
     # compute K
     ur = unique_rows(self.X)
     myX=self.X[ur]
     myY=self.Y[ur]
     D=np.hstack((myX,myY.reshape(-1,1)))
     LOO_sum=0
     for i in range(0,D.shape[0]):
         D_train=np.delete(D,i,0)
         D_test=D[i,:]
         Xtrain=D_train[:,:-1]
         Ytrain=D_train[:,-1]
         Xtest=D_test[:-1]
         Ytest=D_test[-1]
         gp_params= {'theta':lengthscale,'noise_delta':self.noise_delta}
         gp=GaussianProcess(gp_params)
         
         try: # if SVD problem
             gp.fit(Xtrain, Ytrain)
             mu, sigma2 = gp.predict(Xtest, eval_MSE=True)
             logpred=-np.log(np.sqrt(2*3.14))-(2)*np.log(sigma2)-np.square(Ytest-mu)/(2*sigma2)
         except:
             logpred=-999999
         
         LOO_sum+=logpred
     return np.asscalar(LOO_sum)
Example #3
0
    def posterior(self, Xnew):
        #xmin, xmax = -2, 10
        ur = unique_rows(self.X)

        self.gp.fit(self.X[ur], self.Y[ur])
        mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True)
        return mu, np.sqrt(sigma2)
Example #4
0
    def init_with_data(self, init_X,init_Y):
        """      
        Input parameters
        ----------
        gp_params:            Gaussian Process structure      
        x,y:        # init data observations (in original scale)
        """

        # Turn it into np array and store.
        self.X_original=np.asarray(init_X)
        temp_init_point=np.divide((init_X-self.bounds[:,0]),self.max_min_gap)
        
        self.X_original = np.asarray(init_X)
        self.X = np.asarray(temp_init_point)
        
        self.Y_original = np.asarray(init_Y)
        #self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original)        
        self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original)
        
        # Set acquisition function
        self.acq_func = AcquisitionFunction(self.acq)
        
        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])
        def compute_log_marginal(X,lengthscale,noise_delta):
            # compute K
            ur = unique_rows(self.X)
            myX=self.X[ur]
            #myY=np.sqrt(0.5*(self.fstar-self.Y[ur]))
            myY=self.Y[ur]
            
            if self.flagOptimizeHyperFirst==0:
                if self.kernel_name=='SE':
                    self.Euc_dist_X_X=euclidean_distances(myX,myX)
                    KK=np.exp(-np.square(self.Euc_dist_X_X)/lengthscale)+np.eye(len(myX))*self.noise_delta
                else:
                    KK=pdist(myX,lambda a,b: self.kernel_dist(a,b,lengthscale))
                    KK=squareform(KK)
                    KK=KK+np.eye(myX.shape[0])*(1+noise_delta)
                self.flagOptimizeHyperFirst=1
            else:
                if self.kernel_name=='SE':
                    KK=np.exp(-np.square(self.Euc_dist_X_X)/lengthscale)+np.eye(len(myX))*self.noise_delta
                else:
                    KK=pdist(myX,lambda a,b: self.kernel_dist(a,b,lengthscale))
                    KK=squareform(KK)
                    KK=KK+np.eye(myX.shape[0])*(1+noise_delta)

            try:
                temp_inv=np.linalg.solve(KK,myY)
            except: # singular
                return -np.inf
            
            
            try:
                #logmarginal=-0.5*np.dot(self.Y.T,temp_inv)-0.5*np.log(np.linalg.det(KK+noise_delta))-0.5*len(X)*np.log(2*3.14)
                first_term=-0.5*np.dot(myY.T,temp_inv)
                
                # if the matrix is too large, we randomly select a part of the data for fast computation
                if KK.shape[0]>200:
                    idx=np.random.permutation(KK.shape[0])
                    idx=idx[:200]
                    KK=KK[np.ix_(idx,idx)]
                #Wi, LW, LWi, W_logdet = pdinv(KK)
                #sign,W_logdet2=np.linalg.slogdet(KK)
                chol  = spla.cholesky(KK, lower=True)
                W_logdet=np.sum(np.log(np.diag(chol)))
                # Uses the identity that log det A = log prod diag chol A = sum log diag chol A
    
                #second_term=-0.5*W_logdet2
                second_term=-W_logdet
            except: # singular
                return -np.inf
            
            #print "first term ={:.4f} second term ={:.4f}".format(np.asscalar(first_term),np.asscalar(second_term))

            logmarginal=first_term+second_term-0.5*len(myY)*np.log(2*3.14)
                
            if np.isnan(np.asscalar(logmarginal))==True:
                print("theta={:s} first term ={:.4f} second  term ={:.4f}".format(lengthscale,np.asscalar(first_term),np.asscalar(second_term)))
                #print temp_det

            return np.asscalar(logmarginal)
    def predict_g2(self,xTest,eval_MSE=True):
        """
        compute predictive mean and variance
        Input Parameters
        ----------
        xTest: the testing points 
        
        Returns
        -------
        mean, var
        """    
        if len(xTest.shape)==1: # 1d
            xTest=xTest.reshape((-1,self.X.shape[1]))
        
        # prevent singular matrix
        ur = unique_rows(self.X)
        X=self.X[ur]
        Y=self.Y[ur]
        G=self.G[ur]
    
        #KK=pdist(xTest,lambda a,b: self.ARD_dist_func(a,b,self.theta))
        
        if self.kernel_name=='SE':
            Euc_dist=euclidean_distances(xTest,xTest)
            KK_xTest_xTest=np.exp(-np.square(Euc_dist)/self.lengthscale)+np.eye(xTest.shape[0])*self.noise_delta
            
            Euc_dist_test_train=euclidean_distances(xTest,X)
            KK_xTest_xTrain=np.exp(-np.square(Euc_dist_test_train)/self.lengthscale)
        else:
            KK=pdist(xTest,lambda a,b: self.kernel_dist(a,b,self.lengthscale))
            KK=squareform(KK)
            KK_xTest_xTest=KK+np.eye(xTest.shape[0])+np.eye(xTest.shape[0])*self.noise_delta
            KK_xTest_xTrain=cdist(xTest,X,lambda a,b: self.kernel_dist(a,b,self.lengthscale))
        
        """
        temp=np.dot(KK_xTest_xTrain,self.KK_x_x_inv)
        mean=np.dot(temp,Y)
        var=KK_xTest_xTest-np.dot(temp,KK_xTest_xTrain.T)
        """
        
        
        # using Cholesky update
        #mean=np.dot(KK_xTest_xTrain,self.alpha)
        meanG=np.dot(KK_xTest_xTrain,self.alphaG)

        #v=np.linalg.solve(self.L,KK_xTest_xTrain.T)
        #var=KK_xTest_xTest-np.dot(v.T,v)
        
        v=np.linalg.solve(self.L,KK_xTest_xTrain.T)
        varG=KK_xTest_xTest-np.dot(v.T,v)
        
        
        # compute mF, varF
        mf=self.fstar-0.5*meanG*meanG
        varf=meanG*varG*meanG
        #varf=varG

        return mf.ravel(),np.diag(varf)     
 def optimize_lengthscale(self,previous_theta,noise_delta):
     if self.kernel_name == 'ARD':
         newlengthscale=self.optimize_lengthscale_ARD(previous_theta,noise_delta)
         self.lengthscale=newlengthscale
         
         # refit the model
         ur = unique_rows(self.X)            
         self.fit(self.X[ur],self.Y[ur])
         
         return newlengthscale
     
     if self.kernel_name=='SE':
         newlengthscale=self.optimize_lengthscale_SE_maximizing(previous_theta,noise_delta)
         self.lengthscale=newlengthscale
         
         # refit the model
         ur = unique_rows(self.X)            
         self.fit(self.X[ur],self.Y[ur])
         
         return newlengthscale
Example #8
0
        def compute_log_marginal_with_logistic_hyper(lengthscale, lengthscale_t,midpoint,growth,noise_delta):
            # compute K
            temp=np.hstack((self.X,self.T))
            ur = unique_rows(temp)
            myX=self.X[ur]
            myT=self.T[ur]
            
            # transform Y_curve to Y_original, then to Y
            Y_original=transform_logistic(self.Y_curves,midpoint,growth,self.MaxEpisode)
            myY=(Y_original-np.mean(Y_original))/np.std(Y_original)
            
            myY=myY[ur]
          
            self.Euc_dist_x=euclidean_distances(myX,myX)
            self.Euc_dist_t=euclidean_distances(myT,myT)
        
            KK=np.exp(-np.square(self.Euc_dist_x)/lengthscale-np.square(self.Euc_dist_t)/lengthscale_t)\
                +np.eye(len(myX))*noise_delta
                    
            
            try:
                temp_inv=np.linalg.solve(KK,myY)
            except: # singular
                return -np.inf
            
            try:
                #logmarginal=-0.5*np.dot(self.Y.T,temp_inv)-0.5*np.log(np.linalg.det(KK+noise_delta))-0.5*len(X)*np.log(2*3.14)
                first_term=-0.5*np.dot(myY.T,temp_inv)
                
                # if the matrix is too large, we randomly select a part of the data for fast computation
                if KK.shape[0]>200:
                    idx=np.random.permutation(KK.shape[0])
                    idx=idx[:200]
                    KK=KK[np.ix_(idx,idx)]
                #Wi, LW, LWi, W_logdet = pdinv(KK)
                #sign,W_logdet2=np.linalg.slogdet(KK)
                chol  = spla.cholesky(KK, lower=True)
                W_logdet=np.sum(np.log(np.diag(chol)))
                # Uses the identity that log det A = log prod diag chol A = sum log diag chol A
    
                #second_term=-0.5*W_logdet2
                second_term=-W_logdet
            except: # singular
                return -np.inf
            

            logmarginal=first_term+second_term-0.5*len(myY)*np.log(2*3.14)
                
            if np.isnan(np.asscalar(logmarginal))==True:
                print("lengthscale_x={:f} lengthscale_t={:f} first term ={:.4f} second  term ={:.4f}".format(
                        lengthscale,lengthscale_t,np.asscalar(first_term),np.asscalar(second_term)))

            #print(lengthscale, lengthscale_t,midpoint,growth,"logmarginal:",logmarginal)
            return np.asscalar(logmarginal)
    def predict_bucb(self,xTest,eval_MSE):
        """
        compute predictive mean and variance for BUCB        
        
        Input Parameters
        ----------
        xTest: the testing points 
        
        Returns
        -------
        mean, var
        """
    
        if len(xTest.shape)==1: # 1d
            xTest=xTest.reshape((-1,self.X.shape[1]))
            
        #Euc_dist=euclidean_distances(xTest,xTest)
        #KK_xTest_xTest=np.exp(-self.theta*np.square(Euc_dist))+self.noise_delta
        
        if self.kernel_name=='SE':
            Euc_dist=euclidean_distances(xTest,xTest)
            KK_xTest_xTest=np.exp(-np.square(Euc_dist)/self.lengthscale)+np.eye(xTest.shape[0])*self.noise_delta
            
            ur = unique_rows(self.X)
            X=self.X[ur]
            
            Euc_dist_test_train=euclidean_distances(xTest,X)
            KK_xTest_xTrain=np.exp(-np.square(Euc_dist_test_train)/self.lengthscale)


            Euc_dist_train_train=euclidean_distances(X,X)
            self.KK_bucb_train_train=np.exp(-np.square(Euc_dist_train_train)/self.lengthscale)+np.eye(X.shape[0])*self.noise_delta        
            
            
        #Euc_dist=euclidean_distances(xTest,self.X)
        #KK_xTest_xTrain=np.exp(-self.theta*np.square(Euc_dist))
        
        
        # computing the mean using the old data
        try:
            temp=np.linalg.solve(self.KK_x_x+np.eye(self.X.shape[0])*self.noise_delta,KK_xTest_xTrain.T)
        except:
            temp=np.linalg.lstsq(self.KK_x_x+np.eye(self.X.shape[0])*self.noise_delta,KK_xTest_xTrain.T, rcond=-1)
            temp=temp[0]
        mean=np.dot(temp.T,self.Y)
        
        var=self.compute_var(self.X_bucb,xTest)
            
        return mean.ravel(),var
Example #10
0
    def predict(self,xTest, eval_MSE=True):
        """
        compute predictive mean and variance
        Input Parameters
        ----------
        xTest: the testing points 
        
        Returns
        -------
        mean, var
        """    

        if len(xTest.shape)==1: # 1d
            xTest=xTest.reshape((-1,self.X.shape[1]+1))
            
        tTest=xTest[:,-1]
        tTest=np.atleast_2d(tTest)
        tTest=np.reshape(tTest,(xTest.shape[0],-1))
        
        xTest=xTest[:,:-1]
        
        # prevent singular matrix
        temp=np.hstack((self.X,self.T))
        ur = unique_rows(temp)
        
        X=self.X[ur]
        T=self.T[ur]
                
        Euc_dist_x=euclidean_distances(xTest,xTest)
        Euc_dist_t=euclidean_distances(tTest,tTest)

        KK_xTest_xTest=np.exp(-np.square(Euc_dist_x)/self.hyper['lengthscale_x']-np.square(Euc_dist_t)/self.hyper['lengthscale_t'])\
            +np.eye(xTest.shape[0])*self.noise_delta
        
        Euc_dist_test_train_x=euclidean_distances(xTest,X)
        
        Euc_dist_test_train_t=euclidean_distances(tTest,T)
        
        KK_xTest_xTrain=np.exp(-np.square(Euc_dist_test_train_x)/self.hyper['lengthscale_x']-np.square(Euc_dist_test_train_t)/self.hyper['lengthscale_t'])
            
        #Exp_dist_test_train_x*Exp_dist_test_train_t
  
        # using Cholesky update
        mean=np.dot(KK_xTest_xTrain,self.alpha)
        v=np.linalg.solve(self.L,KK_xTest_xTrain.T)
        var=KK_xTest_xTest-np.dot(v.T,v)
        

        return mean.ravel(),np.diag(var)  
    def fit(self,X,Y,fstar):
        """
        Fit Gaussian Process model

        Input Parameters
        ----------
        x: the observed points 
        y: the outcome y=f(x)
        
        """ 
        ur = unique_rows(X)
        X=X[ur]
        Y=Y[ur]
        
        self.X=X
        self.Y=Y
        self.fstar=fstar
        self.G=np.sqrt(2.0*(fstar-Y))
        #self.G=np.log(1.0*(fstar-Y))
        
        
        
        #KK=pdist(self.X,lambda a,b: self.ARD_dist_func(a,b,self.theta))
        
        if self.kernel_name=='SE':
            Euc_dist=euclidean_distances(X,X)
            self.KK_x_x=np.exp(-np.square(Euc_dist)/self.lengthscale)+np.eye(len(X))*self.noise_delta
        else:
            KK=pdist(self.X,lambda a,b: self.kernel_dist(a,b,self.lengthscale)) 
            KK=squareform(KK)
            self.KK_x_x=KK+np.eye(self.X.shape[0])*(1+self.noise_delta)
            
        #Euc_dist=euclidean_distances(X,X)
        #self.KK_x_x=np.exp(-self.theta*np.square(Euc_dist))+self.noise_delta
        
        if np.isnan(self.KK_x_x).any(): #NaN
            print("nan in KK_x_x")
        
        self.KK_x_x_inv=np.linalg.pinv(self.KK_x_x)
        self.L=np.linalg.cholesky(self.KK_x_x)
        #temp=np.linalg.solve(self.L,self.Y)
        
        tempG=np.linalg.solve(self.L,self.G-np.sqrt(2*self.fstar))
        #self.alpha=np.linalg.solve(self.L.T,temp)
        self.alphaG=np.linalg.solve(self.L.T,tempG)
Example #12
0
    def fit(self,X,T,Y,Y_curves):
        """
        Fit Gaussian Process model

        Input Parameters
        ----------
        x: the observed points 
        t: time or number of episode
        y: the outcome y=f(x)
        
        """ 
        temp=np.hstack((X,T))
        ur = unique_rows(temp)
        
        T=T[ur]
        X=X[ur]
        Y=Y[ur]
        
        self.X=X
        self.Y=Y
        self.T=T
        self.Y_curves=[val for idx,val in enumerate(Y_curves) if ur[idx]==True]
        
        for curves in self.Y_curves:
            self.MaxEpisode=max(len(curves),self.MaxEpisode)
        #self.Y_curves=Y_curves[myidx]
            
        Euc_dist_x=euclidean_distances(X,X)
        #exp_dist_x=np.exp(-np.square(Euc_dist)/self.hyper['lengthscale_x'])+np.eye(len(X))*self.noise_delta
    
        Euc_dist_t=euclidean_distances(T,T)
        #exp_dist_t=np.exp(-np.square(Euc_dist)/self.hyper['lengthscale_x']_t)+np.eye(len(X))*self.noise_delta       
    
        self.KK_x_x=np.exp(-np.square(Euc_dist_x)/self.hyper['lengthscale_x']\
                           -np.square(Euc_dist_t)/self.hyper['lengthscale_t'])+np.eye(len(X))*self.noise_delta
          
        if np.isnan(self.KK_x_x).any(): #NaN
            print("nan in KK_x_x")
        
        #self.KK_x_x_inv=np.linalg.pinv(self.KK_x_x)
        self.L=np.linalg.cholesky(self.KK_x_x)
        temp=np.linalg.solve(self.L,self.Y)
        self.alpha=np.linalg.solve(self.L.T,temp)
        self.cond_num=self.compute_condition_number()
Example #13
0
    def optimize_lengthscale_logistic_hyper(self,prev_hyper,noise_delta):
        # optimize both GP lengthscale and logistic hyperparameter

            
        #prev_theta=[prev_theta_x,prev_theta_t,prev_midpoint,prev_growth]
        newlengthscale,newlengthscale_t,newmidpoint,newgrowth=self.optimize_lengthscale_SE_logistic_hyper(prev_hyper,noise_delta)
        self.hyper['lengthscale_x']=newlengthscale
        self.hyper['lengthscale_t']=newlengthscale_t
        
        # refit the model
        temp=np.hstack((self.X,self.T))
        ur = unique_rows(temp)

        # update Y here
        Y_original=transform_logistic(self.Y_curves,newmidpoint,newgrowth,self.SearchSpace[-1,1])
        Y=(Y_original-np.mean(Y_original))/np.std(Y_original)
        self.Y=Y
        #
        self.fit(self.X[ur],self.T[ur],self.Y[ur],self.Y_curves)
        
        return newlengthscale,newlengthscale_t,newmidpoint,newgrowth
Example #14
0
    def optimize_gp_hyperparameter(self,mygp=None,gp_params=None):
        
        if mygp==None:
            mygp=self.gp
            
        if gp_params==None:
            gp_params=self.gp_params
            
        if self.optimize_gp=='maximize':
            newlengthscale = mygp.optimize_lengthscale_SE_maximizing(gp_params['lengthscale'],gp_params['noise_delta'])
            gp_params['lengthscale']=newlengthscale
            if self.verbose==1:
                print("MML estimated lengthscale =",newlengthscale)
        elif self.optimize_gp=='loo':
            newlengthscale = mygp.optimize_lengthscale_SE_loo(gp_params['lengthscale'],gp_params['noise_delta'])
            gp_params['lengthscale']=newlengthscale
            if self.verbose==1:
                print("LOO estimated lengthscale =",newlengthscale)

        elif self.optimize_gp=='marginal':
            self.theta_vector = mygp.slice_sampling_lengthscale_SE(gp_params['lengthscale'],gp_params['noise_delta'])
            gp_params['lengthscale']=self.theta_vector[0]
            self.theta_vector =np.unique(self.theta_vector)
            self.gp_params['newtheta_vector']=self.theta_vector 
            #print "estimated lengthscale ={:s}".format(self.theta_vector)
        elif self.optimize_gp=="fstar":
            fstar_scaled=(self.acq['fstar']-np.mean(self.Y_original))/np.std(self.Y_original)
            newlengthscale = mygp.optimize_lengthscale_SE_fstar(gp_params['lengthscale'],gp_params['noise_delta'],fstar_scaled)
            gp_params['lengthscale']=newlengthscale
            print("estimated lengthscale =",newlengthscale)
            
        tempX=mygp.X
        tempY=mygp.Y
        # init a new Gaussian Process after optimizing hyper-parameter
        mygp=GaussianProcess(gp_params)
        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(tempX)
        mygp.fit(tempX[ur], tempY[ur])
        return mygp, gp_params
    def maximize_with_lengthscale_derived_by_fstar(self, gp_params):
        """
        Main optimization method.

        Input parameters
        ----------
        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.stop_flag == 1:
            return

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X

            #self.Y = np.append(self.Y, self.f(temp_X_new_original))
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std(
                self.Y_original)

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = GaussianProcess(gp_params)
        if self.gp.KK_x_x_inv == []:
            # Find unique rows of X to avoid GP from breaking
            ur = unique_rows(self.X)
            self.gp.fit(self.X[ur], self.Y[ur])

        acq = self.acq

        # optimize GP parameters after 10 iterations
        if len(self.Y) % (3 * self.dim) == 0:
            fstar_scaled = (self.acq['fstar'] -
                            np.mean(self.Y_original)) / np.std(self.Y_original)
            newlengthscale = self.gp.optimize_lengthscale_SE_fstar(
                self.gp_params['lengthscale'], self.gp_params['noise_delta'],
                fstar_scaled)
            self.gp_params['lengthscale'] = newlengthscale
            print("estimated lengthscale =", newlengthscale)

            # init a new Gaussian Process after optimizing hyper-parameter
            self.gp = GaussianProcess(gp_params)
            # Find unique rows of X to avoid GP from breaking
            ur = unique_rows(self.X)
            self.gp.fit(self.X[ur], self.Y[ur])

        if self.acq['name'] == 'mes':
            self.maximize_mes(gp_params)
            return
        if self.acq['name'] == 'pvrs':
            self.maximize_pvrs(gp_params)
            return
        if self.acq['name'] == 'e3i':
            self.maximize_e3i(gp_params)
            return
        if self.acq['name'] == 'ei_kov' or self.acq[
                'name'] == 'poi_kov' or self.acq['name'] == 'ei_fstar':
            self.acq['fstar_scaled'] = (self.acq['fstar'] - np.mean(
                self.Y_original)) / np.std(self.Y_original)

        # Set acquisition function
        start_opt = time.time()

        y_max = self.Y.max()

        if 'xstars' not in globals():
            xstars = []

        self.xstars = xstars

        self.acq['xstars'] = xstars
        self.acq_func = AcquisitionFunction(self.acq)

        if acq['name'] == "ei_mu":
            #find the maximum in the predictive mean
            x_mu_max, y_max = acq_max_with_name(gp=self.gp,
                                                scalebounds=self.scalebounds,
                                                acq_name='mu',
                                                IsReturnY=True)

        x_max = acq_max(ac=self.acq_func.acq_kind,
                        gp=self.gp,
                        bounds=self.scalebounds,
                        opt_toolbox=self.opt_toolbox,
                        seeds=self.xstars)

        val_acq = self.acq_func.acq_kind(x_max, self.gp)

        if self.stopping_criteria != 0 and val_acq < self.stopping_criteria:
            val_acq = self.acq_func.acq_kind(x_max, self.gp)

            self.stop_flag = 1
            #print "Stopping Criteria is violated. Stopping Criteria is {:.15f}".format(self.stopping_criteria)

        self.alpha_Xt = np.append(self.alpha_Xt, val_acq)

        mean, var = self.gp.predict(x_max, eval_MSE=True)
        var.flags['WRITEABLE'] = True
        var[var < 1e-20] = 0
        #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var)

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # store X
        self.X = np.vstack((self.X, x_max.reshape((1, -1))))

        # compute X in original scale
        temp_X_new_original = x_max * self.max_min_gap + self.bounds[:, 0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))
        # evaluate Y using original X

        #self.Y = np.append(self.Y, self.f(temp_X_new_original))

        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std(
            self.Y_original)

        if self.gp.flagIncremental == 1:
            self.gp.fit_incremental(x_max, self.Y[-1])
    def predict(self,xTest,eval_MSE=True):
        """
        compute predictive mean and variance
        Input Parameters
        ----------
        xTest: the testing points 
        
        Returns
        -------
        mean, var
        """    
        if len(xTest.shape)==1: # 1d
            xTest=xTest.reshape((-1,self.X.shape[1]))
        
        # prevent singular matrix
        ur = unique_rows(self.X)
        X=self.X[ur]
        Y=self.Y[ur]
        #Gtest=np.log(1.0*(self.fstar-))
    
        #KK=pdist(xTest,lambda a,b: self.ARD_dist_func(a,b,self.theta))
        
        if self.kernel_name=='SE':
            Euc_dist=euclidean_distances(xTest,xTest)
            KK_xTest_xTest=np.exp(-np.square(Euc_dist)/self.lengthscale)+np.eye(xTest.shape[0])*self.noise_delta
            
            Euc_dist_test_train=euclidean_distances(xTest,X)
            KK_xTest_xTrain=np.exp(-np.square(Euc_dist_test_train)/self.lengthscale)
        else:
            KK=pdist(xTest,lambda a,b: self.kernel_dist(a,b,self.lengthscale))
            KK=squareform(KK)
            KK_xTest_xTest=KK+np.eye(xTest.shape[0])+np.eye(xTest.shape[0])*self.noise_delta
            KK_xTest_xTrain=cdist(xTest,X,lambda a,b: self.kernel_dist(a,b,self.lengthscale))
        
        """
        temp=np.dot(KK_xTest_xTrain,self.KK_x_x_inv)
        mean=np.dot(temp,Y)
        var=KK_xTest_xTest-np.dot(temp,KK_xTest_xTrain.T)
        """
        
        
        # using Cholesky update
        #mean=np.dot(KK_xTest_xTrain,self.alpha)
        meanG=np.dot(KK_xTest_xTrain,self.alphaG)+np.sqrt(2*self.fstar) # non zero prior mean

        #v=np.linalg.solve(self.L,KK_xTest_xTrain.T)
        #var=KK_xTest_xTest-np.dot(v.T,v)
        
        v=np.linalg.solve(self.L,KK_xTest_xTrain.T)
        varG=KK_xTest_xTest-np.dot(v.T,v)
        
        
        # compute mF, varF
        mf=self.fstar-0.5*np.square(meanG)
        #mf=self.fstar-np.exp(meanG)
        
        # using linearlisation
        varf=meanG*varG*meanG 

        # using moment matching
        
        """
        temp=np.diag(varG)
        temp=np.atleast_2d(temp)
        temp=np.reshape(temp,(-1,1))
        
        temp2=np.square(meanG)
        temp2=np.atleast_2d(temp2)
        temp2=np.reshape(temp2,(-1,1))

        mf=self.fstar-0.5*(temp2+temp)
        varf=0.5*varG*varG+meanG*varG*meanG 
        """

        return mf.ravel(),np.diag(varf)  
Example #17
0
    def maximize_batch_PVRS_iterative_greedy(self,B=5,first_batch=[]):
        """
        Finding a batch of points using Peak Suppression / Constant Liar approach
        
        Input Parameters
        ----------

        gp_params:          Parameters to be passed to the Gaussian Process class
        
        kappa:              constant value in UCB
              
        Returns
        -------
        X: a batch of [x_1..x_Nt]
        """
        y_max = self.Y.max()
        
        # Set parameters if any was passed
        gp=GaussianProcess(self.gp_params)
        
        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        gp.fit(self.X[ur], self.Y[ur])
        
        # define the number of Thompson sample M
        if 'n_xstars' in self.acq:
            numXtar=self.acq['n_xstars']
        else:
            numXtar=20*self.dim

        if self.xstars==[]:
            xstars=[]
            for ii in range(numXtar):
                mu_acq={}
                mu_acq['name']='thompson'
                mu_acq['dim']=self.dim
                mu_acq['scalebounds']=self.scalebounds     
                acq_mu=AcquisitionFunction(mu_acq)
                xt_TS = acq_max(ac=acq_mu.acq_kind,gp=gp,bounds=self.scalebounds,opt_toolbox='scipy')
                
                #temp.append(xt_TS)
                xstars.append(xt_TS)
        else:
            xstars=self.xstars
            
        # Set acquisition function
        myacq={}
        myacq['name']='pvrs'
        myacq['dim']=self.acq['dim']
        myacq['xstars']=xstars
        acq_func = AcquisitionFunction(myacq)
        
        nRepeat=8
        pred_var=[0]*nRepeat
        bestBatch=[0]*nRepeat
        for tt in range(nRepeat):
            
            # copy GP, X and Y
            temp_gp=copy.deepcopy(gp)
            temp_X=copy.deepcopy(self.X)
            temp_Y=copy.deepcopy(self.Y)
            
            start_batch=time.time()
    
            #store new_x
            if tt==0: # first iteration (repeat) use Greedy approach to fill a batch
                
                if first_batch==[]: # if the first batch is not initialized by greedy
                    new_X=[]
                    for ii in range(B):
                        # Finding argmax of the acquisition function.
                        x_max = acq_max(ac=acq_func.acq_kind,gp=temp_gp, bounds=self.scalebounds)
                        if ii==0:
                            new_X=x_max
                        else:
                            new_X= np.vstack((new_X, x_max.reshape((1, -1))))
                        temp_X = np.vstack((temp_X, x_max.reshape((1, -1))))
                        const_liar,const_liar_variance=temp_gp.predict(x_max,eval_MSE=1)
                        const_liar=np.random.rand()
                        temp_Y = np.append(temp_Y, const_liar )
                        temp_gp.fit(temp_X,temp_Y)
                else:
                    new_X=first_batch
                    #temp_X = np.vstack((temp_X, new_X.reshape((B, -1))))
                    #const_liar,const_liar_variance=temp_gp.predict(new_X,eval_MSE=1)
                    #const_liar=np.random.rand()
                    #temp_Y = np.append(temp_Y, const_liar )
                    #temp_gp.fit(temp_X,temp_Y)
                    
            else:# >=1 iteration
  
                for ii in range(B):                
                    #new_X=new_X.pop(0)
                    temp_X=copy.deepcopy(self.X)

                    if ii==0: # first element
                        temp_X = np.vstack((temp_X, new_X[ii+1:])) # remove item ii  
                    else:
                        if ii==B-1: # last element
                            temp_X = np.vstack((temp_X, new_X[0:ii-1])) # remove item ii  
                        else:
                            #temp_X = np.vstack((temp_X, new_X[0:ii]+new_X[ii+1:])) # remove item ii  
                            temp_X = np.vstack((temp_X, np.vstack((new_X[0:ii],new_X[ii+1:])))) # remove item ii  
   
                    temp_Y,const_liar_variance=temp_gp.predict(temp_X,eval_MSE=1)
                    #temp_Y=np.random.random(size=(len(temp_X),1)) # constant liar
                    temp_gp.fit(temp_X,temp_Y)
    
                    # Finding argmax of the acquisition function.
                    x_max = acq_max_with_init(ac=acq_func.acq_kind,
                                                          gp=temp_gp, y_max=y_max, 
                                                          bounds=self.scalebounds,
                                                          #init_location=np.asarray(new_X[ii]))                    
                                                          init_location=[])                    
                                                                                              
                    previous_var=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X))
                    
                    # back up old value
                    old_value=new_X[ii].copy()
                                       
                    new_X[ii]=x_max
                    
                    new_var=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X))

                    if new_var>previous_var: # keep the previous value if the uncertainty does not reduce
                        new_X[ii]=old_value
                        #print "old value"
                        
                    #new_var2=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X))

                    #print "prev var={:.6f}, newvar={:.6f}, newvar2={:.6f}".format(np.asscalar(previous_var),
                                    #np.asscalar(new_var),np.asscalar(new_var2))


            pred_var[tt]=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X))
            #print pred_var
            bestBatch[tt]=np.asarray(new_X)
            
            
        #return new_X,new_X_original
        idxBest=np.argmin(pred_var)
        
        new_X=bestBatch[idxBest]
        
        self.NumPoints=np.append(self.NumPoints,new_X.shape[0])

        self.X=np.vstack((self.X,new_X))
        # convert back to original scale
        temp_X_new_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(new_X)]
        temp_X_new_original=np.asarray(temp_X_new_original)
        self.X_original=np.vstack((self.X_original, temp_X_new_original))
        
        # evaluate y=f(x)
        temp=self.f(temp_X_new_original)
        temp=np.reshape(temp,(-1,1))
        self.Y_original=np.append(self.Y_original,temp)
        self.Y=(self.Y_original-np.mean(self.Y_original))/(np.max(self.Y_original)-np.min(self.Y_original))
        
        
        
        #return bestBatch[idxBest],pred_var[idxBest]
        return bestBatch[idxBest],pred_var
    def predict(self,xTest,eval_MSE=True):
        """
        compute predictive mean and variance
        Input Parameters
        ----------
        xTest: the testing points 
        
        Returns
        -------
        mean, var
        """    
        if len(xTest.shape)==1: # 1d
            xTest=xTest.reshape((-1,self.X.shape[1]))
        
        # prevent singular matrix
        ur = unique_rows(self.X)
        X=self.X[ur]
        Y=self.Y[ur]
    
        #KK=pdist(xTest,lambda a,b: self.ARD_dist_func(a,b,self.theta))
        
        if self.kernel_name=='SE':
            Euc_dist=euclidean_distances(xTest,xTest)
            KK_xTest_xTest=np.exp(-np.square(Euc_dist)/self.lengthscale)+np.eye(xTest.shape[0])*self.noise_delta
            
            Euc_dist_test_train=euclidean_distances(xTest,X)
            KK_xTest_xTrain=np.exp(-np.square(Euc_dist_test_train)/self.lengthscale)
        else:
            KK=pdist(xTest,lambda a,b: self.kernel_dist(a,b,self.lengthscale))
            KK=squareform(KK)
            KK_xTest_xTest=KK+np.eye(xTest.shape[0])+np.eye(xTest.shape[0])*self.noise_delta
            KK_xTest_xTrain=cdist(xTest,X,lambda a,b: self.kernel_dist(a,b,self.lengthscale))
        
        """
        temp=np.dot(KK_xTest_xTrain,self.KK_x_x_inv)
        mean=np.dot(temp,Y)
        var=KK_xTest_xTest-np.dot(temp,KK_xTest_xTrain.T)
        """
        
        
        # using Cholesky update
        mean=np.dot(KK_xTest_xTrain,self.alpha)
        v=np.linalg.solve(self.L,KK_xTest_xTrain.T)
        var=KK_xTest_xTest-np.dot(v.T,v)
        
        """
        if self.flagIncremental==1:
            temp=np.dot(KK_xTest_xTrain,self.KK_x_x_inv)
            mean=np.dot(temp,self.Y)
            var=KK_xTest_xTest-np.dot(temp,KK_xTest_xTrain.T)
        else:
            try:
                temp=np.linalg.solve(self.KK_x_x,KK_xTest_xTrain.T)
            except:
                temp=np.linalg.lstsq(self.KK_x_x,KK_xTest_xTrain.T, rcond=-1)
                temp=temp[0]
            mean=np.dot(temp.T,Y)
            var=KK_xTest_xTest-np.dot(temp.T,KK_xTest_xTrain.T)
        """

        return mean.ravel(),np.diag(var)  
Example #19
0
    def maximize_batch_greedy_PVRS(self,B=5):
        """
        Finding a batch of points using Peak Suppression / Constant Liar approach
        
        Input Parameters
        ----------

        gp_params:          Parameters to be passed to the Gaussian Process class
        
        kappa:              constant value in UCB
              
        Returns
        -------
        X: a batch of [x_1..x_Nt]
        """
        y_max = self.Y.max()
        
        # Set parameters if any was passed
        self.gp=GaussianProcess(self.gp_params)
        
        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])
        
        start_opt=time.time()        

        if 'n_xstars' in self.acq:
            numXtar=self.acq['n_xstars']
        else:
            numXtar=30*self.dim
        
        #temp=[]
        # finding the xt of Thompson Sampling
        xstars=[]
            
        for ii in range(numXtar):
            mu_acq={}
            mu_acq['name']='thompson'
            mu_acq['dim']=self.dim
            mu_acq['scalebounds']=self.scalebounds     
            acq_mu=AcquisitionFunction(mu_acq)
            xt_TS = acq_max(ac=acq_mu.acq_kind,gp=self.gp,bounds=self.scalebounds,opt_toolbox='scipy')
            
            #temp.append(xt_TS)
            xstars.append(xt_TS)
                
        self.xstars=xstars

                    
        # Set acquisition function
        myacq={}
        myacq['name']='pvrs'
        myacq['dim']=self.acq['dim']
        myacq['xstars']=xstars
        
        acq_func = AcquisitionFunction(myacq)
        
        # copy GP, X and Y
        temp_gp=copy.deepcopy(self.gp)
        temp_X=copy.deepcopy(self.X)
        temp_Y=copy.deepcopy(self.Y)
        #temp_Y_original=self.Y_original
        
        start_batch=time.time()


        # check predictive variance before adding a new data points
        var_before=self.gp.compute_var(temp_X,xstars) 
        var_before=np.mean(var_before)
        
        
        #store new_x
        new_X=np.empty((0,self.dim),float)
        for ii in range(B):
            # Finding argmax of the acquisition function.
            x_max = acq_max(ac=acq_func.acq_kind,gp=temp_gp, bounds=self.scalebounds)
                                  
            new_X= np.vstack((new_X, x_max.reshape((1, -1))))
            
            temp_X = np.vstack((temp_X, x_max.reshape((1, -1))))
            
            
                    
            # check predictive variance after
            var_after=self.gp.compute_var(temp_X,xstars) 
            var_after=np.mean(var_after)
        
            if self.PVRS_before_after==[]:
                self.PVRS_before_after=np.asarray([var_before,var_after])
            else:
                temp_var=np.asarray([var_before,var_after])
                self.PVRS_before_after=np.vstack((self.PVRS_before_after, temp_var))

        
            var_before=var_after
            
            const_liar,const_liar_variance=temp_gp.predict(x_max,eval_MSE=1)
            
            const_liar=np.random.rand()
            temp_Y = np.append(temp_Y, const_liar )
            
            temp_gp.fit(temp_X,temp_Y)
        
        # for debug
        finish_batch=time.time()-start_batch        

        #return new_X,new_X_original
        
        self.NumPoints=np.append(self.NumPoints,new_X.shape[0])

        self.X=np.vstack((self.X,new_X))
        # convert back to original scale
        temp_X_new_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(new_X)]
        temp_X_new_original=np.asarray(temp_X_new_original)
        self.X_original=np.vstack((self.X_original, temp_X_new_original))
        
        # evaluate y=f(x)
        temp=self.f(temp_X_new_original)
        temp=np.reshape(temp,(-1,1))
        self.Y_original=np.append(self.Y_original,temp)
        self.Y=(self.Y_original-np.mean(self.Y_original))/(np.max(self.Y_original)-np.min(self.Y_original))
        
                # find the maximizer in the GP mean function
        try: 
            len(self.gp)
            x_mu_max=[]
            for j in range(self.J):
                x_mu_max_temp=acq_max_with_name(gp=self.gp[j],scalebounds=self.scalebounds[self.featIdx[j]],acq_name="mu")
                x_mu_max=np.hstack((x_mu_max,x_mu_max_temp))

        except:            
            x_mu_max=acq_max_with_name(gp=self.gp,scalebounds=self.scalebounds,acq_name="mu")

        
        x_mu_max_original=x_mu_max*self.max_min_gap+self.bounds[:,0]
        # set y_max = mu_max
        #mu_max=acq_mu.acq_kind(x_mu_max,gp=self.gp)
        self.Y_original_maxGP = np.append(self.Y_original_maxGP, self.f(x_mu_max_original))
        self.X_original_maxGP = np.vstack((self.X_original_maxGP, x_mu_max_original))
        
        return new_X
    def maximize(self):
        """
        Main optimization method.

        Input parameters
        ----------
        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.stop_flag == 1:
            return

        if self.acq['name'] == 'random':

            super(BayesOpt, self).generate_random_point()

            return

        # init a new Gaussian Process
        self.gp = GaussianProcess(self.gp_params)
        if self.gp.KK_x_x_inv == []:
            # Find unique rows of X to avoid GP from breaking
            ur = unique_rows(self.X)
            self.gp.fit(self.X[ur], self.Y[ur])

        acq = self.acq

        # optimize GP parameters after 10 iterations
        if len(self.Y) % (2 * self.dim) == 0:
            self.gp, self.gp_params = super(BayesOpt,
                                            self).optimize_gp_hyperparameter()

        if self.acq['name'] == 'mes':
            self.maximize_mes()
            return
        if self.acq['name'] == 'pvrs':
            self.maximize_pvrs()
            return
        if self.acq['name'] == 'e3i':
            self.maximize_e3i()
            return
        if self.acq['name'] == 'ei_kov' or self.acq[
                'name'] == 'poi_kov' or self.acq['name'] == 'ei_fstar':
            self.acq['fstar_scaled'] = (self.acq['fstar'] - np.mean(
                self.Y_original)) / np.std(self.Y_original)

        # Set acquisition function
        start_opt = time.time()

        #y_max = self.Y.max()

        if 'xstars' not in globals():
            xstars = []

        self.xstars = xstars

        self.acq['xstars'] = xstars
        self.acq_func = AcquisitionFunction(self.acq)

        if acq['name'] == "ei_mu":
            #find the maximum in the predictive mean
            x_mu_max, y_max = acq_max_with_name(gp=self.gp,
                                                scalebounds=self.scalebounds,
                                                acq_name='mu',
                                                IsReturnY=True)

        x_max = acq_max(ac=self.acq_func.acq_kind,
                        gp=self.gp,
                        bounds=self.scalebounds,
                        opt_toolbox=self.opt_toolbox,
                        seeds=self.xstars)

        val_acq = self.acq_func.acq_kind(x_max, self.gp)

        if self.stopping_criteria != 0 and val_acq < self.stopping_criteria:
            #val_acq=self.acq_func.acq_kind(x_max,self.gp)

            self.stop_flag = 1
            #print "Stopping Criteria is violated. Stopping Criteria is {:.15f}".format(self.stopping_criteria)

        self.alpha_Xt = np.append(self.alpha_Xt, val_acq)

        mean, var = self.gp.predict(x_max, eval_MSE=True)
        var.flags['WRITEABLE'] = True
        var[var < 1e-20] = 0
        #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var)

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        super(BayesOpt, self).augment_the_new_data(x_max)
    def maximize_pvrs(self):
        """
        Main optimization method.

        Input parameters
        ----------
        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.stop_flag == 1:
            return

        if 'n_xstars' in self.acq:
            numXstar = self.acq['n_xstars']
        else:
            numXstar = 10 * self.dim

        # Set acquisition function
        start_opt = time.time()

        y_max = self.Y.max()

        # run the acquisition function for the first time to get xstar

        self.xstars = []
        # finding the xt of UCB

        numTheta = len(self.theta_vector)
        temp = []
        # finding the xt of Thompson Sampling
        for ii in range(numXstar):
            if self.theta_vector != []:
                # since the numXstar > len(theta_vector)

                index = np.random.randint(numTheta)
                #print index
                gp_params['theta'] = self.theta_vector[index]

            # init a new Gaussian Process
            self.gp = GaussianProcess(self.gp_params)
            # Find unique rows of X to avoid GP from breaking
            ur = unique_rows(self.X)
            self.gp.fit(self.X[ur], self.Y[ur])

            xt_TS, y_xt_TS = acq_max_with_name(gp=self.gp,
                                               scalebounds=self.scalebounds,
                                               acq_name="thompson",
                                               IsReturnY=True)

            temp.append(xt_TS)
            # check if f* > y^max and ignore xt_TS otherwise
            #if y_xt_TS>=y_max:
            #self.xstars.append(xt_TS)

        if self.xstars == []:
            #print 'xt_suggestion is empty'
            # again perform TS and take all of them
            self.xstars = temp

        # check predictive variance before adding a new data points
        var_before = self.gp.compute_var(self.gp.X, self.xstars)
        var_before = np.mean(var_before)

        if self.xstar_accumulate == []:
            self.xstar_accumulate = np.asarray(self.xstars)
        else:
            self.xstar_accumulate = np.vstack(
                (self.xstar_accumulate, np.asarray(self.xstars)))

        accum_var_before = [
            self.gp.compute_var(self.gp.X, val)
            for idx, val in enumerate(self.xstar_accumulate)
        ]
        accum_var_before = np.mean(accum_var_before)

        self.gp.lengthscale_vector = self.theta_vector
        self.acq['xstars'] = self.xstars
        self.acq_func = AcquisitionFunction(self.acq)
        x_max = acq_max(ac=self.acq_func.acq_kind,
                        gp=self.gp,
                        bounds=self.scalebounds,
                        opt_toolbox=self.opt_toolbox,
                        seeds=self.xstars)
        #xstars_array=np.asarray(self.acq_func.object.xstars)

        val_acq = -self.acq_func.acq_kind(x_max, self.gp)

        # check predictive variance after
        temp = np.vstack((self.gp.X, x_max))
        var_after = self.gp.compute_var(temp, self.xstars)
        var_after = np.mean(var_after)

        accum_var_after = [
            self.gp.compute_var(temp, val)
            for idx, val in enumerate(self.xstar_accumulate)
        ]
        accum_var_after = np.mean(accum_var_after)

        if self.PVRS_before_after == []:
            self.PVRS_before_after = np.asarray([var_before, var_after])
            self.accummulate_PVRS_before_after = np.asarray(
                [accum_var_before, accum_var_after])

        else:
            self.PVRS_before_after = np.vstack(
                (self.PVRS_before_after, np.asarray([var_before, var_after])))
            self.accummulate_PVRS_before_after = np.vstack(
                (self.accummulate_PVRS_before_after,
                 np.asarray([accum_var_before, accum_var_after])))

        #print "predictive variance before={:.12f} after={:.12f} val_acq={:.12f}".format(var_before,var_after,np.asscalar(val_acq))

        # check maximum variance
        var_acq = {}
        var_acq['name'] = 'pure_exploration'
        var_acq['dim'] = self.dim
        var_acq['scalebounds'] = self.scalebounds
        acq_var = AcquisitionFunction(var_acq)
        temp = acq_max(ac=acq_var.acq_kind,
                       gp=self.gp,
                       bounds=self.scalebounds,
                       opt_toolbox='scipy')

        # get the value f*
        #max_var_after=acq_var.acq_kind(temp,self.gp,y_max=y_max)
        #print "max predictive variance ={:.8f}".format(np.asscalar(max_var_after))

        if self.stopping_criteria != 0 and val_acq < self.stopping_criteria:
            val_acq = self.acq_func.acq_kind(x_max, self.gp)
            self.stop_flag = 1
            print(
                "Stopping Criteria is violated. Stopping Criteria is {:.15f}".
                format(self.stopping_criteria))

        #mean,var=self.gp.predict(x_max, eval_MSE=True)
        #var.flags['WRITEABLE']=True
        #var[var<1e-20]=0
        #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var)

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        super(BayesOpt, self).augment_the_new_data(x_max)
    def maximize(self):
        """
        Main optimization method.

        Input parameters
        ----------
        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.stop_flag == 1:
            return

        if self.acq['name'] == 'random':
            x_max = generate_random_points(bounds=self.scalebounds, size=1)
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X

            #self.Y = np.append(self.Y, self.f(temp_X_new_original))
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std(
                self.Y_original)

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std(
            self.Y_original)

        # init a new Gaussian Process
        if self.isTGP == 1:
            self.gp = TransformedGP(self.gp_params)
            # Find unique rows of X to avoid GP from breaking
            ur = unique_rows(self.X)
            self.gp.fit(self.X[ur], self.Y[ur], fstar_scaled)
        else:
            self.gp = GaussianProcess(self.gp_params)
            ur = unique_rows(self.X)
            self.gp.fit(self.X[ur], self.Y[ur])

        # optimize GP parameters after 10 iterations
        newlengthscale = None
        # we donot optimize lengthscale for the setting of gp_lengthscale
        if len(self.Y) % (4 * self.dim) == 0:
            if self.optimize_gp == 'maximize':
                newlengthscale = self.gp.optimize_lengthscale_SE_maximizing(
                    self.gp_params['lengthscale'],
                    self.gp_params['noise_delta'])
                self.gp_params['lengthscale'] = newlengthscale

            elif self.optimize_gp == 'loo':
                newlengthscale = self.gp.optimize_lengthscale_SE_loo(
                    self.gp_params['lengthscale'],
                    self.gp_params['noise_delta'])
                self.gp_params['lengthscale'] = newlengthscale

            if self.verbose == 1:
                print("estimated lengthscale =", newlengthscale)

            # init a new Gaussian Process after optimizing hyper-parameter
            if self.isTGP == 1:
                self.gp = TransformedGP(self.gp_params)
                # Find unique rows of X to avoid GP from breaking
                ur = unique_rows(self.X)
                self.gp.fit(self.X[ur], self.Y[ur], fstar_scaled)
            else:
                self.gp = GaussianProcess(self.gp_params)
                ur = unique_rows(self.X)
                self.gp.fit(self.X[ur], self.Y[ur])

        # Set acquisition function
        start_opt = time.time()
        # run the acquisition function for the first time to get xstar

        self.xstars = []

        fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std(
            self.Y_original)

        fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std(
            self.Y_original)
        self.acq['fstar_scaled'] = np.asarray([fstar_scaled])

        x_max = acq_max_with_name(gp=self.gp,
                                  scalebounds=self.scalebounds,
                                  acq_name=self.acq['name'],
                                  fstar_scaled=fstar_scaled)
        """
        self.acq_func = AcquisitionFunction(self.acq)
        x_max = acq_max(ac=self.acq_func.acq_kind,gp=self.gp,bounds=self.scalebounds,
                        opt_toolbox=self.opt_toolbox)
        """

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # store X
        self.X = np.vstack((self.X, x_max.reshape((1, -1))))

        # compute X in original scale
        temp_X_new_original = x_max * self.max_min_gap + self.bounds[:, 0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))
        # evaluate Y using original X

        #self.Y = np.append(self.Y, self.f(temp_X_new_original))
        y_original = self.f(temp_X_new_original)
        self.Y_original = np.append(self.Y_original, y_original)

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std(
            self.Y_original)

        if self.gp.flagIncremental == 1:
            self.gp.fit_incremental(x_max, self.Y[-1])