def optimize_gp_hyperparameter(self,mygp=None,gp_params=None): if mygp==None: mygp=self.gp if gp_params==None: gp_params=self.gp_params if self.optimize_gp=='maximize': newlengthscale = mygp.optimize_lengthscale_SE_maximizing(gp_params['lengthscale'],gp_params['noise_delta']) gp_params['lengthscale']=newlengthscale if self.verbose==1: print("MML estimated lengthscale =",newlengthscale) elif self.optimize_gp=='loo': newlengthscale = mygp.optimize_lengthscale_SE_loo(gp_params['lengthscale'],gp_params['noise_delta']) gp_params['lengthscale']=newlengthscale if self.verbose==1: print("LOO estimated lengthscale =",newlengthscale) elif self.optimize_gp=='marginal': self.theta_vector = mygp.slice_sampling_lengthscale_SE(gp_params['lengthscale'],gp_params['noise_delta']) gp_params['lengthscale']=self.theta_vector[0] self.theta_vector =np.unique(self.theta_vector) self.gp_params['newtheta_vector']=self.theta_vector #print "estimated lengthscale ={:s}".format(self.theta_vector) elif self.optimize_gp=="fstar": fstar_scaled=(self.acq['fstar']-np.mean(self.Y_original))/np.std(self.Y_original) newlengthscale = mygp.optimize_lengthscale_SE_fstar(gp_params['lengthscale'],gp_params['noise_delta'],fstar_scaled) gp_params['lengthscale']=newlengthscale print("estimated lengthscale =",newlengthscale) tempX=mygp.X tempY=mygp.Y # init a new Gaussian Process after optimizing hyper-parameter mygp=GaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(tempX) mygp.fit(tempX[ur], tempY[ur]) return mygp, gp_params
def maximize_pvrs(self): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag == 1: return if 'n_xstars' in self.acq: numXstar = self.acq['n_xstars'] else: numXstar = 10 * self.dim # Set acquisition function start_opt = time.time() y_max = self.Y.max() # run the acquisition function for the first time to get xstar self.xstars = [] # finding the xt of UCB numTheta = len(self.theta_vector) temp = [] # finding the xt of Thompson Sampling for ii in range(numXstar): if self.theta_vector != []: # since the numXstar > len(theta_vector) index = np.random.randint(numTheta) #print index gp_params['theta'] = self.theta_vector[index] # init a new Gaussian Process self.gp = GaussianProcess(self.gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) xt_TS, y_xt_TS = acq_max_with_name(gp=self.gp, scalebounds=self.scalebounds, acq_name="thompson", IsReturnY=True) temp.append(xt_TS) # check if f* > y^max and ignore xt_TS otherwise #if y_xt_TS>=y_max: #self.xstars.append(xt_TS) if self.xstars == []: #print 'xt_suggestion is empty' # again perform TS and take all of them self.xstars = temp # check predictive variance before adding a new data points var_before = self.gp.compute_var(self.gp.X, self.xstars) var_before = np.mean(var_before) if self.xstar_accumulate == []: self.xstar_accumulate = np.asarray(self.xstars) else: self.xstar_accumulate = np.vstack( (self.xstar_accumulate, np.asarray(self.xstars))) accum_var_before = [ self.gp.compute_var(self.gp.X, val) for idx, val in enumerate(self.xstar_accumulate) ] accum_var_before = np.mean(accum_var_before) self.gp.lengthscale_vector = self.theta_vector self.acq['xstars'] = self.xstars self.acq_func = AcquisitionFunction(self.acq) x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox, seeds=self.xstars) #xstars_array=np.asarray(self.acq_func.object.xstars) val_acq = -self.acq_func.acq_kind(x_max, self.gp) # check predictive variance after temp = np.vstack((self.gp.X, x_max)) var_after = self.gp.compute_var(temp, self.xstars) var_after = np.mean(var_after) accum_var_after = [ self.gp.compute_var(temp, val) for idx, val in enumerate(self.xstar_accumulate) ] accum_var_after = np.mean(accum_var_after) if self.PVRS_before_after == []: self.PVRS_before_after = np.asarray([var_before, var_after]) self.accummulate_PVRS_before_after = np.asarray( [accum_var_before, accum_var_after]) else: self.PVRS_before_after = np.vstack( (self.PVRS_before_after, np.asarray([var_before, var_after]))) self.accummulate_PVRS_before_after = np.vstack( (self.accummulate_PVRS_before_after, np.asarray([accum_var_before, accum_var_after]))) #print "predictive variance before={:.12f} after={:.12f} val_acq={:.12f}".format(var_before,var_after,np.asscalar(val_acq)) # check maximum variance var_acq = {} var_acq['name'] = 'pure_exploration' var_acq['dim'] = self.dim var_acq['scalebounds'] = self.scalebounds acq_var = AcquisitionFunction(var_acq) temp = acq_max(ac=acq_var.acq_kind, gp=self.gp, bounds=self.scalebounds, opt_toolbox='scipy') # get the value f* #max_var_after=acq_var.acq_kind(temp,self.gp,y_max=y_max) #print "max predictive variance ={:.8f}".format(np.asscalar(max_var_after)) if self.stopping_criteria != 0 and val_acq < self.stopping_criteria: val_acq = self.acq_func.acq_kind(x_max, self.gp) self.stop_flag = 1 print( "Stopping Criteria is violated. Stopping Criteria is {:.15f}". format(self.stopping_criteria)) #mean,var=self.gp.predict(x_max, eval_MSE=True) #var.flags['WRITEABLE']=True #var[var<1e-20]=0 #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) super(BayesOpt, self).augment_the_new_data(x_max)
def maximize(self): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag == 1: return if self.acq['name'] == 'random': super(BayesOpt, self).generate_random_point() return # init a new Gaussian Process self.gp = GaussianProcess(self.gp_params) if self.gp.KK_x_x_inv == []: # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) acq = self.acq # optimize GP parameters after 10 iterations if len(self.Y) % (2 * self.dim) == 0: self.gp, self.gp_params = super(BayesOpt, self).optimize_gp_hyperparameter() if self.acq['name'] == 'mes': self.maximize_mes() return if self.acq['name'] == 'pvrs': self.maximize_pvrs() return if self.acq['name'] == 'e3i': self.maximize_e3i() return if self.acq['name'] == 'ei_kov' or self.acq[ 'name'] == 'poi_kov' or self.acq['name'] == 'ei_fstar': self.acq['fstar_scaled'] = (self.acq['fstar'] - np.mean( self.Y_original)) / np.std(self.Y_original) # Set acquisition function start_opt = time.time() #y_max = self.Y.max() if 'xstars' not in globals(): xstars = [] self.xstars = xstars self.acq['xstars'] = xstars self.acq_func = AcquisitionFunction(self.acq) if acq['name'] == "ei_mu": #find the maximum in the predictive mean x_mu_max, y_max = acq_max_with_name(gp=self.gp, scalebounds=self.scalebounds, acq_name='mu', IsReturnY=True) x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox, seeds=self.xstars) val_acq = self.acq_func.acq_kind(x_max, self.gp) if self.stopping_criteria != 0 and val_acq < self.stopping_criteria: #val_acq=self.acq_func.acq_kind(x_max,self.gp) self.stop_flag = 1 #print "Stopping Criteria is violated. Stopping Criteria is {:.15f}".format(self.stopping_criteria) self.alpha_Xt = np.append(self.alpha_Xt, val_acq) mean, var = self.gp.predict(x_max, eval_MSE=True) var.flags['WRITEABLE'] = True var[var < 1e-20] = 0 #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) super(BayesOpt, self).augment_the_new_data(x_max)
class BayesOpt(BO_Sequential_Base): def __init__(self, gp_params, func_params, acq_params, verbose=1): """ Input parameters ---------- gp_params: GP parameters gp_params.theta: to compute the kernel gp_params.delta: to compute the kernel func_params: function to optimize func_params.init bound: initial bounds for parameters func_params.bounds: bounds on parameters func_params.func: a function to be optimized acq_params: acquisition function, acq_params.acq_func['name']=['ei','ucb','poi','lei'] ,acq['kappa'] for ucb, acq['k'] for lei acq_params.opt_toolbox: optimization toolbox 'nlopt','direct','scipy' Returns ------- dim: dimension bounds: bounds on original scale scalebounds: bounds on normalized scale of 0-1 time_opt: will record the time spent on optimization gp: Gaussian Process object """ # Find number of parameters super(BayesOpt, self).__init__(gp_params, func_params, acq_params, verbose) """ bounds=func_params['function'].bounds self.dim = len(bounds) # Create an array with parameters bounds if isinstance(bounds,dict): # Get the name of the parameters self.keys = list(bounds.keys()) self.bounds = [] for key in list(bounds.keys()): self.bounds.append(bounds[key]) self.bounds = np.asarray(self.bounds) else: self.bounds=np.asarray(bounds) #print(self.bounds) # create a scalebounds 0-1 scalebounds=np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds=scalebounds.T self.max_min_gap=self.bounds[:,1]-self.bounds[:,0] # Some function to be optimized self.f = func_params['function'].func # optimization toolbox if 'opt_toolbox' not in acq_params: self.opt_toolbox='scipy' else: self.opt_toolbox=acq_params['opt_toolbox'] # acquisition function type self.acq=acq_params['acq_func'] self.acq['scalebounds']=self.scalebounds if 'debug' not in self.acq: self.acq['debug']=0 if 'stopping' not in acq_params: self.stopping_criteria=0 else: self.stopping_criteria=acq_params['stopping'] if 'optimize_gp' not in acq_params: self.optimize_gp=0 else: self.optimize_gp=acq_params['optimize_gp'] if 'marginalize_gp' not in acq_params: self.marginalize_gp=0 else: self.marginalize_gp=acq_params['marginalize_gp'] # store X in original scale self.X_original= None # store X in 0-1 scale self.X = None # store y=f(x) # (y - mean)/(max-min) self.Y = None # y original scale self.Y_original = None # performance evaluation at the maximum mean GP (for information theoretic) self.Y_original_maxGP = None self.X_original_maxGP = None # value of the acquisition function at the selected point self.alpha_Xt=None self.Tau_Xt=None self.time_opt=0 self.k_Neighbor=2 # Lipschitz constant self.L=0 self.gp_params=gp_params # Gaussian Process class self.gp=GaussianProcess(gp_params) # acquisition function self.acq_func = None # stop condition self.stop_flag=0 self.logmarginal=0 # xt_suggestion, caching for Consensus self.xstars=[] self.xstar_accumulate=[] # theta vector for marginalization GP self.theta_vector =[] # PVRS before and after self.PVRS_before_after=[] self.accummulate_PVRS_before_after=[] # store ystars #self.ystars=np.empty((0,100), float) self.ystars=[] """ # will be later used for visualization def posterior(self, Xnew): self.gp.fit(self.X, self.Y) mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True) return mu, np.sqrt(sigma2) def init(self, gp_params, n_init_points=3, seed=1): """ Input parameters ---------- gp_params: Gaussian Process structure n_init_points: # init points """ super(BayesOpt, self).init(gp_params, n_init_points, seed) def init_with_data(self, init_X, init_Y): """ Input parameters ---------- gp_params: Gaussian Process structure x,y: # init data observations (in original scale) """ super(BayesOpt, self).init_with_data(init_X, init_Y) def estimate_L(self, bounds): ''' Estimate the Lipschitz constant of f by taking maximizing the norm of the expectation of the gradient of *f*. ''' def df(x, model, x0): mean_derivative = gp_model.predictive_gradient(self.X, self.Y, x) temp = mean_derivative * mean_derivative if len(temp.shape) <= 1: res = np.sqrt(temp) else: res = np.sqrt( np.sum(temp, axis=1) ) # simply take the norm of the expectation of the gradient return -res gp_model = self.gp dim = len(bounds) num_data = 1000 * dim samples = np.zeros(shape=(num_data, dim)) for k in range(0, dim): samples[:, k] = np.random.uniform(low=bounds[k][0], high=bounds[k][1], size=num_data) #samples = np.vstack([samples,gp_model.X]) pred_samples = df(samples, gp_model, 0) x0 = samples[np.argmin(pred_samples)] res = minimize(df, x0, method='L-BFGS-B', bounds=bounds, args=(gp_model, x0), options={'maxiter': 100}) try: minusL = res.fun[0][0] except: if len(res.fun.shape) == 1: minusL = res.fun[0] else: minusL = res.fun L = -minusL if L < 1e-6: L = 0.0001 ## to avoid problems in cases in which the model is flat. return L def maximize_with_lengthscale_derived_by_fstar(self, gp_params): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag == 1: return if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std( self.Y_original) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = GaussianProcess(gp_params) if self.gp.KK_x_x_inv == []: # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) acq = self.acq # optimize GP parameters after 10 iterations if len(self.Y) % (3 * self.dim) == 0: fstar_scaled = (self.acq['fstar'] - np.mean(self.Y_original)) / np.std(self.Y_original) newlengthscale = self.gp.optimize_lengthscale_SE_fstar( self.gp_params['lengthscale'], self.gp_params['noise_delta'], fstar_scaled) self.gp_params['lengthscale'] = newlengthscale print("estimated lengthscale =", newlengthscale) # init a new Gaussian Process after optimizing hyper-parameter self.gp = GaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) if self.acq['name'] == 'mes': self.maximize_mes(gp_params) return if self.acq['name'] == 'pvrs': self.maximize_pvrs(gp_params) return if self.acq['name'] == 'e3i': self.maximize_e3i(gp_params) return if self.acq['name'] == 'ei_kov' or self.acq[ 'name'] == 'poi_kov' or self.acq['name'] == 'ei_fstar': self.acq['fstar_scaled'] = (self.acq['fstar'] - np.mean( self.Y_original)) / np.std(self.Y_original) # Set acquisition function start_opt = time.time() y_max = self.Y.max() if 'xstars' not in globals(): xstars = [] self.xstars = xstars self.acq['xstars'] = xstars self.acq_func = AcquisitionFunction(self.acq) if acq['name'] == "ei_mu": #find the maximum in the predictive mean x_mu_max, y_max = acq_max_with_name(gp=self.gp, scalebounds=self.scalebounds, acq_name='mu', IsReturnY=True) x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox, seeds=self.xstars) val_acq = self.acq_func.acq_kind(x_max, self.gp) if self.stopping_criteria != 0 and val_acq < self.stopping_criteria: val_acq = self.acq_func.acq_kind(x_max, self.gp) self.stop_flag = 1 #print "Stopping Criteria is violated. Stopping Criteria is {:.15f}".format(self.stopping_criteria) self.alpha_Xt = np.append(self.alpha_Xt, val_acq) mean, var = self.gp.predict(x_max, eval_MSE=True) var.flags['WRITEABLE'] = True var[var < 1e-20] = 0 #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # store X self.X = np.vstack((self.X, x_max.reshape((1, -1)))) # compute X in original scale temp_X_new_original = x_max * self.max_min_gap + self.bounds[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std( self.Y_original) if self.gp.flagIncremental == 1: self.gp.fit_incremental(x_max, self.Y[-1]) def maximize(self): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag == 1: return if self.acq['name'] == 'random': super(BayesOpt, self).generate_random_point() return # init a new Gaussian Process self.gp = GaussianProcess(self.gp_params) if self.gp.KK_x_x_inv == []: # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) acq = self.acq # optimize GP parameters after 10 iterations if len(self.Y) % (2 * self.dim) == 0: self.gp, self.gp_params = super(BayesOpt, self).optimize_gp_hyperparameter() if self.acq['name'] == 'mes': self.maximize_mes() return if self.acq['name'] == 'pvrs': self.maximize_pvrs() return if self.acq['name'] == 'e3i': self.maximize_e3i() return if self.acq['name'] == 'ei_kov' or self.acq[ 'name'] == 'poi_kov' or self.acq['name'] == 'ei_fstar': self.acq['fstar_scaled'] = (self.acq['fstar'] - np.mean( self.Y_original)) / np.std(self.Y_original) # Set acquisition function start_opt = time.time() #y_max = self.Y.max() if 'xstars' not in globals(): xstars = [] self.xstars = xstars self.acq['xstars'] = xstars self.acq_func = AcquisitionFunction(self.acq) if acq['name'] == "ei_mu": #find the maximum in the predictive mean x_mu_max, y_max = acq_max_with_name(gp=self.gp, scalebounds=self.scalebounds, acq_name='mu', IsReturnY=True) x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox, seeds=self.xstars) val_acq = self.acq_func.acq_kind(x_max, self.gp) if self.stopping_criteria != 0 and val_acq < self.stopping_criteria: #val_acq=self.acq_func.acq_kind(x_max,self.gp) self.stop_flag = 1 #print "Stopping Criteria is violated. Stopping Criteria is {:.15f}".format(self.stopping_criteria) self.alpha_Xt = np.append(self.alpha_Xt, val_acq) mean, var = self.gp.predict(x_max, eval_MSE=True) var.flags['WRITEABLE'] = True var[var < 1e-20] = 0 #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) super(BayesOpt, self).augment_the_new_data(x_max) def maximize_mes(self): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag == 1: return # Set acquisition function start_opt = time.time() y_max = self.Y.max() # run the acquisition function for the first time to get xstar self.xstars = [] # finding the xt of UCB y_max = np.max(self.Y) #numXtar=10*self.dim numXtar = 30 y_stars = [] temp = [] # finding the xt of Thompson Sampling for ii in range(numXtar): xt_TS, y_xt_TS = acq_max_with_name(gp=self.gp, scalebounds=self.scalebounds, acq_name="thompson", IsReturnY=True) #if y_xt_TS>=y_max: y_stars.append(y_xt_TS) temp.append(xt_TS) # check if f* > y^max and ignore xt_TS otherwise if y_xt_TS >= y_max: self.xstars.append(xt_TS) if self.xstars == []: #print 'xt_suggestion is empty' # again perform TS and take all of them self.xstars = temp self.acq['xstars'] = self.xstars self.acq['ystars'] = y_stars self.acq_func = AcquisitionFunction(self.acq) x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox, seeds=self.xstars) #xstars_array=np.asarray(self.acq_func.object.xstars) val_acq = self.acq_func.acq_kind(x_max, self.gp) if self.stopping_criteria != 0 and val_acq < self.stopping_criteria: val_acq = self.acq_func.acq_kind(x_max, self.gp) self.stop_flag = 1 print( "Stopping Criteria is violated. Stopping Criteria is {:.15f}". format(self.stopping_criteria)) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) super(BayesOpt, self).augment_the_new_data(x_max) # convert ystar to original scale y_stars = [ val * np.std(self.Y_original) + np.mean(self.Y_original) for idx, val in enumerate(y_stars) ] self.ystars.append(np.ravel(y_stars)) def maximize_e3i(self): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag == 1: return # Set acquisition function start_opt = time.time() y_max = self.Y.max() # run the acquisition function for the first time to get xstar self.xstars = [] # finding the xt of UCB y_max = np.max(self.Y) numXtar = 50 * self.dim #numXtar=20 y_stars = [] temp = [] # finding the xt of Thompson Sampling for ii in range(numXtar): xt_TS, y_xt_TS = acq_max_with_name(gp=self.gp, scalebounds=self.scalebounds, acq_name="thompson", IsReturnY=True) y_stars.append(y_xt_TS) temp.append(xt_TS) # check if f* > y^max and ignore xt_TS otherwise #if y_xt_TS>=y_max: self.xstars.append(xt_TS) #y_stars=y_stars*np.std(self.Y_original)+np.mean(self.Y_original) #print "{:.5f} {:.6f}".format(np.mean(y_stars),np.std(y_stars)) #print "ymax={:.5f}".format(np.max(self.Y)) if self.acq['debug'] == 1: print('mean y*={:.4f}({:.8f}) y+={:.4f}'.format( np.mean(y_xt_TS), np.std(y_xt_TS), y_max)) if self.xstars == []: #print 'xt_suggestion is empty' # again perform TS and take all of them self.xstars = temp self.acq['xstars'] = self.xstars self.acq['ystars'] = y_stars self.acq_func = AcquisitionFunction(self.acq) x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox, seeds=self.xstars) #xstars_array=np.asarray(self.acq_func.object.xstars) val_acq = self.acq_func.acq_kind(x_max, self.gp) if self.stopping_criteria != 0 and val_acq < self.stopping_criteria: val_acq = self.acq_func.acq_kind(x_max, self.gp) self.stop_flag = 1 print( "Stopping Criteria is violated. Stopping Criteria is {:.15f}". format(self.stopping_criteria)) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) super(BayesOpt, self).augment_the_new_data(x_max) def maximize_pvrs(self): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag == 1: return if 'n_xstars' in self.acq: numXstar = self.acq['n_xstars'] else: numXstar = 10 * self.dim # Set acquisition function start_opt = time.time() y_max = self.Y.max() # run the acquisition function for the first time to get xstar self.xstars = [] # finding the xt of UCB numTheta = len(self.theta_vector) temp = [] # finding the xt of Thompson Sampling for ii in range(numXstar): if self.theta_vector != []: # since the numXstar > len(theta_vector) index = np.random.randint(numTheta) #print index gp_params['theta'] = self.theta_vector[index] # init a new Gaussian Process self.gp = GaussianProcess(self.gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) xt_TS, y_xt_TS = acq_max_with_name(gp=self.gp, scalebounds=self.scalebounds, acq_name="thompson", IsReturnY=True) temp.append(xt_TS) # check if f* > y^max and ignore xt_TS otherwise #if y_xt_TS>=y_max: #self.xstars.append(xt_TS) if self.xstars == []: #print 'xt_suggestion is empty' # again perform TS and take all of them self.xstars = temp # check predictive variance before adding a new data points var_before = self.gp.compute_var(self.gp.X, self.xstars) var_before = np.mean(var_before) if self.xstar_accumulate == []: self.xstar_accumulate = np.asarray(self.xstars) else: self.xstar_accumulate = np.vstack( (self.xstar_accumulate, np.asarray(self.xstars))) accum_var_before = [ self.gp.compute_var(self.gp.X, val) for idx, val in enumerate(self.xstar_accumulate) ] accum_var_before = np.mean(accum_var_before) self.gp.lengthscale_vector = self.theta_vector self.acq['xstars'] = self.xstars self.acq_func = AcquisitionFunction(self.acq) x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox, seeds=self.xstars) #xstars_array=np.asarray(self.acq_func.object.xstars) val_acq = -self.acq_func.acq_kind(x_max, self.gp) # check predictive variance after temp = np.vstack((self.gp.X, x_max)) var_after = self.gp.compute_var(temp, self.xstars) var_after = np.mean(var_after) accum_var_after = [ self.gp.compute_var(temp, val) for idx, val in enumerate(self.xstar_accumulate) ] accum_var_after = np.mean(accum_var_after) if self.PVRS_before_after == []: self.PVRS_before_after = np.asarray([var_before, var_after]) self.accummulate_PVRS_before_after = np.asarray( [accum_var_before, accum_var_after]) else: self.PVRS_before_after = np.vstack( (self.PVRS_before_after, np.asarray([var_before, var_after]))) self.accummulate_PVRS_before_after = np.vstack( (self.accummulate_PVRS_before_after, np.asarray([accum_var_before, accum_var_after]))) #print "predictive variance before={:.12f} after={:.12f} val_acq={:.12f}".format(var_before,var_after,np.asscalar(val_acq)) # check maximum variance var_acq = {} var_acq['name'] = 'pure_exploration' var_acq['dim'] = self.dim var_acq['scalebounds'] = self.scalebounds acq_var = AcquisitionFunction(var_acq) temp = acq_max(ac=acq_var.acq_kind, gp=self.gp, bounds=self.scalebounds, opt_toolbox='scipy') # get the value f* #max_var_after=acq_var.acq_kind(temp,self.gp,y_max=y_max) #print "max predictive variance ={:.8f}".format(np.asscalar(max_var_after)) if self.stopping_criteria != 0 and val_acq < self.stopping_criteria: val_acq = self.acq_func.acq_kind(x_max, self.gp) self.stop_flag = 1 print( "Stopping Criteria is violated. Stopping Criteria is {:.15f}". format(self.stopping_criteria)) #mean,var=self.gp.predict(x_max, eval_MSE=True) #var.flags['WRITEABLE']=True #var[var<1e-20]=0 #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) super(BayesOpt, self).augment_the_new_data(x_max)
def maximize_with_lengthscale_derived_by_fstar(self, gp_params): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag == 1: return if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std( self.Y_original) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = GaussianProcess(gp_params) if self.gp.KK_x_x_inv == []: # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) acq = self.acq # optimize GP parameters after 10 iterations if len(self.Y) % (3 * self.dim) == 0: fstar_scaled = (self.acq['fstar'] - np.mean(self.Y_original)) / np.std(self.Y_original) newlengthscale = self.gp.optimize_lengthscale_SE_fstar( self.gp_params['lengthscale'], self.gp_params['noise_delta'], fstar_scaled) self.gp_params['lengthscale'] = newlengthscale print("estimated lengthscale =", newlengthscale) # init a new Gaussian Process after optimizing hyper-parameter self.gp = GaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) if self.acq['name'] == 'mes': self.maximize_mes(gp_params) return if self.acq['name'] == 'pvrs': self.maximize_pvrs(gp_params) return if self.acq['name'] == 'e3i': self.maximize_e3i(gp_params) return if self.acq['name'] == 'ei_kov' or self.acq[ 'name'] == 'poi_kov' or self.acq['name'] == 'ei_fstar': self.acq['fstar_scaled'] = (self.acq['fstar'] - np.mean( self.Y_original)) / np.std(self.Y_original) # Set acquisition function start_opt = time.time() y_max = self.Y.max() if 'xstars' not in globals(): xstars = [] self.xstars = xstars self.acq['xstars'] = xstars self.acq_func = AcquisitionFunction(self.acq) if acq['name'] == "ei_mu": #find the maximum in the predictive mean x_mu_max, y_max = acq_max_with_name(gp=self.gp, scalebounds=self.scalebounds, acq_name='mu', IsReturnY=True) x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox, seeds=self.xstars) val_acq = self.acq_func.acq_kind(x_max, self.gp) if self.stopping_criteria != 0 and val_acq < self.stopping_criteria: val_acq = self.acq_func.acq_kind(x_max, self.gp) self.stop_flag = 1 #print "Stopping Criteria is violated. Stopping Criteria is {:.15f}".format(self.stopping_criteria) self.alpha_Xt = np.append(self.alpha_Xt, val_acq) mean, var = self.gp.predict(x_max, eval_MSE=True) var.flags['WRITEABLE'] = True var[var < 1e-20] = 0 #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # store X self.X = np.vstack((self.X, x_max.reshape((1, -1)))) # compute X in original scale temp_X_new_original = x_max * self.max_min_gap + self.bounds[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std( self.Y_original) if self.gp.flagIncremental == 1: self.gp.fit_incremental(x_max, self.Y[-1])
class BO_Sequential_Base(object): def __init__(self, gp_params, func_params, acq_params, verbose=1): """ Input parameters ---------- gp_params: GP parameters gp_params.theta: to compute the kernel gp_params.delta: to compute the kernel func_params: function to optimize func_params.init bound: initial bounds for parameters func_params.bounds: bounds on parameters func_params.func: a function to be optimized acq_params: acquisition function, acq_params.acq_func['name']=['ei','ucb','poi','lei'] ,acq['kappa'] for ucb, acq['k'] for lei acq_params.opt_toolbox: optimization toolbox 'nlopt','direct','scipy' Returns ------- dim: dimension bounds: bounds on original scale scalebounds: bounds on normalized scale of 0-1 time_opt: will record the time spent on optimization gp: Gaussian Process object """ # Find number of parameters self.verbose=verbose try: bounds=func_params['function']['bounds'] except: bounds=func_params['function'].bounds self.dim = len(bounds) # Create an array with parameters bounds if isinstance(bounds,dict): # Get the name of the parameters self.keys = list(bounds.keys()) self.bounds = [] for key in list(bounds.keys()): self.bounds.append(bounds[key]) self.bounds = np.asarray(self.bounds) else: self.bounds=np.asarray(bounds) # create a scalebounds 0-1 scalebounds=np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds=scalebounds.T self.max_min_gap=self.bounds[:,1]-self.bounds[:,0] # Some function to be optimized self.function=func_params['function'] try: self.f = func_params['function']['func'] except: self.f = func_params['function'].func # optimization toolbox if 'opt_toolbox' not in acq_params: self.opt_toolbox='scipy' else: self.opt_toolbox=acq_params['opt_toolbox'] # acquisition function type self.acq=acq_params['acq_func'] self.acq['scalebounds']=self.scalebounds if 'debug' not in self.acq: self.acq['debug']=0 if 'stopping' not in acq_params: self.stopping_criteria=0 else: self.stopping_criteria=acq_params['stopping'] if 'optimize_gp' not in acq_params: self.optimize_gp=0 else: self.optimize_gp=acq_params['optimize_gp'] if 'marginalize_gp' not in acq_params: self.marginalize_gp=0 else: self.marginalize_gp=acq_params['marginalize_gp'] # store X in original scale self.X_original= None # store X in 0-1 scale self.X = None # store y=f(x) # (y - mean)/(max-min) self.Y = None # y original scale self.Y_original = None # performance evaluation at the maximum mean GP (for information theoretic) self.Y_original_maxGP = None self.X_original_maxGP = None # value of the acquisition function at the selected point self.alpha_Xt=None self.Tau_Xt=None self.time_opt=0 self.k_Neighbor=2 # Lipschitz constant self.L=0 self.gp_params=gp_params # Gaussian Process class self.gp=GaussianProcess(gp_params) # acquisition function self.acq_func = None # stop condition self.stop_flag=0 self.logmarginal=0 # xt_suggestion, caching for Consensus self.xstars=[] self.xstar_accumulate=[] # theta vector for marginalization GP self.theta_vector =[] # PVRS before and after self.PVRS_before_after=[] self.accummulate_PVRS_before_after=[] # store ystars #self.ystars=np.empty((0,100), float) self.ystars=[] def init(self, gp_params, n_init_points=3,seed=1): """ Input parameters ---------- gp_params: Gaussian Process structure n_init_points: # init points """ np.random.seed(seed) # Generate random points l = [np.random.uniform(x[0], x[1]) for _ in range(n_init_points) for x in self.bounds] #l=[np.linspace(x[0],x[1],num=n_init_points) for x in self.init_bounds] # Concatenate new random points to possible existing # points from self.explore method. temp=np.asarray(l) temp=temp.T init_X=list(temp.reshape((n_init_points,-1))) self.X_original = np.asarray(init_X) self.X_original_maxGP= np.asarray(init_X) # Evaluate target function at all initialization y_init=self.f(init_X) y_init=np.reshape(y_init,(n_init_points,1)) self.Y_original = np.asarray(y_init) self.Y_original_maxGP=np.asarray(y_init) self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) # convert it to scaleX temp_init_point=np.divide((init_X-self.bounds[:,0]),self.max_min_gap) self.X = np.asarray(temp_init_point) def init_with_data(self, init_X,init_Y): """ Input parameters ---------- gp_params: Gaussian Process structure x,y: # init data observations (in original scale) """ # Turn it into np array and store. self.X_original=np.asarray(init_X) temp_init_point=np.divide((init_X-self.bounds[:,0]),self.max_min_gap) self.X_original = np.asarray(init_X) self.X = np.asarray(temp_init_point) self.Y_original = np.asarray(init_Y) #self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) # Set acquisition function self.acq_func = AcquisitionFunction(self.acq) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) def optimize_gp_hyperparameter(self,mygp=None,gp_params=None): if mygp==None: mygp=self.gp if gp_params==None: gp_params=self.gp_params if self.optimize_gp=='maximize': newlengthscale = mygp.optimize_lengthscale_SE_maximizing(gp_params['lengthscale'],gp_params['noise_delta']) gp_params['lengthscale']=newlengthscale if self.verbose==1: print("MML estimated lengthscale =",newlengthscale) elif self.optimize_gp=='loo': newlengthscale = mygp.optimize_lengthscale_SE_loo(gp_params['lengthscale'],gp_params['noise_delta']) gp_params['lengthscale']=newlengthscale if self.verbose==1: print("LOO estimated lengthscale =",newlengthscale) elif self.optimize_gp=='marginal': self.theta_vector = mygp.slice_sampling_lengthscale_SE(gp_params['lengthscale'],gp_params['noise_delta']) gp_params['lengthscale']=self.theta_vector[0] self.theta_vector =np.unique(self.theta_vector) self.gp_params['newtheta_vector']=self.theta_vector #print "estimated lengthscale ={:s}".format(self.theta_vector) elif self.optimize_gp=="fstar": fstar_scaled=(self.acq['fstar']-np.mean(self.Y_original))/np.std(self.Y_original) newlengthscale = mygp.optimize_lengthscale_SE_fstar(gp_params['lengthscale'],gp_params['noise_delta'],fstar_scaled) gp_params['lengthscale']=newlengthscale print("estimated lengthscale =",newlengthscale) tempX=mygp.X tempY=mygp.Y # init a new Gaussian Process after optimizing hyper-parameter mygp=GaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(tempX) mygp.fit(tempX[ur], tempY[ur]) return mygp, gp_params def generate_random_point(self): x_max = [np.random.uniform(x[0], x[1], size=1) for x in self.bounds] x_max=np.asarray(x_max) x_max=x_max.T self.X_original=np.vstack((self.X_original, x_max)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) self.time_opt=np.hstack((self.time_opt,0)) def augment_the_new_data(self,x_max): # store X self.X = np.vstack((self.X, x_max.reshape((1, -1)))) # compute X in original scale temp_X_new_original=x_max*self.max_min_gap+self.bounds[:,0] self.X_original=np.vstack((self.X_original, temp_X_new_original)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) # find the maximizer in the GP mean function try: len(self.gp) x_mu_max=[] for j in range(self.J): x_mu_max_temp=acq_max_with_name(gp=self.gp[j],scalebounds=self.scalebounds[self.featIdx[j]],acq_name="mu") x_mu_max=np.hstack((x_mu_max,x_mu_max_temp)) except: x_mu_max=acq_max_with_name(gp=self.gp,scalebounds=self.scalebounds,acq_name="mu") x_mu_max_original=x_mu_max*self.max_min_gap+self.bounds[:,0] # set y_max = mu_max #mu_max=acq_mu.acq_kind(x_mu_max,gp=self.gp) self.Y_original_maxGP = np.append(self.Y_original_maxGP, self.f(x_mu_max_original)) self.X_original_maxGP = np.vstack((self.X_original_maxGP, x_mu_max_original))
def __init__(self, gp_params, func_params, acq_params, verbose=1): """ Input parameters ---------- gp_params: GP parameters gp_params.theta: to compute the kernel gp_params.delta: to compute the kernel func_params: function to optimize func_params.init bound: initial bounds for parameters func_params.bounds: bounds on parameters func_params.func: a function to be optimized acq_params: acquisition function, acq_params.acq_func['name']=['ei','ucb','poi','lei'] ,acq['kappa'] for ucb, acq['k'] for lei acq_params.opt_toolbox: optimization toolbox 'nlopt','direct','scipy' Returns ------- dim: dimension bounds: bounds on original scale scalebounds: bounds on normalized scale of 0-1 time_opt: will record the time spent on optimization gp: Gaussian Process object """ # Find number of parameters self.verbose=verbose try: bounds=func_params['function']['bounds'] except: bounds=func_params['function'].bounds self.dim = len(bounds) # Create an array with parameters bounds if isinstance(bounds,dict): # Get the name of the parameters self.keys = list(bounds.keys()) self.bounds = [] for key in list(bounds.keys()): self.bounds.append(bounds[key]) self.bounds = np.asarray(self.bounds) else: self.bounds=np.asarray(bounds) # create a scalebounds 0-1 scalebounds=np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds=scalebounds.T self.max_min_gap=self.bounds[:,1]-self.bounds[:,0] # Some function to be optimized self.function=func_params['function'] try: self.f = func_params['function']['func'] except: self.f = func_params['function'].func # optimization toolbox if 'opt_toolbox' not in acq_params: self.opt_toolbox='scipy' else: self.opt_toolbox=acq_params['opt_toolbox'] # acquisition function type self.acq=acq_params['acq_func'] self.acq['scalebounds']=self.scalebounds if 'debug' not in self.acq: self.acq['debug']=0 if 'stopping' not in acq_params: self.stopping_criteria=0 else: self.stopping_criteria=acq_params['stopping'] if 'optimize_gp' not in acq_params: self.optimize_gp=0 else: self.optimize_gp=acq_params['optimize_gp'] if 'marginalize_gp' not in acq_params: self.marginalize_gp=0 else: self.marginalize_gp=acq_params['marginalize_gp'] # store X in original scale self.X_original= None # store X in 0-1 scale self.X = None # store y=f(x) # (y - mean)/(max-min) self.Y = None # y original scale self.Y_original = None # performance evaluation at the maximum mean GP (for information theoretic) self.Y_original_maxGP = None self.X_original_maxGP = None # value of the acquisition function at the selected point self.alpha_Xt=None self.Tau_Xt=None self.time_opt=0 self.k_Neighbor=2 # Lipschitz constant self.L=0 self.gp_params=gp_params # Gaussian Process class self.gp=GaussianProcess(gp_params) # acquisition function self.acq_func = None # stop condition self.stop_flag=0 self.logmarginal=0 # xt_suggestion, caching for Consensus self.xstars=[] self.xstar_accumulate=[] # theta vector for marginalization GP self.theta_vector =[] # PVRS before and after self.PVRS_before_after=[] self.accummulate_PVRS_before_after=[] # store ystars #self.ystars=np.empty((0,100), float) self.ystars=[]
def __init__(self,gp_params, func_params, acq_params): """ Input parameters ---------- gp_params: GP parameters gp_params.thete: to compute the kernel gp_params.delta: to compute the kernel func_params: function to optimize func_params.init bound: initial bounds for parameters func_params.bounds: bounds on parameters func_params.func: a function to be optimized acq_params: acquisition function, acq_params.acq_func['name']=['ei','ucb','poi','lei'] ,acq['kappa'] for ucb, acq['k'] for lei acq_params.opt_toolbox: optimization toolbox 'nlopt','direct','scipy' Returns ------- dim: dimension bounds: bounds on original scale scalebounds: bounds on normalized scale of 0-1 time_opt: will record the time spent on optimization gp: Gaussian Process object """ try: bounds=func_params['bounds'] except: bounds=func_params['function'].bounds self.dim = len(bounds) # Create an array with parameters bounds if isinstance(bounds,dict): # Get the name of the parameters self.keys = list(bounds.keys()) self.bounds = [] for key in list(bounds.keys()): self.bounds.append(bounds[key]) self.bounds = np.asarray(self.bounds) else: self.bounds=np.asarray(bounds) scalebounds=np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds=scalebounds.T self.max_min_gap=self.bounds[:,1]-self.bounds[:,0] # acquisition function type self.acq=acq_params['acq_func'] if 'debug' not in self.acq: self.acq['debug']=0 if 'optimize_gp' not in acq_params: self.optimize_gp=0 else: self.optimize_gp=acq_params['optimize_gp'] if 'marginalize_gp' not in acq_params: self.marginalize_gp=0 else: self.marginalize_gp=acq_params['marginalize_gp'] # Some function to be optimized self.function=func_params['function'] try: self.f = func_params['function']['func'] except: self.f = func_params['function'].func # optimization toolbox if 'opt_toolbox' not in acq_params: self.opt_toolbox='scipy' else: self.opt_toolbox=acq_params['opt_toolbox'] # store the batch size for each iteration self.NumPoints=[] # Numpy array place holders self.X_original= None # scale the data to 0-1 fit GP better self.X = None # X=( X_original - min(bounds) / (max(bounds) - min(bounds)) self.Y = None # Y=( Y_original - mean(bounds) / (max(bounds) - min(bounds)) self.Y_original = None self.opt_time=0 self.L=0 # lipschitz self.gp=GaussianProcess(gp_params) self.gp_params=gp_params # Acquisition Function #self.acq_func = None self.acq_func = AcquisitionFunction(acq=self.acq) self.accum_dist=[] # theta vector for marginalization GP self.theta_vector =[] if 'xstars' not in self.acq: self.xstars=[] else: self.xstars=self.acq['xstars'] # PVRS before and after self.PVRS_before_after=[] self.xstars=[] self.Y_original_maxGP=None self.X_original_maxGP=None
def maximize_batch_greedy_PVRS(self,B=5): """ Finding a batch of points using Peak Suppression / Constant Liar approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class kappa: constant value in UCB Returns ------- X: a batch of [x_1..x_Nt] """ y_max = self.Y.max() # Set parameters if any was passed self.gp=GaussianProcess(self.gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) start_opt=time.time() if 'n_xstars' in self.acq: numXtar=self.acq['n_xstars'] else: numXtar=30*self.dim #temp=[] # finding the xt of Thompson Sampling xstars=[] for ii in range(numXtar): mu_acq={} mu_acq['name']='thompson' mu_acq['dim']=self.dim mu_acq['scalebounds']=self.scalebounds acq_mu=AcquisitionFunction(mu_acq) xt_TS = acq_max(ac=acq_mu.acq_kind,gp=self.gp,bounds=self.scalebounds,opt_toolbox='scipy') #temp.append(xt_TS) xstars.append(xt_TS) self.xstars=xstars # Set acquisition function myacq={} myacq['name']='pvrs' myacq['dim']=self.acq['dim'] myacq['xstars']=xstars acq_func = AcquisitionFunction(myacq) # copy GP, X and Y temp_gp=copy.deepcopy(self.gp) temp_X=copy.deepcopy(self.X) temp_Y=copy.deepcopy(self.Y) #temp_Y_original=self.Y_original start_batch=time.time() # check predictive variance before adding a new data points var_before=self.gp.compute_var(temp_X,xstars) var_before=np.mean(var_before) #store new_x new_X=np.empty((0,self.dim),float) for ii in range(B): # Finding argmax of the acquisition function. x_max = acq_max(ac=acq_func.acq_kind,gp=temp_gp, bounds=self.scalebounds) new_X= np.vstack((new_X, x_max.reshape((1, -1)))) temp_X = np.vstack((temp_X, x_max.reshape((1, -1)))) # check predictive variance after var_after=self.gp.compute_var(temp_X,xstars) var_after=np.mean(var_after) if self.PVRS_before_after==[]: self.PVRS_before_after=np.asarray([var_before,var_after]) else: temp_var=np.asarray([var_before,var_after]) self.PVRS_before_after=np.vstack((self.PVRS_before_after, temp_var)) var_before=var_after const_liar,const_liar_variance=temp_gp.predict(x_max,eval_MSE=1) const_liar=np.random.rand() temp_Y = np.append(temp_Y, const_liar ) temp_gp.fit(temp_X,temp_Y) # for debug finish_batch=time.time()-start_batch #return new_X,new_X_original self.NumPoints=np.append(self.NumPoints,new_X.shape[0]) self.X=np.vstack((self.X,new_X)) # convert back to original scale temp_X_new_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(new_X)] temp_X_new_original=np.asarray(temp_X_new_original) self.X_original=np.vstack((self.X_original, temp_X_new_original)) # evaluate y=f(x) temp=self.f(temp_X_new_original) temp=np.reshape(temp,(-1,1)) self.Y_original=np.append(self.Y_original,temp) self.Y=(self.Y_original-np.mean(self.Y_original))/(np.max(self.Y_original)-np.min(self.Y_original)) # find the maximizer in the GP mean function try: len(self.gp) x_mu_max=[] for j in range(self.J): x_mu_max_temp=acq_max_with_name(gp=self.gp[j],scalebounds=self.scalebounds[self.featIdx[j]],acq_name="mu") x_mu_max=np.hstack((x_mu_max,x_mu_max_temp)) except: x_mu_max=acq_max_with_name(gp=self.gp,scalebounds=self.scalebounds,acq_name="mu") x_mu_max_original=x_mu_max*self.max_min_gap+self.bounds[:,0] # set y_max = mu_max #mu_max=acq_mu.acq_kind(x_mu_max,gp=self.gp) self.Y_original_maxGP = np.append(self.Y_original_maxGP, self.f(x_mu_max_original)) self.X_original_maxGP = np.vstack((self.X_original_maxGP, x_mu_max_original)) return new_X
class BatchPVRS(object): def __init__(self,gp_params, func_params, acq_params): """ Input parameters ---------- gp_params: GP parameters gp_params.thete: to compute the kernel gp_params.delta: to compute the kernel func_params: function to optimize func_params.init bound: initial bounds for parameters func_params.bounds: bounds on parameters func_params.func: a function to be optimized acq_params: acquisition function, acq_params.acq_func['name']=['ei','ucb','poi','lei'] ,acq['kappa'] for ucb, acq['k'] for lei acq_params.opt_toolbox: optimization toolbox 'nlopt','direct','scipy' Returns ------- dim: dimension bounds: bounds on original scale scalebounds: bounds on normalized scale of 0-1 time_opt: will record the time spent on optimization gp: Gaussian Process object """ try: bounds=func_params['bounds'] except: bounds=func_params['function'].bounds self.dim = len(bounds) # Create an array with parameters bounds if isinstance(bounds,dict): # Get the name of the parameters self.keys = list(bounds.keys()) self.bounds = [] for key in list(bounds.keys()): self.bounds.append(bounds[key]) self.bounds = np.asarray(self.bounds) else: self.bounds=np.asarray(bounds) scalebounds=np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds=scalebounds.T self.max_min_gap=self.bounds[:,1]-self.bounds[:,0] # acquisition function type self.acq=acq_params['acq_func'] if 'debug' not in self.acq: self.acq['debug']=0 if 'optimize_gp' not in acq_params: self.optimize_gp=0 else: self.optimize_gp=acq_params['optimize_gp'] if 'marginalize_gp' not in acq_params: self.marginalize_gp=0 else: self.marginalize_gp=acq_params['marginalize_gp'] # Some function to be optimized self.function=func_params['function'] try: self.f = func_params['function']['func'] except: self.f = func_params['function'].func # optimization toolbox if 'opt_toolbox' not in acq_params: self.opt_toolbox='scipy' else: self.opt_toolbox=acq_params['opt_toolbox'] # store the batch size for each iteration self.NumPoints=[] # Numpy array place holders self.X_original= None # scale the data to 0-1 fit GP better self.X = None # X=( X_original - min(bounds) / (max(bounds) - min(bounds)) self.Y = None # Y=( Y_original - mean(bounds) / (max(bounds) - min(bounds)) self.Y_original = None self.opt_time=0 self.L=0 # lipschitz self.gp=GaussianProcess(gp_params) self.gp_params=gp_params # Acquisition Function #self.acq_func = None self.acq_func = AcquisitionFunction(acq=self.acq) self.accum_dist=[] # theta vector for marginalization GP self.theta_vector =[] if 'xstars' not in self.acq: self.xstars=[] else: self.xstars=self.acq['xstars'] # PVRS before and after self.PVRS_before_after=[] self.xstars=[] self.Y_original_maxGP=None self.X_original_maxGP=None def posterior(self, Xnew): #xmin, xmax = -2, 10 ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True) return mu, np.sqrt(sigma2) def init(self, n_init_points): """ Input parameters ---------- gp_params: Gaussian Process structure n_init_points: # init points """ # Generate random points l = [np.random.uniform(x[0], x[1], size=n_init_points) for x in self.bounds] # Concatenate new random points to possible existing # points from self.explore method. #self.init_points += list(map(list, zip(*l))) temp=np.asarray(l) temp=temp.T init_X=list(temp.reshape((n_init_points,-1))) # Evaluate target function at all initialization y_init=self.f(init_X) # Turn it into np array and store. self.X_original=np.asarray(init_X) temp_init_point=np.divide((init_X-self.bounds[:,0]),self.max_min_gap) self.X_original_maxGP= np.asarray(init_X) self.X_original = np.asarray(init_X) self.X = np.asarray(temp_init_point) y_init=np.reshape(y_init,(n_init_points,1)) self.Y_original = np.asarray(y_init) self.Y_original_maxGP=np.asarray(y_init) self.Y=(self.Y_original-np.mean(self.Y_original))/(np.max(self.Y_original)-np.min(self.Y_original)) self.NumPoints=np.append(self.NumPoints,n_init_points) # Set parameters if any was passed #self.gp=GaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking #ur = unique_rows(self.X) #self.gp.fit(self.X[ur], self.Y[ur]) #print "#Batch={:d} f_max={:.4f}".format(n_init_points,self.Y.max()) def init_with_data(self, init_X,init_Y): """ Input parameters ---------- gp_params: Gaussian Process structure x,y: # init data observations (in original scale) """ # Turn it into np array and store. self.X_original=np.asarray(init_X) temp_init_point=np.divide((init_X-self.bounds[:,0]),self.max_min_gap) self.X_original = np.asarray(init_X) self.X_original_maxGP= np.asarray(init_X) self.X = np.asarray(temp_init_point) self.Y_original = np.asarray(init_Y) self.Y_original_maxGP=np.asarray(init_Y) self.Y=(self.Y_original-np.mean(self.Y_original))/(np.max(self.Y_original)-np.min(self.Y_original)) self.NumPoints=np.append(self.NumPoints,len(init_Y)) # Set acquisition function self.acq_func = AcquisitionFunction(self.acq) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) def compute_PredictiveVariance(self,Xstars,X_t): """ Xstars: locations of global optimums X: existing observations X_t: suggested_batch """ # for robustness, remove empty X_t X_t=np.atleast_2d(X_t) mask = ~np.any(np.isnan(X_t), axis=1) X_t = X_t[mask] X=np.vstack((self.X,X_t)) var=self.gp.compute_var(X,Xstars) mean_variance=np.mean(var) return np.asarray(mean_variance) def maximize_batch_PVRS_iterative_greedy(self,B=5,first_batch=[]): """ Finding a batch of points using Peak Suppression / Constant Liar approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class kappa: constant value in UCB Returns ------- X: a batch of [x_1..x_Nt] """ y_max = self.Y.max() # Set parameters if any was passed gp=GaussianProcess(self.gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) gp.fit(self.X[ur], self.Y[ur]) # define the number of Thompson sample M if 'n_xstars' in self.acq: numXtar=self.acq['n_xstars'] else: numXtar=20*self.dim if self.xstars==[]: xstars=[] for ii in range(numXtar): mu_acq={} mu_acq['name']='thompson' mu_acq['dim']=self.dim mu_acq['scalebounds']=self.scalebounds acq_mu=AcquisitionFunction(mu_acq) xt_TS = acq_max(ac=acq_mu.acq_kind,gp=gp,bounds=self.scalebounds,opt_toolbox='scipy') #temp.append(xt_TS) xstars.append(xt_TS) else: xstars=self.xstars # Set acquisition function myacq={} myacq['name']='pvrs' myacq['dim']=self.acq['dim'] myacq['xstars']=xstars acq_func = AcquisitionFunction(myacq) nRepeat=8 pred_var=[0]*nRepeat bestBatch=[0]*nRepeat for tt in range(nRepeat): # copy GP, X and Y temp_gp=copy.deepcopy(gp) temp_X=copy.deepcopy(self.X) temp_Y=copy.deepcopy(self.Y) start_batch=time.time() #store new_x if tt==0: # first iteration (repeat) use Greedy approach to fill a batch if first_batch==[]: # if the first batch is not initialized by greedy new_X=[] for ii in range(B): # Finding argmax of the acquisition function. x_max = acq_max(ac=acq_func.acq_kind,gp=temp_gp, bounds=self.scalebounds) if ii==0: new_X=x_max else: new_X= np.vstack((new_X, x_max.reshape((1, -1)))) temp_X = np.vstack((temp_X, x_max.reshape((1, -1)))) const_liar,const_liar_variance=temp_gp.predict(x_max,eval_MSE=1) const_liar=np.random.rand() temp_Y = np.append(temp_Y, const_liar ) temp_gp.fit(temp_X,temp_Y) else: new_X=first_batch #temp_X = np.vstack((temp_X, new_X.reshape((B, -1)))) #const_liar,const_liar_variance=temp_gp.predict(new_X,eval_MSE=1) #const_liar=np.random.rand() #temp_Y = np.append(temp_Y, const_liar ) #temp_gp.fit(temp_X,temp_Y) else:# >=1 iteration for ii in range(B): #new_X=new_X.pop(0) temp_X=copy.deepcopy(self.X) if ii==0: # first element temp_X = np.vstack((temp_X, new_X[ii+1:])) # remove item ii else: if ii==B-1: # last element temp_X = np.vstack((temp_X, new_X[0:ii-1])) # remove item ii else: #temp_X = np.vstack((temp_X, new_X[0:ii]+new_X[ii+1:])) # remove item ii temp_X = np.vstack((temp_X, np.vstack((new_X[0:ii],new_X[ii+1:])))) # remove item ii temp_Y,const_liar_variance=temp_gp.predict(temp_X,eval_MSE=1) #temp_Y=np.random.random(size=(len(temp_X),1)) # constant liar temp_gp.fit(temp_X,temp_Y) # Finding argmax of the acquisition function. x_max = acq_max_with_init(ac=acq_func.acq_kind, gp=temp_gp, y_max=y_max, bounds=self.scalebounds, #init_location=np.asarray(new_X[ii])) init_location=[]) previous_var=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X)) # back up old value old_value=new_X[ii].copy() new_X[ii]=x_max new_var=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X)) if new_var>previous_var: # keep the previous value if the uncertainty does not reduce new_X[ii]=old_value #print "old value" #new_var2=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X)) #print "prev var={:.6f}, newvar={:.6f}, newvar2={:.6f}".format(np.asscalar(previous_var), #np.asscalar(new_var),np.asscalar(new_var2)) pred_var[tt]=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X)) #print pred_var bestBatch[tt]=np.asarray(new_X) #return new_X,new_X_original idxBest=np.argmin(pred_var) new_X=bestBatch[idxBest] self.NumPoints=np.append(self.NumPoints,new_X.shape[0]) self.X=np.vstack((self.X,new_X)) # convert back to original scale temp_X_new_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(new_X)] temp_X_new_original=np.asarray(temp_X_new_original) self.X_original=np.vstack((self.X_original, temp_X_new_original)) # evaluate y=f(x) temp=self.f(temp_X_new_original) temp=np.reshape(temp,(-1,1)) self.Y_original=np.append(self.Y_original,temp) self.Y=(self.Y_original-np.mean(self.Y_original))/(np.max(self.Y_original)-np.min(self.Y_original)) #return bestBatch[idxBest],pred_var[idxBest] return bestBatch[idxBest],pred_var def maximize_batch_greedy_PVRS(self,B=5): """ Finding a batch of points using Peak Suppression / Constant Liar approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class kappa: constant value in UCB Returns ------- X: a batch of [x_1..x_Nt] """ y_max = self.Y.max() # Set parameters if any was passed self.gp=GaussianProcess(self.gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) start_opt=time.time() if 'n_xstars' in self.acq: numXtar=self.acq['n_xstars'] else: numXtar=30*self.dim #temp=[] # finding the xt of Thompson Sampling xstars=[] for ii in range(numXtar): mu_acq={} mu_acq['name']='thompson' mu_acq['dim']=self.dim mu_acq['scalebounds']=self.scalebounds acq_mu=AcquisitionFunction(mu_acq) xt_TS = acq_max(ac=acq_mu.acq_kind,gp=self.gp,bounds=self.scalebounds,opt_toolbox='scipy') #temp.append(xt_TS) xstars.append(xt_TS) self.xstars=xstars # Set acquisition function myacq={} myacq['name']='pvrs' myacq['dim']=self.acq['dim'] myacq['xstars']=xstars acq_func = AcquisitionFunction(myacq) # copy GP, X and Y temp_gp=copy.deepcopy(self.gp) temp_X=copy.deepcopy(self.X) temp_Y=copy.deepcopy(self.Y) #temp_Y_original=self.Y_original start_batch=time.time() # check predictive variance before adding a new data points var_before=self.gp.compute_var(temp_X,xstars) var_before=np.mean(var_before) #store new_x new_X=np.empty((0,self.dim),float) for ii in range(B): # Finding argmax of the acquisition function. x_max = acq_max(ac=acq_func.acq_kind,gp=temp_gp, bounds=self.scalebounds) new_X= np.vstack((new_X, x_max.reshape((1, -1)))) temp_X = np.vstack((temp_X, x_max.reshape((1, -1)))) # check predictive variance after var_after=self.gp.compute_var(temp_X,xstars) var_after=np.mean(var_after) if self.PVRS_before_after==[]: self.PVRS_before_after=np.asarray([var_before,var_after]) else: temp_var=np.asarray([var_before,var_after]) self.PVRS_before_after=np.vstack((self.PVRS_before_after, temp_var)) var_before=var_after const_liar,const_liar_variance=temp_gp.predict(x_max,eval_MSE=1) const_liar=np.random.rand() temp_Y = np.append(temp_Y, const_liar ) temp_gp.fit(temp_X,temp_Y) # for debug finish_batch=time.time()-start_batch #return new_X,new_X_original self.NumPoints=np.append(self.NumPoints,new_X.shape[0]) self.X=np.vstack((self.X,new_X)) # convert back to original scale temp_X_new_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(new_X)] temp_X_new_original=np.asarray(temp_X_new_original) self.X_original=np.vstack((self.X_original, temp_X_new_original)) # evaluate y=f(x) temp=self.f(temp_X_new_original) temp=np.reshape(temp,(-1,1)) self.Y_original=np.append(self.Y_original,temp) self.Y=(self.Y_original-np.mean(self.Y_original))/(np.max(self.Y_original)-np.min(self.Y_original)) # find the maximizer in the GP mean function try: len(self.gp) x_mu_max=[] for j in range(self.J): x_mu_max_temp=acq_max_with_name(gp=self.gp[j],scalebounds=self.scalebounds[self.featIdx[j]],acq_name="mu") x_mu_max=np.hstack((x_mu_max,x_mu_max_temp)) except: x_mu_max=acq_max_with_name(gp=self.gp,scalebounds=self.scalebounds,acq_name="mu") x_mu_max_original=x_mu_max*self.max_min_gap+self.bounds[:,0] # set y_max = mu_max #mu_max=acq_mu.acq_kind(x_mu_max,gp=self.gp) self.Y_original_maxGP = np.append(self.Y_original_maxGP, self.f(x_mu_max_original)) self.X_original_maxGP = np.vstack((self.X_original_maxGP, x_mu_max_original)) return new_X
def maximize_batch_PVRS_iterative_greedy(self,B=5,first_batch=[]): """ Finding a batch of points using Peak Suppression / Constant Liar approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class kappa: constant value in UCB Returns ------- X: a batch of [x_1..x_Nt] """ y_max = self.Y.max() # Set parameters if any was passed gp=GaussianProcess(self.gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) gp.fit(self.X[ur], self.Y[ur]) # define the number of Thompson sample M if 'n_xstars' in self.acq: numXtar=self.acq['n_xstars'] else: numXtar=20*self.dim if self.xstars==[]: xstars=[] for ii in range(numXtar): mu_acq={} mu_acq['name']='thompson' mu_acq['dim']=self.dim mu_acq['scalebounds']=self.scalebounds acq_mu=AcquisitionFunction(mu_acq) xt_TS = acq_max(ac=acq_mu.acq_kind,gp=gp,bounds=self.scalebounds,opt_toolbox='scipy') #temp.append(xt_TS) xstars.append(xt_TS) else: xstars=self.xstars # Set acquisition function myacq={} myacq['name']='pvrs' myacq['dim']=self.acq['dim'] myacq['xstars']=xstars acq_func = AcquisitionFunction(myacq) nRepeat=8 pred_var=[0]*nRepeat bestBatch=[0]*nRepeat for tt in range(nRepeat): # copy GP, X and Y temp_gp=copy.deepcopy(gp) temp_X=copy.deepcopy(self.X) temp_Y=copy.deepcopy(self.Y) start_batch=time.time() #store new_x if tt==0: # first iteration (repeat) use Greedy approach to fill a batch if first_batch==[]: # if the first batch is not initialized by greedy new_X=[] for ii in range(B): # Finding argmax of the acquisition function. x_max = acq_max(ac=acq_func.acq_kind,gp=temp_gp, bounds=self.scalebounds) if ii==0: new_X=x_max else: new_X= np.vstack((new_X, x_max.reshape((1, -1)))) temp_X = np.vstack((temp_X, x_max.reshape((1, -1)))) const_liar,const_liar_variance=temp_gp.predict(x_max,eval_MSE=1) const_liar=np.random.rand() temp_Y = np.append(temp_Y, const_liar ) temp_gp.fit(temp_X,temp_Y) else: new_X=first_batch #temp_X = np.vstack((temp_X, new_X.reshape((B, -1)))) #const_liar,const_liar_variance=temp_gp.predict(new_X,eval_MSE=1) #const_liar=np.random.rand() #temp_Y = np.append(temp_Y, const_liar ) #temp_gp.fit(temp_X,temp_Y) else:# >=1 iteration for ii in range(B): #new_X=new_X.pop(0) temp_X=copy.deepcopy(self.X) if ii==0: # first element temp_X = np.vstack((temp_X, new_X[ii+1:])) # remove item ii else: if ii==B-1: # last element temp_X = np.vstack((temp_X, new_X[0:ii-1])) # remove item ii else: #temp_X = np.vstack((temp_X, new_X[0:ii]+new_X[ii+1:])) # remove item ii temp_X = np.vstack((temp_X, np.vstack((new_X[0:ii],new_X[ii+1:])))) # remove item ii temp_Y,const_liar_variance=temp_gp.predict(temp_X,eval_MSE=1) #temp_Y=np.random.random(size=(len(temp_X),1)) # constant liar temp_gp.fit(temp_X,temp_Y) # Finding argmax of the acquisition function. x_max = acq_max_with_init(ac=acq_func.acq_kind, gp=temp_gp, y_max=y_max, bounds=self.scalebounds, #init_location=np.asarray(new_X[ii])) init_location=[]) previous_var=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X)) # back up old value old_value=new_X[ii].copy() new_X[ii]=x_max new_var=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X)) if new_var>previous_var: # keep the previous value if the uncertainty does not reduce new_X[ii]=old_value #print "old value" #new_var2=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X)) #print "prev var={:.6f}, newvar={:.6f}, newvar2={:.6f}".format(np.asscalar(previous_var), #np.asscalar(new_var),np.asscalar(new_var2)) pred_var[tt]=self.compute_PredictiveVariance(Xstars=xstars,X_t=np.asarray(new_X)) #print pred_var bestBatch[tt]=np.asarray(new_X) #return new_X,new_X_original idxBest=np.argmin(pred_var) new_X=bestBatch[idxBest] self.NumPoints=np.append(self.NumPoints,new_X.shape[0]) self.X=np.vstack((self.X,new_X)) # convert back to original scale temp_X_new_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(new_X)] temp_X_new_original=np.asarray(temp_X_new_original) self.X_original=np.vstack((self.X_original, temp_X_new_original)) # evaluate y=f(x) temp=self.f(temp_X_new_original) temp=np.reshape(temp,(-1,1)) self.Y_original=np.append(self.Y_original,temp) self.Y=(self.Y_original-np.mean(self.Y_original))/(np.max(self.Y_original)-np.min(self.Y_original)) #return bestBatch[idxBest],pred_var[idxBest] return bestBatch[idxBest],pred_var
def maximize(self): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag == 1: return if self.acq['name'] == 'random': x_max = generate_random_points(bounds=self.scalebounds, size=1) self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std( self.Y_original) self.time_opt = np.hstack((self.time_opt, 0)) return fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std( self.Y_original) # init a new Gaussian Process if self.isTGP == 1: self.gp = TransformedGP(self.gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur], fstar_scaled) else: self.gp = GaussianProcess(self.gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # optimize GP parameters after 10 iterations newlengthscale = None # we donot optimize lengthscale for the setting of gp_lengthscale if len(self.Y) % (4 * self.dim) == 0: if self.optimize_gp == 'maximize': newlengthscale = self.gp.optimize_lengthscale_SE_maximizing( self.gp_params['lengthscale'], self.gp_params['noise_delta']) self.gp_params['lengthscale'] = newlengthscale elif self.optimize_gp == 'loo': newlengthscale = self.gp.optimize_lengthscale_SE_loo( self.gp_params['lengthscale'], self.gp_params['noise_delta']) self.gp_params['lengthscale'] = newlengthscale if self.verbose == 1: print("estimated lengthscale =", newlengthscale) # init a new Gaussian Process after optimizing hyper-parameter if self.isTGP == 1: self.gp = TransformedGP(self.gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur], fstar_scaled) else: self.gp = GaussianProcess(self.gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Set acquisition function start_opt = time.time() # run the acquisition function for the first time to get xstar self.xstars = [] fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std( self.Y_original) fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std( self.Y_original) self.acq['fstar_scaled'] = np.asarray([fstar_scaled]) x_max = acq_max_with_name(gp=self.gp, scalebounds=self.scalebounds, acq_name=self.acq['name'], fstar_scaled=fstar_scaled) """ self.acq_func = AcquisitionFunction(self.acq) x_max = acq_max(ac=self.acq_func.acq_kind,gp=self.gp,bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) """ # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # store X self.X = np.vstack((self.X, x_max.reshape((1, -1)))) # compute X in original scale temp_X_new_original = x_max * self.max_min_gap + self.bounds[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) y_original = self.f(temp_X_new_original) self.Y_original = np.append(self.Y_original, y_original) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std( self.Y_original) if self.gp.flagIncremental == 1: self.gp.fit_incremental(x_max, self.Y[-1])
def __init__(self, gp_params, func_params, acq_params, verbose=1): """ Input parameters ---------- gp_params: GP parameters gp_params.theta: to compute the kernel gp_params.delta: to compute the kernel func_params: function to optimize func_params.init bound: initial bounds for parameters func_params.bounds: bounds on parameters func_params.func: a function to be optimized acq_params: acquisition function, acq_params.acq_func['name']=['ei','ucb','poi','lei'] ,acq['kappa'] for ucb, acq['k'] for lei acq_params.opt_toolbox: optimization toolbox 'nlopt','direct','scipy' isTGP: using transformed Gaussian process Returns ------- dim: dimension bounds: bounds on original scale scalebounds: bounds on normalized scale of 0-1 time_opt: will record the time spent on optimization gp: Gaussian Process object """ if verbose == 1: self.verbose = 1 else: self.verbose = 0 # Find number of parameters bounds = func_params['function'].bounds if 'init_bounds' not in func_params: init_bounds = bounds else: init_bounds = func_params['init_bounds'] self.dim = len(bounds) self.fstar = func_params['function'].fstar #self.fstar_original==func_params['function'].fstar*func_params['function'].ismax # Create an array with parameters bounds if isinstance(bounds, dict): # Get the name of the parameters self.keys = list(bounds.keys()) self.bounds = [] for key in list(bounds.keys()): self.bounds.append(bounds[key]) self.bounds = np.asarray(self.bounds) else: self.bounds = np.asarray(bounds) if len(init_bounds) == 0: self.init_bounds = self.bounds.copy() else: self.init_bounds = init_bounds if isinstance(init_bounds, dict): # Get the name of the parameters self.keys = list(init_bounds.keys()) self.init_bounds = [] for key in list(init_bounds.keys()): self.init_bounds.append(init_bounds[key]) self.init_bounds = np.asarray(self.init_bounds) else: self.init_bounds = np.asarray(init_bounds) # create a scalebounds 0-1 scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] # Some function to be optimized self.f = func_params['function'].func # optimization toolbox if 'opt_toolbox' not in acq_params: self.opt_toolbox = 'scipy' else: self.opt_toolbox = acq_params['opt_toolbox'] # acquisition function type self.acq = acq_params['acq_func'] self.acq['scalebounds'] = self.scalebounds if 'debug' not in self.acq: self.acq['debug'] = 0 if 'stopping' not in acq_params: self.stopping_criteria = 0 else: self.stopping_criteria = acq_params['stopping'] if 'optimize_gp' not in acq_params: self.optimize_gp = 'maximize' else: self.optimize_gp = acq_params['optimize_gp'] if 'marginalize_gp' not in acq_params: self.marginalize_gp = 0 else: self.marginalize_gp = acq_params['marginalize_gp'] # store X in original scale self.X_original = None # store X in 0-1 scale self.X = None # store y=f(x) # (y - mean)/(max-min) self.Y = None # y original scale self.Y_original = None self.time_opt = 0 self.gp_params = gp_params # Gaussian Process class if 'surrogate' not in self.acq: self.acq['surrogate'] = 'gp' if self.acq['surrogate'] == 'tgp': self.isTGP = 1 self.gp = TransformedGP(gp_params) else: self.isTGP = 0 self.gp = GaussianProcess(gp_params) # acquisition function self.acq_func = None # stop condition self.stop_flag = 0 self.logmarginal = 0 # xt_suggestion, caching for Consensus self.xstars = [] self.xstar_accumulate = [] # theta vector for marginalization GP self.theta_vector = [] if 'n_xstars' in self.acq: self.numXstar = self.acq['n_xstars'] else: #self.numXstar=self.dim*50 self.numXstar = 100 # store ystars #self.ystars=np.empty((0,100), float) self.gstars = np.empty((0, self.numXstar), float) self.gap_gstar_fstar = np.empty((0, self.numXstar), float) # store all selection of AF for algorithm with confidence bound self.marker = [] self.flagTheta_TS = 0 self.mean_theta_TS = None
class BayesOpt_KnownOptimumValue(object): def __init__(self, gp_params, func_params, acq_params, verbose=1): """ Input parameters ---------- gp_params: GP parameters gp_params.theta: to compute the kernel gp_params.delta: to compute the kernel func_params: function to optimize func_params.init bound: initial bounds for parameters func_params.bounds: bounds on parameters func_params.func: a function to be optimized acq_params: acquisition function, acq_params.acq_func['name']=['ei','ucb','poi','lei'] ,acq['kappa'] for ucb, acq['k'] for lei acq_params.opt_toolbox: optimization toolbox 'nlopt','direct','scipy' isTGP: using transformed Gaussian process Returns ------- dim: dimension bounds: bounds on original scale scalebounds: bounds on normalized scale of 0-1 time_opt: will record the time spent on optimization gp: Gaussian Process object """ if verbose == 1: self.verbose = 1 else: self.verbose = 0 # Find number of parameters bounds = func_params['function'].bounds if 'init_bounds' not in func_params: init_bounds = bounds else: init_bounds = func_params['init_bounds'] self.dim = len(bounds) self.fstar = func_params['function'].fstar #self.fstar_original==func_params['function'].fstar*func_params['function'].ismax # Create an array with parameters bounds if isinstance(bounds, dict): # Get the name of the parameters self.keys = list(bounds.keys()) self.bounds = [] for key in list(bounds.keys()): self.bounds.append(bounds[key]) self.bounds = np.asarray(self.bounds) else: self.bounds = np.asarray(bounds) if len(init_bounds) == 0: self.init_bounds = self.bounds.copy() else: self.init_bounds = init_bounds if isinstance(init_bounds, dict): # Get the name of the parameters self.keys = list(init_bounds.keys()) self.init_bounds = [] for key in list(init_bounds.keys()): self.init_bounds.append(init_bounds[key]) self.init_bounds = np.asarray(self.init_bounds) else: self.init_bounds = np.asarray(init_bounds) # create a scalebounds 0-1 scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] # Some function to be optimized self.f = func_params['function'].func # optimization toolbox if 'opt_toolbox' not in acq_params: self.opt_toolbox = 'scipy' else: self.opt_toolbox = acq_params['opt_toolbox'] # acquisition function type self.acq = acq_params['acq_func'] self.acq['scalebounds'] = self.scalebounds if 'debug' not in self.acq: self.acq['debug'] = 0 if 'stopping' not in acq_params: self.stopping_criteria = 0 else: self.stopping_criteria = acq_params['stopping'] if 'optimize_gp' not in acq_params: self.optimize_gp = 'maximize' else: self.optimize_gp = acq_params['optimize_gp'] if 'marginalize_gp' not in acq_params: self.marginalize_gp = 0 else: self.marginalize_gp = acq_params['marginalize_gp'] # store X in original scale self.X_original = None # store X in 0-1 scale self.X = None # store y=f(x) # (y - mean)/(max-min) self.Y = None # y original scale self.Y_original = None self.time_opt = 0 self.gp_params = gp_params # Gaussian Process class if 'surrogate' not in self.acq: self.acq['surrogate'] = 'gp' if self.acq['surrogate'] == 'tgp': self.isTGP = 1 self.gp = TransformedGP(gp_params) else: self.isTGP = 0 self.gp = GaussianProcess(gp_params) # acquisition function self.acq_func = None # stop condition self.stop_flag = 0 self.logmarginal = 0 # xt_suggestion, caching for Consensus self.xstars = [] self.xstar_accumulate = [] # theta vector for marginalization GP self.theta_vector = [] if 'n_xstars' in self.acq: self.numXstar = self.acq['n_xstars'] else: #self.numXstar=self.dim*50 self.numXstar = 100 # store ystars #self.ystars=np.empty((0,100), float) self.gstars = np.empty((0, self.numXstar), float) self.gap_gstar_fstar = np.empty((0, self.numXstar), float) # store all selection of AF for algorithm with confidence bound self.marker = [] self.flagTheta_TS = 0 self.mean_theta_TS = None # will be later used for visualization def posterior(self, Xnew): self.gp.fit(self.X, self.Y) mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True) return mu, np.sqrt(sigma2) def posterior_tgp(self, Xnew): fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std( self.Y_original) self.gp.fit(self.X, self.Y, fstar_scaled) mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True) return mu, np.sqrt(sigma2) def posterior_tgp_g(self, Xnew): fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std( self.Y_original) self.tgp.fit(self.X, self.Y, fstar_scaled) mu, sigma2 = self.tgp.predict_G(Xnew, eval_MSE=True) return mu, np.sqrt(sigma2) def init_with_data(self, init_X, init_Y): """ Input parameters ---------- gp_params: Gaussian Process structure x,y: # init data observations (in original scale) """ # Turn it into np array and store. self.X_original = np.asarray(init_X) temp_init_point = np.divide((init_X - self.bounds[:, 0]), self.max_min_gap) self.X_original = np.asarray(init_X) self.X = np.asarray(temp_init_point) self.Y_original = np.asarray(init_Y) #self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) # add y_optimum into Y set #Y_temp=[self.Y_original,self.fstar] #self.Y=(self.Y_original-np.mean(Y_temp))/np.std(Y_temp) self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std( self.Y_original) def init(self, gp_params, n_init_points=3, seed=1): """ Input parameters ---------- gp_params: Gaussian Process structure n_init_points: # init points """ np.random.seed(seed) # Generate random points l = [ np.random.uniform(x[0], x[1]) for _ in range(n_init_points) for x in self.init_bounds ] #l=[np.linspace(x[0],x[1],num=n_init_points) for x in self.init_bounds] # Concatenate new random points to possible existing # points from self.explore method. temp = np.asarray(l) temp = temp.T init_X = list(temp.reshape((n_init_points, -1))) self.X_original = np.asarray(init_X) # Evaluate target function at all initialization y_init = self.f(init_X) y_init = np.reshape(y_init, (n_init_points, 1)) self.Y_original = np.asarray(y_init) self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std( self.Y_original) # convert it to scaleX temp_init_point = np.divide((init_X - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp_init_point) def maximize(self): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag == 1: return if self.acq['name'] == 'random': x_max = generate_random_points(bounds=self.scalebounds, size=1) self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std( self.Y_original) self.time_opt = np.hstack((self.time_opt, 0)) return fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std( self.Y_original) # init a new Gaussian Process if self.isTGP == 1: self.gp = TransformedGP(self.gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur], fstar_scaled) else: self.gp = GaussianProcess(self.gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # optimize GP parameters after 10 iterations newlengthscale = None # we donot optimize lengthscale for the setting of gp_lengthscale if len(self.Y) % (4 * self.dim) == 0: if self.optimize_gp == 'maximize': newlengthscale = self.gp.optimize_lengthscale_SE_maximizing( self.gp_params['lengthscale'], self.gp_params['noise_delta']) self.gp_params['lengthscale'] = newlengthscale elif self.optimize_gp == 'loo': newlengthscale = self.gp.optimize_lengthscale_SE_loo( self.gp_params['lengthscale'], self.gp_params['noise_delta']) self.gp_params['lengthscale'] = newlengthscale if self.verbose == 1: print("estimated lengthscale =", newlengthscale) # init a new Gaussian Process after optimizing hyper-parameter if self.isTGP == 1: self.gp = TransformedGP(self.gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur], fstar_scaled) else: self.gp = GaussianProcess(self.gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Set acquisition function start_opt = time.time() # run the acquisition function for the first time to get xstar self.xstars = [] fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std( self.Y_original) fstar_scaled = (self.fstar - np.mean(self.Y_original)) / np.std( self.Y_original) self.acq['fstar_scaled'] = np.asarray([fstar_scaled]) x_max = acq_max_with_name(gp=self.gp, scalebounds=self.scalebounds, acq_name=self.acq['name'], fstar_scaled=fstar_scaled) """ self.acq_func = AcquisitionFunction(self.acq) x_max = acq_max(ac=self.acq_func.acq_kind,gp=self.gp,bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) """ # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # store X self.X = np.vstack((self.X, x_max.reshape((1, -1)))) # compute X in original scale temp_X_new_original = x_max * self.max_min_gap + self.bounds[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) y_original = self.f(temp_X_new_original) self.Y_original = np.append(self.Y_original, y_original) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / np.std( self.Y_original) if self.gp.flagIncremental == 1: self.gp.fit_incremental(x_max, self.Y[-1])