def optimize_log_lik(self): """ Optimize the kernel hyperparameters before acuiring the next point """ #inital hyperparams lengthscales = self.initial_hyperparams['length scales'] noise_var = self.initial_hyperparams['noise variance'] amp = self.initial_hyperparams['covar amplitude'] # optimize hyperparams using SK learn sk_loklik0, sk_amp0, sk_noise0, sk_ls0 = self.sk_kernel( amp, noise_var, lengthscales) if self.X_obs.shape[0] > 1: # don't run twice on the first step #last hyperparams seen so far current_covar = np.sqrt( 1. / np.exp(self.model.covar_params[0]) ) #this gives a full matrix and not just lengscale, the sk learn #can't deal with matrix. current_lengthscales = np.diag( current_covar) #take only diagonal part - the lengthscales! current_covar_amp = np.exp(self.model.covar_params[1]) current_noise_variance = self.model.noise_var #repeate SK learn print('optimize on last hyperparams seen so far') sk_loklik, sk_amp, sk_noise, sk_ls = self.sk_kernel( current_covar_amp, current_noise_variance, current_lengthscales) # compare lik and choose best hyperparams if sk_loklik > sk_loklik0: hyperparams_opt = ( (np.diag(1. / (sk_ls**2))), np.log(sk_amp), np.log(sk_noise) ) #this is the required packing for the online gp code else: hyperparams_opt = ((np.diag(1. / (sk_ls0**2))), np.log(sk_amp0), np.log(sk_noise0)) #print(hyperparams_opt) # create new OnlineGP model - overwrites the existing one print('sanity dim check: ', self.model.nin == self.X_obs.shape[1]) self.model = OGP( self.model.nin, hyperparams=hyperparams_opt, maxBV=self.model.maxBV, covar=self.model.covar ) #, weighted=self.model.weighted,maxBV=self.model.maxBV, prmean=self.model.prmean, prmeanp=self.model.prmeanp, prvar=self.model.prvar, prvarp=self.model.prvarp,proj=self.model.proj,thresh=self.model.thresh, sparsityQ=self.model.sparsityQ) # initialize model on current data p_X = self.X_obs p_Y = self.ytrain num = p_X.shape[0] self.model.fit(p_X, p_Y, num)
def optimize_kernel_hyperparameters(self, noiseQ=False): """ Optimize the kernel hyperparameters before acuiring the next point. This method optimizes the kernel twice - starting from the initial or last hyperparamters. Then compares the log likelihood and re-build the GP model using the most likely hypers. Note: sk learn can't deal with matrix. so we can only optimize on lengthscales. """ self.noiseQ = noiseQ # optimize kernel using SK learn from initial hyperparams print('optimize on initial hyperparams') sk_loklik0, sk_hypers0 = self.sk_kernel(self.initial_hyperparams) # optimize kernel using SK learn from current hyperparams self.current_hyperparams = {} self.current_hyperparams['precisionMatrix'] = np.diag( 1. / self.model.lengthscales**2) self.current_hyperparams['noise_variance'] = self.model.noise_var self.current_hyperparams[ 'amplitude_covar'] = self.model.amplitude_covar print('optimize on last hyperparams seen so far') sk_loklik, sk_hypers = self.sk_kernel(self.current_hyperparams) # compare likelihoods and choose best hyperparams if sk_loklik > sk_loklik0: hyperparams_opt = sk_hypers else: hyperparams_opt = sk_hypers0 for key in hyperparams_opt: if key == 'precisionMatrix': self.hyperparams_opt_all[key] = np.array( list( chain(self.hyperparams_opt_all[key], [hyperparams_opt[key].diagonal()]))) else: self.hyperparams_opt_all[key] = list( chain(self.hyperparams_opt_all[key], [hyperparams_opt[key]])) if self.verboseQ: print('hyperparams_opt ', hyperparams_opt) # create new OnlineGP model - overwrites the existing one if self.verboseQ: print('sanity dim check: ', self.model.dim == self.X_obs.shape[1]) self.model = OGP(self.model.dim, hyperparams=hyperparams_opt, maxBV=self.model.maxBV, covar=self.model.covar) # ,weighted=self.model.weighted, maxBV=self.model.maxBV) #, prmean=self.model.prmean, prmeanp=self.model.prmeanp, prvar=self.model.prvar, prvarp=self.model.prvarp , proj=self.model.proj,thresh=self.model.thresh, sparsityQ=self.model.sparsityQ, verboseQ=self.model.verboseQ) # initialize new model on current data self.model.fit(self.X_obs, np.array(self.Y_obs).flatten(), self.X_obs.shape[0])
def FocusCorrection(lens, obj): ndim = 1 dev_ids = [str(x+1) for x in np.arange(ndim)] start_point = [[obj]] mi_module = importlib.import_module('machine_interfaces.machine_interface_Defocus') mi = mi_module.machine_interface(dev_ids = dev_ids, start_point = start_point, lens = lens) mi.getState() gp_ls = np.array(np.ones(ndim)) * [0.317] gp_amp = 0.256 gp_noise = 0.000253 gp_precisionmat = np.array(np.diag(1/(gp_ls**2))) hyperparams = {'precisionMatrix': gp_precisionmat, 'amplitude_covar': gp_amp, 'noise_variance': gp_noise} gp = OGP(ndim, hyperparams) opt = BayesOpt(gp, mi, acq_func="UCB", start_dev_vals = mi.x, dev_ids = dev_ids) opt.ucb_params = np.array([2, None]) Obj_state_s=[] # initialize empty Obj_state_s for each start point Niter = 10 # run 10 iterations for each case for i in range(Niter): Obj_state_s.append(opt.best_seen()[1]) opt.OptIter() # the optimized objective lens current and corresponding defocus is saved in opt.best_seen()[0] and [1] res = opt.best_seen() del mi, gp, opt return res
gp_lengthscales = np.array([0.0001]) #gp_precisionmat = 1/np.diag(gp_lengthscales**(2)) gp_precisionmat = np.diag(np.log(1. / gp_lengthscales**(2))) #2. gp_amp #gp_amp = 0.1 gp_amp = 10 #3. gp_noise #gp_noise = 0.0001 gp_noise = 10**(-10) hyps = [gp_precisionmat, np.log(gp_amp), np.log(gp_noise**2)] #format the hyperparams for the OGP gp = OGP(ndim, hyps) #create the bayesian optimizer that will use the gp as the model to optimize the machine #opt = BayesOpt(gp, mi, acq_func="UCB", start_dev_vals = mi.x, dev_ids = dev_ids) #mi.setX(-0.004) #opt = BayesOpt(gp, mi, acq_func="UCB", start_dev_vals = -0.004, dev_ids = dev_ids, bounds=((-np.inf, 0))) mi.setX(-0.001) s = mi.getState() opt = BayesOpt(gp, mi, acq_func="UCB", dev_ids=dev_ids, bounds=((-0.00875, 0), )) opt.ucb_params = [2, None]
mi = mi_module.machine_interface( dev_ids=dev_ids, start_point=start_point ) #an isotropic n-dimensional gaussian with amplitude=1, centered at the origin, plus gaussian background noise with std dev = 0.1 #create the gp ndim = len(dev_ids) # GP parameters gp_precisionmat = scan_params['gp_precisionmat'] gp_amp = scan_params['gp_amp'] gp_noise_variance = scan_params['gp_noise'] hyperparams = { 'precisionMatrix': gp_precisionmat, 'amplitude_covar': gp_amp, 'noise_variance': gp_noise_variance } gp = OGP(ndim, hyperparams) #create the bayesian optimizer that will use the gp as the model to optimize the machine opt = BayesOpt(gp, mi, acq_func="UCB", start_dev_vals=mi.x, dev_ids=dev_ids) opt.ucb_params = scan_params[ 'ucb_params'] #set the acquisition function parameters print('ucb_params', opt.ucb_params) #run the gp search for some number of steps Obj_state_s = [] optimize_kernel_on_the_fly = None #optimize_kernel_on_the_fly is the iteration number to start optimize the kernel's hyperparmaters. If None, no optimization of the hypers during BO. Niter = 10 for i in range(Niter): clear_output(wait=True)
class BayesOpt: """ Contains the Bayesian optimization class with the following methods: acquire(): Returns the point that maximizes the acquisition function. For 'testEI', returns the index of the point instead. For normal acquisition, currently uses the bounded L-BFGS optimizer. Haven't tested alternatives much. best_seen(): Uses the model to make predictions at every observed point, returning the best-performing (x,y) pair. This is more robust to noise than returning the best observation, but could be replaced by other, faster methods. OptIter(): The main method for Bayesian optimization. Maximizes the acquisition function, then uses the interface to test this point and update the model. """ def __init__(self, model, target_func, acq_func='EI', xi=0.0, alt_param=-1, m=200, bounds=None, iter_bound=False, prior_data=None, start_dev_vals=None, dev_ids=None, searchBoundScaleFactor=None, optimize_kernel_on_the_fly = None, verboseQ=False): """ Initialization parameters: -------------------------- model: an object with methods 'predict', 'fit', and 'update' surrogate model to use interface: an object which supplies the state of the system and allows for changing the system's x-value. Should have methods '(x,y) = intfc.getState()' and 'intfc.setX(x_new)'. Note that this interface system is rough, and used for testing and as a placeholder for the machine interface. acq_func: specifies how the optimizer should choose its next point. 'PI': uses probability of improvement. The interface should supply y-values. 'EI': uses expected improvement. The interface should supply y-values. 'UCB': uses GP upper confidence bound. No y-values needed. 'testEI': uses EI over a finite set of points. This set must be provided as alt_param, and the interface need not supply meaningful y-values. xi: exploration parameter suggested in some Bayesian opt. literature alt_param: currently only used when acq_func=='testEI' m: the maximum size of model; can be ignored unless passing an untrained SPGP or other model which doesn't already know its own size bounds: a tuple of (min,max) tuples specifying search bounds for each input dimension. Generally leads to better performance. Has a different interpretation when iter_bounds is True. iter_bounds: if True, bounds the distance that can be moved in a single iteration in terms of the length scale in each dimension. Uses the bounds variable as a multiple of the length scales, so bounds==2 with iter_bounds==True limits movement per iteration to two length scales in each dimension. Generally a good idea for safety, etc. prior_data: input data to train the model on initially. For convenience, since the model can be trained externally as well. Assumed to be a pandas DataFrame of shape (n, dim+1) where the last column contains y-values. optimize_kernel_on_the_fly: if not None, int which indicated the iteration number to start kernel optimization. Currently works for RBF only. """ self.model = model self.m = m self.bounds = bounds self.searchBoundScaleFactor = 1. if type(searchBoundScaleFactor) is not type(None): try: self.searchBoundScaleFactor = abs(searchBoundScaleFactor) except: print(('BayesOpt - ERROR: ', searchBoundScaleFactor, ' is not a valid searchBoundScaleFactor (scaling coeff).')) self.iter_bound = iter_bound self.prior_data = prior_data # for seeding the GP with data acquired by another optimizer self.target_func = target_func self.optimize_kernel_on_the_fly = optimize_kernel_on_the_fly self.verboseQ = verboseQ if self.optimize_kernel_on_the_fly is not None: print('Run BO w/ kernel optimization on the fly') try: self.mi = self.target_func.mi except: self.mi = self.target_func self.acq_func = (acq_func, xi, alt_param) #self.ucb_params = [0.24, 0.4] # [nu,delta] worked well for LCLS self.ucb_params = [2., None] # if we want to used a fixed scale factor of the standard deviation self.max_iter = 100 self.alpha = 1.0 #control the ratio of exploration to exploitation in AI acuisition function self.kill = False self.ndim = np.array(start_dev_vals).size self.multiprocessingQ = multiprocessingQ # speed up acquisition function optimization self.dev_ids = dev_ids self.start_dev_vals = start_dev_vals self.pvs = self.dev_ids self.defocus_correction = False try: # get initial state print('Supposed to be grabbing initial machine state...') (x_init, y_init) = self.getState() print('x_init',x_init) print('y_init',y_init) self.X_obs = np.array(x_init) self.Y_obs = [y_init] self.current_x = np.array(np.array(x_init).flatten(), ndmin=2) except: print('BayesOpt - ERROR: Could not grab initial machine state') # calculate length scales try: self.lengthscales = self.model.lengthscales except: print('WARNING - GP.bayesian_optimization.BayesOpt: Using some unit length scales cause we messed up somehow...') self.lengthscales = np.ones(len(self.dev_ids)) # make a copy of the initial params self.initial_hyperparams = {} self.initial_hyperparams['precisionMatrix'] = np.diag(1./copy.copy(self.lengthscales)**2) self.initial_hyperparams['noise_variance'] = copy.copy(self.model.noise_var) self.initial_hyperparams['amplitude_covar'] = copy.copy(self.model.amplitude_covar) #initiate optimized hypers self.hyperparams_opt_all = {} self.hyperparams_opt_all['noise_variance'] = [copy.copy(self.model.noise_var)] self.hyperparams_opt_all['amplitude_covar'] = [copy.copy(self.model.amplitude_covar)] self.hyperparams_opt_all['precisionMatrix'] = [1./copy.copy(self.lengthscales)**2] if self.verboseQ: print('Using prior mean function of ', self.model.prmean) print('Using prior mean parameters of ', self.model.prmeanp) def getState(self): """ get current state of the machine """ x_vals, y_val = self.mi.getState() return x_vals, y_val def terminate(self, devices): """ Sets the position back to the location that seems best in hindsight. It's a good idea to run this at the end of the optimization, since Bayesian optimization tries to explore and might not always end in a good place. """ print(("TERMINATE", self.x_best)) if(self.acq_func[0] == 'EI'): # set position back to something reasonable for i, dev in enumerate(devices): dev.set_value(self.x_best[i]) #error_func(self.x_best) if(self.acq_func[0] == 'UCB'): # UCB doesn't keep track of x_best, so find it (x_best, y_best) = self.best_seen() for i, dev in enumerate(devices): dev.set_value(x_best[i]) def sk_kernel(self, hypers_dict): amp = hypers_dict['amplitude_covar'] lengthscales = np.diag(hypers_dict['precisionMatrix'])**-0.5 noise_var = hypers_dict['noise_variance'] se_ard = Ck(amp)*RBF(length_scale=lengthscales, length_scale_bounds=(1e-6,10)) noise = WhiteKernel(noise_level=noise_var, noise_level_bounds=(1e-9, 1)) # noise terms sk_kernel = se_ard if self.noiseQ: sk_kernel += noise t0 = time.time() gpr = GaussianProcessRegressor(kernel=sk_kernel, n_restarts_optimizer=5) print("Initial kernel: %s" % gpr.kernel) # self.ytrain = [y[0][0] for y in self.Y_obs] gpr.fit(self.X_obs, np.array(self.Y_obs).flatten()) print('SK fit time is ',time.time() - t0) print("Learned kernel: %s" % gpr.kernel_) print("Log-marginal-likelihood: %.3f" % gpr.log_marginal_likelihood(gpr.kernel_.theta)) #print(gpr.kernel_.get_params()) if self.noiseQ: # RBF w/ noise sk_ls = gpr.kernel_.get_params()['k1__k2__length_scale'] sk_amp = gpr.kernel_.get_params()['k1__k1__constant_value'] sk_loklik = gpr.log_marginal_likelihood(gpr.kernel_.theta) sk_noise = gpr.kernel_.get_params()['k2__noise_level'] else: #RBF w/o noise sk_ls = gpr.kernel_.get_params()['k2__length_scale'] sk_amp = gpr.kernel_.get_params()['k1__constant_value'] sk_loklik = gpr.log_marginal_likelihood(gpr.kernel_.theta) sk_noise = 0 # make dict sk_hypers = {} sk_hypers['precisionMatrix'] = np.diag(1./(sk_ls**2)) sk_hypers['noise_variance'] = sk_noise sk_hypers['amplitude_covar'] = sk_amp return sk_loklik, sk_hypers def optimize_kernel_hyperparameters(self, noiseQ = False): """ Optimize the kernel hyperparameters before acuiring the next point. This method optimizes the kernel twice - starting from the initial or last hyperparamters. Then compares the log likelihood and re-build the GP model using the most likely hypers. Note: sk learn can't deal with matrix. so we can only optimize on lengthscales. """ self.noiseQ = noiseQ # optimize kernel using SK learn from initial hyperparams print('optimize on initial hyperparams') sk_loklik0, sk_hypers0 = self.sk_kernel(self.initial_hyperparams) # optimize kernel using SK learn from current hyperparams self.current_hyperparams = {} self.current_hyperparams['precisionMatrix'] = np.diag(1./self.model.lengthscales**2) self.current_hyperparams['noise_variance'] = self.model.noise_var self.current_hyperparams['amplitude_covar'] = self.model.amplitude_covar print('optimize on last hyperparams seen so far') sk_loklik, sk_hypers = self.sk_kernel(self.current_hyperparams) # compare likelihoods and choose best hyperparams if sk_loklik > sk_loklik0: hyperparams_opt = sk_hypers else: hyperparams_opt = sk_hypers0 for key in hyperparams_opt: if key == 'precisionMatrix': self.hyperparams_opt_all[key] = np.array(list(chain(self.hyperparams_opt_all[key], [hyperparams_opt[key].diagonal()]))) else: self.hyperparams_opt_all[key] = list(chain(self.hyperparams_opt_all[key], [hyperparams_opt[key]])) if self.verboseQ: print('hyperparams_opt ',hyperparams_opt) # create new OnlineGP model - overwrites the existing one if self.verboseQ: print('sanity dim check: ',self.model.dim == self.X_obs.shape[1]) self.model = OGP(self.model.dim, hyperparams = hyperparams_opt, maxBV = self.model.maxBV, covar = self.model.covar) # ,weighted=self.model.weighted, maxBV=self.model.maxBV) #, prmean=self.model.prmean, prmeanp=self.model.prmeanp, prvar=self.model.prvar, prvarp=self.model.prvarp , proj=self.model.proj,thresh=self.model.thresh, sparsityQ=self.model.sparsityQ, verboseQ=self.model.verboseQ) # initialize new model on current data self.model.fit(self.X_obs, np.array(self.Y_obs).flatten(), self.X_obs.shape[0]) def minimize(self, error_func, x): """ weighting for exploration vs exploitation in the GP at the end of scan, alpha array goes from 1 to zero. """ inverse_sign = -1 self.current_x = np.array(np.array(x).flatten(), ndmin=2) self.X_obs = np.array(self.current_x) self.Y_obs = [np.array([[inverse_sign*error_func(x)]])] # iterate though the GP method for i in range(self.max_iter): # get next point to try using acquisition function x_next = self.acquire() if self.optimize_kernel_on_the_fly is not None: if i > self.optimize_kernel_on_the_fly: print('****** Optimizing kerenl hyperparams') self.optimize_kernel_hyperparameters() y_new = error_func(x_next.flatten()) if self.opt_ctrl.kill: print('WARNING - BayesOpt: Killing Bayesian optimizer...') break y_new = np.array([[inverse_sign *y_new]]) # change position of interface x_new = deepcopy(x_next) self.current_x = x_new # add new entry to observed data self.X_obs = np.concatenate((self.X_obs, x_new), axis=0) self.Y_obs.append(y_new) # update the model (may want to add noise if using testEI) self.model.update(x_new, y_new) def OptIter(self,pause=0): """ runs the optimizer for one iteration """ # get next point to try using acquisition function x_next = self.acquire() if(self.acq_func[0] == 'testEI'): ind = x_next x_next = np.array(self.acq_func[2].iloc[ind,:-1],ndmin=2) # change position of interface and get resulting y-value self.mi.setX(x_next) if(self.acq_func[0] == 'testEI'): (x_new, y_new) = (x_next, self.acq_func[2].iloc[ind,-1]) else: (x_new, y_new) = self.mi.getState() # fix defocus here if necessary, make sure the saved observation comes from small defocus. if self.defocus_correction and self.mi.getDefocus() < 0.85: print('Correcting defocus using S2 lens...') new_S2 = self.mi.CorrectDefocus(self.mi.x, self.mi.S2) self.mi.setS2(new_S2[0][0]) (x_new, y_new) = self.mi.getState() # add new entry to observed data self.X_obs = np.concatenate((self.X_obs,x_new),axis=0) self.Y_obs.append(y_new) # update the model (may want to add noise if using testEI) self.model.update(x_new, y_new)# + .5*np.random.randn()) return x_new, y_new def ForcePoint(self,x_next): """ force a point acquisition at our discretion and update the model """ # change position of interface and get resulting y-value self.mi.setX(x_next) if(self.acq_func[0] == 'testEI'): (x_new, y_new) = (x_next, self.acq_func[2].iloc[ind,-1]) else: (x_new, y_new) = self.mi.getState() # add new entry to observed data self.X_obs = np.concatenate((self.X_obs,x_new),axis=0) self.Y_obs.append(y_new) # update the model (may want to add noise if using testEI) self.model.update(x_new, y_new) def best_seen(self): """ Checks the observed points to see which is predicted to be best. Probably safer than just returning the maximum observed, since the model has noise. It takes longer this way, though; you could instead take the model's prediction at the x-value that has done best if this needs to be faster. Not needed for UCB so do it the fast way (return max obs) """ if(self.acq_func[0] == 'UCB'): mu = self.Y_obs else: (mu, var) = self.model.predict(self.X_obs) mu = [self.model.predict(np.array(x,ndmin=2))[0] for x in self.X_obs] (ind_best, mu_best) = max(enumerate(mu), key=op.itemgetter(1)) return (self.X_obs[ind_best], mu_best) def acquire(self): """ Computes the next point for the optimizer to try by maximizing the acquisition function. If movement per iteration is bounded, starts search at current position. """ # look from best positions (x_best, y_best) = self.best_seen() self.x_best = x_best x_curr = self.current_x[-1] x_start = x_best ndim = x_curr.size # dimension of the feature space we're searching NEEDED FOR UCB try: nsteps = 1 + self.X_obs.shape[0] # acquisition number we're on NEEDED FOR UCB except: nsteps = 1 # check to see if this is bounding step sizes # print(self.iter_bound) if(self.iter_bound): if(self.bounds is None): # looks like a scale factor self.bounds = 1.0 bound_lengths = self.searchBoundScaleFactor * 3. * self.lengthscales # 3x hyperparam lengths relative_bounds = np.transpose(np.array([-bound_lengths, bound_lengths])) iter_bounds = np.transpose(np.array([x_start - bound_lengths, x_start + bound_lengths])) else: iter_bounds = self.bounds # print(self.iter_bound, relative_bounds) # options for finding the peak of the acquisition function: optmethod = 'L-BFGS-B' # L-BFGS-B, BFGS, TNC, and SLSQP allow bounds whereas Powell and COBYLA don't maxiter = 1000 # max number of steps for one scipy.optimize.minimize call try: nproc = mp.cpu_count() # number of processes to launch minimizations on except: nproc = 1 niter = 1 # max number of starting points for search niter_success = 1 # stop search if same minima for 10 steps tolerance = 1.e-4 # goal tolerance # perturb start to break symmetry? #x_start += np.random.randn(lengthscales.size)*lengthscales*1e-6 # probability of improvement acquisition function if(self.acq_func[0] == 'PI'): aqfcn = negProbImprove fargs=(self.model, y_best, self.acq_func[1]) # expected improvement acquisition function elif(self.acq_func[0] == 'EI'): aqfcn = negExpImprove fargs = (self.model, y_best, self.acq_func[1], self.alpha) # gaussian process upper confidence bound acquisition function elif(self.acq_func[0] == 'UCB'): aqfcn = negUCB fargs = (self.model, ndim, nsteps, self.ucb_params[0], self.ucb_params[1]) # maybe something mitch was using once? (can probably remove) elif(self.acq_func[0] == 'testEI'): # collect all possible x values options = np.array(self.acq_func[2].iloc[:, :-1]) (x_best, y_best) = self.best_seen() # find the option with best EI best_option_score = (-1,1e12) for i in range(options.shape[0]): result = negExpImprove(options[i],self.model,y_best,self.acq_func[1]) if(result < best_option_score[1]): best_option_score = (i, result) # return the index of the best option return best_option_score[0] else: print('WARNING - BayesOpt: Unknown acquisition function.') return 0 try: if(self.multiprocessingQ): # multi-processing to speed search neval = 2*int(10.*2.**(ndim/12.)) nkeep = 2*min(8,neval) # neval = int(3) # nkeep = int(2) # add the 10 best points seen so far (largest Y_obs) nbest = 3 # add the best points seen so far (largest Y_obs) nstart = 1 # make sure some starting points are there to prevent run away searches yobs = np.array([y[0][0] for y in self.Y_obs]) isearch = yobs.argsort()[-nbest:] for i in range(min(nstart,len(self.Y_obs))): # if np.sum(isearch == i) == 0: # not found in list isearch = np.append(isearch, i) isearch.sort() # sort to bias searching near earlier steps v0s = None for i in isearch: # """ # parallelgridsearch generates pseudo-random grid, then performs an ICDF transform # to map to multinormal distrinbution centered on x_start and with widths given by hyper params # """ vs = parallelgridsearch(aqfcn,self.X_obs[i],self.searchBoundScaleFactor * 0.6*self.lengthscales,fargs,neval,nkeep) if type(v0s) == type(None): v0s = copy.copy(vs) else: v0s = np.vstack((v0s,vs)) v0sort = v0s[:,-1].argsort()[:nkeep] # keep the nlargest v0s = v0s[v0sort] x0s = v0s[:,:-1] # for later testing if the minimize results are better than the best starting point v0best = v0s[0] if basinhoppingQ: print("multi-processing, basinhoppingQ") # use basinhopping bkwargs = dict(niter=niter,niter_success=niter_success, minimizer_kwargs={'method':optmethod,'args':fargs,'tol':tolerance,'bounds':iter_bounds,'options':{'maxiter':maxiter}}) # keyword args for basinhopping res = parallelbasinhopping(aqfcn,x0s,bkwargs) else: print("multi-processing, minimize") # use minimize mkwargs = dict(bounds=iter_bounds, method=optmethod, options={'maxiter':maxiter}, tol=tolerance) # keyword args for scipy.optimize.minimize res = parallelminimize(aqfcn,x0s,fargs,mkwargs,v0best,relative_bounds=relative_bounds) else: # single-processing if basinhoppingQ: print("single-processing, basinhoppingQ") res = basinhopping(aqfcn, x_start,niter=niter,niter_success=niter_success, minimizer_kwargs={'method':optmethod,'args':(self.model, y_best, self.acq_func[1], self.alpha),'tol':tolerance,'bounds':iter_bounds,'options':{'maxiter':maxiter}}) else: print("single-processing, minimize") # res = minimize(aqfcn, x_start, args=(self.model, y_best, self.acq_func[1], self.alpha), method=optmethod,tol=tolerance,bounds=iter_bounds,options={'maxiter':maxiter}) res = minimize(aqfcn, x_start, args=fargs, method=optmethod,tol=tolerance,bounds=iter_bounds,options={'maxiter':maxiter}) res = res.x except: raise return np.array(res,ndmin=2) # return resulting x value as a (1 x dim) vector
class BayesOpt: def __init__(self, model, target_func, acq_func='EI', xi=0.0, alt_param=-1, m=200, bounds=None, iter_bound=False, prior_data=None, start_dev_vals=None, dev_ids=None, searchBoundScaleFactor=None): self.model = model self.m = m self.bounds = bounds self.searchBoundScaleFactor = 1. if type(searchBoundScaleFactor) is not type(None): try: self.searchBoundScaleFactor = abs(searchBoundScaleFactor) except: print(( 'BayesOpt - ERROR: ', searchBoundScaleFactor, ' is not a valid searchBoundScaleFactor (scaling coeff).')) self.iter_bound = iter_bound self.prior_data = prior_data # for seeding the GP with data acquired by another optimizer self.target_func = target_func print('target_func = ', target_func) try: self.mi = self.target_func.mi print('********* BO - self.mi = self.target_func.mi WORKED!') except: self.mi = self.target_func print('********* BO - self.mi = self.target_func WORKED!') self.acq_func = (acq_func, xi, alt_param) ## the nus in these here should be increased by a factor of npts_per_sample if using standard error of the mean as noise param ##self.ucb_params = [0.01, 2.] # [nu,delta] #self.ucb_params = [0.002, 0.4] # [nu,delta] we like ##self.ucb_params = [0.007, 1.0] # [nu,delta] # the nus in these here should be used with the standard error of the mean #self.ucb_params = [0.12, 2.] # [nu,delta] # self.ucb_params = [0.24, 0.4] # [nu,delta] we like #self.ucb_params = [0.84, 1.0] # [nu,delta] self.ucb_params = [ 2., None ] # if we want to used a fixed scale factor of the standard deviation self.max_iter = 100 self.check = None self.alpha = 1 self.kill = False self.ndim = np.array(start_dev_vals).size self.multiprocessingQ = multiprocessingQ # speed up acquisition function optimization #Post-edit if self.mi.name == 'MultinormalInterface': self.dev_ids = self.mi.pvs[:-1] # last pv is objective self.start_dev_vals = self.mi.x else: self.dev_ids = dev_ids self.start_dev_vals = start_dev_vals self.pvs = self.dev_ids self.pvs_ = [pv.replace(":", "_") for pv in self.pvs] try: # get initial state (x_init, y_init) = self.getState() print('Supposed to be grabbing machine state...') print('x_init', x_init) print('y_init', y_init) self.X_obs = np.array(x_init) self.Y_obs = [y_init] self.current_x = np.array(np.array(x_init).flatten(), ndmin=2) except: print('BayesOpt - ERROR: Could not grab initial machine state') # calculate length scales try: # length scales from covar params cp = self.model.covar_params[0] cps = np.shape(cp) lengthscales = np.sqrt(1. / np.exp(cp)) if np.size(cps) == 2: if cps[0] < cps[1]: # vector of lengths self.lengthscales = lengthscales.flatten() else: # matrix of lengths self.lengthscales = np.diag(lengthscales) except: print( 'WARNING - GP.bayesian_optimization.BayesOpt: Using some unit length scales cause we messed up somehow...' ) self.lengthscales = np.ones(len(self.dev_ids)) # make a copy of the initial params self.initial_hyperparams = {} self.initial_hyperparams['length scales'] = copy.copy( self.lengthscales) self.initial_hyperparams[ 'noise variance'] = self.model.noise_var #np.exp(self.model.covar_params[2]) self.initial_hyperparams['covar amplitude'] = np.exp( self.model.covar_params[1]) ## initialize the prior #self.model.prmean = None # prior mean fcn #self.model.prmeanp = None # params of prmean fcn #self.model.prvar = None #self.model.prvarp = None #self.model.prmean_name = '' def getState(self): #print('>>>>>>>> getState') #x_vals = [self.mi.get_value(d) for d in self.dev_ids] #print('>>>>>>>>>>>>>>>>>>>> invoking get_penalty') #y_val = -self.target_func.get_penalty() #print(y_val) #print('>>>>>>>>>>>>> getState returning') #Note: Dylan edited this function on 2019-08-30 for use with his simple_machine_interface class by commenting out the lines above and replacing them with the line immediately below x_vals, y_val = self.mi.getState() return x_vals, y_val def terminate(self, devices): """ Sets the position back to the location that seems best in hindsight. It's a good idea to run this at the end of the optimization, since Bayesian optimization tries to explore and might not always end in a good place. """ print(("TERMINATE", self.x_best)) if (self.acq_func[0] == 'EI'): # set position back to something reasonable for i, dev in enumerate(devices): dev.set_value(self.x_best[i]) #error_func(self.x_best) if (self.acq_func[0] == 'UCB'): # UCB doesn't keep track of x_best, so find it (x_best, y_best) = self.best_seen() for i, dev in enumerate(devices): dev.set_value(x_best[i]) def minimize(self, error_func, x): # weighting for exploration vs exploitation in the GP at the end of scan, alpha array goes from 1 to zero inverse_sign = -1 self.current_x = np.array(np.array(x).flatten(), ndmin=2) self.X_obs = np.array(self.current_x) self.Y_obs = [np.array([[inverse_sign * error_func(x)]])] # iterate though the GP method for i in range(self.max_iter): print('<><><><><><><><> iter number {} <><><><><><><<<'.format(i)) if (i > 2): # and (np.mod(i,3) == 0): print('****** Optimizing kerenl hyperparams') self.optimize_log_lik() # get next point to try using acquisition function x_next = self.acquire(self.alpha) # check for problems with the beam if self.check != None: self.check.errorCheck() y_new = error_func(x_next.flatten()) if self.opt_ctrl.kill: print('WARNING - BayesOpt: Killing Bayesian optimizer...') break y_new = np.array([[inverse_sign * y_new]]) # change position of interface x_new = deepcopy(x_next) self.current_x = x_new # add new entry to observed data self.X_obs = np.concatenate((self.X_obs, x_new), axis=0) self.Y_obs.append(y_new) # update the model (may want to add noise if using testEI) self.model.update(x_new, y_new) def OptIter(self, pause=0): # runs the optimizer for one iteration # get next point to try using acquisition function x_next = self.acquire() if (self.acq_func[0] == 'testEI'): ind = x_next x_next = np.array(self.acq_func[2].iloc[ind, :-1], ndmin=2) # change position of interface and get resulting y-value self.mi.setX(x_next) if (self.acq_func[0] == 'testEI'): (x_new, y_new) = (x_next, self.acq_func[2].iloc[ind, -1]) else: (x_new, y_new) = self.mi.getState() # add new entry to observed data self.X_obs = np.concatenate((self.X_obs, x_new), axis=0) self.Y_obs.append(y_new) # update the model (may want to add noise if using testEI) self.model.update(x_new, y_new) # + .5*np.random.randn()) def ForcePoint(self, x_next): # force a point acquisition at our discretion and update the model # change position of interface and get resulting y-value self.mi.setX(x_next) if (self.acq_func[0] == 'testEI'): (x_new, y_new) = (x_next, self.acq_func[2].iloc[ind, -1]) else: (x_new, y_new) = self.mi.getState() # add new entry to observed data self.X_obs = np.concatenate((self.X_obs, x_new), axis=0) self.Y_obs.append(y_new) # update the model (may want to add noise if using testEI) self.model.update(x_new, y_new) def best_seen(self): """ Checks the observed points to see which is predicted to be best. Probably safer than just returning the maximum observed, since the model has noise. It takes longer this way, though; you could instead take the model's prediction at the x-value that has done best if this needs to be faster. Not needed for UCB so do it the fast way (return max obs) """ if (self.acq_func[0] == 'UCB'): mu = self.Y_obs else: (mu, var) = self.model.predict(self.X_obs) mu = [ self.model.predict(np.array(x, ndmin=2))[0] for x in self.X_obs ] (ind_best, mu_best) = max(enumerate(mu), key=op.itemgetter(1)) return (self.X_obs[ind_best], mu_best) def sk_kernel(self, amp, noise_var, lengthscales): #all params are variance #print('amp',amp,'noise_var',noise_var,'lengthscales',lengthscales) se_ard = Ck(amp) * RBF(length_scale=lengthscales, length_scale_bounds=(1e-6, 20)) noise = WhiteKernel(noise_level=noise_var, noise_level_bounds=(1e-9, 100)) # noise terms # sk_kernel = se_ard + noise + Ck(0.4) #with bias sk_kernel = se_ard + noise t0 = time.time() gpr = GaussianProcessRegressor(kernel=sk_kernel, n_restarts_optimizer=5) print("Initial kernel: %s" % gpr.kernel) self.ytrain = [y[0][0] for y in self.Y_obs] gpr.fit(self.X_obs, self.ytrain) print('SK fit time is ', time.time() - t0) print("Learned kernel: %s" % gpr.kernel_) print("Log-marginal-likelihood: %.3f" % gpr.log_marginal_likelihood(gpr.kernel_.theta)) #print(gpr.kernel_.get_params()) sk_ls = gpr.kernel_.get_params()['k1__k2__length_scale'] sk_noise = gpr.kernel_.get_params()['k2__noise_level'] sk_amp = gpr.kernel_.get_params()['k1__k1__constant_value'] sk_loklik = gpr.log_marginal_likelihood(gpr.kernel_.theta) # #if bias is included use this: # sk_ls = gpr.kernel_.get_params()['k1__k1__k2__length_scale'] # sk_noise = gpr.kernel_.get_params()['k1__k2__noise_level'] # sk_amp = gpr.kernel_.get_params()['k1__k1__k1__constant_value'] # sk_loklik = gpr.log_marginal_likelihood(gpr.kernel_.theta) return sk_loklik, sk_amp, sk_noise, sk_ls def optimize_log_lik(self): """ Optimize the kernel hyperparameters before acuiring the next point """ #inital hyperparams lengthscales = self.initial_hyperparams['length scales'] noise_var = self.initial_hyperparams['noise variance'] amp = self.initial_hyperparams['covar amplitude'] # optimize hyperparams using SK learn sk_loklik0, sk_amp0, sk_noise0, sk_ls0 = self.sk_kernel( amp, noise_var, lengthscales) if self.X_obs.shape[0] > 1: # don't run twice on the first step #last hyperparams seen so far current_covar = np.sqrt( 1. / np.exp(self.model.covar_params[0]) ) #this gives a full matrix and not just lengscale, the sk learn #can't deal with matrix. current_lengthscales = np.diag( current_covar) #take only diagonal part - the lengthscales! current_covar_amp = np.exp(self.model.covar_params[1]) current_noise_variance = self.model.noise_var #repeate SK learn print('optimize on last hyperparams seen so far') sk_loklik, sk_amp, sk_noise, sk_ls = self.sk_kernel( current_covar_amp, current_noise_variance, current_lengthscales) # compare lik and choose best hyperparams if sk_loklik > sk_loklik0: hyperparams_opt = ( (np.diag(1. / (sk_ls**2))), np.log(sk_amp), np.log(sk_noise) ) #this is the required packing for the online gp code else: hyperparams_opt = ((np.diag(1. / (sk_ls0**2))), np.log(sk_amp0), np.log(sk_noise0)) #print(hyperparams_opt) # create new OnlineGP model - overwrites the existing one print('sanity dim check: ', self.model.nin == self.X_obs.shape[1]) self.model = OGP( self.model.nin, hyperparams=hyperparams_opt, maxBV=self.model.maxBV, covar=self.model.covar ) #, weighted=self.model.weighted,maxBV=self.model.maxBV, prmean=self.model.prmean, prmeanp=self.model.prmeanp, prvar=self.model.prvar, prvarp=self.model.prvarp,proj=self.model.proj,thresh=self.model.thresh, sparsityQ=self.model.sparsityQ) # initialize model on current data p_X = self.X_obs p_Y = self.ytrain num = p_X.shape[0] self.model.fit(p_X, p_Y, num) def acquire(self, alpha=1.): """ Computes the next point for the optimizer to try by maximizing the acquisition function. If movement per iteration is bounded, starts search at current position. """ # look from best positions (x_best, y_best) = self.best_seen() self.x_best = x_best x_curr = self.current_x[-1] x_start = x_best ndim = x_curr.size # dimension of the feature space we're searching NEEDED FOR UCB try: nsteps = 1 + self.X_obs.shape[ 0] # acquisition number we're on NEEDED FOR UCB except: nsteps = 1 # check to see if this is bounding step sizes if (self.iter_bound or True): if (self.bounds is None): # looks like a scale factor self.bounds = 1.0 bound_lengths = self.searchBoundScaleFactor * 3. * self.lengthscales # 3x hyperparam lengths relative_bounds = np.transpose( np.array([-bound_lengths, bound_lengths])) #iter_bounds = np.transpose(np.array([x_start - bound_lengths, x_start + bound_lengths])) iter_bounds = np.transpose( np.array([x_start - bound_lengths, x_start + bound_lengths])) else: iter_bounds = self.bounds # options for finding the peak of the acquisition function: optmethod = 'L-BFGS-B' # L-BFGS-B, BFGS, TNC, and SLSQP allow bounds whereas Powell and COBYLA don't maxiter = 1000 # max number of steps for one scipy.optimize.minimize call try: nproc = mp.cpu_count( ) # number of processes to launch minimizations on except: nproc = 1 niter = 1 # max number of starting points for search niter_success = 1 # stop search if same minima for 10 steps tolerance = 1.e-4 # goal tolerance # perturb start to break symmetry? #x_start += np.random.randn(lengthscales.size)*lengthscales*1e-6 # probability of improvement acquisition function if (self.acq_func[0] == 'PI'): aqfcn = negProbImprove fargs = (self.model, y_best, self.acq_func[1]) # expected improvement acquisition function elif (self.acq_func[0] == 'EI'): aqfcn = negExpImprove fargs = (self.model, y_best, self.acq_func[1], alpha) # gaussian process upper confidence bound acquisition function elif (self.acq_func[0] == 'UCB'): aqfcn = negUCB fargs = (self.model, ndim, nsteps, self.ucb_params[0], self.ucb_params[1]) # maybe something mitch was using once? (can probably remove) elif (self.acq_func[0] == 'testEI'): # collect all possible x values options = np.array(self.acq_func[2].iloc[:, :-1]) (x_best, y_best) = self.best_seen() # find the option with best EI best_option_score = (-1, 1e12) for i in range(options.shape[0]): result = negExpImprove(options[i], self.model, y_best, self.acq_func[1]) if (result < best_option_score[1]): best_option_score = (i, result) # return the index of the best option return best_option_score[0] else: print('WARNING - BayesOpt: Unknown acquisition function.') return 0 try: if (self.multiprocessingQ): # multi-processing to speed search neval = 2 * int(10. * 2.**(ndim / 12.)) nkeep = 2 * min(8, neval) # parallelgridsearch generates pseudo-random grid, then performs an ICDF transform # to map to multinormal distrinbution centered on x_start and with widths given by hyper params # add the 10 best points seen so far (largest Y_obs) nbest = 3 # add the best points seen so far (largest Y_obs) nstart = 2 # make sure some starting points are there to prevent run away searches yobs = np.array([y[0][0] for y in self.Y_obs]) isearch = yobs.argsort()[-nbest:] for i in range(min(nstart, len(self.Y_obs))): # if np.sum(isearch == i) == 0: # not found in list isearch = np.append(isearch, i) isearch.sort( ) # sort to bias searching near earlier steps v0s = None for i in isearch: vs = parallelgridsearch( aqfcn, self.X_obs[i], self.searchBoundScaleFactor * 0.6 * self.lengthscales, fargs, neval, nkeep) if type(v0s) == type(None): v0s = copy.copy(vs) else: v0s = np.vstack((v0s, vs)) v0sort = v0s[:, -1].argsort()[:nkeep] # keep the nlargest v0s = v0s[v0sort] x0s = v0s[:, : -1] # for later testing if the minimize results are better than the best starting point v0best = v0s[0] if basinhoppingQ: # use basinhopping bkwargs = dict(niter=niter, niter_success=niter_success, minimizer_kwargs={ 'method': optmethod, 'args': fargs, 'tol': tolerance, 'bounds': iter_bounds, 'options': { 'maxiter': maxiter } }) # keyword args for basinhopping res = parallelbasinhopping(aqfcn, x0s, bkwargs) else: # use minimize mkwargs = dict( bounds=iter_bounds, method=optmethod, options={'maxiter': maxiter}, tol=tolerance ) # keyword args for scipy.optimize.minimize res = parallelminimize(aqfcn, x0s, fargs, mkwargs, v0best, relative_bounds=relative_bounds) else: # single-processing if basinhoppingQ: res = basinhopping(aqfcn, x_start, niter=niter, niter_success=niter_success, minimizer_kwargs={ 'method': optmethod, 'args': (self.model, y_best, self.acq_func[1], alpha), 'tol': tolerance, 'bounds': iter_bounds, 'options': { 'maxiter': maxiter } }) else: res = minimize(aqfcn, x_start, args=(self.model, y_best, self.acq_func[1], alpha), method=optmethod, tol=tolerance, bounds=iter_bounds, options={'maxiter': maxiter}) res = res.x except: raise return np.array( res, ndmin=2) # return resulting x value as a (1 x dim) vector