def init_with_data(self, init_X, init_Y): """ Input parameters ---------- gp_params: Gaussian Process structure x,y: # init data observations (in original scale) """ # Turn it into np array and store. self.X_original = np.asarray(init_X) temp_init_point = np.divide((init_X - self.bounds[:, 0]), self.max_min_gap) self.X_original = np.asarray(init_X) self.X = np.asarray(temp_init_point) self.Y_original = np.asarray(init_Y) self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.NumPoints = np.append(self.NumPoints, len(init_Y)) # Set acquisition function self.acq_func = AcquisitionFunction(self.acq) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur])
def test_acquisition_functions(acquisition_function: acquisition_functions.AcquisitionFunction): batch_size = 13 test_loader = torch.utils.data.DataLoader( datasets.MNIST( "../data", train=False, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]), ), batch_size=batch_size, shuffle=False, ) bayesian_net = mnist_model.BayesianNet() estimator = acquisition_function.create(bayesian_net, k=1) estimator.eval() scores = torch.tensor([]) num_iters = 5 for data, _ in itertools.islice(test_loader, num_iters): output = estimator(data) scores = torch.cat((scores, output), dim=0) assert scores.shape == (batch_size * num_iters,)
def test_check_input_permutation(af_type: acquisition_functions.AcquisitionFunction): if af_type == acquisition_functions.AcquisitionFunction.random: return batch_size = 12 test_data = torch.rand((batch_size, 10)) mixture_a = test_data[::2, :] mixture_b = test_data[1::2, :] mixture_c = test_data class Forwarder(torch.nn.Module): def forward(self, batch): return batch forwarder = Forwarder() estimator = af_type.create(forwarder, k=1) estimator.eval() output_a = estimator(mixture_a) output_b = estimator(mixture_b) output_c = estimator(mixture_c) torch.testing.assert_allclose( torch.cat([output_a, output_b], dim=0), torch.cat([output_c[::2], output_c[1::2]], dim=0) )
def expandBoundsDDB_FB(self): """ Description: Expands the search space with the full Bayesian implementation of our DDB method """ print('Attempting to expand search space with DDB-FB method') alpha=self.alpha beta=self.beta bound_samples=100 # Number of radius sample to fit the log-logistic distribution # Find y^+ and x^+ ymax=np.max(self.Y) # Generate test radii max_loc=np.argmax(self.Y) xmax=self.X[max_loc] test_bound=np.zeros(self.scalebounds.shape) bound_dist=np.zeros(bound_samples) bound_center=xmax test_bound[:,1]=bound_center+0.5 test_bound[:,0]=bound_center-0.5 max_radius=np.max(np.array([np.max(max_bound_size-test_bound[:,1]),np.max(test_bound[:,0])])) step=max_radius/bound_samples packing_number=np.zeros(bound_samples) # Generate a Thompson sample maxima to estimate internal maxima TS=AcquisitionFunction.ThompsonSampling(self.gp) tsb_x,tsb_y=acq_max_global(TS, self.gp, bounds=self.scalebounds) # Generate Gumbel samples to estimate the external maxima for i in range(0,bound_samples): bound_length=test_bound[:,1]-test_bound[:,0] volume=np.power(max_bound_size,self.dim)-np.prod(bound_length) packing_number[i]=round(volume/(5*self.gp.lengthscale)) mu=stats.norm.ppf(1.0-1.0/packing_number[i]) sigma=stats.norm.ppf(1.0-(1.0/packing_number[i])*np.exp(-1.0))-stats.norm.ppf(1.0-(1.0/(packing_number[i]))) bound_dist[i]=np.exp(-np.exp(-(-tsb_y-mu)/sigma)) test_bound[:,1]=test_bound[:,1]+step test_bound[:,0]=test_bound[:,0]-step bound_dist[np.isnan(bound_dist)]=1 # Fit the log-logistic paramaters to the Gumbel samples xfit=np.arange(0,max_radius,max_radius/100) popt,pcov=optimize.curve_fit(self.sufficientBoundPDF,xfit[0:100],bound_dist,bounds=np.array([[5,1.1],[20,5]])) print("popt={}".format(popt)) b=ymax/popt[0] a=popt[1] print("b={}, ymax={}".format(b,ymax)) # Sample for the optimal radius for d in range(0,self.dim): gamma=np.random.gamma(shape=alpha,scale=1/beta,size=100) loglog=stats.fisk.pdf(gamma,ymax/b,scale=a) scaled_weights=loglog/np.sum(loglog) multi=np.random.multinomial(1,scaled_weights) r_index=np.argmax(multi) print("Radius of {} selected".format(gamma[r_index])) self.scalebounds[d,1]=xmax[d]+gamma[r_index] self.scalebounds[d,0]=xmax[d]-gamma[r_index] print("seach space extended to {} with DDB".format(self.scalebounds))
def maximize_expanding_volume_L(self, gp_params): """ Expanding volume following L ~ MaxIter Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # scale the data before updating the GP # convert it to scaleX self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Set acquisition function start_opt = time.time() y_max = self.Y.max() #self.L=self.estimate_L(self.scalebounds) # select the acquisition function self.acq_func = AcquisitionFunction(self.acq) # consider the expansion step # backup the previous bounds self.bounds_bk = self.bounds.copy() self.scalebounds_bk = self.scalebounds.copy() # the region considered is computed as follows: NewVol~OldVol*T/t # alternatively, we compute the radius NewL~Oldl*pow(T/t,1/d) new_radius = self.l_radius * np.power( self.MaxIter / len(self.Y_original), 1.0 / self.dim) # extra proportion extra_proportion = new_radius * 1.0 / self.l_radius #extra_radius=(new_radius-self.l_radius)/2 if extra_proportion < 1: extra_proportion = 1 max_bounds = self.bounds.copy() # expand half to the lower bound and half to the upper bound max_bounds[:, 0] = max_bounds[:, 0] - self.max_min_gap * ( extra_proportion - 1) * 0.5 max_bounds[:, 1] = max_bounds[:, 1] + self.max_min_gap * ( extra_proportion - 1) * 0.5 # make sure it is within the limit if not (self.b_limit_lower is None): temp_max_bounds_lower = [ np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx]) for idx in xrange(self.dim) ] max_bounds[:, 0] = temp_max_bounds_lower if not (self.b_limit_upper is None): temp_max_bounds_upper = [ np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx]) for idx in xrange(self.dim) ] max_bounds[:, 1] = temp_max_bounds_upper temp = [(max_bounds[d, :] - self.bounds_bk[d, 0]) * 1.0 / self.max_min_gap[d] for d in xrange(self.dim)] self.scalebounds = np.asarray(temp) # perform standard BO on the new bound (scaled) x_max_scale = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max) #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0]) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max_scale).sum(axis=1) == 0): x_max_scale = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # check if the estimated data point is in the old bound or new for cropping IsCropping = 0 if IsCropping == 1: flagOutside = 0 for d in xrange(self.dim): if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[ d] < self.scalebounds_bk[d, 0]: #outside the old bound flagOutside = 1 self.scalebounds[d, 0] = np.minimum(x_max_scale[d], self.scalebounds_bk[d, 0]) self.scalebounds[d, 1] = np.maximum(x_max_scale[d], self.scalebounds_bk[d, 1]) # now the scalebounds is no longer 0-1 if flagOutside == 0: # not outside the old bound self.scalebounds = self.scalebounds_bk self.bounds = self.bounds_bk.copy() else: # inside the old bound => recompute bound temp = [ self.scalebounds[d, :] * self.max_min_gap[d] + self.bounds_bk[d, 0] for d in xrange(self.dim) ] if self.dim > 1: self.bounds = np.reshape(temp, (self.dim, 2)) else: self.bounds = np.array(temp) else: temp = [ self.scalebounds[d, :] * self.max_min_gap[d] + self.bounds_bk[d, 0] for d in xrange(self.dim) ] if self.dim > 1: self.bounds = np.reshape(temp, (self.dim, 2)) else: self.bounds = np.array(temp) # compute X in original scale temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # clone the self.X for updating GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) # for plotting self.gp = PradaGaussianProcess(gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # update volume and radius #self.vol=prod(self.max_min_gap) #self.l_radius=np.power(self.vol,1/self.dim) self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) / self.dim)
def maximize_volume_doubling(self, gp_params): """ Volume Doubling, double the volume (e.g., gamma=2) after every 3d evaluations Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # scale the data before updating the GP # convert it to scaleX self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Find unique rows of X to avoid GP from breaking # Set acquisition function start_opt = time.time() y_max = self.Y.max() # select the acquisition function self.acq_func = AcquisitionFunction(self.acq) self.scalebounds_bk = self.scalebounds.copy() self.bounds_bk = self.bounds # consider the expansion step after 3 iterations if (len(self.Y) % 3) == 0: new_radius = 2.0 * self.l_radius extra_radius = (new_radius - self.l_radius) / 2 max_bounds = self.bounds.copy() max_bounds[:, 0] = max_bounds[:, 0] - extra_radius max_bounds[:, 1] = max_bounds[:, 1] + extra_radius # make sure it is within the limit if not (self.b_limit_lower is None): temp_max_bounds_lower = [ np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx]) for idx in xrange(self.dim) ] max_bounds[:, 0] = temp_max_bounds_lower if not (self.b_limit_upper is None): temp_max_bounds_upper = [ np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx]) for idx in xrange(self.dim) ] max_bounds[:, 1] = temp_max_bounds_upper self.bounds = np.asarray(max_bounds).copy() temp = [(max_bounds[d, :] - self.bounds_bk[d, 0]) * 1.0 / self.max_min_gap[d] for d in xrange(self.dim)] self.scalebounds = np.asarray(temp) # perform standard BO on the new bound (scaled) x_max_scale = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max) #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0]) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max_scale).sum(axis=1) == 0): x_max_scale = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # compute X in original scale temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # clone the self.X for updating GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) # for plotting self.gp = PradaGaussianProcess(gp_params) ur = unique_rows(self.X) try: self.gp.fit(self.X[ur], self.Y[ur]) except: print "bug" # update volume and radius #self.vol=prod(self.max_min_gap) #self.l_radius=np.power(self.vol,1/self.dim) self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) / self.dim)
def maximize_unbounded_regularizer(self, gp_params): """ Unbounded Regularizer AISTAST 2016 Bobak Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # scale the data before updating the GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Find unique rows of X to avoid GP from breaking # Set acquisition function start_opt = time.time() y_max = self.Y.max() self.scalebounds_bk = self.scalebounds.copy() self.bounds_bk = self.bounds # consider the expansion step after 3 iterations if (len(self.Y) % 3) == 0: new_radius = 2.0 * self.l_radius extra_radius = (new_radius - self.l_radius) / 2 max_bounds = self.bounds.copy() max_bounds[:, 0] = max_bounds[:, 0] - extra_radius max_bounds[:, 1] = max_bounds[:, 1] + extra_radius # make sure it is within the limit if not (self.b_limit_lower is None): temp_max_bounds_lower = [ np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx]) for idx in xrange(self.dim) ] max_bounds[:, 0] = temp_max_bounds_lower if not (self.b_limit_upper is None): temp_max_bounds_upper = [ np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx]) for idx in xrange(self.dim) ] max_bounds[:, 1] = temp_max_bounds_upper self.bounds = np.asarray(max_bounds) temp = [(max_bounds[d, :] - self.bounds[d, 0]) * 1.0 / self.max_min_gap[d] for d in xrange(self.dim)] self.scalebounds = np.asarray(temp) # select the acquisition function self.acq['x_bar'] = np.mean(self.bounds) self.acq['R'] = np.power(self.l_radius, 1.0 / self.dim) self.acq_func = AcquisitionFunction(self.acq) # mean of the domain #acq['R'] x_max_scale = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max) #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0]) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max_scale).sum(axis=1) == 0): x_max_scale = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # check if the estimated data point is in the old bound or new flagOutside = 0 for d in xrange(self.dim): if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[ d] < self.scalebounds_bk[d, 0]: #outside the old bound flagOutside = 1 self.scalebounds[d, 0] = np.minimum(x_max_scale[d], self.scalebounds_bk[d, 0]) self.scalebounds[d, 1] = np.maximum(x_max_scale[d], self.scalebounds_bk[d, 1]) # now the scalebounds is no longer 0-1 if flagOutside == 0: # not outside the old bound self.scalebounds = self.scalebounds_bk else: # inside the old bound => recompute bound temp = [ self.scalebounds[d, :] * self.max_min_gap[d] + self.bounds_bk[d, 0] for d in xrange(self.dim) ] if self.dim > 1: self.bounds = np.reshape(temp, (self.dim, 2)) else: self.bounds = np.array(temp) # compute X in original scale temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # clone the self.X for updating GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) # for plotting self.gp = PradaGaussianProcess(gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # update volume and radius #self.vol=prod(self.max_min_gap) #self.l_radius=np.power(self.vol,1/self.dim) self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) / self.dim)
class PradaBayOptFBO(object): def __init__(self, gp_params, f, b_init_lower, b_init_upper, b_limit_lower, b_limit_upper, acq, verbose=1, opt_toolbox='nlopt'): """ Input parameters ---------- f: function to optimize: pbounds0: bounds on parameters predefined acq: acquisition function, acq['name']=['ei','ucb','poi','lei'] ,acq['kappa'] for ucb, acq['k'] for lei opt: optimization toolbox, 'nlopt','direct','scipy' Returns ------- dim: dimension bounds0: initial bounds on original scale bounds_limit: limit bounds on original scale bounds: bounds on parameters (current) bounds_list: bounds at all iterations bounds_bk: bounds backup for computational purpose scalebounds: bounds on normalized scale of 0-1 # be careful with scaling scalebounds_bk: bounds on normalized scale of 0-1 backup for computation time_opt: will record the time spent on optimization gp: Gaussian Process object MaxIter: Maximum number of iterations """ # Find number of parameters self.dim = len(b_init_lower) self.b_init_lower = b_init_lower self.b_init_upper = b_init_upper self.bounds0 = np.asarray([b_init_lower, b_init_upper]).T self.bounds = self.bounds0.copy() self.bounds_list = self.bounds0.copy() self.bounds_bk = self.bounds.copy() # keep track # create a scalebounds 0-1 scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] self.max_min_gap_bk = self.max_min_gap.copy() # Some function to be optimized self.f = f # optimization toolbox self.opt_toolbox = opt_toolbox # acquisition function type self.acq = acq # store X in original scale self.X_original = None # store X in 0-1 scale self.X = None # store y=f(x) # (y - mean)/(max-min) self.Y = None # y original scale self.Y_original = None self.time_opt = 0 self.k_Neighbor = 2 # Lipschitz constant self.L = 0 # Gaussian Process class self.gp = PradaGaussianProcess(gp_params) # acquisition function self.acq_func = None # stop condition self.stop_flag = 0 # volume of initial box # compute in log space #self.vol0=prod(self.max_min_gap) self.l_radius0 = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) / self.dim) self.l_radius = self.l_radius0 self.MaxIter = gp_params['MaxIter'] self.b_limit_lower = b_limit_lower self.b_limit_upper = b_limit_upper # visualization purpose self.X_invasion = [] # will be later used for visualization def posterior(self, Xnew): self.gp.fit(self.X, self.Y) mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True) return mu, np.sqrt(sigma2) def init(self, gp_params, n_init_points=3): """ Input parameters ---------- gp_params: Gaussian Process structure n_init_points: # init points """ # Generate random points l = [ np.random.uniform(x[0], x[1], size=n_init_points) for x in self.bounds ] # Concatenate new random points to possible existing # points from self.explore method. temp = np.asarray(l) temp = temp.T init_X = list(temp.reshape((n_init_points, -1))) self.X_original = np.asarray(init_X) # Evaluate target function at all initialization y_init = self.f(init_X) y_init = np.reshape(y_init, (n_init_points, 1)) self.Y_original = np.asarray(y_init) self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.X = self.X_original.copy() def max_volume(self, gp, max_bounds_scale, max_lcb): """ A function to find the a data point that maximums a searching volume Input Parameters ---------- ac: The acquisition function object that return its point-wise value. gp: A gaussian process fitted to the relevant data. y_max: The current maximum known value of the target function. bounds: The variables bounds to limit the search of the acq max. Returns ------- x_max, The arg max of the acquisition function. """ def compute_utility_score_for_maximizing_volume_wrapper( x_tries, gp, dim, max_lcb): if len(x_tries.shape) == 1: return compute_utility_score_for_maximizing_volume( x_tries, gp, dim, max_lcb) return np.apply_along_axis( compute_utility_score_for_maximizing_volume, 1, x_tries, gp, dim, max_lcb) def compute_utility_score_for_maximizing_volume( x_tries, gp, dim, max_lcb): new_bounds = self.scalebounds kappa = 2 mean, var = gp.predict(x_tries, eval_MSE=True) var.flags['WRITEABLE'] = True #var=var.copy() var[var < 1e-10] = 0 myucb = mean + kappa * np.sqrt(var) myucb = np.ravel(myucb) if np.asscalar(myucb) < np.asscalar(max_lcb): return myucb # store the points (outside the previous bound) that sastify the constraint (original scale) # convert to original scale before adding to x_tries_original = x_tries * self.max_min_gap + self.bounds_bk[:, 0] # check if it is outside the old bound flagOutside = 0 for d in xrange(self.dim): if x_tries[d] > self.scalebounds_bk[d, 1] or x_tries[ d] < self.scalebounds_bk[d, 0]: #outside the old bound flagOutside = 1 break if flagOutside == 1: # append to the invasion set if len(self.X_invasion) == 0: self.X_invasion = x_tries_original self.Y_invasion = myucb else: self.X_invasion = np.vstack( (self.X_invasion, x_tries_original)) self.Y_invasion = np.vstack((self.Y_invasion, myucb)) # expanse the bound for d in xrange(dim): # expand lower bound if x_tries[d] < new_bounds[d, 0]: new_bounds[d, 0] = x_tries[d] if x_tries[d] > new_bounds[d, 1]: new_bounds[d, 1] = x_tries[d] self.scalebounds = new_bounds # update the utility score return myucb dim = max_bounds_scale.shape[0] # Start with the lower bound as the argmax #x_max = max_bounds[:, 0] max_acq = None myopts = {'maxiter': 1000, 'fatol': 0.001, 'xatol': 0.001} # multi start for i in xrange(5 * dim): # Find the minimum of minus the acquisition function x_tries = np.random.uniform(max_bounds_scale[:, 0], max_bounds_scale[:, 1], size=(100 * dim, dim)) # evaluate L(x) # estimate new L y_tries = compute_utility_score_for_maximizing_volume_wrapper( x_tries, gp, dim, max_lcb) #find x optimal for init idx_max = np.argmax(y_tries) x_init_max = x_tries[idx_max] res = minimize( lambda x: -compute_utility_score_for_maximizing_volume_wrapper( x, gp, dim, max_lcb), #x_init_max.reshape(1, -1),bounds=bounds,options=myopts,method="nelder-mead")#L-BFGS-B x_init_max.reshape(1, -1), bounds=max_bounds_scale, options=myopts, method="L-BFGS-B") #L-BFGS-B # value at the estimated point val = compute_utility_score_for_maximizing_volume( res.x, gp, dim, max_lcb) # Store it if better than previous minimum(maximum). if max_acq is None or val >= max_acq: x_max = res.x max_acq = val #print max_acq # Clip output to make sure it lies within the bounds. Due to floating # point technicalities this is not always the case. def run_FBO(self, gp_params): """ Main optimization method for filtering strategy for BO. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ # for random approach if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # scale the data before updating the GP # convert it to scaleX self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Set acquisition function start_opt = time.time() # obtain the maximum on the observed set (for EI) y_max = self.Y.max() #self.L=self.estimate_L(self.scalebounds) # select the acquisition function self.acq_func = AcquisitionFunction(self.acq) # consider the expansion step # finding the maximum over the lower bound # mu(x)-kappa x sigma(x) mu_acq = {} mu_acq['name'] = 'lcb' mu_acq['dim'] = self.dim mu_acq['kappa'] = 2 acq_mu = AcquisitionFunction(mu_acq) # obtain the argmax(lcb), make sure the scale bound vs original bound x_lcb_max = acq_max(ac=acq_mu.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) # obtain the max(lcb) max_lcb = acq_mu.acq_kind(x_lcb_max, gp=self.gp, y_max=y_max) max_lcb = np.ravel(max_lcb) # finding the region outside the box, that has the ucb > max_lcb self.max_min_gap_bk = self.max_min_gap.copy() self.bounds_bk = self.bounds.copy() self.scalebounds_bk = self.scalebounds.copy() self.X_invasion = [] # the region considered is computed as follows: NewVol~OldVol*T/t # alternatively, we compute the radius NewL~Oldl*pow(T/t,1/d) new_radius = self.l_radius * np.power( self.MaxIter / len(self.Y_original), 1.0 / self.dim) # extra proportion extra_proportion = new_radius * 1.0 / self.l_radius #extra_radius=(new_radius-self.l_radius)/2 # check if extra radius is negative if extra_proportion < 1: extra_proportion = 1 max_bounds = self.bounds.copy() # expand half to the lower bound and half to the upper bound, X'_t max_bounds[:, 0] = max_bounds[:, 0] - self.max_min_gap * ( extra_proportion - 1) max_bounds[:, 1] = max_bounds[:, 1] + self.max_min_gap * ( extra_proportion - 1) #max_bounds[:,0]=max_bounds[:,0]-extra_radius #max_bounds[:,1]=max_bounds[:,1]+extra_radius # make sure the max_bounds is within the limit if not (self.b_limit_lower is None): temp_max_bounds_lower = [ np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx]) for idx in xrange(self.dim) ] max_bounds[:, 0] = temp_max_bounds_lower if not (self.b_limit_upper is None): temp_max_bounds_upper = [ np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx]) for idx in xrange(self.dim) ] max_bounds[:, 1] = temp_max_bounds_upper temp = [ (max_bounds[d, :] - self.bounds[d, 0]) * 1.0 / self.max_min_gap[d] for d in xrange(self.dim) ] max_bounds_scale = np.asarray(temp) # find suitable candidates in new regions # ucb(x) > max_lcb st max L(x) # new bound in scale space # we note that the scalebound will be changed inside this function self.max_volume(self.gp, max_bounds_scale, max_lcb) #print "new bounds scale" #print self.scalebounds # perform standard BO on the new bound (scaled) x_max_scale = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) val_acq = self.acq_func.acq_kind(x_max_scale, self.gp, y_max) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max_scale).sum(axis=1) == 0): x_max_scale = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # check if the estimated data point is in the old bound or new flagOutside = 0 for d in xrange(self.dim): if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[ d] < self.scalebounds_bk[d, 0]: #outside the old bound flagOutside = 1 self.scalebounds[d, 0] = np.minimum(x_max_scale[d], self.scalebounds_bk[d, 0]) self.scalebounds[d, 1] = np.maximum(x_max_scale[d], self.scalebounds_bk[d, 1]) else: self.scalebounds[d, :] = self.scalebounds_bk[d, :] # now the scalebounds is no longer 0-1 if flagOutside == 0: # not outside the old bound, use the old bound self.scalebounds = self.scalebounds_bk self.bounds = self.bounds_bk.copy() else: # outside the old bound => expand the bound as the minimum bound containing the old bound and the selected point temp = [ self.scalebounds[d, :] * self.max_min_gap[d] + self.bounds_bk[d, 0] for d in xrange(self.dim) ] if self.dim > 1: self.bounds = np.reshape(temp, (self.dim, 2)) else: self.bounds = np.array(temp) self.bounds_list = np.hstack((self.bounds_list, self.bounds)) # compute X in original scale temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # clone the self.X for updating GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) # for plotting self.gp = PradaGaussianProcess(gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # update volume and radius #self.vol=prod(self.max_min_gap) #self.l_radius=np.power(self.vol,1/self.dim) self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) / self.dim) #====================================================================================== #====================================================================================================== #====================================================================================================== #====================================================================================================== def maximize_volume_doubling(self, gp_params): """ Volume Doubling, double the volume (e.g., gamma=2) after every 3d evaluations Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # scale the data before updating the GP # convert it to scaleX self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Find unique rows of X to avoid GP from breaking # Set acquisition function start_opt = time.time() y_max = self.Y.max() # select the acquisition function self.acq_func = AcquisitionFunction(self.acq) self.scalebounds_bk = self.scalebounds.copy() self.bounds_bk = self.bounds # consider the expansion step after 3 iterations if (len(self.Y) % 3) == 0: new_radius = 2.0 * self.l_radius extra_radius = (new_radius - self.l_radius) / 2 max_bounds = self.bounds.copy() max_bounds[:, 0] = max_bounds[:, 0] - extra_radius max_bounds[:, 1] = max_bounds[:, 1] + extra_radius # make sure it is within the limit if not (self.b_limit_lower is None): temp_max_bounds_lower = [ np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx]) for idx in xrange(self.dim) ] max_bounds[:, 0] = temp_max_bounds_lower if not (self.b_limit_upper is None): temp_max_bounds_upper = [ np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx]) for idx in xrange(self.dim) ] max_bounds[:, 1] = temp_max_bounds_upper self.bounds = np.asarray(max_bounds).copy() temp = [(max_bounds[d, :] - self.bounds_bk[d, 0]) * 1.0 / self.max_min_gap[d] for d in xrange(self.dim)] self.scalebounds = np.asarray(temp) # perform standard BO on the new bound (scaled) x_max_scale = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max) #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0]) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max_scale).sum(axis=1) == 0): x_max_scale = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # compute X in original scale temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # clone the self.X for updating GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) # for plotting self.gp = PradaGaussianProcess(gp_params) ur = unique_rows(self.X) try: self.gp.fit(self.X[ur], self.Y[ur]) except: print "bug" # update volume and radius #self.vol=prod(self.max_min_gap) #self.l_radius=np.power(self.vol,1/self.dim) self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) / self.dim) def maximize_unbounded_regularizer(self, gp_params): """ Unbounded Regularizer AISTAST 2016 Bobak Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # scale the data before updating the GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Find unique rows of X to avoid GP from breaking # Set acquisition function start_opt = time.time() y_max = self.Y.max() self.scalebounds_bk = self.scalebounds.copy() self.bounds_bk = self.bounds # consider the expansion step after 3 iterations if (len(self.Y) % 3) == 0: new_radius = 2.0 * self.l_radius extra_radius = (new_radius - self.l_radius) / 2 max_bounds = self.bounds.copy() max_bounds[:, 0] = max_bounds[:, 0] - extra_radius max_bounds[:, 1] = max_bounds[:, 1] + extra_radius # make sure it is within the limit if not (self.b_limit_lower is None): temp_max_bounds_lower = [ np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx]) for idx in xrange(self.dim) ] max_bounds[:, 0] = temp_max_bounds_lower if not (self.b_limit_upper is None): temp_max_bounds_upper = [ np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx]) for idx in xrange(self.dim) ] max_bounds[:, 1] = temp_max_bounds_upper self.bounds = np.asarray(max_bounds) temp = [(max_bounds[d, :] - self.bounds[d, 0]) * 1.0 / self.max_min_gap[d] for d in xrange(self.dim)] self.scalebounds = np.asarray(temp) # select the acquisition function self.acq['x_bar'] = np.mean(self.bounds) self.acq['R'] = np.power(self.l_radius, 1.0 / self.dim) self.acq_func = AcquisitionFunction(self.acq) # mean of the domain #acq['R'] x_max_scale = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max) #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0]) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max_scale).sum(axis=1) == 0): x_max_scale = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # check if the estimated data point is in the old bound or new flagOutside = 0 for d in xrange(self.dim): if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[ d] < self.scalebounds_bk[d, 0]: #outside the old bound flagOutside = 1 self.scalebounds[d, 0] = np.minimum(x_max_scale[d], self.scalebounds_bk[d, 0]) self.scalebounds[d, 1] = np.maximum(x_max_scale[d], self.scalebounds_bk[d, 1]) # now the scalebounds is no longer 0-1 if flagOutside == 0: # not outside the old bound self.scalebounds = self.scalebounds_bk else: # inside the old bound => recompute bound temp = [ self.scalebounds[d, :] * self.max_min_gap[d] + self.bounds_bk[d, 0] for d in xrange(self.dim) ] if self.dim > 1: self.bounds = np.reshape(temp, (self.dim, 2)) else: self.bounds = np.array(temp) # compute X in original scale temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # clone the self.X for updating GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) # for plotting self.gp = PradaGaussianProcess(gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # update volume and radius #self.vol=prod(self.max_min_gap) #self.l_radius=np.power(self.vol,1/self.dim) self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) / self.dim) def maximize_expanding_volume_L(self, gp_params): """ Expanding volume following L ~ MaxIter Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # scale the data before updating the GP # convert it to scaleX self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Set acquisition function start_opt = time.time() y_max = self.Y.max() #self.L=self.estimate_L(self.scalebounds) # select the acquisition function self.acq_func = AcquisitionFunction(self.acq) # consider the expansion step # backup the previous bounds self.bounds_bk = self.bounds.copy() self.scalebounds_bk = self.scalebounds.copy() # the region considered is computed as follows: NewVol~OldVol*T/t # alternatively, we compute the radius NewL~Oldl*pow(T/t,1/d) new_radius = self.l_radius * np.power( self.MaxIter / len(self.Y_original), 1.0 / self.dim) # extra proportion extra_proportion = new_radius * 1.0 / self.l_radius #extra_radius=(new_radius-self.l_radius)/2 if extra_proportion < 1: extra_proportion = 1 max_bounds = self.bounds.copy() # expand half to the lower bound and half to the upper bound max_bounds[:, 0] = max_bounds[:, 0] - self.max_min_gap * ( extra_proportion - 1) * 0.5 max_bounds[:, 1] = max_bounds[:, 1] + self.max_min_gap * ( extra_proportion - 1) * 0.5 # make sure it is within the limit if not (self.b_limit_lower is None): temp_max_bounds_lower = [ np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx]) for idx in xrange(self.dim) ] max_bounds[:, 0] = temp_max_bounds_lower if not (self.b_limit_upper is None): temp_max_bounds_upper = [ np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx]) for idx in xrange(self.dim) ] max_bounds[:, 1] = temp_max_bounds_upper temp = [(max_bounds[d, :] - self.bounds_bk[d, 0]) * 1.0 / self.max_min_gap[d] for d in xrange(self.dim)] self.scalebounds = np.asarray(temp) # perform standard BO on the new bound (scaled) x_max_scale = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max) #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0]) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max_scale).sum(axis=1) == 0): x_max_scale = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # check if the estimated data point is in the old bound or new for cropping IsCropping = 0 if IsCropping == 1: flagOutside = 0 for d in xrange(self.dim): if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[ d] < self.scalebounds_bk[d, 0]: #outside the old bound flagOutside = 1 self.scalebounds[d, 0] = np.minimum(x_max_scale[d], self.scalebounds_bk[d, 0]) self.scalebounds[d, 1] = np.maximum(x_max_scale[d], self.scalebounds_bk[d, 1]) # now the scalebounds is no longer 0-1 if flagOutside == 0: # not outside the old bound self.scalebounds = self.scalebounds_bk self.bounds = self.bounds_bk.copy() else: # inside the old bound => recompute bound temp = [ self.scalebounds[d, :] * self.max_min_gap[d] + self.bounds_bk[d, 0] for d in xrange(self.dim) ] if self.dim > 1: self.bounds = np.reshape(temp, (self.dim, 2)) else: self.bounds = np.array(temp) else: temp = [ self.scalebounds[d, :] * self.max_min_gap[d] + self.bounds_bk[d, 0] for d in xrange(self.dim) ] if self.dim > 1: self.bounds = np.reshape(temp, (self.dim, 2)) else: self.bounds = np.array(temp) # compute X in original scale temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # clone the self.X for updating GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) # for plotting self.gp = PradaGaussianProcess(gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # update volume and radius #self.vol=prod(self.max_min_gap) #self.l_radius=np.power(self.vol,1/self.dim) self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) / self.dim) def maximize_expanding_volume_L_Cropping(self, gp_params): """ Expanding volume following L ~ MaxIter Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # scale the data before updating the GP # convert it to scaleX self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Set acquisition function start_opt = time.time() y_max = self.Y.max() #self.L=self.estimate_L(self.scalebounds) # select the acquisition function self.acq_func = AcquisitionFunction(self.acq) # consider the expansion step # finding the region outside the box, that has the ucb > max_lcb self.bounds_bk = self.bounds.copy() self.scalebounds_bk = self.scalebounds.copy() # the region considered is computed as follows: NewVol~OldVol*T/t # alternatively, we compute the radius NewL~Oldl*pow(T/t,1/d) new_radius = self.l_radius * np.power( self.MaxIter / len(self.Y_original), 1.0 / self.dim) # extra proportion extra_proportion = new_radius * 1.0 / self.l_radius #extra_radius=(new_radius-self.l_radius)/2 # check if extra radius is negative #if extra_radius<0: #extra_radius=0 max_bounds = self.bounds.copy() # expand half to the lower bound and half to the upper bound max_bounds[:, 0] = max_bounds[:, 0] - self.max_min_gap * extra_proportion max_bounds[:, 1] = max_bounds[:, 1] + self.max_min_gap * extra_proportion # make sure the max_bound is still within the limit if not (self.b_limit_lower is None): temp_max_bounds_lower = [ np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx]) for idx in xrange(self.dim) ] max_bounds[:, 0] = temp_max_bounds_lower if not (self.b_limit_upper is None): temp_max_bounds_upper = [ np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx]) for idx in xrange(self.dim) ] max_bounds[:, 1] = temp_max_bounds_upper temp = [(max_bounds[d, :] - self.bounds_bk[d, 0]) * 1.0 / self.max_min_gap[d] for d in xrange(self.dim)] self.scalebounds = np.asarray(temp) # perform standard BO on the new bound (scaled) x_max_scale = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max) #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0]) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max_scale).sum(axis=1) == 0): x_max_scale = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # check if the estimated data point is in the old bound or new for cropping IsCropping = 1 if IsCropping == 1: flagOutside = 0 for d in xrange(self.dim): if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[ d] < self.scalebounds_bk[d, 0]: #outside the old bound flagOutside = 1 self.scalebounds[d, 0] = np.minimum(x_max_scale[d], self.scalebounds_bk[d, 0]) self.scalebounds[d, 1] = np.maximum(x_max_scale[d], self.scalebounds_bk[d, 1]) else: self.scalebounds[d, :] = self.scalebounds_bk[d, :] # now the scalebounds is no longer 0-1 if flagOutside == 0: # not outside the old bound self.scalebounds = self.scalebounds_bk self.bounds = self.bounds_bk.copy() else: # inside the old bound => recompute bound temp = [ self.scalebounds[d, :] * self.max_min_gap[d] + self.bounds_bk[d, 0] for d in xrange(self.dim) ] if self.dim > 1: self.bounds = np.reshape(temp, (self.dim, 2)) else: self.bounds = np.array(temp) else: temp = [ self.scalebounds[d, :] * self.max_min_gap[d] + self.bounds_bk[d, 0] for d in xrange(self.dim) ] if self.dim > 1: self.bounds = np.reshape(temp, (self.dim, 2)) else: self.bounds = np.array(temp) # compute X in original scale temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # clone the self.X for updating GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) # for plotting self.gp = PradaGaussianProcess(gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # update volume and radius #self.vol=prod(self.max_min_gap) #self.l_radius=np.power(self.vol,1/self.dim) self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) / self.dim)
def run_FBO(self, gp_params): """ Main optimization method for filtering strategy for BO. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ # for random approach if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # scale the data before updating the GP # convert it to scaleX self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Set acquisition function start_opt = time.time() # obtain the maximum on the observed set (for EI) y_max = self.Y.max() #self.L=self.estimate_L(self.scalebounds) # select the acquisition function self.acq_func = AcquisitionFunction(self.acq) # consider the expansion step # finding the maximum over the lower bound # mu(x)-kappa x sigma(x) mu_acq = {} mu_acq['name'] = 'lcb' mu_acq['dim'] = self.dim mu_acq['kappa'] = 2 acq_mu = AcquisitionFunction(mu_acq) # obtain the argmax(lcb), make sure the scale bound vs original bound x_lcb_max = acq_max(ac=acq_mu.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) # obtain the max(lcb) max_lcb = acq_mu.acq_kind(x_lcb_max, gp=self.gp, y_max=y_max) max_lcb = np.ravel(max_lcb) # finding the region outside the box, that has the ucb > max_lcb self.max_min_gap_bk = self.max_min_gap.copy() self.bounds_bk = self.bounds.copy() self.scalebounds_bk = self.scalebounds.copy() self.X_invasion = [] # the region considered is computed as follows: NewVol~OldVol*T/t # alternatively, we compute the radius NewL~Oldl*pow(T/t,1/d) new_radius = self.l_radius * np.power( self.MaxIter / len(self.Y_original), 1.0 / self.dim) # extra proportion extra_proportion = new_radius * 1.0 / self.l_radius #extra_radius=(new_radius-self.l_radius)/2 # check if extra radius is negative if extra_proportion < 1: extra_proportion = 1 max_bounds = self.bounds.copy() # expand half to the lower bound and half to the upper bound, X'_t max_bounds[:, 0] = max_bounds[:, 0] - self.max_min_gap * ( extra_proportion - 1) max_bounds[:, 1] = max_bounds[:, 1] + self.max_min_gap * ( extra_proportion - 1) #max_bounds[:,0]=max_bounds[:,0]-extra_radius #max_bounds[:,1]=max_bounds[:,1]+extra_radius # make sure the max_bounds is within the limit if not (self.b_limit_lower is None): temp_max_bounds_lower = [ np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx]) for idx in xrange(self.dim) ] max_bounds[:, 0] = temp_max_bounds_lower if not (self.b_limit_upper is None): temp_max_bounds_upper = [ np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx]) for idx in xrange(self.dim) ] max_bounds[:, 1] = temp_max_bounds_upper temp = [ (max_bounds[d, :] - self.bounds[d, 0]) * 1.0 / self.max_min_gap[d] for d in xrange(self.dim) ] max_bounds_scale = np.asarray(temp) # find suitable candidates in new regions # ucb(x) > max_lcb st max L(x) # new bound in scale space # we note that the scalebound will be changed inside this function self.max_volume(self.gp, max_bounds_scale, max_lcb) #print "new bounds scale" #print self.scalebounds # perform standard BO on the new bound (scaled) x_max_scale = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) val_acq = self.acq_func.acq_kind(x_max_scale, self.gp, y_max) # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max_scale).sum(axis=1) == 0): x_max_scale = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # check if the estimated data point is in the old bound or new flagOutside = 0 for d in xrange(self.dim): if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[ d] < self.scalebounds_bk[d, 0]: #outside the old bound flagOutside = 1 self.scalebounds[d, 0] = np.minimum(x_max_scale[d], self.scalebounds_bk[d, 0]) self.scalebounds[d, 1] = np.maximum(x_max_scale[d], self.scalebounds_bk[d, 1]) else: self.scalebounds[d, :] = self.scalebounds_bk[d, :] # now the scalebounds is no longer 0-1 if flagOutside == 0: # not outside the old bound, use the old bound self.scalebounds = self.scalebounds_bk self.bounds = self.bounds_bk.copy() else: # outside the old bound => expand the bound as the minimum bound containing the old bound and the selected point temp = [ self.scalebounds[d, :] * self.max_min_gap[d] + self.bounds_bk[d, 0] for d in xrange(self.dim) ] if self.dim > 1: self.bounds = np.reshape(temp, (self.dim, 2)) else: self.bounds = np.array(temp) self.bounds_list = np.hstack((self.bounds_list, self.bounds)) # compute X in original scale temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # clone the self.X for updating GP self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] temp = np.divide((self.X_original - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) # for plotting self.gp = PradaGaussianProcess(gp_params) ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # update volume and radius #self.vol=prod(self.max_min_gap) #self.l_radius=np.power(self.vol,1/self.dim) self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) / self.dim)
class PradaBayOptBatch(object): def __init__(self, gp_params, f, pbounds, acq, verbose=1, opt_toolbox='scipy'): """ Input parameters ---------- f: function to optimize: pbounds: bounds on parameters acq: acquisition function, 'ei', 'ucb' opt: optimization toolbox, 'nlopt','direct','scipy' Returns ------- dim: dimension bounds: bounds on original scale scalebounds: bounds on normalized scale of 0-1 time_opt: will record the time spent on optimization gp: Gaussian Process object """ # Store the original dictionary self.pbounds = pbounds # Find number of parameters self.dim = len(pbounds) # Create an array with parameters bounds if isinstance(pbounds, dict): # Get the name of the parameters self.keys = list(pbounds.keys()) self.bounds = [] for key in self.pbounds.keys(): self.bounds.append(self.pbounds[key]) self.bounds = np.asarray(self.bounds) else: self.bounds = np.asarray(pbounds) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] # Some function to be optimized self.f = f # optimization tool: direct, scipy, nlopt self.opt_toolbox = opt_toolbox # acquisition function type self.acq = acq # store the batch size for each iteration self.NumPoints = [] # Numpy array place holders self.X_original = None # scale the data to 0-1 fit GP better self.X = None # X=( X_original - min(bounds) / (max(bounds) - min(bounds)) self.Y = None # Y=( Y_original - mean(bounds) / (max(bounds) - min(bounds)) self.Y_original = None self.opt_time = 0 self.L = 0 # lipschitz self.gp = PradaGaussianProcess(gp_params) # Acquisition Function #self.acq_func = None self.acq_func = AcquisitionFunction(acq=self.acq) def posterior(self, Xnew): #xmin, xmax = -2, 10 ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True) return mu, np.sqrt(sigma2) def init(self, n_init_points): """ Input parameters ---------- gp_params: Gaussian Process structure n_init_points: # init points """ # Generate random points l = [ np.random.uniform(x[0], x[1], size=n_init_points) for x in self.bounds ] # Concatenate new random points to possible existing # points from self.explore method. #self.init_points += list(map(list, zip(*l))) temp = np.asarray(l) temp = temp.T init_X = list(temp.reshape((n_init_points, -1))) # Evaluate target function at all initialization y_init = self.f(init_X) # Turn it into np array and store. self.X_original = np.asarray(init_X) temp_init_point = np.divide((init_X - self.bounds[:, 0]), self.max_min_gap) self.X_original = np.asarray(init_X) self.X = np.asarray(temp_init_point) y_init = np.reshape(y_init, (n_init_points, 1)) self.Y_original = np.asarray(y_init) self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.NumPoints = np.append(self.NumPoints, n_init_points) # Set parameters if any was passed #self.gp=PradaGaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking #ur = unique_rows(self.X) #self.gp.fit(self.X[ur], self.Y[ur]) #print "#Batch={:d} f_max={:.4f}".format(n_init_points,self.Y.max()) def init_with_data(self, init_X, init_Y): """ Input parameters ---------- gp_params: Gaussian Process structure x,y: # init data observations (in original scale) """ # Turn it into np array and store. self.X_original = np.asarray(init_X) temp_init_point = np.divide((init_X - self.bounds[:, 0]), self.max_min_gap) self.X_original = np.asarray(init_X) self.X = np.asarray(temp_init_point) self.Y_original = np.asarray(init_Y) self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.NumPoints = np.append(self.NumPoints, len(init_Y)) # Set acquisition function self.acq_func = AcquisitionFunction(self.acq) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) def smooth_the_peak(self, my_peak): # define the local bound around the estimated point local_bound = np.zeros((self.dim, 2)) for dd in range(self.dim): try: local_bound[dd, 0] = my_peak[-1][dd] - 0.005 local_bound[dd, 1] = my_peak[-1][dd] + 0.005 except: local_bound[dd, 0] = my_peak[dd] - 0.005 local_bound[dd, 1] = my_peak[dd] + 0.005 local_bound = np.clip(local_bound, self.scalebounds[:, 0], self.scalebounds[:, 1]) dim = len(local_bound) num_data = 1000 * dim samples = np.zeros(shape=(num_data, dim)) #for k in range(0,dim): samples[:,k] = np.random.uniform(low=local_bound[k][0],high=local_bound[k][1],size=num_data) for dd in range(0, dim): samples[:, dd] = np.linspace(local_bound[dd][0], local_bound[dd][1], num_data) # smooth the peak """ n_bins = 100*np.ones(self.dim) mygrid = np.mgrid[[slice(row[0], row[1], n*1j) for row, n in zip(local_bound, n_bins)]] mygrid=mygrid.reshape(100**self.dim, self.dim) utility_grid=self.acq_func.acq_kind(mygrid,self.gp,self.Y.max()) mysamples=np.vstack((mygrid,utility_grid)) samples_smooth=filters.uniform_filter(mysamples, size=[2,2], output=None, mode='reflect', cval=0.0, origin=0) """ # get the utility after smoothing samples_smooth = samples utility_smooth = self.acq_func.acq_kind(samples_smooth, self.gp, self.Y.max()) # get the peak value y #peak_y=np.max(utility_smooth) # get the peak location x #peak_x=samples_smooth[np.argmax(utility_smooth)] peak_x = my_peak # linear regression regr = linear_model.LinearRegression() regr.fit(samples_smooth, utility_smooth) #residual_ss=np.mean((regr.predict(samples_smooth) - utility_smooth) ** 2) mystd = np.std(utility_smooth) return peak_x, mystd def check_real_peak(self, my_peak, threshold=0.1): # define the local bound around the estimated point local_bound = np.zeros((self.dim, 2)) for dd in range(self.dim): try: local_bound[dd, 0] = my_peak[-1][dd] - 0.01 local_bound[dd, 1] = my_peak[-1][dd] + 0.01 except: local_bound[dd, 0] = my_peak[dd] - 0.01 local_bound[dd, 1] = my_peak[dd] + 0.01 #local_bound=np.clip(local_bound,self.scalebounds[:,0],self.scalebounds[:,1]) local_bound[:, 0] = local_bound[:, 0].clip(self.scalebounds[:, 0], self.scalebounds[:, 1]) local_bound[:, 1] = local_bound[:, 1].clip(self.scalebounds[:, 0], self.scalebounds[:, 1]) dim = len(local_bound) num_data = 100 * dim samples = np.zeros(shape=(num_data, dim)) for dd in range(0, dim): samples[:, dd] = np.linspace(local_bound[dd][0], local_bound[dd][1], num_data) # get the utility after smoothing myutility = self.acq_func.acq_kind(samples, self.gp, self.Y.max()) # linear regression #regr = linear_model.LinearRegression() #regr.fit(samples, myutility) #residual_ss=np.mean((regr.predict(samples_smooth) - utility_smooth) ** 2) #mystd=np.std(myutility) mystd = np.mean(myutility) IsPeak = 0 if mystd > threshold / (self.dim**2): IsPeak = 1 return IsPeak, mystd def estimate_L(self, bounds): ''' Estimate the Lipschitz constant of f by taking maximizing the norm of the expectation of the gradient of *f*. ''' def df(x, model, x0): mean_derivative = gp_model.predictive_gradient(self.X, self.Y, x) temp = mean_derivative * mean_derivative if len(temp.shape) <= 1: res = np.sqrt(temp) else: res = np.sqrt( np.sum(temp, axis=1) ) # simply take the norm of the expectation of the gradient return -res gp_model = self.gp dim = len(bounds) num_data = 1000 * dim samples = np.zeros(shape=(num_data, dim)) for k in range(0, dim): samples[:, k] = np.random.uniform(low=bounds[k][0], high=bounds[k][1], size=num_data) #samples = np.vstack([samples,gp_model.X]) pred_samples = df(samples, gp_model, 0) x0 = samples[np.argmin(pred_samples)] res = minimize(df, x0, method='L-BFGS-B', bounds=bounds, args=(gp_model, x0), options={'maxiter': 100}) try: minusL = res.fun[0][0] except: if len(res.fun.shape) == 1: minusL = res.fun[0] else: minusL = res.fun L = -minusL if L < 1e-6: L = 0.0001 ## to avoid problems in cases in which the model is flat. return L def maximize_batch_PS(self, gp_params, B=5, kappa=2): """ Finding a batch of points using Peak Suppression approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class kappa: constant value in UCB Returns ------- X: a batch of [x_1..x_Nt] """ const_liar = self.Y_original.min() # Set acquisition function #self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa) y_max = self.Y.max() # Set parameters if any was passed self.gp = PradaGaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) start_opt = time.time() # copy GP, X and Y temp_gp = self.gp temp_X = self.X temp_Y = self.Y #store new_x new_X = [] stdPeak = [0] * B IsPeak = [0] * B for ii in range(B): # Finding argmax of the acquisition function. x_max = acq_max(ac=self.acq_func.acq_kind, gp=temp_gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) # Test if x_max is repeated, if it is, draw another one at random if np.any((np.abs(temp_X - x_max)).sum(axis=1) < 0.002 * self.dim) | np.isnan(x_max.sum()): #x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1],size=self.scalebounds.shape[0]) IsPeak[ii] = 0 stdPeak[ii] = 0 print "reject" else: IsPeak[ii], stdPeak[ii] = self.check_real_peak(x_max) print "IsPeak={:d} std={:.5f}".format(IsPeak[ii], stdPeak[ii]) if ii == 0: new_X = x_max else: new_X = np.vstack((new_X, x_max.reshape((1, -1)))) temp_X = np.vstack((temp_X, x_max.reshape((1, -1)))) temp_Y = np.append(temp_Y, const_liar) #temp_gp.fit(temp_X,temp_Y) temp_gp.fit_incremental(x_max, np.asarray([const_liar])) """ toplot_bo=copy.deepcopy(self) toplot_bo.gp=copy.deepcopy(temp_gp) toplot_bo.X=temp_X toplot_bo.X_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(temp_X)] toplot_bo.X_original=np.asarray(toplot_bo.X_original) toplot_bo.Y=temp_Y toplot_bo.Y_original=temp_Y*(np.max(self.Y_original)-np.min(self.Y_original))+np.mean(self.Y_original) visualization.plot_bo(toplot_bo) """ IsPeak = np.asarray(IsPeak) # check if there is no real peak, then pick up the top peak (highest std) # rank the peak idx = np.sort(stdPeak) if np.sum(IsPeak) == 0: top_peak = np.argmax(stdPeak) new_X = new_X[top_peak] else: new_X = new_X[IsPeak == 1] print new_X finished_opt = time.time() elapse_opt = finished_opt - start_opt self.opt_time = np.hstack((self.opt_time, elapse_opt)) # Updating the GP. #new_X=new_X.reshape((-1, self.dim)) # Test if x_max is repeated, if it is, draw another one at random temp_new_X = [] for idx, val in enumerate(new_X): if np.all( np.any(np.abs(self.X - val) > 0.02, axis=1)): # check if a data point is already taken temp_new_X = np.append(temp_new_X, val) if len(temp_new_X) == 0: temp_new_X = np.zeros((1, self.dim)) for idx in range(0, self.dim): temp_new_X[0, idx] = np.random.uniform(self.scalebounds[idx, 0], self.scalebounds[idx, 1], 1) else: temp_new_X = temp_new_X.reshape((-1, self.dim)) self.X = np.vstack((self.X, temp_new_X)) # convert back to original scale temp_X_new_original = [ val * self.max_min_gap + self.bounds[:, 0] for idx, val in enumerate(temp_new_X) ] temp_X_new_original = np.asarray(temp_X_new_original) self.X_original = np.vstack((self.X_original, temp_X_new_original)) for idx, val in enumerate(temp_X_new_original): self.Y_original = np.append(self.Y_original, self.f(val)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.NumPoints = np.append(self.NumPoints, temp_X_new_original.shape[0]) print "#Batch={:d} f_max={:.4f}".format(temp_X_new_original.shape[0], self.Y_original.max()) def maximize_batch_CL(self, gp_params, B=5): """ Finding a batch of points using Constant Liar approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class kappa: constant value in UCB Returns ------- X: a batch of [x_1..x_Nt] """ self.NumPoints = np.append(self.NumPoints, B) if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=B) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.opt_time = np.hstack((self.opt_time, 0)) return #const_liar=self.Y.mean() #const_liar=self.Y_original.mean() #const_liar=self.Y.max() # Set acquisition function self.acq_func = AcquisitionFunction(self.acq) y_max = self.Y.max() # Set parameters if any was passed self.gp = PradaGaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) start_opt = time.time() # copy GP, X and Y temp_gp = self.gp temp_X = self.X temp_Y = self.Y #temp_Y_original=self.Y_original #store new_x new_X = [] for ii in range(B): # Finding argmax of the acquisition function. x_max = acq_max(ac=self.acq_func.acq_kind, gp=temp_gp, y_max=y_max, bounds=self.scalebounds) val_acq = self.acq_func.acq_kind(x_max, temp_gp, y_max) print "CL alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0]) # Test if x_max is repeated, if it is, draw another one at random # If it is repeated, print a warning #if np.any((self.X - x_max).sum(axis=1) == 0) | np.isnan(x_max.sum()): #x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1],size=self.scalebounds.shape[0]) if ii == 0: new_X = x_max else: new_X = np.vstack((new_X, x_max.reshape((1, -1)))) temp_X = np.vstack((temp_X, x_max.reshape((1, -1)))) const_liar, const_liar_variance = temp_gp.predict(x_max, eval_MSE=1) temp_Y = np.append(temp_Y, const_liar) temp_gp.fit(temp_X, temp_Y) # Updating the GP. new_X = new_X.reshape((B, -1)) finished_opt = time.time() elapse_opt = finished_opt - start_opt self.opt_time = np.hstack((self.opt_time, elapse_opt)) #print new_X self.X = np.vstack((self.X, new_X)) # convert back to original scale temp_X_new_original = [ val * self.max_min_gap + self.bounds[:, 0] for idx, val in enumerate(new_X) ] temp_X_new_original = np.asarray(temp_X_new_original) self.X_original = np.vstack((self.X_original, temp_X_new_original)) for idx, val in enumerate(temp_X_new_original): self.Y_original = np.append(self.Y_original, self.f(val)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) #print "#Batch={:d} f_max={:.4f}".format(B,self.Y_original.max()) return new_X, temp_X_new_original def maximize_batch_CL_incremental(self, gp_params, B=5): """ Finding a batch of points using Constant Liar approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class kappa: constant value in UCB Returns ------- X: a batch of [x_1..x_Nt] """ self.NumPoints = np.append(self.NumPoints, B) if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=B) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.opt_time = np.hstack((self.opt_time, 0)) return #const_liar=self.Y.mean() #const_liar=self.Y_original.min() #const_liar=self.Y.max() # Set acquisition function self.acq_func = AcquisitionFunction(self.acq) y_max = self.Y.max() # Set parameters if any was passed self.gp = PradaGaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) start_opt = time.time() # copy GP, X and Y temp_gp = copy.deepcopy(self.gp) temp_X = self.X temp_Y = self.Y #temp_Y_original=self.Y_original #store new_x new_X = [] for ii in range(B): # Finding argmax of the acquisition function. x_max = acq_max(ac=self.acq_func.acq_kind, gp=temp_gp, y_max=y_max, bounds=self.scalebounds) # Test if x_max is repeated, if it is, draw another one at random if np.any( np.any(np.abs(self.X - x_max) < 0.02, axis=1)): # check if a data point is already taken x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) if ii == 0: new_X = x_max else: new_X = np.vstack((new_X, x_max.reshape((1, -1)))) const_liar = temp_gp.predict(x_max, eval_MSE=true) #temp_X= np.vstack((temp_X, x_max.reshape((1, -1)))) #temp_Y = np.append(temp_Y, const_liar ) #temp_gp.fit(temp_X,temp_Y) # update the Gaussian Process and thus the acquisition function #temp_gp.compute_incremental_var(temp_X,x_max) temp_gp.fit_incremental(x_max, np.asarray([const_liar])) # Updating the GP. new_X = new_X.reshape((B, -1)) finished_opt = time.time() elapse_opt = finished_opt - start_opt self.opt_time = np.hstack((self.opt_time, elapse_opt)) #print new_X self.X = np.vstack((self.X, new_X)) # convert back to original scale temp_X_new_original = [ val * self.max_min_gap + self.bounds[:, 0] for idx, val in enumerate(new_X) ] temp_X_new_original = np.asarray(temp_X_new_original) self.X_original = np.vstack((self.X_original, temp_X_new_original)) for idx, val in enumerate(temp_X_new_original): self.Y_original = np.append(self.Y_original, self.f(val)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) #print "#Batch={:d} f_max={:.4f}".format(B,self.Y_original.max()) def fitIGMM(self, obs, IsPlot=0): """ Fitting the Infinite Gaussian Mixture Model and GMM where applicable Input Parameters ---------- obs: samples generated under the acqusition function by BGSS IsPlot: flag variable for visualization Returns ------- mean vector: mu_1,...mu_K """ if self.dim <= 2: n_init_components = 3 else: n_init_components = np.int(self.dim * 1.1) dpgmm = mixture.DPGMM(n_components=n_init_components, covariance_type="full", min_covar=10) dpgmm.fit(obs) # check if DPGMM fail, then use GMM. mydist = euclidean_distances(dpgmm.means_, dpgmm.means_) np.fill_diagonal(mydist, 99) if dpgmm.converged_ is False or np.min(mydist) < (0.01 * self.dim): dpgmm = mixture.GMM(n_components=n_init_components, covariance_type="full", min_covar=1e-3) dpgmm.fit(obs) if self.dim >= 5: # since kmeans does not provide weight and means, we will manually compute it try: dpgmm.weights_ = np.histogram(dpgmm.labels_, np.int(self.dim * 1.2)) dpgmm.weights_ = np.true_divide(dpgmm.weights_[0], np.sum(dpgmm.weights_[0])) dpgmm.means_ = dpgmm.cluster_centers_ except: pass # truncated for variational inference weight = dpgmm.weights_ weight_sorted = np.sort(weight) weight_sorted = weight_sorted[::-1] temp_cumsum = np.cumsum(weight_sorted) cutpoint = 0 for idx, val in enumerate(temp_cumsum): if val > 0.73: cutpoint = weight_sorted[idx] break ClusterIndex = [ idx for idx, val in enumerate(dpgmm.weights_) if val >= cutpoint ] myMeans = dpgmm.means_[ClusterIndex] #dpgmm.means_=dpgmm.means_[ClusterIndex] dpgmm.truncated_means_ = dpgmm.means_[ClusterIndex] #myCov=dpgmm.covars_[ClusterIndex] if IsPlot == 1 and self.dim <= 2: visualization.plot_histogram(self, obs) visualization.plot_mixturemodel(dpgmm, self, obs) new_X = myMeans.reshape((len(ClusterIndex), -1)) new_X = new_X.tolist() return new_X def maximize_batch_B3O(self, gp_params, kappa=2, IsPlot=0): """ Finding a batch of points using Budgeted Batch Bayesian Optimization approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class kappa: constant value in UCB IsPlot: flag variable for visualization Returns ------- X: a batch of [x_1..x_Nt] """ # Set acquisition function self.acq_func = AcquisitionFunction(self.acq) # Step 2 in the Algorithm # Set parameters for Gaussian Process self.gp = PradaGaussianProcess(gp_params) if len(self.gp.KK_x_x_inv) == 0: # check if empty self.gp.fit(self.X, self.Y) #else: #self.gp.fit_incremental(self.X[ur], self.Y[ur]) # record optimization time start_gmm_opt = time.time() if IsPlot == 1 and self.dim <= 2: #plot visualization.plot_bo(self) # Step 4 in the Algorithm # generate samples from Acquisition function # check the bound 0-1 or original bound obs = acq_batch_generalized_slice_sampling_generate( self.acq_func.acq_kind, self.gp, self.scalebounds, N=500, y_max=self.Y.max()) # Step 5 and 6 in the Algorithm if len(obs) == 0: # monotonous acquisition function print "Monotonous acquisition function" new_X = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=self.bounds.shape[0]) new_X = new_X.reshape((1, -1)) new_X = new_X.tolist() else: new_X = self.fitIGMM(obs, IsPlot) # Test if x_max is repeated, if it is, draw another one at random temp_new_X = [] for idx, val in enumerate(new_X): if np.all( np.any(np.abs(self.X - val) > 0.02, axis=1)): # check if a data point is already taken temp_new_X = np.append(temp_new_X, val) if len(temp_new_X) == 0: temp_new_X = np.zeros((1, self.dim)) for idx in range(0, self.dim): temp_new_X[0, idx] = np.random.uniform(self.scalebounds[idx, 0], self.scalebounds[idx, 1], 1) else: temp_new_X = temp_new_X.reshape((-1, self.dim)) self.NumPoints = np.append(self.NumPoints, temp_new_X.shape[0]) finished_gmm_opt = time.time() elapse_gmm_opt = finished_gmm_opt - start_gmm_opt self.opt_time = np.hstack((self.opt_time, elapse_gmm_opt)) self.X = np.vstack((self.X, temp_new_X)) temp_X_new_original = [ val * self.max_min_gap + self.bounds[:, 0] for idx, val in enumerate(temp_new_X) ] temp_X_new_original = np.asarray(temp_X_new_original) # Step 7 in the algorithm # Evaluate y=f(x) temp = self.f(temp_X_new_original) temp = np.reshape(temp, (-1, 1)) # Step 8 in the algorithm self.Y_original = np.append(self.Y_original, temp) self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.X_original = np.vstack((self.X_original, temp_X_new_original)) print "#Batch={:d} f_max={:.4f}".format(temp_new_X.shape[0], self.Y_original.max()) #ur = unique_rows(self.X) #self.gp.fit(self.X[ur], self.Y[ur]) #self.gp.fit_incremental(temp_new_X, temp_new_Y) #====================================================================================== #====================================================================================================== #====================================================================================================== #====================================================================================================== def maximize_batch_BUCB(self, gp_params, B=5): """ Finding a batch of points using GP-BUCB approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class B: fixed batch size for all iteration kappa: constant value in UCB IsPlot: flag variable for visualization Returns ------- X: a batch of [x_1..x_B] """ self.B = B # Set acquisition function self.acq_func = AcquisitionFunction(self.acq) # Set parameters if any was passed self.gp = PradaGaussianProcess(gp_params) if len(self.gp.KK_x_x_inv) == 0: # check if empty self.gp.fit(self.X, self.Y) #else: #self.gp.fit_incremental(self.X[ur], self.Y[ur]) start_gmm_opt = time.time() y_max = self.gp.Y.max() # check the bound 0-1 or original bound temp_X = self.X temp_gp = self.gp temp_gp.X_bucb = temp_X temp_gp.KK_x_x_inv_bucb = self.gp.KK_x_x_inv # finding new X new_X = [] for ii in range(B): # Finding argmax of the acquisition function. x_max = acq_max(ac=self.acq_func.acq_kind, gp=temp_gp, y_max=y_max, bounds=self.scalebounds) if np.any( (temp_X - x_max).sum(axis=1) == 0) | np.isnan(x_max.sum()): x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) if ii == 0: new_X = x_max else: new_X = np.vstack((new_X, x_max.reshape((1, -1)))) # update the Gaussian Process and thus the acquisition function temp_gp.compute_incremental_var(temp_X, x_max) temp_X = np.vstack((temp_X, x_max.reshape((1, -1)))) temp_gp.X_bucb = temp_X # record the optimization time finished_gmm_opt = time.time() elapse_gmm_opt = finished_gmm_opt - start_gmm_opt self.time_gmm_opt = np.hstack((self.time_gmm_opt, elapse_gmm_opt)) self.NumPoints = np.append(self.NumPoints, B) self.X = temp_X # convert back to original scale temp_X_new_original = [ val * self.max_min_gap + self.bounds[:, 0] for idx, val in enumerate(new_X) ] temp_X_new_original = np.asarray(temp_X_new_original) self.X_original = np.vstack((self.X_original, temp_X_new_original)) # evaluate y=f(x) temp = self.f(temp_X_new_original) temp = np.reshape(temp, (-1, 1)) self.Y_original = np.append(self.Y_original, temp) self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) print "#Batch={:d} f_max={:.4f}".format(new_X.shape[0], self.Y_original.max())
def maximize(self,gp_params,kappa=2): """ Main optimization method. Input parameters ---------- kappa: parameter for UCB acquisition only. gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ # init a new Gaussian Process self.gp=PradaGaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Set acquisition function start_opt=time.time() acq=self.acq # select the acquisition function if acq=='nei': self.L=self.estimate_L(self.bounds) self.util = AcquisitionFunction(kind=self.acq, L=self.L) else: if acq=="ucb": self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa) else: self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa) y_max = self.Y.max() # select the optimization toolbox if self.opt=='nlopt': x_max,f_max = acq_max_nlopt(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) if self.opt=='scipy': x_max = acq_max(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) if self.opt=='direct': x_max = acq_max_direct(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) # record the optimization time finished_opt=time.time() elapse_opt=finished_opt-start_opt self.time_opt=np.hstack((self.time_opt,elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max).sum(axis=1) == 0): x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # store X self.X = np.vstack((self.X, x_max.reshape((1, -1)))) # compute X in original scale temp_X_new_original=x_max*self.max_min_gap+self.bounds[:,0] self.X_original=np.vstack((self.X_original, temp_X_new_original)) # evaluate Y using original X self.Y = np.append(self.Y, self.f(temp_X_new_original))
def maximize_batch_B3O(self,gp_params, kappa=2,IsPlot=0): """ Finding a batch of points using Budgeted Batch Bayesian Optimization approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class kappa: constant value in UCB IsPlot: flag variable for visualization Returns ------- X: a batch of [x_1..x_Nt] """ # Set acquisition function self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa) # Step 2 in the Algorithm # Set parameters for Gaussian Process self.gp=PradaGaussianProcess(gp_params) if len(self.gp.KK_x_x_inv)==0: # check if empty self.gp.fit(self.X, self.Y) #else: #self.gp.fit_incremental(self.X[ur], self.Y[ur]) # record optimization time start_gmm_opt=time.time() if IsPlot==1 and self.dim<=2:#plot visualization.plot_bo(self) # Step 4 in the Algorithm # generate samples from Acquisition function # check the bound 0-1 or original bound obs=acq_batch_generalized_slice_sampling_generate(self.acq_func.acq_kind,self.gp,self.scalebounds,N=500,y_max=self.Y.max()) # Step 5 and 6 in the Algorithm if len(obs)==0: # monotonous acquisition function print "Monotonous acquisition function" new_X=np.random.uniform(self.bounds[:, 0],self.bounds[:, 1],size=self.bounds.shape[0]) new_X=new_X.reshape((1,-1)) new_X=new_X.tolist() else: new_X=self.fitIGMM(obs,IsPlot) # Test if x_max is repeated, if it is, draw another one at random temp_new_X=[] for idx,val in enumerate(new_X): if np.all(np.any(np.abs(self.X-val)>0.02,axis=1)): # check if a data point is already taken temp_new_X=np.append(temp_new_X,val) if len(temp_new_X)==0: temp_new_X=np.zeros((1,self.dim)) for idx in range(0,self.dim): temp_new_X[0,idx]=np.random.uniform(self.scalebounds[idx,0],self.scalebounds[idx,1],1) else: temp_new_X=temp_new_X.reshape((-1,self.dim)) self.NumPoints=np.append(self.NumPoints,temp_new_X.shape[0]) finished_gmm_opt=time.time() elapse_gmm_opt=finished_gmm_opt-start_gmm_opt self.opt_time=np.hstack((self.opt_time,elapse_gmm_opt)) self.X=np.vstack((self.X, temp_new_X)) temp_X_new_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(temp_new_X)] temp_X_new_original=np.asarray(temp_X_new_original) # Step 7 in the algorithm # Evaluate y=f(x) temp=self.f(temp_X_new_original) temp=np.reshape(temp,(-1,1)) # Step 8 in the algorithm self.Y=np.append(self.Y,temp) self.X_original=np.vstack((self.X_original, temp_X_new_original)) print "#Batch={:d} f_max={:.3f}".format(temp_new_X.shape[0],self.Y.max())
def __init__(self, gp_params, f, pbounds, acq, verbose=1, opt_toolbox='scipy'): """ Input parameters ---------- f: function to optimize: pbounds: bounds on parameters acq: acquisition function, 'ei', 'ucb' opt: optimization toolbox, 'nlopt','direct','scipy' Returns ------- dim: dimension bounds: bounds on original scale scalebounds: bounds on normalized scale of 0-1 time_opt: will record the time spent on optimization gp: Gaussian Process object """ # Store the original dictionary self.pbounds = pbounds # Find number of parameters self.dim = len(pbounds) # Create an array with parameters bounds if isinstance(pbounds, dict): # Get the name of the parameters self.keys = list(pbounds.keys()) self.bounds = [] for key in self.pbounds.keys(): self.bounds.append(self.pbounds[key]) self.bounds = np.asarray(self.bounds) else: self.bounds = np.asarray(pbounds) scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] # Some function to be optimized self.f = f # optimization tool: direct, scipy, nlopt self.opt_toolbox = opt_toolbox # acquisition function type self.acq = acq # store the batch size for each iteration self.NumPoints = [] # Numpy array place holders self.X_original = None # scale the data to 0-1 fit GP better self.X = None # X=( X_original - min(bounds) / (max(bounds) - min(bounds)) self.Y = None # Y=( Y_original - mean(bounds) / (max(bounds) - min(bounds)) self.Y_original = None self.opt_time = 0 self.L = 0 # lipschitz self.gp = PradaGaussianProcess(gp_params) # Acquisition Function #self.acq_func = None self.acq_func = AcquisitionFunction(acq=self.acq)
def expandBoundsDDB_MAP(self): """ Description: Expands the search space with the MAP implementation of our DDB method """ print('Attempting to expand search space with DDB-MAP method') alpha=self.alpha beta=self.beta bound_samples=100 # Number of radius sample to fit the log-logistic distribution # Find y^+ and x^+ ymax=np.max(self.Y) # Generate test radii max_loc=np.argmax(self.Y) xmax=self.X[max_loc] test_bound=np.zeros(self.scalebounds.shape) bound_dist=np.zeros(bound_samples) bound_center=xmax test_bound[:,1]=bound_center+0.5 test_bound[:,0]=bound_center-0.5 max_radius=np.max(np.array([np.max(max_bound_size-test_bound[:,1]),np.max(test_bound[:,0])])) step=max_radius/bound_samples packing_number=np.zeros(bound_samples) # Generate a Thompson sample maxima to estimate internal maxima TS=AcquisitionFunction.ThompsonSampling(self.gp) tsb_x,tsb_y=acq_max_global(TS, self.gp, bounds=self.scalebounds) # Generate Gumbel samples to estimate the external maxima for i in range(0,bound_samples): bound_length=test_bound[:,1]-test_bound[:,0] volume=np.power(max_bound_size,self.dim)-np.prod(bound_length) packing_number[i]=round(volume/(5*self.gp.lengthscale)) mu=stats.norm.ppf(1.0-1.0/packing_number[i]) sigma=stats.norm.ppf(1.0-(1.0/packing_number[i])*np.exp(-1.0))-stats.norm.ppf(1.0-(1.0/(packing_number[i]))) bound_dist[i]=np.exp(-np.exp(-(-tsb_y-mu)/sigma)) test_bound[:,1]=test_bound[:,1]+step test_bound[:,0]=test_bound[:,0]-step bound_dist[np.isnan(bound_dist)]=1 # Fit the log-logistic paramaters to the Gumbel samples xfit=np.arange(0,max_radius,max_radius/100) popt,pcov=optimize.curve_fit(self.sufficientBoundPDF,xfit[0:100],bound_dist,bounds=np.array([[5,1.1],[20,5]])) print("popt={}".format(popt)) b=ymax/popt[0] a=popt[1] print("b={}, ymax={}".format(b,ymax)) # Find the gamma and log-logistic modes to determine the optimisation bound c=ymax/b loglog_mode=a*np.power((c-1.0)/(c+1.0),(1/c)) gamma_mode=(alpha-1)/beta opt_bound=np.ones([2]) opt_bound[0]=min(loglog_mode,gamma_mode) opt_bound[1]=max(loglog_mode,gamma_mode) bound_range=(opt_bound[1]-opt_bound[0]) # Find MAP Estimate of radius r for d in range(0,self.dim): r_max=0 p_max=0 for x0 in np.arange(opt_bound[0],opt_bound[1],bound_range/10): res=optimize.minimize(lambda x: self.radiusPDF(x,alpha,beta,b,ymax,a),x0=x0, bounds=np.array([opt_bound]), method='L-BFGS-B') if -res.fun>p_max: r_max=res.x p_max=-res.fun if r_max>opt_bound[1]: r_max=opt_bound[1] xplot=np.arange(0,10,0.01) yplot=-self.radiusPDF(xplot,alpha,beta,b,ymax,a) max_loc=np.argmax(yplot) print("optimal radius of {} with unscaled probability of {}".format(r_max,p_max)) self.scalebounds[d,1]=xmax[d]+r_max self.scalebounds[d,0]=xmax[d]-r_max print("seach space extended to {} with DDB".format(self.scalebounds))
class PradaBayOptFn(object): def __init__(self, gp_params, f, init_bounds, pbounds, acq, verbose=1, opt_toolbox='nlopt'): """ Input parameters ---------- f: function to optimize: pbounds: bounds on parameters acq: acquisition function, acq['name']=['ei','ucb','poi','lei'] ,acq['kappa'] for ucb, acq['k'] for lei opt: optimization toolbox, 'nlopt','direct','scipy' Returns ------- dim: dimension bounds: bounds on original scale scalebounds: bounds on normalized scale of 0-1 time_opt: will record the time spent on optimization gp: Gaussian Process object """ # Find number of parameters self.dim = len(pbounds) # Create an array with parameters bounds if isinstance(pbounds, dict): # Get the name of the parameters self.keys = list(pbounds.keys()) self.bounds = [] for key in pbounds.keys(): self.bounds.append(pbounds[key]) self.bounds = np.asarray(self.bounds) else: self.bounds = np.asarray(pbounds) if len(init_bounds) == 0: self.init_bounds = self.bounds.copy() else: self.init_bounds = init_bounds if isinstance(init_bounds, dict): # Get the name of the parameters self.keys = list(init_bounds.keys()) self.init_bounds = [] for key in init_bounds.keys(): self.init_bounds.append(init_bounds[key]) self.init_bounds = np.asarray(self.init_bounds) else: self.init_bounds = np.asarray(init_bounds) # create a scalebounds 0-1 scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds = scalebounds.T self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0] # Some function to be optimized self.f = f # optimization toolbox self.opt_toolbox = opt_toolbox # acquisition function type self.acq = acq # store X in original scale self.X_original = None # store X in 0-1 scale self.X = None # store y=f(x) # (y - mean)/(max-min) self.Y = None # y original scale self.Y_original = None self.time_opt = 0 self.k_Neighbor = 2 # Lipschitz constant self.L = 0 # Gaussian Process class self.gp = PradaGaussianProcess(gp_params) # acquisition function self.acq_func = None # stop condition self.stop_flag = 0 # will be later used for visualization def posterior(self, Xnew): self.gp.fit(self.X, self.Y) mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True) return mu, np.sqrt(sigma2) def init(self, gp_params, n_init_points=3): """ Input parameters ---------- gp_params: Gaussian Process structure n_init_points: # init points """ # Generate random points l = [ np.random.uniform(x[0], x[1], size=n_init_points) for x in self.init_bounds ] # Concatenate new random points to possible existing # points from self.explore method. temp = np.asarray(l) temp = temp.T init_X = list(temp.reshape((n_init_points, -1))) self.X_original = np.asarray(init_X) # Evaluate target function at all initialization y_init = self.f(init_X) y_init = np.reshape(y_init, (n_init_points, 1)) self.Y_original = np.asarray(y_init) self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) # convert it to scaleX temp_init_point = np.divide((init_X - self.bounds[:, 0]), self.max_min_gap) self.X = np.asarray(temp_init_point) def estimate_L(self, bounds): ''' Estimate the Lipschitz constant of f by taking maximizing the norm of the expectation of the gradient of *f*. ''' def df(x, model, x0): mean_derivative = gp_model.predictive_gradient(self.X, self.Y, x) temp = mean_derivative * mean_derivative if len(temp.shape) <= 1: res = np.sqrt(temp) else: res = np.sqrt( np.sum(temp, axis=1) ) # simply take the norm of the expectation of the gradient return -res gp_model = self.gp dim = len(bounds) num_data = 1000 * dim samples = np.zeros(shape=(num_data, dim)) for k in range(0, dim): samples[:, k] = np.random.uniform(low=bounds[k][0], high=bounds[k][1], size=num_data) #samples = np.vstack([samples,gp_model.X]) pred_samples = df(samples, gp_model, 0) x0 = samples[np.argmin(pred_samples)] res = minimize(df, x0, method='L-BFGS-B', bounds=bounds, args=(gp_model, x0), options={'maxiter': 100}) try: minusL = res.fun[0][0] except: if len(res.fun.shape) == 1: minusL = res.fun[0] else: minusL = res.fun L = -minusL if L < 1e-6: L = 0.0001 ## to avoid problems in cases in which the model is flat. return L def maximize(self, gp_params, kappa=2): """ Main optimization method. Input parameters ---------- kappa: parameter for UCB acquisition only. gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Set acquisition function start_opt = time.time() acq = self.acq y_max = self.Y.max() #self.L=self.estimate_L(self.scalebounds) # select the acquisition function if acq['name'] == 'nei': self.L = self.estimate_L(self.scalebounds) self.acq_func = AcquisitionFunction(kind=self.acq, L=self.L) else: self.acq_func = AcquisitionFunction(self.acq) if acq['name'] == "ei_mu": #find the maximum in the predictive mean mu_acq = {} mu_acq['name'] = 'mu' mu_acq['dim'] = self.dim acq_mu = AcquisitionFunction(mu_acq) x_mu_max = acq_max(ac=acq_mu.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) # set y_max = mu_max y_max = acq_mu.acq_kind(x_mu_max, gp=self.gp, y_max=y_max) x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) val_acq = self.acq_func.acq_kind(x_max, self.gp, y_max) #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0]) # check the value alpha(x_max)==0 #if val_acq<0.0001: #self.stop_flag=1 #return # select the optimization toolbox """ if self.opt=='nlopt': x_max,f_max = acq_max_nlopt(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) if self.opt=='scipy': if self.opt=='direct': x_max = acq_max_direct(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) """ # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max).sum(axis=1) == 0): x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # store X self.X = np.vstack((self.X, x_max.reshape((1, -1)))) # compute X in original scale temp_X_new_original = x_max * self.max_min_gap + self.bounds[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original))
def maximize(self, gp_params, kappa=2): """ Main optimization method. Input parameters ---------- kappa: parameter for UCB acquisition only. gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=1) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.time_opt = np.hstack((self.time_opt, 0)) return # init a new Gaussian Process self.gp = PradaGaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Set acquisition function start_opt = time.time() acq = self.acq y_max = self.Y.max() #self.L=self.estimate_L(self.scalebounds) # select the acquisition function if acq['name'] == 'nei': self.L = self.estimate_L(self.scalebounds) self.acq_func = AcquisitionFunction(kind=self.acq, L=self.L) else: self.acq_func = AcquisitionFunction(self.acq) if acq['name'] == "ei_mu": #find the maximum in the predictive mean mu_acq = {} mu_acq['name'] = 'mu' mu_acq['dim'] = self.dim acq_mu = AcquisitionFunction(mu_acq) x_mu_max = acq_max(ac=acq_mu.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) # set y_max = mu_max y_max = acq_mu.acq_kind(x_mu_max, gp=self.gp, y_max=y_max) x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.scalebounds, opt_toolbox=self.opt_toolbox) val_acq = self.acq_func.acq_kind(x_max, self.gp, y_max) #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0]) # check the value alpha(x_max)==0 #if val_acq<0.0001: #self.stop_flag=1 #return # select the optimization toolbox """ if self.opt=='nlopt': x_max,f_max = acq_max_nlopt(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) if self.opt=='scipy': if self.opt=='direct': x_max = acq_max_direct(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) """ # record the optimization time finished_opt = time.time() elapse_opt = finished_opt - start_opt self.time_opt = np.hstack((self.time_opt, elapse_opt)) # Test if x_max is repeated, if it is, draw another one at random if np.any((self.X - x_max).sum(axis=1) == 0): x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) # store X self.X = np.vstack((self.X, x_max.reshape((1, -1)))) # compute X in original scale temp_X_new_original = x_max * self.max_min_gap + self.bounds[:, 0] self.X_original = np.vstack((self.X_original, temp_X_new_original)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(temp_X_new_original)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original))
def reduced_eval_consistent_bayesian_model( bayesian_model: mc_dropout.BayesianModule, acquisition_function: AcquisitionFunction, num_classes: int, k: int, initial_percentage: int, reduce_percentage: int, target_size: int, available_loader, device=None, ) -> SubsetEvalResults: """Performs a scoring step with k inference samples while reducing the dataset to at most min_remaining_percentage. Before computing anything at all the initial available dataset is randomly culled to initial_percentage. Every `chunk_size` inferences BALD is recomputed and the bottom `reduce_percentage` samples are dropped.""" global reduced_eval_consistent_bayesian_model_cuda_chunk_size # TODO: ActiveLearningData should be renamed to be a more modular SplitDataset. # Here, we need to use available_dataset because it allows us to easily recover the original indices. # We start with all data in the acquired data. subset_split = active_learning_data.ActiveLearningData( available_loader.dataset) initial_length = len(available_loader.dataset) initial_split_length = initial_length * initial_percentage // 100 # By acquiring [initial_split_length:], we make the tail unavailable. subset_split.acquire(torch.randperm(initial_length)[initial_split_length:]) subset_dataloader = data.DataLoader(subset_split.available_dataset, shuffle=False, batch_size=available_loader.batch_size) print(f"Scoring subset of {len(subset_dataloader.dataset)} items:") # We're done with available_loader in this function. available_loader = None with torch.no_grad(): B = len(subset_split.available_dataset) C = num_classes # We stay on the CPU. logits_B_K_C = None k_lower = 0 torch_utils.gc_cuda() chunk_size = reduced_eval_consistent_bayesian_model_cuda_chunk_size if device.type == "cuda" else 32 while k_lower < k: try: k_upper = min(k_lower + chunk_size, k) old_logit_B_K_C = logits_B_K_C # This also stays on the CPU. logits_B_K_C = torch.empty((B, k_upper, C), dtype=torch.float64) # Copy the old data over. if k_lower > 0: logits_B_K_C[:, 0:k_lower, :].copy_(old_logit_B_K_C) old_logit_B_K_C = None # This resets the dropout masks. bayesian_model.eval() for i, (batch, _) in enumerate( with_progress_bar( subset_dataloader, unit_scale=subset_dataloader.batch_size)): lower = i * subset_dataloader.batch_size upper = min(lower + subset_dataloader.batch_size, B) batch = batch.to(device) # batch_size x ws x classes mc_output_B_K_C = bayesian_model(batch, k_upper - k_lower) logits_B_K_C[lower:upper, k_lower:k_upper].copy_( mc_output_B_K_C.double(), non_blocking=True) except RuntimeError as exception: if torch_utils.should_reduce_batch_size(exception): if chunk_size <= 1: raise chunk_size = chunk_size // 2 print( f"New reduced_eval_consistent_bayesian_model_cuda_chunk_size={chunk_size} ({exception})" ) reduced_eval_consistent_bayesian_model_cuda_chunk_size = chunk_size torch_utils.gc_cuda() else: raise else: if k_upper == k: next_size = target_size elif k_upper < 50: next_size = B else: next_size = max(target_size, B * (100 - reduce_percentage) // 100) # Compute the score if it's needed: we are going to reduce the dataset or we're in the last iteration. if next_size < B or k_upper == k: # Calculate the scores (mutual information) of logits_B_K_C scores_B = acquisition_function.compute_scores( logits_B_K_C, available_loader=subset_dataloader, device=device) else: scores_B = None if next_size < B: print("Reducing size", next_size) # Get indices of samples sorted by increasing mutual information sorted_indices = torch.argsort(scores_B, descending=True) # Select next_size samples with smallest mutual information (ascending indices) new_indices = torch.sort(sorted_indices[:next_size], descending=False)[0] B = next_size logits_B_K_C = logits_B_K_C[new_indices] if k_upper == k: logits_B_K_C = logits_B_K_C.clone().detach() scores_B = scores_B[new_indices].clone().detach() # Acquire all the low scorers subset_split.acquire(sorted_indices[next_size:]) k_lower += chunk_size return SubsetEvalResults(subset_split=subset_split, subset_dataloader=subset_dataloader, scores_B=scores_B, logits_B_K_C=logits_B_K_C)
def maximize_batch_CL_incremental(self, gp_params, B=5): """ Finding a batch of points using Constant Liar approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class kappa: constant value in UCB Returns ------- X: a batch of [x_1..x_Nt] """ self.NumPoints = np.append(self.NumPoints, B) if self.acq['name'] == 'random': x_max = [ np.random.uniform(x[0], x[1], size=B) for x in self.bounds ] x_max = np.asarray(x_max) x_max = x_max.T self.X_original = np.vstack((self.X_original, x_max)) # evaluate Y using original X #self.Y = np.append(self.Y, self.f(temp_X_new_original)) self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original)) self.opt_time = np.hstack((self.opt_time, 0)) return #const_liar=self.Y.mean() #const_liar=self.Y_original.min() #const_liar=self.Y.max() # Set acquisition function self.acq_func = AcquisitionFunction(self.acq) y_max = self.Y.max() # Set parameters if any was passed self.gp = PradaGaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) start_opt = time.time() # copy GP, X and Y temp_gp = copy.deepcopy(self.gp) temp_X = self.X temp_Y = self.Y #temp_Y_original=self.Y_original #store new_x new_X = [] for ii in range(B): # Finding argmax of the acquisition function. x_max = acq_max(ac=self.acq_func.acq_kind, gp=temp_gp, y_max=y_max, bounds=self.scalebounds) # Test if x_max is repeated, if it is, draw another one at random if np.any( np.any(np.abs(self.X - x_max) < 0.02, axis=1)): # check if a data point is already taken x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) if ii == 0: new_X = x_max else: new_X = np.vstack((new_X, x_max.reshape((1, -1)))) const_liar = temp_gp.predict(x_max, eval_MSE=true) #temp_X= np.vstack((temp_X, x_max.reshape((1, -1)))) #temp_Y = np.append(temp_Y, const_liar ) #temp_gp.fit(temp_X,temp_Y) # update the Gaussian Process and thus the acquisition function #temp_gp.compute_incremental_var(temp_X,x_max) temp_gp.fit_incremental(x_max, np.asarray([const_liar])) # Updating the GP. new_X = new_X.reshape((B, -1)) finished_opt = time.time() elapse_opt = finished_opt - start_opt self.opt_time = np.hstack((self.opt_time, elapse_opt)) #print new_X self.X = np.vstack((self.X, new_X)) # convert back to original scale temp_X_new_original = [ val * self.max_min_gap + self.bounds[:, 0] for idx, val in enumerate(new_X) ] temp_X_new_original = np.asarray(temp_X_new_original) self.X_original = np.vstack((self.X_original, temp_X_new_original)) for idx, val in enumerate(temp_X_new_original): self.Y_original = np.append(self.Y_original, self.f(val)) # update Y after change Y_original self.Y = (self.Y_original - np.mean(self.Y_original)) / ( np.max(self.Y_original) - np.min(self.Y_original))
def maximize_batch_BUCB(self,gp_params, B=5,kappa=2): """ Finding a batch of points using GP-BUCB approach Input Parameters ---------- gp_params: Parameters to be passed to the Gaussian Process class B: fixed batch size for all iteration kappa: constant value in UCB IsPlot: flag variable for visualization Returns ------- X: a batch of [x_1..x_B] """ self.B=B # Set acquisition function self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa) # Set parameters if any was passed self.gp=PradaGaussianProcess(gp_params) if len(self.gp.KK_x_x_inv)==0: # check if empty self.gp.fit(self.X, self.Y) #else: #self.gp.fit_incremental(self.X[ur], self.Y[ur]) start_gmm_opt=time.time() # generate samples from Acquisition function y_max=self.gp.Y.max() # check the bound 0-1 or original bound temp_X=self.X temp_gp=self.gp temp_gp.X_bucb=temp_X temp_gp.KK_x_x_inv_bucb=self.gp.KK_x_x_inv # finding new X new_X=[] for ii in range(B): # Finding argmax of the acquisition function. x_max = acq_max(ac=self.acq_func.acq_kind, gp=temp_gp, y_max=y_max, bounds=self.scalebounds) if np.any((temp_X - x_max).sum(axis=1) == 0) | np.isnan(x_max.sum()): x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1], size=self.scalebounds.shape[0]) if ii==0: new_X=x_max else: new_X= np.vstack((new_X, x_max.reshape((1, -1)))) # update the Gaussian Process and thus the acquisition function temp_gp.compute_incremental_var(temp_X,x_max) temp_X = np.vstack((temp_X, x_max.reshape((1, -1)))) temp_gp.X_bucb=temp_X # record the optimization time finished_gmm_opt=time.time() elapse_gmm_opt=finished_gmm_opt-start_gmm_opt self.opt_time=np.hstack((self.opt_time,elapse_gmm_opt)) self.NumPoints=np.append(self.NumPoints,B) self.X=temp_X # convert back to original scale temp_X_new_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(new_X)] temp_X_new_original=np.asarray(temp_X_new_original) self.X_original=np.vstack((self.X_original, temp_X_new_original)) # evaluate y=f(x) temp=self.f(temp_X_new_original) temp=np.reshape(temp,(-1,1)) self.Y=np.append(self.Y,temp) print "#Batch={:d} f_max={:.4f}".format(new_X.shape[0],self.Y.max())
def maximize(self,gp_params): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag==1: return if self.acq['name']=='random': x_max = [np.random.uniform(x[0], x[1], size=1) for x in self.scalebounds] x_max=np.asarray(x_max) x_max=x_max.T self.X_original=np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) self.time_opt=np.hstack((self.time_opt,0)) return # init a new Gaussian Process self.gp=PradaGaussianProcess(gp_params) if self.gp.KK_x_x_inv ==[]: # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) acq=self.acq if acq['debug']==1: logmarginal=self.gp.log_marginal_lengthscale(gp_params['theta'],gp_params['noise_delta']) print(gp_params['theta']) print("log marginal before optimizing ={:.4f}".format(logmarginal)) self.logmarginal=logmarginal if logmarginal<-999999: logmarginal=self.gp.log_marginal_lengthscale(gp_params['theta'],gp_params['noise_delta']) if self.optimize_gp==1 and len(self.Y)%2*self.dim==0 and len(self.Y)>5*self.dim: print("Initial length scale={}".format(gp_params['theta'])) newtheta = self.gp.optimize_lengthscale(gp_params['theta'],gp_params['noise_delta'],self.scalebounds) gp_params['theta']=newtheta print("New length scale={}".format(gp_params['theta'])) # init a new Gaussian Process after optimizing hyper-parameter self.gp=PradaGaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Modify search space based on selected method if self.expandSS=='expandBoundsDDB_MAP': self.expandBoundsDDB_MAP() if self.expandSS=='expandBoundsDDB_FB': self.expandBoundsDDB_FB() if self.expandSS=='expandBoundsFiltering': self.expandBoundsFiltering() if self.expandSS=='volumeDoubling' and len(self.Y)%3*self.dim==0: self.volumeDoubling() # Prevent bounds from breaching maximum limit for d in range(0,self.dim): if self.scalebounds[d,0]<0: print('Lower bound of {} in dimention {} exceeded minimum bound of {}. Scaling up.'.format(self.scalebounds[d,0],d,0)) self.scalebounds[d,0]=0 print('bound set to {}'.format(self.scalebounds)) if self.scalebounds[d,1]>max_bound_size: print('Upper bound of {} in dimention {} exceeded maximum bound of {}. Scaling down.'.format(self.scalebounds[d,1],d,max_bound_size)) self.scalebounds[d,1]=max_bound_size self.scalebounds[d,0]=min(self.scalebounds[d,0],self.scalebounds[d,1]-np.sqrt(3*self.gp.lengthscale)) print('bound set to {}'.format(self.scalebounds)) # Set acquisition function start_opt=time.time() y_max = self.Y.max() if acq['name'] in ['consensus','mes']: ucb_acq_func={} ucb_acq_func['name']='ucb' ucb_acq_func['kappa']=np.log(len(self.Y)) ucb_acq_func['dim']=self.dim ucb_acq_func['scalebounds']=self.scalebounds myacq=AcquisitionFunction(ucb_acq_func) xt_ucb = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) xstars=[] xstars.append(xt_ucb) ei_acq_func={} ei_acq_func['name']='ei' ei_acq_func['dim']=self.dim ei_acq_func['scalebounds']=self.scalebounds myacq=AcquisitionFunction(ei_acq_func) xt_ei = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) xstars.append(xt_ei) pes_acq_func={} pes_acq_func['name']='pes' pes_acq_func['dim']=self.dim pes_acq_func['scalebounds']=self.scalebounds myacq=AcquisitionFunction(pes_acq_func) xt_pes = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) xstars.append(xt_pes) self.xstars=xstars if acq['name']=='vrs': print("please call the maximize_vrs function") return if 'xstars' not in globals(): xstars=[] self.xstars=xstars self.acq['xstars']=xstars self.acq['WW']=False self.acq['WW_dim']=False self.acq_func = AcquisitionFunction(self.acq,self.bb_function) if acq['name']=="ei_mu": #find the maximum in the predictive mean mu_acq={} mu_acq['name']='mu' mu_acq['dim']=self.dim acq_mu=AcquisitionFunction(mu_acq) x_mu_max = acq_max(ac=acq_mu.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds,opt_toolbox=self.opt_toolbox) # set y_max = mu_max y_max=acq_mu.acq_kind(x_mu_max,gp=self.gp, y_max=y_max) x_max = acq_max(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds,opt_toolbox=self.opt_toolbox,seeds=self.xstars) if acq['name']=='consensus' and acq['debug']==1: # plot the x_max and xstars fig=plt.figure(figsize=(5, 5)) plt.scatter(xt_ucb[0],xt_ucb[1],marker='s',color='g',s=200,label='Peak') plt.scatter(xt_ei[0],xt_ei[1],marker='s',color='k',s=200,label='Peak') plt.scatter(x_max[0],x_max[1],marker='*',color='r',s=300,label='Peak') plt.xlim(0,1) plt.ylim(0,1) strFileName="acquisition_functions_debug.eps" fig.savefig(strFileName, bbox_inches='tight') if acq['name']=='vrs' and acq['debug']==1: # plot the x_max and xstars fig=plt.figure(figsize=(5, 5)) plt.scatter(xt_ucb[0],xt_ucb[1],marker='s',color='g',s=200,label='Peak') plt.scatter(xt_ei[0],xt_ei[1],marker='s',color='k',s=200,label='Peak') plt.scatter(x_max[0],x_max[1],marker='*',color='r',s=300,label='Peak') plt.xlim(0,1) plt.ylim(0,1) strFileName="vrs_acquisition_functions_debug.eps" #fig.savefig(strFileName, bbox_inches='tight') val_acq=self.acq_func.acq_kind(x_max,self.gp,y_max) #print x_max #print val_acq if self.stopping_criteria!=0 and val_acq<self.stopping_criteria: val_acq=self.acq_func.acq_kind(x_max,self.gp,y_max) self.stop_flag=1 print("Stopping Criteria is violated. Stopping Criteria is {:.15f}".format(self.stopping_criteria)) self.alpha_Xt= np.append(self.alpha_Xt,val_acq) mean,var=self.gp.predict(x_max, eval_MSE=True) var.flags['WRITEABLE']=True var[var<1e-20]=0 #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var) # record the optimization time finished_opt=time.time() elapse_opt=finished_opt-start_opt self.time_opt=np.hstack((self.time_opt,elapse_opt)) # store X self.X = np.vstack((self.X, x_max.reshape((1, -1)))) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) if self.gp.flagIncremental==1: self.gp.fit_incremental(x_max,self.Y[-1]) # if (self.acq['name']=='ei_regularizerH') or (self.acq['name']=='ei_regularizerQ'): # self.scalebounds[:,0]=self.scalebounds[:,0]+1 # self.scalebounds[:,1]=self.scalebounds[:,1]-1 # self.acq['scalebounds']=self.scalebounds self.experiment_num=self.experiment_num+1
class PradaBayOptFn(object): def __init__(self, gp_params, func_params, acq_params, experiment_num, seed): """ Input parameters ---------- gp_params: GP parameters gp_params.theta: to compute the kernel gp_params.delta: to compute the kernel func_params: function to optimize func_params.init bound: initial bounds for parameters func_params.bounds: bounds on parameters func_params.func: a function to be optimized acq_params: acquisition function, acq_params.acq_func['name']=['ei','ucb','poi','lei'] ,acq['kappa'] for ucb, acq['k'] for lei acq_params.opt_toolbox: optimization toolbox 'nlopt','direct','scipy' experiment_num: the interation of the GP method. Used to make sure each independant stage of the experiment uses different initial conditions seed: Variable used as part of a seed to generate random initial points Returns ------- dim: dimension scalebounds: bound used thoughout the BO algorithm time_opt: will record the time spent on optimization gp: Gaussian Process object """ self.experiment_num=experiment_num self.seed=seed np.random.seed(self.experiment_num*self.seed) # Prior distribution paramaters for the DDB method self.alpha=2 self.beta=4 # Find number of parameters bounds=func_params['bounds'] if 'init_bounds' not in func_params: init_bounds=bounds else: init_bounds=func_params['init_bounds'] # Find input dimention self.dim = len(bounds) self.radius=np.ones([self.dim,1]) # Generate bound array scalebounds=np.array([np.zeros(self.dim), np.ones(self.dim)]) self.scalebounds=scalebounds.T # find function to be optimized self.f = func_params['f'] # acquisition function type self.acq=acq_params['acq_func'] # Check if the search space is to be modified self.bb_function=acq_params["bb_function"] if 'expandSS' not in acq_params: self.expandSS=0 else: self.expandSS=acq_params['expandSS'] # Check if the bound is to be set randomly. If so, shift the bound by a random amount if (acq_params['random_initial_bound']==1): randomizer=np.random.rand(self.dim)*max_bound_size for d in range(0,self.dim): self.scalebounds[d]=self.scalebounds[d]+randomizer[d] # Other checks if 'debug' not in self.acq: self.acq['debug']=0 if 'stopping' not in acq_params: self.stopping_criteria=0 else: self.stopping_criteria=acq_params['stopping'] if 'optimize_gp' not in acq_params: self.optimize_gp=0 else: self.optimize_gp=acq_params['optimize_gp'] if 'marginalize_gp' not in acq_params: self.marginalize_gp=0 else: self.marginalize_gp=acq_params['marginalize_gp'] # optimization toolbox if 'opt_toolbox' not in acq_params: if self.acq['name']=='ei_reg': self.opt_toolbox='unbounded' else: self.opt_toolbox='scipy' else: self.opt_toolbox=acq_params['opt_toolbox'] self.iteration_factor=acq_params['iteration_factor'] # store X in original scale self.X_original= None # store X in 0-1 scale self.X = None # store y=f(x) # (y - mean)/(max-min) self.Y = None # y original scale self.Y_original = None # value of the acquisition function at the selected point self.alpha_Xt=None self.Tau_Xt=None self.time_opt=0 self.k_Neighbor=2 # Gaussian Process class self.gp=PradaGaussianProcess(gp_params) self.gp_params=gp_params # acquisition function self.acq_func = None # stop condition self.stop_flag=0 self.logmarginal=0 # xt_suggestion, caching for Consensus self.xstars=[] self.ystars=np.zeros((2,1)) # theta vector for marginalization GP self.theta_vector =[] def init(self,gp_params, n_init_points=3): """ Input parameters ---------- gp_params: Gaussian Process structure n_init_points: # init points """ # set seed to allow for reproducible results np.random.seed(self.experiment_num*self.seed) print(self.experiment_num) #Generate initial points on grid l=np.zeros([n_init_points,self.dim]) bound_length=self.scalebounds[0,1]-self.scalebounds[0,0] for d in range(0,self.dim): l[:,d]=lhs(n_init_points)[:,0] self.X=np.asarray(l)+self.scalebounds[:,0] self.X=self.X*bound_length #initial inouts print("starting points={}".format(self.X)) print("starting bounds={}".format(self.scalebounds)) y_init=self.f(self.X) y_init=np.reshape(y_init,(n_init_points,1)) self.Y_original = np.asarray(y_init) #initial outputs self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) #outputs normalised print("starting Y values={}".format(self.Y)) #############Rename############# def radiusPDF(self,r,alpha,beta,b,ymax,a): """ Description: Evaluates the posterior distribution for our DDB method Input parameters ---------- r: radius to be evaluated alpha: # gamma distribution shape paramater beta: # gamma distribution rate paramater a: # log-logistic distribution scale paramater b: # log-logistic distribution rate paramater with y_max y_max: # log-logistic distribution rate paramater with b Output: posterior distribution evaluated at r """ gamma=stats.gamma.pdf(r,alpha,scale=1/beta) loglog=stats.fisk.pdf(r,ymax/b,scale=a) P=gamma*loglog return -P def sufficientBoundPDF(self,r,bDivYmax,a): """ Description: Evaluates the likelihood distribution for our DDB method Input parameters ---------- r: radius to be evaluated a: # log-logistic distribution scale paramater bDivYmax: # log-logistic distribution rate paramater Output: likelihood distribution evaluated at r """ P=stats.fisk.cdf(r,bDivYmax,scale=a) return P def expandBoundsDDB_MAP(self): """ Description: Expands the search space with the MAP implementation of our DDB method """ print('Attempting to expand search space with DDB-MAP method') alpha=self.alpha beta=self.beta bound_samples=100 # Number of radius sample to fit the log-logistic distribution # Find y^+ and x^+ ymax=np.max(self.Y) # Generate test radii max_loc=np.argmax(self.Y) xmax=self.X[max_loc] test_bound=np.zeros(self.scalebounds.shape) bound_dist=np.zeros(bound_samples) bound_center=xmax test_bound[:,1]=bound_center+0.5 test_bound[:,0]=bound_center-0.5 max_radius=np.max(np.array([np.max(max_bound_size-test_bound[:,1]),np.max(test_bound[:,0])])) step=max_radius/bound_samples packing_number=np.zeros(bound_samples) # Generate a Thompson sample maxima to estimate internal maxima TS=AcquisitionFunction.ThompsonSampling(self.gp) tsb_x,tsb_y=acq_max_global(TS, self.gp, bounds=self.scalebounds) # Generate Gumbel samples to estimate the external maxima for i in range(0,bound_samples): bound_length=test_bound[:,1]-test_bound[:,0] volume=np.power(max_bound_size,self.dim)-np.prod(bound_length) packing_number[i]=round(volume/(5*self.gp.lengthscale)) mu=stats.norm.ppf(1.0-1.0/packing_number[i]) sigma=stats.norm.ppf(1.0-(1.0/packing_number[i])*np.exp(-1.0))-stats.norm.ppf(1.0-(1.0/(packing_number[i]))) bound_dist[i]=np.exp(-np.exp(-(-tsb_y-mu)/sigma)) test_bound[:,1]=test_bound[:,1]+step test_bound[:,0]=test_bound[:,0]-step bound_dist[np.isnan(bound_dist)]=1 # Fit the log-logistic paramaters to the Gumbel samples xfit=np.arange(0,max_radius,max_radius/100) popt,pcov=optimize.curve_fit(self.sufficientBoundPDF,xfit[0:100],bound_dist,bounds=np.array([[5,1.1],[20,5]])) print("popt={}".format(popt)) b=ymax/popt[0] a=popt[1] print("b={}, ymax={}".format(b,ymax)) # Find the gamma and log-logistic modes to determine the optimisation bound c=ymax/b loglog_mode=a*np.power((c-1.0)/(c+1.0),(1/c)) gamma_mode=(alpha-1)/beta opt_bound=np.ones([2]) opt_bound[0]=min(loglog_mode,gamma_mode) opt_bound[1]=max(loglog_mode,gamma_mode) bound_range=(opt_bound[1]-opt_bound[0]) # Find MAP Estimate of radius r for d in range(0,self.dim): r_max=0 p_max=0 for x0 in np.arange(opt_bound[0],opt_bound[1],bound_range/10): res=optimize.minimize(lambda x: self.radiusPDF(x,alpha,beta,b,ymax,a),x0=x0, bounds=np.array([opt_bound]), method='L-BFGS-B') if -res.fun>p_max: r_max=res.x p_max=-res.fun if r_max>opt_bound[1]: r_max=opt_bound[1] xplot=np.arange(0,10,0.01) yplot=-self.radiusPDF(xplot,alpha,beta,b,ymax,a) max_loc=np.argmax(yplot) print("optimal radius of {} with unscaled probability of {}".format(r_max,p_max)) self.scalebounds[d,1]=xmax[d]+r_max self.scalebounds[d,0]=xmax[d]-r_max print("seach space extended to {} with DDB".format(self.scalebounds)) def expandBoundsDDB_FB(self): """ Description: Expands the search space with the full Bayesian implementation of our DDB method """ print('Attempting to expand search space with DDB-FB method') alpha=self.alpha beta=self.beta bound_samples=100 # Number of radius sample to fit the log-logistic distribution # Find y^+ and x^+ ymax=np.max(self.Y) # Generate test radii max_loc=np.argmax(self.Y) xmax=self.X[max_loc] test_bound=np.zeros(self.scalebounds.shape) bound_dist=np.zeros(bound_samples) bound_center=xmax test_bound[:,1]=bound_center+0.5 test_bound[:,0]=bound_center-0.5 max_radius=np.max(np.array([np.max(max_bound_size-test_bound[:,1]),np.max(test_bound[:,0])])) step=max_radius/bound_samples packing_number=np.zeros(bound_samples) # Generate a Thompson sample maxima to estimate internal maxima TS=AcquisitionFunction.ThompsonSampling(self.gp) tsb_x,tsb_y=acq_max_global(TS, self.gp, bounds=self.scalebounds) # Generate Gumbel samples to estimate the external maxima for i in range(0,bound_samples): bound_length=test_bound[:,1]-test_bound[:,0] volume=np.power(max_bound_size,self.dim)-np.prod(bound_length) packing_number[i]=round(volume/(5*self.gp.lengthscale)) mu=stats.norm.ppf(1.0-1.0/packing_number[i]) sigma=stats.norm.ppf(1.0-(1.0/packing_number[i])*np.exp(-1.0))-stats.norm.ppf(1.0-(1.0/(packing_number[i]))) bound_dist[i]=np.exp(-np.exp(-(-tsb_y-mu)/sigma)) test_bound[:,1]=test_bound[:,1]+step test_bound[:,0]=test_bound[:,0]-step bound_dist[np.isnan(bound_dist)]=1 # Fit the log-logistic paramaters to the Gumbel samples xfit=np.arange(0,max_radius,max_radius/100) popt,pcov=optimize.curve_fit(self.sufficientBoundPDF,xfit[0:100],bound_dist,bounds=np.array([[5,1.1],[20,5]])) print("popt={}".format(popt)) b=ymax/popt[0] a=popt[1] print("b={}, ymax={}".format(b,ymax)) # Sample for the optimal radius for d in range(0,self.dim): gamma=np.random.gamma(shape=alpha,scale=1/beta,size=100) loglog=stats.fisk.pdf(gamma,ymax/b,scale=a) scaled_weights=loglog/np.sum(loglog) multi=np.random.multinomial(1,scaled_weights) r_index=np.argmax(multi) print("Radius of {} selected".format(gamma[r_index])) self.scalebounds[d,1]=xmax[d]+gamma[r_index] self.scalebounds[d,0]=xmax[d]-gamma[r_index] print("seach space extended to {} with DDB".format(self.scalebounds)) def lcb(self,x, gp): """ Calculates the GP-LCB acquisition function values Inputs: gp: The Gaussian process, also contains all data x:The point at which to evaluate the acquisition function Output: acq_value: The value of the aquisition function at point x """ mean, var = gp.predict(x, eval_MSE=True) var.flags['WRITEABLE']=True var[var<1e-10]=0 #prevents negative variances obtained through comp errors mean=np.atleast_2d(mean).T var=np.atleast_2d(var).T beta=2*np.log(len(gp.Y)*np.square((self.experiment_num+1)*math.pi)/(6*0.9)) return mean - np.sqrt(beta) * np.sqrt(var) def ucb(self,x, gp): """ Calculates the GP-UCB acquisition function values Inputs: gp: The Gaussian process, also contains all data x:The point at which to evaluate the acquisition function Output: acq_value: The value of the aquisition function at point x """ mean, var = gp.predict(x, eval_MSE=True) var.flags['WRITEABLE']=True var[var<1e-10]=0 #prevents negative variances obtained through comp errors mean=np.atleast_2d(mean).T var=np.atleast_2d(var).T beta=2*np.log(len(gp.Y)*np.square(self.experiment_num*math.pi)/(6*0.9)) return mean + np.sqrt(beta) * np.sqrt(var) def expandBoundsFiltering(self): """ Description: Expands the search space with filtering Bayesian optimisation (FBO) by Nguyen et al. """ step=0.1*self.gp.lengthscale print('Attempting to expand search space with FBO method') # Determine the unfiltered extension based on the iteration number extended_bound=np.copy(self.scalebounds) extention=math.pow(self.iteration_factor/(max([self.experiment_num,1])),(1/self.dim)) old_radius=(extended_bound[:,1]-extended_bound[:,0])/2 mid_point=extended_bound[:,0]+old_radius new_radius=old_radius*extention extended_bound[:,1]=mid_point+new_radius extended_bound[:,0]=mid_point-new_radius # Calculate the global maximum lower confidence bound lcb_x,lcb_y=acq_max_global(self.lcb, self.gp, extended_bound) # Filter the lower and upper boundary up to the unfiltered extension for d in range(0,self.dim): #Upper bound x_boundry=np.max(self.X[d],axis=0) x_boundry_index=np.argmax(self.X[d],axis=0) xb=self.X[x_boundry_index] ucb_y=self.ucb(self.X[x_boundry_index],self.gp) while(((ucb_y>lcb_y)&(x_boundry<extended_bound[d,1]))|(x_boundry<self.scalebounds[d,1])): x_boundry=x_boundry+step xb[d]=xb[d]+step ucb_y=self.ucb(xb,self.gp) extended_bound[d,1]=x_boundry #Lower bound x_boundry=np.min(self.X[d],axis=0) ucb_y=self.ucb(self.X[x_boundry_index],self.gp) while(((ucb_y>lcb_y)&(x_boundry>extended_bound[d,0]))|(x_boundry>self.scalebounds[d,0])): x_boundry=x_boundry-step xb[d]=xb[d]-step ucb_y=self.ucb(xb,self.gp) extended_bound[d,0]=x_boundry self.scalebounds=extended_bound print("seach space extended to {}".format(self.scalebounds)) def volumeDoubling(self): """ Description: Expands the search space with the volume doubling method by Shahriari et al """ print('Attempting to expand search space with volume doubling method') extended_bound=np.copy(self.scalebounds) old_radius=(extended_bound[:,1]-extended_bound[:,0])/2 volume=np.power(2*old_radius,self.dim) mid_point=extended_bound[:,0]+old_radius new_radius=np.power(2*volume,1/self.dim)/2 extended_bound[:,0]=mid_point-new_radius extended_bound[:,1]=mid_point+new_radius self.scalebounds=extended_bound print("seach space extended to {}".format(self.scalebounds)) def maximize(self,gp_params): """ Main optimization method. Input parameters ---------- gp_params: parameter for Gaussian Process Returns ------- x: recommented point for evaluation """ if self.stop_flag==1: return if self.acq['name']=='random': x_max = [np.random.uniform(x[0], x[1], size=1) for x in self.scalebounds] x_max=np.asarray(x_max) x_max=x_max.T self.X_original=np.vstack((self.X_original, x_max)) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) self.time_opt=np.hstack((self.time_opt,0)) return # init a new Gaussian Process self.gp=PradaGaussianProcess(gp_params) if self.gp.KK_x_x_inv ==[]: # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) acq=self.acq if acq['debug']==1: logmarginal=self.gp.log_marginal_lengthscale(gp_params['theta'],gp_params['noise_delta']) print(gp_params['theta']) print("log marginal before optimizing ={:.4f}".format(logmarginal)) self.logmarginal=logmarginal if logmarginal<-999999: logmarginal=self.gp.log_marginal_lengthscale(gp_params['theta'],gp_params['noise_delta']) if self.optimize_gp==1 and len(self.Y)%2*self.dim==0 and len(self.Y)>5*self.dim: print("Initial length scale={}".format(gp_params['theta'])) newtheta = self.gp.optimize_lengthscale(gp_params['theta'],gp_params['noise_delta'],self.scalebounds) gp_params['theta']=newtheta print("New length scale={}".format(gp_params['theta'])) # init a new Gaussian Process after optimizing hyper-parameter self.gp=PradaGaussianProcess(gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Modify search space based on selected method if self.expandSS=='expandBoundsDDB_MAP': self.expandBoundsDDB_MAP() if self.expandSS=='expandBoundsDDB_FB': self.expandBoundsDDB_FB() if self.expandSS=='expandBoundsFiltering': self.expandBoundsFiltering() if self.expandSS=='volumeDoubling' and len(self.Y)%3*self.dim==0: self.volumeDoubling() # Prevent bounds from breaching maximum limit for d in range(0,self.dim): if self.scalebounds[d,0]<0: print('Lower bound of {} in dimention {} exceeded minimum bound of {}. Scaling up.'.format(self.scalebounds[d,0],d,0)) self.scalebounds[d,0]=0 print('bound set to {}'.format(self.scalebounds)) if self.scalebounds[d,1]>max_bound_size: print('Upper bound of {} in dimention {} exceeded maximum bound of {}. Scaling down.'.format(self.scalebounds[d,1],d,max_bound_size)) self.scalebounds[d,1]=max_bound_size self.scalebounds[d,0]=min(self.scalebounds[d,0],self.scalebounds[d,1]-np.sqrt(3*self.gp.lengthscale)) print('bound set to {}'.format(self.scalebounds)) # Set acquisition function start_opt=time.time() y_max = self.Y.max() if acq['name'] in ['consensus','mes']: ucb_acq_func={} ucb_acq_func['name']='ucb' ucb_acq_func['kappa']=np.log(len(self.Y)) ucb_acq_func['dim']=self.dim ucb_acq_func['scalebounds']=self.scalebounds myacq=AcquisitionFunction(ucb_acq_func) xt_ucb = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) xstars=[] xstars.append(xt_ucb) ei_acq_func={} ei_acq_func['name']='ei' ei_acq_func['dim']=self.dim ei_acq_func['scalebounds']=self.scalebounds myacq=AcquisitionFunction(ei_acq_func) xt_ei = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) xstars.append(xt_ei) pes_acq_func={} pes_acq_func['name']='pes' pes_acq_func['dim']=self.dim pes_acq_func['scalebounds']=self.scalebounds myacq=AcquisitionFunction(pes_acq_func) xt_pes = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds) xstars.append(xt_pes) self.xstars=xstars if acq['name']=='vrs': print("please call the maximize_vrs function") return if 'xstars' not in globals(): xstars=[] self.xstars=xstars self.acq['xstars']=xstars self.acq['WW']=False self.acq['WW_dim']=False self.acq_func = AcquisitionFunction(self.acq,self.bb_function) if acq['name']=="ei_mu": #find the maximum in the predictive mean mu_acq={} mu_acq['name']='mu' mu_acq['dim']=self.dim acq_mu=AcquisitionFunction(mu_acq) x_mu_max = acq_max(ac=acq_mu.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds,opt_toolbox=self.opt_toolbox) # set y_max = mu_max y_max=acq_mu.acq_kind(x_mu_max,gp=self.gp, y_max=y_max) x_max = acq_max(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds,opt_toolbox=self.opt_toolbox,seeds=self.xstars) if acq['name']=='consensus' and acq['debug']==1: # plot the x_max and xstars fig=plt.figure(figsize=(5, 5)) plt.scatter(xt_ucb[0],xt_ucb[1],marker='s',color='g',s=200,label='Peak') plt.scatter(xt_ei[0],xt_ei[1],marker='s',color='k',s=200,label='Peak') plt.scatter(x_max[0],x_max[1],marker='*',color='r',s=300,label='Peak') plt.xlim(0,1) plt.ylim(0,1) strFileName="acquisition_functions_debug.eps" fig.savefig(strFileName, bbox_inches='tight') if acq['name']=='vrs' and acq['debug']==1: # plot the x_max and xstars fig=plt.figure(figsize=(5, 5)) plt.scatter(xt_ucb[0],xt_ucb[1],marker='s',color='g',s=200,label='Peak') plt.scatter(xt_ei[0],xt_ei[1],marker='s',color='k',s=200,label='Peak') plt.scatter(x_max[0],x_max[1],marker='*',color='r',s=300,label='Peak') plt.xlim(0,1) plt.ylim(0,1) strFileName="vrs_acquisition_functions_debug.eps" #fig.savefig(strFileName, bbox_inches='tight') val_acq=self.acq_func.acq_kind(x_max,self.gp,y_max) #print x_max #print val_acq if self.stopping_criteria!=0 and val_acq<self.stopping_criteria: val_acq=self.acq_func.acq_kind(x_max,self.gp,y_max) self.stop_flag=1 print("Stopping Criteria is violated. Stopping Criteria is {:.15f}".format(self.stopping_criteria)) self.alpha_Xt= np.append(self.alpha_Xt,val_acq) mean,var=self.gp.predict(x_max, eval_MSE=True) var.flags['WRITEABLE']=True var[var<1e-20]=0 #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var) # record the optimization time finished_opt=time.time() elapse_opt=finished_opt-start_opt self.time_opt=np.hstack((self.time_opt,elapse_opt)) # store X self.X = np.vstack((self.X, x_max.reshape((1, -1)))) # evaluate Y using original X self.Y_original = np.append(self.Y_original, self.f(x_max)) # update Y after change Y_original self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) if self.gp.flagIncremental==1: self.gp.fit_incremental(x_max,self.Y[-1]) # if (self.acq['name']=='ei_regularizerH') or (self.acq['name']=='ei_regularizerQ'): # self.scalebounds[:,0]=self.scalebounds[:,0]+1 # self.scalebounds[:,1]=self.scalebounds[:,1]-1 # self.acq['scalebounds']=self.scalebounds self.experiment_num=self.experiment_num+1
def maximize(self, init_points=5, n_iter=25, acq='ucb', kappa=2.576, **gp_params): """ Main optimization method. Parameters ---------- :param init_points: Number of randomly chosen points to sample the target function before fitting the gp. :param n_iter: Total number of times the process is to repeated. Note that currently this methods does not have stopping criteria (due to a number of reasons), therefore the total number of points to be sampled must be specified. :param acq: Acquisition function to be used, defaults to Expected Improvement. :param gp_params: Parameters to be passed to the Scikit-learn Gaussian Process object Returns ------- :return: Nothing """ # Reset timer #self.plog.reset_timer() # Set acquisition function self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa) # Initialize x, y and find current y_max if not self.initialized: #if self.verbose: #self.plog.print_header() self.init(init_points) y_max = self.Y.max() self.theta = gp_params['theta'] # Set parameters if any was passed #self.gp.set_params(**gp_params) self.gp = PradaMultipleGaussianProcess(**gp_params) # Find unique rows of X to avoid GP from breaking ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) # Finding argmax of the acquisition function. x_max = acq_max(ac=self.acq_func.acq_kind, gp=self.gp, y_max=y_max, bounds=self.bounds) #print "start acq max nlopt" #x_max,f_max = acq_max_nlopt(f=self.acq_func.acq_kind,gp=self.gp,y_max=y_max, #bounds=self.bounds) #print "end acq max nlopt" # Test if x_max is repeated, if it is, draw another one at random # If it is repeated, print a warning #pwarning = False if np.any((self.X - x_max).sum(axis=1) == 0): #print "x max uniform random" x_max = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=self.bounds.shape[0]) #print "start append X,Y" self.X = np.vstack((self.X, x_max.reshape((1, -1)))) #self.Y = np.append(self.Y, self.f(**dict(zip(self.keys, x_max)))) self.Y = np.append(self.Y, self.f(x_max)) #print "end append X,Y" #print 'x_max={:f}'.format(x_max[0]) #print "start fitting GP" # Updating the GP. ur = unique_rows(self.X) self.gp.fit(self.X[ur], self.Y[ur]) #print "end fitting GP" # Update maximum value to search for next probe point. if self.Y[-1] > y_max: y_max = self.Y[-1]