def __init__(self, model: Model, options: dict) -> None: # best_f = torch.min(model_list.models[0].train_targets) # Initialize parent classes inthe following order: ExpectedImprovement.__init__(self, model=model, best_f=0.0, maximize=False) AcquisitionBaseTools.__init__( self, model=model, Nrestarts_eta=options.optimization.Nrestarts) logger.info("Starting EI ...") self.dim = model.dim self.Nrestarts = options.optimization.Nrestarts self.algo_name = options.optimization.algo_name self.constrained_opt = OptimizationNonLinear( dim=self.dim, fun_obj=self.forward, algo_str=self.algo_name, bounds=[[0.0] * self.dim, [1.0] * self.dim], minimize=False, what2optimize_str="EI acquisition") # self.use_nlopt = False self.disp_info_scipy_opti = options.optimization.disp_info_scipy_opti self.method = "L-BFGS-B" self.x_next, self.alpha_next = None, None
def test_expected_improvement_batch(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([-0.5, 0.0, 0.5], device=device, dtype=dtype).view( 3, 1, 1 ) variance = torch.ones(3, 1, 1, device=device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean, variance=variance)) module = ExpectedImprovement(model=mm, best_f=0.0) X = torch.empty(3, 1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor( [0.19780, 0.39894, 0.69780], device=device, dtype=dtype ) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) # check for proper error if multi-output model mean2 = torch.rand(3, 1, 2, device=device, dtype=dtype) variance2 = torch.rand(3, 1, 2, device=device, dtype=dtype) mm2 = MockModel(MockPosterior(mean=mean2, variance=variance2)) module2 = ExpectedImprovement(model=mm2, best_f=0.0) with self.assertRaises(UnsupportedError): module2(X) # test objective (single-output) mean = torch.tensor([[[0.5]], [[0.25]]], device=device, dtype=dtype) covar = torch.tensor([[[[0.16]]], [[[0.125]]]], device=device, dtype=dtype) mvn = MultivariateNormal(mean, covar) p = GPyTorchPosterior(mvn) mm = MockModel(p) weights = torch.tensor([0.5], device=device, dtype=dtype) obj = ScalarizedObjective(weights) ei = ExpectedImprovement(model=mm, best_f=0.0, objective=obj) X = torch.rand(2, 1, 2, device=device, dtype=dtype) ei_expected = torch.tensor([[0.2601], [0.1500]], device=device, dtype=dtype) torch.allclose(ei(X), ei_expected, atol=1e-4) # test objective (multi-output) mean = torch.tensor( [[[-0.25, 0.5]], [[0.2, -0.1]]], device=device, dtype=dtype ) covar = torch.tensor( [[[0.5, 0.125], [0.125, 0.5]], [[0.25, -0.1], [-0.1, 0.25]]], device=device, dtype=dtype, ) mvn = MultitaskMultivariateNormal(mean, covar) p = GPyTorchPosterior(mvn) mm = MockModel(p) weights = torch.tensor([2.0, 1.0], device=device, dtype=dtype) obj = ScalarizedObjective(weights) ei = ExpectedImprovement(model=mm, best_f=0.0, objective=obj) X = torch.rand(2, 1, 2, device=device, dtype=dtype) ei_expected = torch.tensor([0.6910, 0.5371], device=device, dtype=dtype) torch.allclose(ei(X), ei_expected, atol=1e-4) # test bad objective class with self.assertRaises(UnsupportedError): ExpectedImprovement(model=mm, best_f=0.0, objective=IdentityMCObjective())
def optimize_loop(model, loss, X_train, y_train, X_test, y_test, bounds): best_value = y_train.max() acq_func = ExpectedImprovement(model, best_f=best_value, maximize=True) acq_vals = acq_func(X_test.view((X_test.shape[0], 1, X_test.shape[1]))) # this point has maximum acquisition value and will be added max_acqf_id = acq_vals.argmax() # get the new testing point from X_test X_test_new, X_new = tensor_pop(inp_tensor=X_test, to_pop=max_acqf_id.cpu().numpy()) y_test_new, y_new = tensor_pop(inp_tensor=y_test, to_pop=max_acqf_id.cpu().numpy()) # plot acq function #plot_acq_func(acq_func, X_test=X_test, X_train=X_train, X_new=X_new) # condition model on new observation gpr_model, gpr_mll = get_gpr_model(X_new, y_new, model=model) # concatenate new points to training set X_train_new = torch.cat((X_train, X_new)) y_train_new = torch.cat((y_train, y_new)) return { 'model': gpr_model, 'loss': gpr_mll, 'X_train': X_train_new, 'y_train': y_train_new, 'X_test': X_test_new, 'y_test': y_test_new, 'X_new': X_new, 'y_new': y_new, }
def test_expected_improvement(self): for dtype in (torch.float, torch.double): mean = torch.tensor([[-0.5]], device=self.device, dtype=dtype) variance = torch.ones(1, 1, device=self.device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean, variance=variance)) # basic test module = ExpectedImprovement(model=mm, best_f=0.0) X = torch.empty(1, 1, device=self.device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor(0.19780, device=self.device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) # test maximize module = ExpectedImprovement(model=mm, best_f=0.0, maximize=False) X = torch.empty(1, 1, device=self.device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor(0.6978, device=self.device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) with self.assertRaises(UnsupportedError): module.set_X_pending(None) # test posterior transform (single-output) mean = torch.tensor([0.5], device=self.device, dtype=dtype) covar = torch.tensor([[0.16]], device=self.device, dtype=dtype) mvn = MultivariateNormal(mean, covar) p = GPyTorchPosterior(mvn) mm = MockModel(p) weights = torch.tensor([0.5], device=self.device, dtype=dtype) transform = ScalarizedPosteriorTransform(weights) ei = ExpectedImprovement(model=mm, best_f=0.0, posterior_transform=transform) X = torch.rand(1, 2, device=self.device, dtype=dtype) ei_expected = torch.tensor(0.2601, device=self.device, dtype=dtype) torch.allclose(ei(X), ei_expected, atol=1e-4) # test posterior transform (multi-output) mean = torch.tensor([[-0.25, 0.5]], device=self.device, dtype=dtype) covar = torch.tensor([[[0.5, 0.125], [0.125, 0.5]]], device=self.device, dtype=dtype) mvn = MultitaskMultivariateNormal(mean, covar) p = GPyTorchPosterior(mvn) mm = MockModel(p) weights = torch.tensor([2.0, 1.0], device=self.device, dtype=dtype) transform = ScalarizedPosteriorTransform(weights) ei = ExpectedImprovement(model=mm, best_f=0.0, posterior_transform=transform) X = torch.rand(1, 2, device=self.device, dtype=dtype) ei_expected = torch.tensor(0.6910, device=self.device, dtype=dtype) torch.allclose(ei(X), ei_expected, atol=1e-4)
def test_expected_improvement(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([[-0.5]], device=device, dtype=dtype) variance = torch.ones(1, 1, device=device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean, variance=variance)) module = ExpectedImprovement(model=mm, best_f=0.0) X = torch.empty(1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor(0.19780, device=device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) module = ExpectedImprovement(model=mm, best_f=0.0, maximize=False) X = torch.empty(1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor(0.6978, device=device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))
def test_expected_improvement_batch(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([-0.5, 0.0, 0.5], device=device, dtype=dtype).view(3, 1, 1) variance = torch.ones(3, 1, 1, device=device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean, variance=variance)) module = ExpectedImprovement(model=mm, best_f=0.0) X = torch.empty(3, 1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor([0.19780, 0.39894, 0.69780], device=device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) # check for proper error if multi-output model mean2 = torch.rand(3, 1, 2, device=device, dtype=dtype) variance2 = torch.rand(3, 1, 2, device=device, dtype=dtype) mm2 = MockModel(MockPosterior(mean=mean2, variance=variance2)) module2 = ExpectedImprovement(model=mm2, best_f=0.0) with self.assertRaises(UnsupportedError): module2(X)
def test_penalized_acquisition_function(self): for dtype in (torch.float, torch.double): mock_model = MockModel( MockPosterior(mean=torch.tensor([1.0]), variance=torch.tensor([1.0]))) init_point = torch.tensor([0.5, 0.5, 0.5], device=self.device, dtype=dtype) groups = [[0, 2], [1]] raw_acqf = ExpectedImprovement(model=mock_model, best_f=1.0) penalty = GroupLassoPenalty(init_point=init_point, groups=groups) lmbda = 0.1 acqf = PenalizedAcquisitionFunction(raw_acqf=raw_acqf, penalty_func=penalty, regularization_parameter=lmbda) sample_point = torch.tensor([[1.0, 2.0, 3.0]], device=self.device, dtype=dtype) raw_value = raw_acqf(sample_point) penalty_value = penalty(sample_point) real_value = raw_value - lmbda * penalty_value computed_value = acqf(sample_point) self.assertTrue(torch.equal(real_value, computed_value)) # testing X_pending for analytic raw_acqfn (EI) X_pending = torch.tensor([0.1, 0.2, 0.3], device=self.device, dtype=dtype) with self.assertRaises(UnsupportedError): acqf.set_X_pending(X_pending) # testing X_pending for non-analytic raw_acqfn (EI) sampler = IIDNormalSampler(num_samples=2) raw_acqf_2 = qExpectedImprovement(model=mock_model, best_f=0, sampler=sampler) init_point = torch.tensor([1.0, 1.0, 1.0], device=self.device, dtype=dtype) l2_module = L2Penalty(init_point=init_point) acqf_2 = PenalizedAcquisitionFunction( raw_acqf=raw_acqf_2, penalty_func=l2_module, regularization_parameter=lmbda, ) X_pending = torch.tensor([0.1, 0.2, 0.3], device=self.device, dtype=dtype) acqf_2.set_X_pending(X_pending) self.assertTrue(torch.equal(acqf_2.X_pending, X_pending))
def ei_or_nei( model: Union[ALEBOGP, ModelListGP], objective_weights: Tensor, outcome_constraints: Optional[Tuple[Tensor, Tensor]], X_observed: Tensor, X_pending: Optional[Tensor], q: int, noiseless: bool, ) -> AcquisitionFunction: """Use analytic EI if appropriate, otherwise Monte Carlo NEI. Analytic EI can be used if: Single outcome, no constraints, no pending points, not batch, and no noise. Args: model: GP. objective_weights: Weights on each outcome for the objective. outcome_constraints: Outcome constraints. X_observed: Observed points for NEI. X_pending: Pending points. q: Batch size. noiseless: True if evaluations are noiseless. Returns: An AcquisitionFunction, either analytic EI or MC NEI. """ if ( len(objective_weights) == 1 and outcome_constraints is None and X_pending is None and q == 1 and noiseless ): maximize = objective_weights[0] > 0 if maximize: best_f = model.train_targets.max() else: best_f = model.train_targets.min() return ExpectedImprovement(model=model, best_f=best_f, maximize=maximize) else: with gpytorch.settings.max_cholesky_size(2000): acq = get_NEI( model=model, objective_weights=objective_weights, outcome_constraints=outcome_constraints, X_observed=X_observed, X_pending=X_pending, ) return acq
def optimize_loop(model, loss, X_train, y_train, X_test, y_test, bounds): best_value = y_train.min() acq_func = ExpectedImprovement(model, best_f=best_value, maximize=False) X_new, acq_value = optimize_acqf(acq_func, bounds=bounds, q=1, \ num_restarts=100, raw_samples=100) X_new = X_new.view((1, 1)) y_new = (eval_true_func(X_new) - y_mean) / y_std # concatenate new points to training set X_train_new = torch.cat((X_train, X_new)) y_train_new = torch.cat((y_train, y_new)) plot_acq_func(acq_func, X_test=X_test, X_train=X_train_new, X_new=X_new) # condition model on new observation gpr_model, gpr_mll = get_gpr_model(X_new, y_new, model=model) # plot model performance plot_testing(gpr_model, X_test=X_test, X_train=X_train_new, \ y_train=y_train_new, y_test=y_test, X_new=X_new, y_new=y_new) return gpr_model, gpr_mll, X_train_new, y_train_new
def plot_testing(model, X_train, y_train, X_test, target, x_dim): """ Test the surrogate model with model, test_X and new_X """ # Initialize plot font = {'size': 20} matplotlib.rc('font', **font) matplotlib.rcParams['axes.linewidth'] = 1.5 matplotlib.rcParams['xtick.major.size'] = 8 matplotlib.rcParams['xtick.major.width'] = 2 matplotlib.rcParams['ytick.major.size'] = 8 matplotlib.rcParams['ytick.major.width'] = 2 hist_fig, ax = plt.subplots(figsize=(12, 6)) # set up model in eval mode model.eval() with torch.no_grad(): posterior = model.posterior(X_test) # Get upper and lower confidence bounds (2 std from the mean) lower, upper = posterior.mvn.confidence_region() ax.plot(X_test, posterior.mean.cpu().numpy(), \ 'b', label='Posterior Mean') # Shade between the lower and upper confidence bounds ax.fill_between(X_test[:, x_dim], lower.cpu().numpy(), \ upper.cpu().numpy(), alpha=0.5, label = '95% Credibility') # Plot training points as black stars ax.scatter(X_train[:, x_dim].cpu().numpy(), y_train.cpu().numpy(), s=120, c='k', marker='*', label='Training Data') ax.set_xlabel(f'{target}') ax.set_ylabel('E/Z') ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plt.tight_layout() plt.show() acq_func = ExpectedImprovement(model, best_f=y_train.max(), maximize=True) plot_acq_func(acq_func, X_train, x_dim=x_dim)
def get_new_points_acq_func_vals(model, acq_fn_label, new_points, best_response, acq_fn_hyperparams=None): if acq_fn_label == 'expected_improvement': acq_func = ExpectedImprovement(model, best_f=best_response, maximize=True) elif acq_fn_label == 'ucb': hyperparams = {'beta': 2} if acq_fn_hyperparams is not None: hyperparams.update(acq_fn_hyperparams) acq_func = UpperConfidenceBound(model, **hyperparams) else: raise NotImplementedError(f'acq_fn_label {acq_fn_label} does not ' 'match implemented types') acq_vals = acq_func( new_points.view((new_points.shape[0], 1, new_points.shape[1]))) return acq_vals
def test_acquisition_functions(self): tkwargs = {"device": self.device, "dtype": torch.double} train_X, train_Y, train_Yvar, model = self._get_data_and_model( infer_noise=True, **tkwargs ) fit_fully_bayesian_model_nuts( model, warmup_steps=8, num_samples=5, thinning=2, disable_progbar=True ) sampler = IIDNormalSampler(num_samples=2) acquisition_functions = [ ExpectedImprovement(model=model, best_f=train_Y.max()), ProbabilityOfImprovement(model=model, best_f=train_Y.max()), PosteriorMean(model=model), UpperConfidenceBound(model=model, beta=4), qExpectedImprovement(model=model, best_f=train_Y.max(), sampler=sampler), qNoisyExpectedImprovement(model=model, X_baseline=train_X, sampler=sampler), qProbabilityOfImprovement( model=model, best_f=train_Y.max(), sampler=sampler ), qSimpleRegret(model=model, sampler=sampler), qUpperConfidenceBound(model=model, beta=4, sampler=sampler), qNoisyExpectedHypervolumeImprovement( model=ModelListGP(model, model), X_baseline=train_X, ref_point=torch.zeros(2, **tkwargs), sampler=sampler, ), qExpectedHypervolumeImprovement( model=ModelListGP(model, model), ref_point=torch.zeros(2, **tkwargs), sampler=sampler, partitioning=NondominatedPartitioning( ref_point=torch.zeros(2, **tkwargs), Y=train_Y.repeat([1, 2]) ), ), ] for acqf in acquisition_functions: for batch_shape in [[5], [6, 5, 2]]: test_X = torch.rand(*batch_shape, 1, 4, **tkwargs) self.assertEqual(acqf(test_X).shape, torch.Size(batch_shape))
def test_proximal(self): for dtype in (torch.float, torch.double): train_X = torch.rand(5, 3, device=self.device, dtype=dtype) train_Y = train_X.norm(dim=-1, keepdim=True) model = (SingleTaskGP(train_X, train_Y).to(device=self.device, dtype=dtype).eval()) EI = ExpectedImprovement(model, best_f=0.0) # test single point proximal_weights = torch.ones(3, device=self.device, dtype=dtype) test_X = torch.rand(1, 3, device=self.device, dtype=dtype) EI_prox = ProximalAcquisitionFunction( EI, proximal_weights=proximal_weights) ei = EI(test_X) mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights)) test_prox_weight = torch.exp( mv_normal.log_prob(test_X)) / torch.exp( mv_normal.log_prob(train_X[-1])) ei_prox = EI_prox(test_X) self.assertTrue(torch.allclose(ei_prox, ei * test_prox_weight)) self.assertTrue(ei_prox.shape == torch.Size([1])) # test t-batch with broadcasting test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype) ei = EI(test_X) mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights)) test_prox_weight = torch.exp( mv_normal.log_prob(test_X)) / torch.exp( mv_normal.log_prob(train_X[-1])) ei_prox = EI_prox(test_X) self.assertTrue( torch.allclose(ei_prox, ei * test_prox_weight.flatten())) self.assertTrue(ei_prox.shape == torch.Size([4])) # test MC acquisition function qEI = qExpectedImprovement(model, best_f=0.0) test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype) qEI_prox = ProximalAcquisitionFunction( qEI, proximal_weights=proximal_weights) qei = qEI(test_X) mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights)) test_prox_weight = torch.exp( mv_normal.log_prob(test_X)) / torch.exp( mv_normal.log_prob(train_X[-1])) qei_prox = qEI_prox(test_X) self.assertTrue( torch.allclose(qei_prox, qei * test_prox_weight.flatten())) self.assertTrue(qei_prox.shape == torch.Size([4])) # test gradient test_X = torch.rand(1, 3, device=self.device, dtype=dtype, requires_grad=True) ei_prox = EI_prox(test_X) ei_prox.backward() # test model without train_inputs bad_model = DummyModel() with self.assertRaises(UnsupportedError): ProximalAcquisitionFunction( ExpectedImprovement(bad_model, 0.0), proximal_weights) # test proximal weights that do not match training_inputs train_X = torch.rand(5, 1, 3, device=self.device, dtype=dtype) train_Y = train_X.norm(dim=-1, keepdim=True) model = SingleTaskGP(train_X, train_Y).to(device=self.device).eval() with self.assertRaises(ValueError): ProximalAcquisitionFunction(ExpectedImprovement(model, 0.0), proximal_weights[:1]) with self.assertRaises(ValueError): ProximalAcquisitionFunction( ExpectedImprovement(model, 0.0), torch.rand(3, 3, device=self.device, dtype=dtype), ) # test for x_pending points pending_acq = DummyAcquisitionFunction(model) pending_acq.set_X_pending( torch.rand(3, 3, device=self.device, dtype=dtype)) with self.assertRaises(UnsupportedError): ProximalAcquisitionFunction(pending_acq, proximal_weights) # test model with multi-batch training inputs train_X = torch.rand(5, 2, 3, device=self.device, dtype=dtype) train_Y = train_X.norm(dim=-1, keepdim=True) bad_single_task = (SingleTaskGP( train_X, train_Y).to(device=self.device).eval()) with self.assertRaises(UnsupportedError): ProximalAcquisitionFunction( ExpectedImprovement(bad_single_task, 0.0), proximal_weights)
def main(argv): dataset = 1 node_id = 1 try: opts, args = getopt.getopt(argv, "hd:n:", ["dataset=", "nodeid="]) except getopt.GetoptError: print('random parallel with input dataset') sys.exit(2) for opt, arg in opts: if opt == '-h': print('random parallel with input dataset') sys.exit() elif opt in ("-d", "--dataset"): dataset = int(arg) elif opt in ("-n", "--nodeid"): node_id = int(arg) # average over multiple trials for trial in range(1, N_TRIALS + 1): print(f"\nTrial {trial:>2} of {N_TRIALS} ", end="") best_observed_ei, best_observed_nei = [], [] # initialize the best configuration # best_observed_ei_conf, best_observed_all_nei_conf, best_random_conf = [], [], [] # call helper functions to generate initial training data and initialize model train_x_ei, train_obj_ei, best_observed_value_ei, current_best_config = generate_initial_data( dataset, node_id) mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei) best_observed_ei.append(best_observed_value_ei) # run N_BATCH rounds of BayesOpt after the initial random batch for iteration in range(1, N_BATCH + 1): # fit the models fit_gpytorch_model(mll_ei) # for best_f, we use the best observed noisy values as an approximation EI = ExpectedImprovement( model=model_ei, best_f=train_obj_ei.max(), ) # optimize and get new observation new_x_ei, new_obj_ei = optimize_acqf_and_get_observation( EI, dataset, node_id) # update training points train_x_ei = torch.cat([train_x_ei, new_x_ei]) train_obj_ei = torch.cat([train_obj_ei, new_obj_ei]) # update progress best_value_ei = train_obj_ei.max().item() best_observed_ei.append(best_value_ei) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting mll_ei, model_ei = initialize_model( train_x_ei, train_obj_ei, model_ei.state_dict(), ) # return the best configuration best_tensor_ei, indices_ei = torch.max(train_obj_ei, 0) train_best_x_ei = train_x_ei[indices_ei].cpu().numpy() from botorch.acquisition import PosteriorMean argmax_pmean_ei, max_pmean_ei = optimize_acqf( acq_function=PosteriorMean(model_ei), bounds=bounds, q=1, num_restarts=20, raw_samples=2048, ) csv_file_name = '/home/jjshi/modes/botorch/' + folder_name + '/base-line/hp-gp-ei-dataset-' + str( dataset) + '-node-' + str( (node_id + 1)) + '-trail' + str(trial) + '.csv' with open(csv_file_name, 'w') as csvFile: writer = csv.writer(csvFile) writer.writerow([ str(argmax_pmean_ei.cpu().numpy()), str(max_pmean_ei.cpu().numpy()) ]) # ei prediction writer.writerow( [str(train_best_x_ei), str(best_tensor_ei.cpu().numpy())]) # ei observation csvFile.close()
def acf_constructor(model, objective_weights, outcome_constraints, X_observed, X_pending, **kwargs): return ExpectedImprovement(model, best_f=torch.max(X_observed))