def test_q_expected_improvement(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # the event shape is `b x q x t` = 1 x 1 x 1 samples = torch.zeros(1, 1, 1, device=device, dtype=dtype) mm = MockModel(MockPosterior(samples=samples)) # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking X = torch.zeros(1, 1, device=device, dtype=dtype) # basic test sampler = IIDNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) # test shifting best_f value acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 1.0) # basic test, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() res = acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
def test_q_expected_improvement(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # the event shape is `b x q x t` = 1 x 1 x 1 samples = torch.zeros(1, 1, 1, device=device, dtype=dtype) mm = MockModel(MockPosterior(samples=samples)) # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking X = torch.zeros(1, 1, device=device, dtype=dtype) # basic test sampler = IIDNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) # test shifting best_f value acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 1.0) # basic test, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() res = acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
def optimize_qparego_and_get_observation(model, train_obj, sampler): """Samples a set of random weights for each candidate in the batch, performs sequential greedy optimization of the qParEGO acquisition function, and returns a new candidate and observation.""" acq_func_list = [] for _ in range(BATCH_SIZE): weights = sample_simplex(problem.num_objectives, **tkwargs).squeeze() objective = GenericMCObjective( get_chebyshev_scalarization(weights=weights, Y=train_obj)) acq_func = qExpectedImprovement( # pyre-ignore: [28] model=model, objective=objective, best_f=objective(train_obj).max(), sampler=sampler, ) acq_func_list.append(acq_func) # optimize candidates, _ = optimize_acqf_list( acq_function_list=acq_func_list, bounds=standard_bounds, num_restarts=NUM_RESTARTS, raw_samples=RAW_SAMPLES, # used for intialization heuristic options={ "batch_limit": 5, "maxiter": 200 }, ) # observe new values new_x = unnormalize(candidates.detach(), bounds=problem.bounds) new_obj = problem(new_x) return new_x, new_obj
def select_query_point(self, batch_size=1): """ :param batch_size (int): number of query points to return :return: (batch_size x d_orig) numpy array """ # TODO: Make the random initialization its own function so it can be done separately from the acquisition argmin # Initialize with random points if len(self.X) < self.initial_random_samples: # Select query point randomly from embedding_boundaries X_query = \ self.rng.uniform(size=self.boundaries.shape[0]) \ * (self.boundaries[:, 1] - self.boundaries[:, 0]) \ + self.boundaries[:, 0] X_query = torch.from_numpy(X_query).unsqueeze(0) # Query by maximizing the acquisition function else: print("---------------------") print('querying') print("self.X.shape: {}".format(self.X.shape)) print("self.y.shape: {}".format(self.y.shape)) # Initialize model if len(self.X) == self.initial_random_samples: self.model = ExactGaussianProcess( train_x=self.X.float(), train_y=self.y.float(), ) # Acquisition function qEI = qExpectedImprovement( model=self.model, best_f=torch.max(self.y).item(), ) # qUCB = qUpperConfidenceBound( # model=self.model, # beta=2.0, # ) print("batch_size: {}".format(batch_size)) # Optimize for a (batch_size x d_embedding) tensor query point X_query = global_optimization( objective_function=qEI, boundaries=torch.from_numpy(self.boundaries).float(), batch_size=batch_size, # number of query points to suggest ) print("batched X_query: {}".format(X_query)) print("batched X_query.shape: {}".format(X_query.shape)) print("X concatenated: {}".format(self.X.shape)) return X_query
def test_fixed_features(self, cuda=False): device = torch.device("cuda" if cuda else "cpu") train_X = torch.rand(5, 3, device=device) train_Y = train_X.norm(dim=-1) model = SingleTaskGP(train_X, train_Y).to(device=device).eval() qEI = qExpectedImprovement(model, best_f=0.0) # test single point test_X = torch.rand(1, 3, device=device) qEI_ff = FixedFeatureAcquisitionFunction(qEI, d=3, columns=[2], values=test_X[..., -1:]) qei = qEI(test_X) qei_ff = qEI_ff(test_X[..., :-1]) self.assertTrue(torch.allclose(qei, qei_ff)) # test list input qEI_ff = FixedFeatureAcquisitionFunction(qEI, d=3, columns=[2], values=[0.5]) qei_ff = qEI_ff(test_X[..., :-1]) # test q-batch test_X = torch.rand(2, 3, device=device) qEI_ff = FixedFeatureAcquisitionFunction(qEI, d=3, columns=[1], values=test_X[..., [1]]) qei = qEI(test_X) qei_ff = qEI_ff(test_X[..., [0, 2]]) self.assertTrue(torch.allclose(qei, qei_ff)) # test t-batch with broadcasting test_X = torch.rand(2, 3, device=device).expand(4, 2, 3) qEI_ff = FixedFeatureAcquisitionFunction(qEI, d=3, columns=[2], values=test_X[0, :, -1:]) qei = qEI(test_X) qei_ff = qEI_ff(test_X[..., :-1]) self.assertTrue(torch.allclose(qei, qei_ff)) # test gradient test_X = torch.rand(1, 3, device=device, requires_grad=True) test_X_ff = test_X[..., :-1].detach().clone().requires_grad_(True) qei = qEI(test_X) qEI_ff = FixedFeatureAcquisitionFunction(qEI, d=3, columns=[2], values=test_X[..., [2]].detach()) qei_ff = qEI_ff(test_X_ff) self.assertTrue(torch.allclose(qei, qei_ff)) qei.backward() qei_ff.backward() self.assertTrue(torch.allclose(test_X.grad[..., :-1], test_X_ff.grad)) # test error b/c of incompatible input shapes with self.assertRaises(ValueError): qEI_ff(test_X)
def test_penalized_acquisition_function(self): for dtype in (torch.float, torch.double): mock_model = MockModel( MockPosterior(mean=torch.tensor([1.0]), variance=torch.tensor([1.0]))) init_point = torch.tensor([0.5, 0.5, 0.5], device=self.device, dtype=dtype) groups = [[0, 2], [1]] raw_acqf = ExpectedImprovement(model=mock_model, best_f=1.0) penalty = GroupLassoPenalty(init_point=init_point, groups=groups) lmbda = 0.1 acqf = PenalizedAcquisitionFunction(raw_acqf=raw_acqf, penalty_func=penalty, regularization_parameter=lmbda) sample_point = torch.tensor([[1.0, 2.0, 3.0]], device=self.device, dtype=dtype) raw_value = raw_acqf(sample_point) penalty_value = penalty(sample_point) real_value = raw_value - lmbda * penalty_value computed_value = acqf(sample_point) self.assertTrue(torch.equal(real_value, computed_value)) # testing X_pending for analytic raw_acqfn (EI) X_pending = torch.tensor([0.1, 0.2, 0.3], device=self.device, dtype=dtype) with self.assertRaises(UnsupportedError): acqf.set_X_pending(X_pending) # testing X_pending for non-analytic raw_acqfn (EI) sampler = IIDNormalSampler(num_samples=2) raw_acqf_2 = qExpectedImprovement(model=mock_model, best_f=0, sampler=sampler) init_point = torch.tensor([1.0, 1.0, 1.0], device=self.device, dtype=dtype) l2_module = L2Penalty(init_point=init_point) acqf_2 = PenalizedAcquisitionFunction( raw_acqf=raw_acqf_2, penalty_func=l2_module, regularization_parameter=lmbda, ) X_pending = torch.tensor([0.1, 0.2, 0.3], device=self.device, dtype=dtype) acqf_2.set_X_pending(X_pending) self.assertTrue(torch.equal(acqf_2.X_pending, X_pending))
def optimize_qparego_and_get_observation(model, train_obj, train_con, sampler, obj_func, time_list, global_start_time): """Samples a set of random weights for each candidate in the batch, performs sequential greedy optimization of the qParEGO acquisition function, and returns a new candidate and observation.""" acq_func_list = [] for _ in range(1): # sample random weights weights = sample_simplex(problem.num_objs, **tkwargs).squeeze() # construct augmented Chebyshev scalarization scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj) # initialize ConstrainedMCObjective constrained_objective = get_constrained_mc_objective(train_obj=train_obj, train_con=train_con, scalarization=scalarization) train_y = torch.cat([train_obj, train_con], dim=-1) acq_func = qExpectedImprovement( # pyre-ignore: [28] model=model, objective=constrained_objective, best_f=constrained_objective(train_y).max(), sampler=sampler, ) acq_func_list.append(acq_func) # optimize candidates, _ = optimize_acqf_list( acq_function_list=acq_func_list, bounds=standard_bounds, num_restarts=20, raw_samples=1024, # used for intialization heuristic options={"batch_limit": 5, "maxiter": 200}, ) # observe new values new_x = candidates.detach() new_obj = [] new_con = [] for x in new_x: res = obj_func(x) y = res['objs'] c = res['constraints'] new_obj.append(y) new_con.append(c) global_time = time.time() - global_start_time time_list.append(global_time) new_obj = torch.tensor(new_obj, **tkwargs).reshape(new_x.shape[0], -1) new_con = torch.tensor(new_con, **tkwargs).reshape(new_x.shape[0], -1) print(f'evaluate {new_x.shape[0]} configs on real objective') return new_x, new_obj, new_con
def test_acquisition_functions(self): tkwargs = {"device": self.device, "dtype": torch.double} train_X, train_Y, train_Yvar, model = self._get_data_and_model( infer_noise=True, **tkwargs ) fit_fully_bayesian_model_nuts( model, warmup_steps=8, num_samples=5, thinning=2, disable_progbar=True ) sampler = IIDNormalSampler(num_samples=2) acquisition_functions = [ ExpectedImprovement(model=model, best_f=train_Y.max()), ProbabilityOfImprovement(model=model, best_f=train_Y.max()), PosteriorMean(model=model), UpperConfidenceBound(model=model, beta=4), qExpectedImprovement(model=model, best_f=train_Y.max(), sampler=sampler), qNoisyExpectedImprovement(model=model, X_baseline=train_X, sampler=sampler), qProbabilityOfImprovement( model=model, best_f=train_Y.max(), sampler=sampler ), qSimpleRegret(model=model, sampler=sampler), qUpperConfidenceBound(model=model, beta=4, sampler=sampler), qNoisyExpectedHypervolumeImprovement( model=ModelListGP(model, model), X_baseline=train_X, ref_point=torch.zeros(2, **tkwargs), sampler=sampler, ), qExpectedHypervolumeImprovement( model=ModelListGP(model, model), ref_point=torch.zeros(2, **tkwargs), sampler=sampler, partitioning=NondominatedPartitioning( ref_point=torch.zeros(2, **tkwargs), Y=train_Y.repeat([1, 2]) ), ), ] for acqf in acquisition_functions: for batch_shape in [[5], [6, 5, 2]]: test_X = torch.rand(*batch_shape, 1, 4, **tkwargs) self.assertEqual(acqf(test_X).shape, torch.Size(batch_shape))
def test_q_expected_improvement_batch(self): for dtype in (torch.float, torch.double): # the event shape is `b x q x t` = 2 x 2 x 1 samples = torch.zeros(2, 2, 1, device=self.device, dtype=dtype) samples[0, 0, 0] = 1.0 mm = MockModel(MockPosterior(samples=samples)) # X is a dummy and unused b/c of mocking X = torch.zeros(1, 1, 1, device=self.device, dtype=dtype) # test batch mode sampler = IIDNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) # test shifting best_f value acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler) res = acqf(X) self.assertEqual(res[0].item(), 2.0) self.assertEqual(res[1].item(), 1.0) # test batch mode, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) # 1-dim batch self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) res = acqf(X.expand(2, 1, 1)) # 2-dim batch self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) # the base samples should have the batch dim collapsed self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X.expand(2, 1, 1)) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # test batch mode, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # test batch mode, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) # 1-dim batch self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs)) res = acqf(X.expand(2, 1, 1)) # 2-dim batch self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) # the base samples should have the batch dim collapsed self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X.expand(2, 1, 1)) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
def optimize(): verbose = True best_observed_all_ei, best_observed_all_nei, best_random_all = [], [], [] train_x_all_ei, train_x_all_nei, train_x_all_random = [], [], [] train_y_all_ei, train_y_all_nei, train_y_all_random = [], [], [] # statistics over multiple trials for trial in range(1, N_TRIALS + 1): print('\nTrial {} of {}'.format(trial, N_TRIALS)) best_observed_ei, best_observed_nei = [], [] best_random = [] # generate initial training data and initialize model print('\nGenerating {} random samples'.format(N_INITIAL_SAMPLES)) train_x_ei, train_y_ei, best_y_ei, mean_y, std_y = generate_initial_data( n_samples=N_INITIAL_SAMPLES) denormalize = lambda x: -(x * std_y + mean_y) mll_ei, model_ei = initialize_model(train_x_ei, train_y_ei) train_x_nei, train_y_nei, best_y_nei = train_x_ei, train_y_ei, best_y_ei mll_nei, model_nei = initialize_model(train_x_nei, train_y_nei) train_x_random, train_y_random, best_y_random = train_x_ei, train_y_ei, best_y_ei best_observed_ei.append(denormalize(best_y_ei)) best_observed_nei.append(denormalize(best_y_nei)) best_random.append(denormalize(best_y_random)) # run N_BATCH rounds of BayesOpt after the initial random batch for iteration in range(1, N_BATCH + 1): print('\nBatch {} of {}\n'.format(iteration, N_BATCH)) t0 = time.time() # fit the models fit_gpytorch_model(mll_ei) fit_gpytorch_model(mll_nei) # update acquisition functions qEI = qExpectedImprovement( model=model_ei, best_f=train_y_ei.max(), sampler=qmc_sampler, ) qNEI = qNoisyExpectedImprovement( model=model_nei, X_baseline=train_x_nei, sampler=qmc_sampler, ) # optimize acquisition function and evaluate new sample new_x_ei, new_y_ei = optimize_acqf_and_get_observation( qEI, mean_y=mean_y, std_y=std_y) print('EI: time to traverse is {:.4f}s'.format( -(new_y_ei.numpy().ravel()[0] * std_y + mean_y))) new_x_nei, new_y_nei = optimize_acqf_and_get_observation( qNEI, mean_y=mean_y, std_y=std_y) print('NEI: time to traverse is {:.4f}s'.format( -(new_y_nei.numpy().ravel()[0] * std_y + mean_y))) new_x_random, new_y_random = sample_random_observations( mean_y=mean_y, std_y=std_y) print('Random: time to traverse is {:.4f}s'.format( -(new_y_random.numpy().ravel()[0] * std_y + mean_y))) # update training points train_x_ei = torch.cat([train_x_ei, new_x_ei]) train_y_ei = torch.cat([train_y_ei, new_y_ei]) train_x_nei = torch.cat([train_x_nei, new_x_nei]) train_y_nei = torch.cat([train_y_nei, new_y_nei]) train_x_random = torch.cat([train_x_random, new_x_random]) train_y_random = torch.cat([train_y_random, new_y_random]) # update progress best_value_ei = denormalize(train_y_ei.max().item()) best_value_nei = denormalize(train_y_nei.max().item()) best_value_random = denormalize(train_y_random.max().item()) best_observed_ei.append(best_value_ei) best_observed_nei.append(best_value_nei) best_random.append(best_value_random) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting mll_ei, model_ei = initialize_model( train_x_ei, train_y_ei, model_ei.state_dict(), ) mll_nei, model_nei = initialize_model( train_x_nei, train_y_nei, model_nei.state_dict(), ) t1 = time.time() if verbose: print( 'best lap time (random, qEI, qNEI) = {:.2f}, {:.2f}, {:.2f}, time to compute = {:.2f}s' .format(best_value_random, best_value_ei, best_value_nei, t1 - t0)) else: print(".") best_observed_all_ei.append(best_observed_ei) best_observed_all_nei.append(best_observed_nei) best_random_all.append(best_random) train_x_all_ei.append(train_x_ei.cpu().numpy()) train_x_all_nei.append(train_x_nei.cpu().numpy()) train_x_all_random.append(train_x_random.cpu().numpy()) train_y_all_ei.append(denormalize(train_y_ei.cpu().numpy())) train_y_all_nei.append(denormalize(train_y_nei.cpu().numpy())) train_y_all_random.append(denormalize(train_y_random.cpu().numpy())) iters = np.arange(N_BATCH + 1) * BATCH_SIZE y_ei = np.asarray(best_observed_all_ei) y_nei = np.asarray(best_observed_all_nei) y_rnd = np.asarray(best_random_all) savestr = time.strftime('%Y%m%d%H%M%S') ##################################################################### # save results if SAVE_RESULTS: np.savez( 'results/{}_raceline_data-{}.npz'.format('UCB', savestr), y_ei=y_ei, y_nei=y_nei, y_rnd=y_rnd, iters=iters, train_x_all_ei=np.asarray(train_x_all_ei), train_x_all_nei=np.asarray(train_x_all_nei), train_x_all_random=np.asarray(train_x_all_random), train_y_all_ei=np.asarray(train_y_all_ei), train_y_all_nei=np.asarray(train_y_all_nei), train_y_all_random=np.asarray(train_y_all_random), SEED=SEED, ) ##################################################################### # plot results if PLOT_RESULTS: def ci(y): return 1.96 * y.std(axis=0) / np.sqrt(N_TRIALS) plt.figure() plt.gca().set_prop_cycle(None) plt.plot(iters, y_rnd.mean(axis=0), linewidth=1.5) plt.plot(iters, y_ei.mean(axis=0), linewidth=1.5) plt.plot(iters, y_nei.mean(axis=0), linewidth=1.5) plt.gca().set_prop_cycle(None) plt.fill_between(iters, y_rnd.mean(axis=0) - ci(y_rnd), y_rnd.mean(axis=0) + ci(y_rnd), label='random', alpha=0.2) plt.fill_between(iters, y_ei.mean(axis=0) - ci(y_ei), y_ei.mean(axis=0) + ci(y_ei), label='qEI', alpha=0.2) plt.fill_between(iters, y_nei.mean(axis=0) - ci(y_nei), y_nei.mean(axis=0) + ci(y_nei), label='qNEI', alpha=0.2) plt.xlabel('number of observations (beyond initial points)') plt.ylabel('best lap times') plt.grid(True) plt.legend(loc=0) plt.savefig('results/{}_laptimes-{}.png'.format('UCB', savestr), dpi=600) plt.show()
def bo_qei(config): """Optimizes over designs x in an offline optimization problem using the CMA Evolution Strategy Args: config: dict a dictionary of hyper parameters such as the learning rate """ # create the training task and logger logger = Logger(config['logging_dir']) task = StaticGraphTask(config['task'], **config['task_kwargs']) if config['normalize_ys']: task.map_normalize_y() if task.is_discrete and not config["use_vae"]: task.map_to_logits() if config['normalize_xs']: task.map_normalize_x() x = task.x y = task.y if task.is_discrete and config["use_vae"]: vae_model = SequentialVAE(task, hidden_size=config['vae_hidden_size'], latent_size=config['vae_latent_size'], activation=config['vae_activation'], kernel_size=config['vae_kernel_size'], num_blocks=config['vae_num_blocks']) vae_trainer = VAETrainer(vae_model, vae_optim=tf.keras.optimizers.Adam, vae_lr=config['vae_lr'], beta=config['vae_beta']) # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, batch_size=config['vae_batch_size'], val_size=config['val_size']) # estimate the number of training steps per epoch vae_trainer.launch(train_data, val_data, logger, config['vae_epochs']) # map the x values to latent space x = vae_model.encoder_cnn.predict(x)[0] mean = np.mean(x, axis=0, keepdims=True) standard_dev = np.std(x - mean, axis=0, keepdims=True) x = (x - mean) / standard_dev input_shape = x.shape[1:] input_size = np.prod(input_shape) # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, bootstraps=config['bootstraps'], batch_size=config['ensemble_batch_size'], val_size=config['val_size']) # make several keras neural networks with two hidden layers forward_models = [ ForwardModel(input_shape, hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) for b in range(config['bootstraps']) ] # create a trainer for a forward model with a conservative objective ensemble = Ensemble(forward_models, forward_model_optim=tf.keras.optimizers.Adam, forward_model_lr=config['ensemble_lr']) # train the model for an additional number of epochs ensemble.launch(train_data, val_data, logger, config['ensemble_epochs']) # select the top 1 initial designs from the dataset indices = tf.math.top_k(y[:, 0], k=config['bo_gp_samples'])[1] initial_x = tf.gather(x, indices, axis=0) initial_y = tf.gather(y, indices, axis=0) from botorch.models import FixedNoiseGP, ModelListGP from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood from botorch.acquisition.objective import GenericMCObjective from botorch.optim import optimize_acqf from botorch import fit_gpytorch_model from botorch.acquisition.monte_carlo import qExpectedImprovement from botorch.sampling.samplers import SobolQMCNormalSampler from botorch.exceptions import BadInitialCandidatesWarning import torch import time import warnings warnings.filterwarnings('ignore', category=BadInitialCandidatesWarning) warnings.filterwarnings('ignore', category=RuntimeWarning) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dtype = torch.float32 def objective(input_x): original_x = input_x # convert the tensor into numpy before using a TF model if torch.cuda.is_available(): input_x = input_x.detach().cpu().numpy() else: input_x = input_x.detach().numpy() batch_shape = input_x.shape[:-1] # pass the input into a TF model input_x = tf.reshape(input_x, [-1, *input_shape]) # optimize teh ground truth or the learned model if config["optimize_ground_truth"]: if task.is_discrete and config["use_vae"]: input_x = tf.argmax( vae_model.decoder_cnn.predict(input_x * standard_dev + mean), axis=2, output_type=tf.int32) value = task.predict(input_x) else: value = ensemble.get_distribution(input_x).mean() ys = value.numpy() ys.reshape(list(batch_shape) + [1]) # convert the scores back to pytorch tensors return torch.tensor(ys).type_as(original_x).to(device, dtype=dtype) NOISE_SE = config['bo_noise_se'] train_yvar = torch.tensor(NOISE_SE**2, device=device, dtype=dtype) def initialize_model(train_x, train_obj, state_dict=None): # define models for objective model_obj = FixedNoiseGP(train_x, train_obj, train_yvar.expand_as(train_obj)).to(train_x) # combine into a multi-output GP model model = ModelListGP(model_obj) mll = SumMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model def obj_callable(Z): return Z[..., 0] # define a feasibility-weighted objective for optimization obj = GenericMCObjective(obj_callable) BATCH_SIZE = config['bo_batch_size'] bounds = torch.tensor([ np.min(x, axis=0).reshape([input_size]).tolist(), np.max(x, axis=0).reshape([input_size]).tolist() ], device=device, dtype=dtype) def optimize_acqf_and_get_observation(acq_func): """Optimizes the acquisition function, and returns a new candidate and a noisy observation.""" # optimize try: candidates, _ = optimize_acqf( acq_function=acq_func, bounds=bounds, q=BATCH_SIZE, num_restarts=config['bo_num_restarts'], raw_samples=config[ 'bo_raw_samples'], # used for intialization heuristic options={ "batch_limit": config['bo_batch_limit'], "maxiter": config['bo_maxiter'] }) except RuntimeError: return # observe new values new_x = candidates.detach() exact_obj = objective(candidates) new_obj = exact_obj + NOISE_SE * torch.randn_like(exact_obj) return new_x, new_obj N_BATCH = config['bo_iterations'] MC_SAMPLES = config['bo_mc_samples'] best_observed_ei = [] # call helper functions to generate initial training data and initialize model train_x_ei = initial_x.numpy().reshape([initial_x.shape[0], input_size]) train_x_ei = torch.tensor(train_x_ei).to(device, dtype=dtype) train_obj_ei = initial_y.numpy().reshape([initial_y.shape[0], 1]) train_obj_ei = torch.tensor(train_obj_ei).to(device, dtype=dtype) best_observed_value_ei = train_obj_ei.max().item() mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei) best_observed_ei.append(best_observed_value_ei) # run N_BATCH rounds of BayesOpt after the initial random batch for iteration in range(1, N_BATCH + 1): t0 = time.time() # fit the models fit_gpytorch_model(mll_ei) # define the qEI acquisition module using a QMC sampler qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # for best_f, we use the best observed noisy values as an approximation qEI = qExpectedImprovement(model=model_ei, best_f=train_obj_ei.max(), sampler=qmc_sampler, objective=obj) # optimize and get new observation result = optimize_acqf_and_get_observation(qEI) if result is None: print("RuntimeError was encountered, most likely a " "'symeig_cpu: the algorithm failed to converge'") break new_x_ei, new_obj_ei = result # update training points train_x_ei = torch.cat([train_x_ei, new_x_ei]) train_obj_ei = torch.cat([train_obj_ei, new_obj_ei]) # update progress best_value_ei = obj(train_x_ei).max().item() best_observed_ei.append(best_value_ei) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei, model_ei.state_dict()) t1 = time.time() print( f"Batch {iteration:>2}: best_value = " f"({best_value_ei:>4.2f}), " f"time = {t1 - t0:>4.2f}.", end="") if torch.cuda.is_available(): x_sol = train_x_ei.detach().cpu().numpy() y_sol = train_obj_ei.detach().cpu().numpy() else: x_sol = train_x_ei.detach().numpy() y_sol = train_obj_ei.detach().numpy() # select the top 1 initial designs from the dataset indices = tf.math.top_k(y_sol[:, 0], k=config['solver_samples'])[1] solution = tf.gather(x_sol, indices, axis=0) solution = tf.reshape(solution, [-1, *input_shape]) if task.is_discrete and config["use_vae"]: solution = solution * standard_dev + mean logits = vae_model.decoder_cnn.predict(solution) solution = tf.argmax(logits, axis=2, output_type=tf.int32) # save the current solution to the disk np.save(os.path.join(config["logging_dir"], f"solution.npy"), solution.numpy()) # evaluate the found solution and record a video score = task.predict(solution) if task.is_normalized_y: score = task.denormalize_y(score) logger.record("score", score, N_BATCH, percentile=True)
def test_q_expected_improvement(self): for dtype in (torch.float, torch.double): tkwargs = {"device": self.device, "dtype": dtype} # the event shape is `b x q x t` = 1 x 1 x 1 samples = torch.zeros(1, 1, 1, **tkwargs) mm = MockModel(MockPosterior(samples=samples)) # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking X = torch.zeros(1, 1, **tkwargs) # basic test sampler = IIDNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) # test shifting best_f value acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 1.0) # TODO: Test batched best_f, batched model, batched evaluation # basic test, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() res = acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs)) # basic test for X_pending and warning acqf.set_X_pending() self.assertIsNone(acqf.X_pending) acqf.set_X_pending(None) self.assertIsNone(acqf.X_pending) acqf.set_X_pending(X) self.assertEqual(acqf.X_pending, X) mm._posterior._samples = torch.zeros(1, 2, 1, **tkwargs) res = acqf(X) X2 = torch.zeros(1, 1, 1, **tkwargs, requires_grad=True) with warnings.catch_warnings( record=True) as ws, settings.debug(True): acqf.set_X_pending(X2) self.assertEqual(acqf.X_pending, X2) self.assertEqual(len(ws), 1) self.assertTrue(issubclass(ws[-1].category, BotorchWarning))
def test_q_expected_improvement_batch(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # the event shape is `b x q x t` = 2 x 2 x 1 samples = torch.zeros(2, 2, 1, device=device, dtype=dtype) samples[0, 0, 0] = 1.0 mm = MockModel(MockPosterior(samples=samples)) # X is a dummy and unused b/c of mocking X = torch.zeros(1, 1, 1, device=device, dtype=dtype) # test batch mode sampler = IIDNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) # test shifting best_f value acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler) res = acqf(X) self.assertEqual(res[0].item(), 2.0) self.assertEqual(res[1].item(), 1.0) # test batch mode, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) # 1-dim batch self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) res = acqf(X.expand(2, 1, 1)) # 2-dim batch self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) # the base samples should have the batch dim collapsed self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X.expand(2, 1, 1)) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # test batch mode, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # test batch mode, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) # 1-dim batch self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs)) res = acqf(X.expand(2, 1, 1)) # 2-dim batch self.assertEqual(res[0].item(), 1.0) self.assertEqual(res[1].item(), 0.0) # the base samples should have the batch dim collapsed self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) bs = acqf.sampler.base_samples.clone() acqf(X.expand(2, 1, 1)) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
# run N_BATCH rounds of BayesOpt after the initial random batch for iteration in range(1, N_BATCH + 1): t0 = time.time() # fit the models fit_gpytorch_model(mll_ei) fit_gpytorch_model(mll_nei) # define the qEI and qNEI acquisition modules using a QMC sampler qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # for best_f, we use the best observed noisy values as an approximation qEI = qExpectedImprovement( model=model_ei, best_f=(train_obj_ei * (train_con_ei <= 0).to(train_obj_ei)).max(), sampler=qmc_sampler, objective=constrained_obj, ) qNEI = qNoisyExpectedImprovement( model=model_nei, X_baseline=train_x_nei, sampler=qmc_sampler, objective=constrained_obj, ) # optimize and get new observation new_x_ei, new_obj_ei, new_con_ei = optimize_acqf_and_get_observation( qEI) new_x_nei, new_obj_nei, new_con_nei = optimize_acqf_and_get_observation( qNEI)
def test_proximal(self): for dtype in (torch.float, torch.double): train_X = torch.rand(5, 3, device=self.device, dtype=dtype) train_Y = train_X.norm(dim=-1, keepdim=True) model = (SingleTaskGP(train_X, train_Y).to(device=self.device, dtype=dtype).eval()) EI = ExpectedImprovement(model, best_f=0.0) # test single point proximal_weights = torch.ones(3, device=self.device, dtype=dtype) test_X = torch.rand(1, 3, device=self.device, dtype=dtype) EI_prox = ProximalAcquisitionFunction( EI, proximal_weights=proximal_weights) ei = EI(test_X) mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights)) test_prox_weight = torch.exp( mv_normal.log_prob(test_X)) / torch.exp( mv_normal.log_prob(train_X[-1])) ei_prox = EI_prox(test_X) self.assertTrue(torch.allclose(ei_prox, ei * test_prox_weight)) self.assertTrue(ei_prox.shape == torch.Size([1])) # test t-batch with broadcasting test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype) ei = EI(test_X) mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights)) test_prox_weight = torch.exp( mv_normal.log_prob(test_X)) / torch.exp( mv_normal.log_prob(train_X[-1])) ei_prox = EI_prox(test_X) self.assertTrue( torch.allclose(ei_prox, ei * test_prox_weight.flatten())) self.assertTrue(ei_prox.shape == torch.Size([4])) # test MC acquisition function qEI = qExpectedImprovement(model, best_f=0.0) test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype) qEI_prox = ProximalAcquisitionFunction( qEI, proximal_weights=proximal_weights) qei = qEI(test_X) mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights)) test_prox_weight = torch.exp( mv_normal.log_prob(test_X)) / torch.exp( mv_normal.log_prob(train_X[-1])) qei_prox = qEI_prox(test_X) self.assertTrue( torch.allclose(qei_prox, qei * test_prox_weight.flatten())) self.assertTrue(qei_prox.shape == torch.Size([4])) # test gradient test_X = torch.rand(1, 3, device=self.device, dtype=dtype, requires_grad=True) ei_prox = EI_prox(test_X) ei_prox.backward() # test model without train_inputs bad_model = DummyModel() with self.assertRaises(UnsupportedError): ProximalAcquisitionFunction( ExpectedImprovement(bad_model, 0.0), proximal_weights) # test proximal weights that do not match training_inputs train_X = torch.rand(5, 1, 3, device=self.device, dtype=dtype) train_Y = train_X.norm(dim=-1, keepdim=True) model = SingleTaskGP(train_X, train_Y).to(device=self.device).eval() with self.assertRaises(ValueError): ProximalAcquisitionFunction(ExpectedImprovement(model, 0.0), proximal_weights[:1]) with self.assertRaises(ValueError): ProximalAcquisitionFunction( ExpectedImprovement(model, 0.0), torch.rand(3, 3, device=self.device, dtype=dtype), ) # test for x_pending points pending_acq = DummyAcquisitionFunction(model) pending_acq.set_X_pending( torch.rand(3, 3, device=self.device, dtype=dtype)) with self.assertRaises(UnsupportedError): ProximalAcquisitionFunction(pending_acq, proximal_weights) # test model with multi-batch training inputs train_X = torch.rand(5, 2, 3, device=self.device, dtype=dtype) train_Y = train_X.norm(dim=-1, keepdim=True) bad_single_task = (SingleTaskGP( train_X, train_Y).to(device=self.device).eval()) with self.assertRaises(UnsupportedError): ProximalAcquisitionFunction( ExpectedImprovement(bad_single_task, 0.0), proximal_weights)
def qei_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based batch Expected Improvement (qEI). The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with single-objective optimization. Args: train_x: Previous parameter configurations. A ``torch.Tensor`` of shape ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials and ``n_params`` is the number of parameters. ``n_params`` may be larger than the actual number of parameters if categorical parameters are included in the search space, since these parameters are one-hot encoded. Values are not normalized. train_obj: Previously observed objectives. A ``torch.Tensor`` of shape ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``. ``n_objectives`` is the number of objectives. Observations are not normalized. train_con: Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``. ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of constraints. A constraint is violated if strictly larger than 0. If no constraints are involved in the optimization, this argument will be :obj:`None`. bounds: Search space bounds. A ``torch.Tensor`` of shape ``(n_params, 2)``. ``n_params`` is identical to that of ``train_x``. The first and the second column correspond to the lower and upper bounds for each parameter respectively. Returns: Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. """ if train_obj.size(-1) != 1: raise ValueError("Objective may only contain single values with qEI.") if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) is_feas = (train_con <= 0).all(dim=-1) train_obj_feas = train_obj[is_feas] if train_obj_feas.numel() == 0: # TODO(hvy): Do not use 0 as the best observation. _logger.warning( "No objective values are feasible. Using 0 as the best objective in qEI." ) best_f = torch.zeros(()) else: best_f = train_obj_feas.max() constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) objective = ConstrainedMCObjective( objective=lambda Z: Z[..., 0], constraints=constraints, ) else: train_y = train_obj best_f = train_obj.max() objective = None # Using the default identity objective. train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) acqf = qExpectedImprovement( model=model, best_f=best_f, sampler=SobolQMCNormalSampler(num_samples=256), objective=objective, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=10, raw_samples=512, options={ "batch_limit": 5, "maxiter": 200 }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(x: torch.Tensor): # Caution: unnormalize and maximize x = unnormalize(x, bounds=problem_bounds) x = x.cpu().numpy().astype(np.float64) # caution res = problem.evaluate(x) res['objs'] = [-y for y in res['objs']] return res # Caution: negative values imply feasibility in botorch time_list = [] global_start_time = time.time() # random seed np.random.seed(seed) torch.manual_seed(seed) # call helper functions to generate initial training data and initialize model train_x, train_obj, train_con = generate_initial_data( initial_runs, objective_function, time_list, global_start_time) mll, model = initialize_model(train_x, train_obj, train_con) # run (max_runs - initial_runs) rounds of BayesOpt after the initial random batch for iteration in range(initial_runs + 1, max_runs + 1): t0 = time.time() # fit the models fit_gpytorch_model(mll) # define the qEI and qNEI acquisition modules using a QMC sampler qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # for best_f, we use the best observed values if (train_con > 0).any(dim=-1).all(): # no feasible data best_f = -INFEASIBLE_OBJ_VALUE else: best_f = train_obj[(train_con <= 0).all(dim=-1)].max() qEI = qExpectedImprovement( model=model, best_f=best_f, sampler=qmc_sampler, objective=constrained_obj, ) # optimize and get new observation new_x, new_obj, new_con = optimize_acqf_and_get_observation( qEI, objective_function, time_list, global_start_time) # update training points train_x = torch.cat([train_x, new_x]) train_obj = torch.cat([train_obj, new_obj]) train_con = torch.cat([train_con, new_con]) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting mll, model = initialize_model( train_x, train_obj, train_con, model.state_dict(), ) t1 = time.time() print("Iter %d: x=%s, perf=%s, con=%s, time=%.2f, global_time=%.2f" % (iteration, unnormalize(new_x, bounds=problem_bounds), -new_obj, new_con, t1 - t0, time_list[-1]), flush=True) # Save result X = unnormalize(train_x, bounds=problem_bounds).cpu().numpy().astype( np.float64) # caution train_obj[(train_con > 0).any( dim=-1)] = -INFEASIBLE_OBJ_VALUE # set infeasible perf_list = ( -1 * train_obj.reshape(-1).cpu().numpy().astype(np.float64)).tolist() return X, perf_list, time_list
def test_sample_points_around_best(self): tkwargs = {"device": self.device} _bounds = torch.ones(2, 2) _bounds[1] = 2 for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype bounds = _bounds.to(**tkwargs) X_train = 1 + torch.rand(20, 2, **tkwargs) model = MockModel( MockPosterior(mean=(2 * X_train + 1).sum(dim=-1, keepdim=True)) ) # test NEI with X_baseline acqf = qNoisyExpectedImprovement(model, X_baseline=X_train) with mock.patch( "botorch.optim.initializers.sample_perturbed_subset_dims" ) as mock_subset_dims: X_rnd = sample_points_around_best( acq_function=acqf, n_discrete_points=4, sigma=1e-3, bounds=bounds, ) mock_subset_dims.assert_not_called() self.assertTrue(X_rnd.shape, torch.Size([4, 2])) self.assertTrue((X_rnd >= 1).all()) self.assertTrue((X_rnd <= 2).all()) # test model that returns a batched mean model = MockModel( MockPosterior( mean=(2 * X_train + 1).sum(dim=-1, keepdim=True).unsqueeze(0) ) ) acqf = qNoisyExpectedImprovement(model, X_baseline=X_train) X_rnd = sample_points_around_best( acq_function=acqf, n_discrete_points=4, sigma=1e-3, bounds=bounds, ) self.assertTrue(X_rnd.shape, torch.Size([4, 2])) self.assertTrue((X_rnd >= 1).all()) self.assertTrue((X_rnd <= 2).all()) # test EI without X_baseline acqf = qExpectedImprovement(model, best_f=0.0) with warnings.catch_warnings(record=True) as w, settings.debug(True): X_rnd = sample_points_around_best( acq_function=acqf, n_discrete_points=4, sigma=1e-3, bounds=bounds, ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, BotorchWarning)) self.assertIsNone(X_rnd) # set train inputs model.train_inputs = (X_train,) X_rnd = sample_points_around_best( acq_function=acqf, n_discrete_points=4, sigma=1e-3, bounds=bounds, ) self.assertTrue(X_rnd.shape, torch.Size([4, 2])) self.assertTrue((X_rnd >= 1).all()) self.assertTrue((X_rnd <= 2).all()) # test an acquisition function that has objective=None # and maximize=False pm = PosteriorMean(model, maximize=False) self.assertIsNone(pm.objective) self.assertFalse(pm.maximize) X_rnd = sample_points_around_best( acq_function=pm, n_discrete_points=4, sigma=0, bounds=bounds, best_pct=1e-8, # ensures that we only use best value ) idx = (-model.posterior(X_train).mean).argmax() self.assertTrue((X_rnd == X_train[idx : idx + 1]).all(dim=-1).all()) # test acquisition function that has no model ff = FixedFeatureAcquisitionFunction(pm, d=2, columns=[0], values=[0]) # set X_baseline for testing purposes ff.X_baseline = X_train with warnings.catch_warnings(record=True) as w, settings.debug(True): X_rnd = sample_points_around_best( acq_function=ff, n_discrete_points=4, sigma=1e-3, bounds=bounds, ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, BotorchWarning)) self.assertIsNone(X_rnd) # test constraints with NEHVI constraints = [lambda Y: Y[..., 0]] ref_point = torch.zeros(2, **tkwargs) # test cases when there are and are not any feasible points for any_feas in (True, False): Y_train = torch.stack( [ torch.linspace(-0.5, 0.5, X_train.shape[0], **tkwargs) if any_feas else torch.ones(X_train.shape[0], **tkwargs), X_train.sum(dim=-1), ], dim=-1, ) moo_model = MockModel(MockPosterior(mean=Y_train, samples=Y_train)) acqf = qNoisyExpectedHypervolumeImprovement( moo_model, ref_point=ref_point, X_baseline=X_train, constraints=constraints, cache_root=False, ) X_rnd = sample_points_around_best( acq_function=acqf, n_discrete_points=4, sigma=0.0, bounds=bounds, ) self.assertTrue(X_rnd.shape, torch.Size([4, 2])) # this should be true since sigma=0 # and we should only be returning feasible points violation = constraints[0](Y_train) neg_violation = -violation.clamp_min(0.0) feas = neg_violation == 0 eq_mask = (X_train.unsqueeze(1) == X_rnd.unsqueeze(0)).all(dim=-1) if feas.any(): # determine # create n_train x n_rnd tensor of booleans eq_mask = (X_train.unsqueeze(1) == X_rnd.unsqueeze(0)).all(dim=-1) # check that all X_rnd correspond to feasible points self.assertEqual(eq_mask[feas].sum(), 4) else: idcs = torch.topk(neg_violation, k=2).indices self.assertEqual(eq_mask[idcs].sum(), 4) self.assertTrue((X_rnd >= 1).all()) self.assertTrue((X_rnd <= 2).all()) # test that subset_dims is called if d>=21 X_train = 1 + torch.rand(20, 21, **tkwargs) model = MockModel( MockPosterior(mean=(2 * X_train + 1).sum(dim=-1, keepdim=True)) ) bounds = torch.ones(2, 21, **tkwargs) bounds[1] = 2 # test NEI with X_baseline acqf = qNoisyExpectedImprovement(model, X_baseline=X_train) with mock.patch( "botorch.optim.initializers.sample_perturbed_subset_dims", wraps=sample_perturbed_subset_dims, ) as mock_subset_dims: X_rnd = sample_points_around_best( acq_function=acqf, n_discrete_points=5, sigma=1e-3, bounds=bounds ) self.assertTrue(X_rnd.shape, torch.Size([5, 2])) self.assertTrue((X_rnd >= 1).all()) self.assertTrue((X_rnd <= 2).all()) mock_subset_dims.assert_called_once() # test tiny prob_perturb to make sure we perturb at least one dimension X_rnd = sample_points_around_best( acq_function=acqf, n_discrete_points=5, sigma=1e-3, bounds=bounds, prob_perturb=1e-8, ) self.assertTrue( ((X_rnd.unsqueeze(0) == X_train.unsqueeze(1)).all(dim=-1)).sum() == 0 )
def main(argv): dataset = 1 try: opts, args = getopt.getopt(argv, "hd:", ["dataset="]) except getopt.GetoptError: print('random parallel with input dataset') sys.exit(2) for opt, arg in opts: if opt == '-h': print('random parallel with input dataset') sys.exit() elif opt in ("-d", "--dataset"): dataset = int(arg) # average over multiple trials for trial in range(1, N_TRIALS + 1): print(f"\nTrial {trial:>2} of {N_TRIALS} ", end="") best_observed_ei, best_observed_nei = [], [] # call helper functions to generate initial training data and initialize model train_x_ei, train_obj_ei, best_observed_value_ei, current_best_config = generate_initial_data( dataset) mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei) best_observed_ei.append(best_observed_value_ei) # run N_BATCH rounds of BayesOpt after the initial random batch for iteration in range(1, N_BATCH + 1): # fit the models fit_gpytorch_model(mll_ei) # define the qEI and qNEI acquisition modules using a QMC sampler qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # for best_f, we use the best observed noisy values as an approximation qEI = qExpectedImprovement( model=model_ei, best_f=train_obj_ei.max(), sampler=qmc_sampler, ) # optimize and get new observation new_x_ei, new_obj_ei = optimize_acqf_and_get_observation( qEI, dataset) # update training points train_x_ei = torch.cat([train_x_ei, new_x_ei]) train_obj_ei = torch.cat([train_obj_ei, new_obj_ei]) # update progress best_value_ei = train_obj_ei.max().item() best_observed_ei.append(best_value_ei) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting mll_ei, model_ei = initialize_model( train_x_ei, train_obj_ei, model_ei.state_dict(), ) # return the best configuration best_tensor_ei, indices_ei = torch.max(train_obj_ei, 0) train_best_x_ei = train_x_ei[indices_ei].cpu().numpy() from botorch.acquisition import PosteriorMean argmax_pmean_ei, max_pmean_ei = optimize_acqf( acq_function=PosteriorMean(model_ei), bounds=bounds, q=1, num_restarts=20, raw_samples=2048, ) csv_file_name = '/home/junjie/modes/botorch/' + folder_name + '/modes-i/hp-gp-qei-dataset-' + str( dataset) + '-trail' + str(trial) + '.csv' with open(csv_file_name, 'w') as csvFile: writer = csv.writer(csvFile) writer.writerow([ str(argmax_pmean_ei.cpu().numpy()), str(max_pmean_ei.cpu().numpy()) ]) # ei prediction writer.writerow( [str(train_best_x_ei), str(best_tensor_ei.cpu().numpy())]) # ei observation csvFile.close()
def test_get_X_baseline(self): tkwargs = {"device": self.device} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype X_train = torch.rand(20, 2, **tkwargs) model = MockModel( MockPosterior(mean=(2 * X_train + 1).sum(dim=-1, keepdim=True))) # test NEI with X_baseline acqf = qNoisyExpectedImprovement(model, X_baseline=X_train[:2]) X = get_X_baseline(acq_function=acqf) self.assertTrue(torch.equal(X, acqf.X_baseline)) # test EI without X_baseline acqf = qExpectedImprovement(model, best_f=0.0) with warnings.catch_warnings( record=True) as w, settings.debug(True): X_rnd = get_X_baseline(acq_function=acqf, ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, BotorchWarning)) self.assertIsNone(X_rnd) # set train inputs model.train_inputs = (X_train, ) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, X_train)) # test that we fail back to train_inputs if X_baseline is an empty tensor acqf.register_buffer("X_baseline", X_train[:0]) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, X_train)) # test acquisitipon function without X_baseline or model acqf = FixedFeatureAcquisitionFunction(acqf, d=2, columns=[0], values=[0]) with warnings.catch_warnings( record=True) as w, settings.debug(True): X_rnd = get_X_baseline(acq_function=acqf, ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, BotorchWarning)) self.assertIsNone(X_rnd) Y_train = 2 * X_train[:2] + 1 moo_model = MockModel(MockPosterior(mean=Y_train, samples=Y_train)) ref_point = torch.zeros(2, **tkwargs) # test NEHVI with X_baseline acqf = qNoisyExpectedHypervolumeImprovement( moo_model, ref_point=ref_point, X_baseline=X_train[:2], cache_root=False, ) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, acqf.X_baseline)) # test qEHVI without train_inputs acqf = qExpectedHypervolumeImprovement( moo_model, ref_point=ref_point, partitioning=FastNondominatedPartitioning( ref_point=ref_point, Y=Y_train, ), ) # test extracting train_inputs from model list GP model_list = ModelListGP( SingleTaskGP(X_train, Y_train[:, :1]), SingleTaskGP(X_train, Y_train[:, 1:]), ) acqf = qExpectedHypervolumeImprovement( model_list, ref_point=ref_point, partitioning=FastNondominatedPartitioning( ref_point=ref_point, Y=Y_train, ), ) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, X_train)) # test MESMO for which we need to use # `acqf.mo_model` batched_mo_model = SingleTaskGP(X_train, Y_train) acqf = qMultiObjectiveMaxValueEntropy( batched_mo_model, sample_pareto_frontiers=lambda model: torch.rand( 10, 2, **tkwargs), ) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, X_train)) # test that if there is an input transform that is applied # to the train_inputs when the model is in eval mode, we # extract the untransformed train_inputs model = SingleTaskGP(X_train, Y_train[:, :1], input_transform=Warp(indices=[0, 1])) model.eval() self.assertFalse(torch.equal(model.train_inputs[0], X_train)) acqf = qExpectedImprovement(model, best_f=0.0) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, X_train))
def observe(self, X, y): """Send an observation of a suggestion back to the optimizer. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ try: assert len(X) == len(y) c = 0 for x_, y_ in zip(X, y): # Archive stores all the solutions self.archive.append(x_) self.arc_fitness.append( -y_) # As BoTorch solves a maximization problem if self.iter == 1: self.population.append(x_) self.fitness.append(y_) else: if y_ <= self.fitness[c]: self.population[c] = x_ self.fitness[c] = y_ c += 1 # Just ignore, any inf observations we got, unclear if right thing if np.isfinite(y_): self._observe(x_, y_) # Transform the data (seen till now) into tensors and train the model train_x = normalize(torch.from_numpy( self.search_space.warp(self.archive)), bounds=self.torch_bounds) train_y = standardize( torch.from_numpy( np.array(self.arc_fitness).reshape(len(self.arc_fitness), 1))) # Fit the GP based on the actual observed values if self.iter == 1: self.model, mll = self.make_model(train_x, train_y) else: self.model, mll = self.make_model(train_x, train_y, self.model.state_dict()) # mll.train() fit_gpytorch_model(mll) # define the sampler sampler = SobolQMCNormalSampler(num_samples=512) # define the acquisition function self.acquisition = qExpectedImprovement(model=self.model, best_f=train_y.max(), sampler=sampler) except Exception as e: print('Error: {} in observe()'.format(e))
def test_q_expected_improvement(self): for dtype in (torch.float, torch.double): # the event shape is `b x q x t` = 1 x 1 x 1 samples = torch.zeros(1, 1, 1, device=self.device, dtype=dtype) mm = MockModel(MockPosterior(samples=samples)) # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking X = torch.zeros(1, 1, device=self.device, dtype=dtype) # basic test sampler = IIDNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) # test shifting best_f value acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 1.0) # test size verification of best_f with self.assertRaises(ValueError): qExpectedImprovement( model=mm, best_f=torch.zeros(2, device=self.device, dtype=dtype) ) # basic test, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() res = acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs)) # basic test for X_pending and warning acqf.set_X_pending() self.assertIsNone(acqf.X_pending) acqf.set_X_pending(None) self.assertIsNone(acqf.X_pending) acqf.set_X_pending(X) self.assertEqual(acqf.X_pending, X) res = acqf(X) X2 = torch.zeros( 1, 1, 1, device=self.device, dtype=dtype, requires_grad=True ) with warnings.catch_warnings(record=True) as ws, settings.debug(True): acqf.set_X_pending(X2) self.assertEqual(acqf.X_pending, X2) self.assertEqual(len(ws), 1) self.assertTrue(issubclass(ws[-1].category, BotorchWarning)) # test bad objective type obj = ScalarizedObjective( weights=torch.rand(2, device=self.device, dtype=dtype) ) with self.assertRaises(UnsupportedError): qExpectedImprovement(model=mm, best_f=0, sampler=sampler, objective=obj)
def get_acquisition_function( acquisition_function_name: str, model: Model, objective: MCAcquisitionObjective, X_observed: Tensor, X_pending: Optional[Tensor] = None, constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, mc_samples: int = 500, qmc: bool = True, seed: Optional[int] = None, **kwargs, ) -> monte_carlo.MCAcquisitionFunction: r"""Convenience function for initializing botorch acquisition functions. Args: acquisition_function_name: Name of the acquisition function. model: A fitted model. objective: A MCAcquisitionObjective. X_observed: A `m1 x d`-dim Tensor of `m1` design points that have already been observed. X_pending: A `m2 x d`-dim Tensor of `m2` design points whose evaluation is pending. constraints: A list of callables, each mapping a Tensor of dimension `sample_shape x batch-shape x q x m` to a Tensor of dimension `sample_shape x batch-shape x q`, where negative values imply feasibility. Used when constraint_transforms are not passed as part of the objective. mc_samples: The number of samples to use for (q)MC evaluation of the acquisition function. qmc: If True, use quasi-Monte-Carlo sampling (instead of iid). seed: If provided, perform deterministic optimization (i.e. the function to optimize is fixed and not stochastic). Returns: The requested acquisition function. Example: >>> model = SingleTaskGP(train_X, train_Y) >>> obj = LinearMCObjective(weights=torch.tensor([1.0, 2.0])) >>> acqf = get_acquisition_function("qEI", model, obj, train_X) """ # initialize the sampler if qmc: sampler = SobolQMCNormalSampler(num_samples=mc_samples, seed=seed) else: sampler = IIDNormalSampler(num_samples=mc_samples, seed=seed) # instantiate and return the requested acquisition function if acquisition_function_name == "qEI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qExpectedImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, ) elif acquisition_function_name == "qPI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qProbabilityOfImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, tau=kwargs.get("tau", 1e-3), ) elif acquisition_function_name == "qNEI": return monte_carlo.qNoisyExpectedImprovement( model=model, X_baseline=X_observed, sampler=sampler, objective=objective, X_pending=X_pending, prune_baseline=kwargs.get("prune_baseline", False), ) elif acquisition_function_name == "qSR": return monte_carlo.qSimpleRegret(model=model, sampler=sampler, objective=objective, X_pending=X_pending) elif acquisition_function_name == "qUCB": if "beta" not in kwargs: raise ValueError("`beta` must be specified in kwargs for qUCB.") return monte_carlo.qUpperConfidenceBound( model=model, beta=kwargs["beta"], sampler=sampler, objective=objective, X_pending=X_pending, ) elif acquisition_function_name == "qEHVI": # pyre-fixme [16]: `Model` has no attribute `train_targets` try: ref_point = kwargs["ref_point"] except KeyError: raise ValueError( "`ref_point` must be specified in kwargs for qEHVI") try: Y = kwargs["Y"] except KeyError: raise ValueError("`Y` must be specified in kwargs for qEHVI") # get feasible points if constraints is not None: feas = torch.stack([c(Y) <= 0 for c in constraints], dim=-1).all(dim=-1) Y = Y[feas] obj = objective(Y) partitioning = NondominatedPartitioning( ref_point=torch.as_tensor(ref_point, dtype=Y.dtype, device=Y.device), Y=obj, alpha=kwargs.get("alpha", 0.0), ) return moo_monte_carlo.qExpectedHypervolumeImprovement( model=model, ref_point=ref_point, partitioning=partitioning, sampler=sampler, objective=objective, constraints=constraints, X_pending=X_pending, ) raise NotImplementedError( f"Unknown acquisition function {acquisition_function_name}")
def test_fixed_features(self): train_X = torch.rand(5, 3, device=self.device) train_Y = train_X.norm(dim=-1, keepdim=True) model = SingleTaskGP(train_X, train_Y).to(device=self.device).eval() qEI = qExpectedImprovement(model, best_f=0.0) for q in [1, 2]: # test single point test_X = torch.rand(q, 3, device=self.device) qEI_ff = FixedFeatureAcquisitionFunction(qEI, d=3, columns=[2], values=test_X[..., -1:]) qei = qEI(test_X) qei_ff = qEI_ff(test_X[..., :-1]) self.assertTrue(torch.allclose(qei, qei_ff)) # test list input with float qEI_ff = FixedFeatureAcquisitionFunction(qEI, d=3, columns=[2], values=[0.5]) qei_ff = qEI_ff(test_X[..., :-1]) test_X_clone = test_X.clone() test_X_clone[..., 2] = 0.5 qei = qEI(test_X_clone) self.assertTrue(torch.allclose(qei, qei_ff)) # test list input with Tensor and float qEI_ff = FixedFeatureAcquisitionFunction( qEI, d=3, columns=[0, 2], values=[test_X[..., [0]], 0.5]) qei_ff = qEI_ff(test_X[..., [1]]) self.assertTrue(torch.allclose(qei, qei_ff)) # test t-batch with broadcasting and list of floats test_X = torch.rand(q, 3, device=self.device).expand(4, q, 3) qEI_ff = FixedFeatureAcquisitionFunction(qEI, d=3, columns=[2], values=test_X[0, :, -1:]) qei = qEI(test_X) qei_ff = qEI_ff(test_X[..., :-1]) self.assertTrue(torch.allclose(qei, qei_ff)) # test t-batch with broadcasting and list of floats and Tensor qEI_ff = FixedFeatureAcquisitionFunction( qEI, d=3, columns=[0, 2], values=[test_X[0, :, [0]], 0.5]) test_X_clone = test_X.clone() test_X_clone[..., 2] = 0.5 qei = qEI(test_X_clone) qei_ff = qEI_ff(test_X[..., [1]]) self.assertTrue(torch.allclose(qei, qei_ff)) # test gradient test_X = torch.rand(1, 3, device=self.device, requires_grad=True) test_X_ff = test_X[..., :-1].detach().clone().requires_grad_(True) qei = qEI(test_X) qEI_ff = FixedFeatureAcquisitionFunction(qEI, d=3, columns=[2], values=test_X[..., [2]].detach()) qei_ff = qEI_ff(test_X_ff) self.assertTrue(torch.allclose(qei, qei_ff)) qei.backward() qei_ff.backward() self.assertTrue(torch.allclose(test_X.grad[..., :-1], test_X_ff.grad)) test_X = test_X.detach().clone() test_X_ff = test_X[..., [1]].detach().clone().requires_grad_(True) test_X[..., 2] = 0.5 test_X.requires_grad_(True) qei = qEI(test_X) qEI_ff = FixedFeatureAcquisitionFunction( qEI, d=3, columns=[0, 2], values=[test_X[..., [0]].detach(), 0.5]) qei_ff = qEI_ff(test_X_ff) qei.backward() qei_ff.backward() self.assertTrue(torch.allclose(test_X.grad[..., [1]], test_X_ff.grad)) # test error b/c of incompatible input shapes with self.assertRaises(ValueError): qEI_ff(test_X)
def main( benchmark_name, dataset_name, dimensions, method_name, num_runs, run_start, num_iterations, acquisition_name, # acquisition_optimizer_name, gamma, num_random_init, mc_samples, batch_size, num_fantasies, num_restarts, raw_samples, noise_variance_init, # use_ard, # use_input_warping, standardize_targets, input_dir, output_dir): # TODO(LT): Turn into options # device = "cpu" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dtype = torch.double benchmark = make_benchmark(benchmark_name, dimensions=dimensions, dataset_name=dataset_name, input_dir=input_dir) name = make_name(benchmark_name, dimensions=dimensions, dataset_name=dataset_name) output_path = Path(output_dir).joinpath(name, method_name) output_path.mkdir(parents=True, exist_ok=True) options = dict(gamma=gamma, num_random_init=num_random_init, acquisition_name=acquisition_name, mc_samples=mc_samples, batch_size=batch_size, num_restarts=num_restarts, raw_samples=raw_samples, num_fantasies=num_fantasies, noise_variance_init=noise_variance_init, standardize_targets=standardize_targets) with output_path.joinpath("options.yaml").open('w') as f: yaml.dump(options, f) config_space = DenseConfigurationSpace(benchmark.get_config_space()) bounds = create_bounds(config_space.get_bounds(), device=device, dtype=dtype) input_dim = config_space.get_dimensions() def func(tensor, *args, **kwargs): """ Wrapper that receives and returns torch.Tensor """ config = dict_from_tensor(tensor, cs=config_space) # turn into maximization problem res = -benchmark.evaluate(config).value return torch.tensor(res, device=device, dtype=dtype) for run_id in trange(run_start, num_runs, unit="run"): run_begin_t = batch_end_t_adj = batch_end_t = datetime.now() frames = [] features = [] targets = [] noise_variance = torch.tensor(noise_variance_init, device=device, dtype=dtype) state_dict = None with trange(num_iterations) as iterations: for batch in iterations: if len(targets) < num_random_init: # click.echo(f"Completed {i}/{num_random_init} initial runs. " # "Suggesting random candidate...") # TODO(LT): support random seed X_batch = torch.rand(size=(batch_size, input_dim), device=device, dtype=dtype) else: # construct dataset X = torch.vstack(features) y = torch.hstack(targets).unsqueeze(axis=-1) y = standardize(y) if standardize_targets else y # construct model # model = FixedNoiseGP(X, standardize(y), noise_variance.expand_as(y), model = FixedNoiseGP(X, y, noise_variance.expand_as(y), input_transform=None).to(X) mll = ExactMarginalLogLikelihood(model.likelihood, model) if state_dict is not None: model.load_state_dict(state_dict) # update model fit_gpytorch_model(mll) # construct acquisition function tau = torch.quantile(y, q=1 - gamma) iterations.set_postfix(tau=tau.item()) if acquisition_name == "q-KG": assert num_fantasies is not None and num_fantasies > 0 acq = qKnowledgeGradient(model, num_fantasies=num_fantasies) elif acquisition_name == "q-EI": assert mc_samples is not None and mc_samples > 0 qmc_sampler = SobolQMCNormalSampler( num_samples=mc_samples) acq = qExpectedImprovement(model=model, best_f=tau, sampler=qmc_sampler) # optimize acquisition function X_batch, b = optimize_acqf(acq_function=acq, bounds=bounds, q=batch_size, num_restarts=num_restarts, raw_samples=raw_samples, options=dict(batch_limit=5, maxiter=200)) state_dict = model.state_dict() # begin batch evaluation batch_begin_t = datetime.now() decision_duration = batch_begin_t - batch_end_t batch_begin_t_adj = batch_end_t_adj + decision_duration eval_end_times = [] # TODO(LT): Deliberately not doing broadcasting for now since # batch sizes are so small anyway. Can revisit later if there # is a compelling reason to do it. rows = [] for j, x_next in enumerate(X_batch): # eval begin time eval_begin_t = datetime.now() # evaluate blackbox objective y_next = func(x_next) # eval end time eval_end_t = datetime.now() # eval duration eval_duration = eval_end_t - eval_begin_t # adjusted eval end time is the duration added to the # time at which batch eval was started eval_end_t_adj = batch_begin_t_adj + eval_duration eval_end_times.append(eval_end_t_adj) elapsed = eval_end_t_adj - run_begin_t # update dataset features.append(x_next) targets.append(y_next) row = dict_from_tensor(x_next, cs=config_space) row["loss"] = -y_next.item() row["cost_eval"] = eval_duration.total_seconds() row["finished"] = elapsed.total_seconds() rows.append(row) batch_end_t = datetime.now() batch_end_t_adj = max(eval_end_times) frame = pd.DataFrame(data=rows) \ .assign(batch=batch, cost_decision=decision_duration.total_seconds()) frames.append(frame) data = pd.concat(frames, axis="index", ignore_index=True) data.to_csv(output_path.joinpath(f"{run_id:03d}.csv")) return 0
def select_query_point(self, batch_size=1): """ :param batch_size (int): number of query points to return :return: (batch_size x d_orig) numpy array """ # Produces (d_embedding, 2) array if self.embedding_boundaries_setting == "auto": # Approximately compute boundaries on embedded space embedding_boundaries = self._compute_boundaries_embedding( self.original_boundaries) elif self.embedding_boundaries_setting == "constant": # As described in the original paper. This is default. embedding_boundaries = np.array( [[-np.sqrt(self.d_embedding), np.sqrt(self.d_embedding)]] * self.d_embedding) else: raise NotImplementedError("embedding_boundaries_setting must be " "'auto' or 'constant'.") # TODO: Make the random initialization its own function so it can be done separately from the acquisition argmin # Initialize with random points if len(self.X) < self.initial_random_samples: # Select query point randomly from embedding_boundaries X_query_embedded = \ self.rng.uniform(size=embedding_boundaries.shape[0]) \ * (embedding_boundaries[:, 1] - embedding_boundaries[:, 0]) \ + embedding_boundaries[:, 0] X_query_embedded = torch.from_numpy(X_query_embedded).unsqueeze(0) print("X_query_embedded.shape: {}".format(X_query_embedded.shape)) # Query by maximizing the acquisition function else: print("---------------------") print('querying') print("self.X_embedded.shape: {}".format(self.X_embedded.shape)) print("self.y.shape: {}".format(self.y.shape)) # Initialize model if len(self.X) == self.initial_random_samples: self.model = ExactGaussianProcess( train_x=self.X_embedded.float(), train_y=self.y.float(), ) # Acquisition function qEI = qExpectedImprovement( model=self.model, best_f=torch.max(self.y).item(), ) # qUCB = qUpperConfidenceBound( # model=self.model, # beta=2.0, # ) print("batch_size: {}".format(batch_size)) # Optimize for a (batch_size x d_embedding) tensor query point X_query_embedded = global_optimization( objective_function=qEI, boundaries=torch.from_numpy(embedding_boundaries).float(), batch_size=batch_size, # number of query points to suggest ) print("batched X_query_embedded: {}".format(X_query_embedded)) print("batched X_query_embedded.shape: {}".format( X_query_embedded.shape)) print("X_embedded concatenated: {}".format(self.X_embedded.shape)) # Map to higher dimensional space and clip to hard boundaries [-1, 1] X_query = np.clip(a=self._manifold_to_dataspace( X_query_embedded.numpy()), a_min=self.original_boundaries[:, 0], a_max=self.original_boundaries[:, 1]) return X_query, X_query_embedded
def qparego_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization. The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with multi-objective optimization when the number of objectives is larger than three. .. seealso:: :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value descriptions. """ n_objectives = train_obj.size(-1) weights = sample_simplex(n_objectives).squeeze() scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj) if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) objective = ConstrainedMCObjective( objective=lambda Z: scalarization(Z[..., :n_objectives]), constraints=constraints, ) else: train_y = train_obj objective = GenericMCObjective(scalarization) train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) acqf = qExpectedImprovement( model=model, best_f=objective(train_y).max(), sampler=SobolQMCNormalSampler(num_samples=256), objective=objective, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=20, raw_samples=1024, options={ "batch_limit": 5, "maxiter": 200 }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
def test_q_expected_improvement(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # the event shape is `b x q x t` = 1 x 1 x 1 samples = torch.zeros(1, 1, 1, device=device, dtype=dtype) mm = MockModel(MockPosterior(samples=samples)) # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking X = torch.zeros(1, 1, device=device, dtype=dtype) # basic test sampler = IIDNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) # test shifting best_f value acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 1.0) # basic test, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() res = acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True) acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs)) # basic test for X_pending and warning acqf.set_X_pending() self.assertIsNone(acqf.X_pending) acqf.set_X_pending(None) self.assertIsNone(acqf.X_pending) acqf.set_X_pending(X) self.assertEqual(acqf.X_pending, X) res = acqf(X) X2 = torch.zeros(1, 1, 1, device=device, dtype=dtype, requires_grad=True) with warnings.catch_warnings(record=True) as ws: acqf.set_X_pending(X2) self.assertEqual(acqf.X_pending, X2) self.assertEqual(len(ws), 1) self.assertTrue(issubclass(ws[-1].category, BotorchWarning))
def get_acquisition_function( acquisition_function_name: str, model: Model, objective: MCAcquisitionObjective, X_observed: Tensor, X_pending: Optional[Tensor] = None, mc_samples: int = 500, qmc: bool = True, seed: Optional[int] = None, **kwargs, ) -> monte_carlo.MCAcquisitionFunction: r"""Convenience function for initializing botorch acquisition functions. Args: acquisition_function_name: Name of the acquisition function. model: A fitted model. objective: A MCAcquisitionObjective. X_observed: A `m1 x d`-dim Tensor of `m1` design points that have already been observed. X_pending: A `m2 x d`-dim Tensor of `m2` design points whose evaluation is pending. mc_samples: The number of samples to use for (q)MC evaluation of the acquisition function. qmc: If True, use quasi-Monte-Carlo sampling (instead of iid). seed: If provided, perform deterministic optimization (i.e. the function to optimize is fixed and not stochastic). Returns: The requested acquisition function. Example: >>> model = SingleTaskGP(train_X, train_Y) >>> obj = LinearMCObjective(weights=torch.tensor([1.0, 2.0])) >>> acqf = get_acquisition_function("qEI", model, obj, train_X) """ # initialize the sampler if qmc: sampler = SobolQMCNormalSampler(num_samples=mc_samples, seed=seed) else: sampler = IIDNormalSampler(num_samples=mc_samples, seed=seed) # instantiate and return the requested acquisition function if acquisition_function_name == "qEI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qExpectedImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, ) elif acquisition_function_name == "qPI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qProbabilityOfImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, tau=kwargs.get("tau", 1e-3), ) elif acquisition_function_name == "qNEI": return monte_carlo.qNoisyExpectedImprovement( model=model, X_baseline=X_observed, sampler=sampler, objective=objective, X_pending=X_pending, prune_baseline=kwargs.get("prune_baseline", False), ) elif acquisition_function_name == "qSR": return monte_carlo.qSimpleRegret(model=model, sampler=sampler, objective=objective, X_pending=X_pending) elif acquisition_function_name == "qUCB": if "beta" not in kwargs: raise ValueError("`beta` must be specified in kwargs for qUCB.") return monte_carlo.qUpperConfidenceBound( model=model, beta=kwargs["beta"], sampler=sampler, objective=objective, X_pending=X_pending, ) raise NotImplementedError( f"Unknown acquisition function {acquisition_function_name}")
def train_loop(self): from botorch.models import SingleTaskGP from botorch.fit import fit_gpytorch_model from gpytorch.mlls import ExactMarginalLogLikelihood from botorch.optim import optimize_acqf from botorch.acquisition.monte_carlo import qExpectedImprovement from botorch.sampling.samplers import SobolQMCNormalSampler seed = 1 torch.manual_seed(seed) dt, d = torch.float32, 3 lb, ub = [1e-4, 0.1, 0.1], [3e-3, 1 - 1e-3, 1 - 1e-3] bounds = torch.tensor([lb, ub], dtype=dt) def gen_initial_data(): # auto # x = unnormalize(torch.rand(1, 3, dtype=dt), bounds=bounds) # manual x = torch.tensor([[1e-3, 0.9, 0.999]]) print('BO Initialization: \n') print('Initial Hyper-parameter: ' + str(x)) obj = self.train(x.view(-1)) print('Initial Error: ' + str(obj)) return x, obj.unsqueeze(1) def get_fitted_model(x, obj, state_dict=None): # initialize and fit model fitted_model = SingleTaskGP(train_X=x, train_Y=obj) if state_dict is not None: fitted_model.load_state_dict(state_dict) mll = ExactMarginalLogLikelihood(fitted_model.likelihood, fitted_model) mll.to(x) fit_gpytorch_model(mll) return fitted_model def optimize_acqf_and_get_observation(acq_func): """Optimizes the acquisition function, and returns a new candidate and a noisy observation""" candidates, _ = optimize_acqf( acq_function=acq_func, bounds=torch.stack([ torch.zeros(d, dtype=dt), torch.ones(d, dtype=dt), ]), q=1, num_restarts=10, raw_samples=200, ) x = unnormalize(candidates.detach(), bounds=bounds) print('Hyper-parameter: ' + str(x)) obj = self.train(x.view(-1)).unsqueeze(-1) print(print('Error: ' + str(obj))) return x, obj N_BATCH = 500 MC_SAMPLES = 2000 best_observed = [] train_x, train_obj = gen_initial_data() # (1,3), (1,1) best_observed.append(train_obj.view(-1)) print(f"\nRunning BO......\n ", end='') state_dict = None for iteration in range(N_BATCH): # fit the model model = get_fitted_model( normalize(train_x, bounds=bounds), standardize(train_obj), state_dict=state_dict, ) # define the qNEI acquisition module using a QMC sampler qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES, seed=seed) qEI = qExpectedImprovement(model=model, sampler=qmc_sampler, best_f=standardize(train_obj).max()) # optimize and get new observation new_x, new_obj = optimize_acqf_and_get_observation(qEI) # update training points train_x = torch.cat((train_x, new_x)) train_obj = torch.cat((train_obj, new_obj)) # update progress best_value = train_obj.max().item() best_observed.append(best_value) state_dict = model.state_dict() print(".", end='') print(best_observed)