def test_constrained_q_expected_hypervolume_improvement(self): for dtype in (torch.float, torch.double): tkwargs = {"device": self.device, "dtype": dtype} ref_point = [0.0, 0.0] pareto_Y = torch.tensor( [[4.0, 5.0], [5.0, 5.0], [8.5, 3.5], [8.5, 3.0], [9.0, 1.0]], **tkwargs) partitioning = NondominatedPartitioning(num_outcomes=2) partitioning.update(Y=pareto_Y) # test q=1 # the event shape is `b x q x m` = 1 x 1 x 2 samples = torch.tensor([[[6.5, 4.5]]], **tkwargs) mm = MockModel(MockPosterior(samples=samples)) sampler = IIDNormalSampler(num_samples=1) X = torch.zeros(1, 1, **tkwargs) # test zero slack for eta in (1e-1, 1e-2): acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, constraints=[lambda Z: torch.zeros_like(Z[..., -1])], eta=eta, ) res = acqf(X) self.assertAlmostEqual(res.item(), 0.5 * 1.5, places=4) # test feasible acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, constraints=[lambda Z: -100.0 * torch.ones_like(Z[..., -1])], eta=1e-3, ) res = acqf(X) self.assertAlmostEqual(res.item(), 1.5, places=4) # test infeasible acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, constraints=[lambda Z: 100.0 * torch.ones_like(Z[..., -1])], eta=1e-3, ) res = acqf(X) self.assertAlmostEqual(res.item(), 0.0, places=4)
def test_expected_hypervolume_improvement(self): tkwargs = {"device": self.device} for dtype in (torch.float, torch.double): ref_point = [0.0, 0.0] tkwargs["dtype"] = dtype pareto_Y = torch.tensor( [[4.0, 5.0], [5.0, 5.0], [8.5, 3.5], [8.5, 3.0], [9.0, 1.0]], **tkwargs) partitioning = NondominatedPartitioning(num_outcomes=2) # the event shape is `b x q x m` = 1 x 1 x 1 mean = torch.zeros(1, 1, 2, **tkwargs) variance = torch.zeros(1, 1, 2, **tkwargs) mm = MockModel(MockPosterior(mean=mean, variance=variance)) # test error if there is not pareto_Y initialized in partitioning with self.assertRaises(BotorchError): ExpectedHypervolumeImprovement(model=mm, ref_point=ref_point, partitioning=partitioning) partitioning.update(Y=pareto_Y) # test error if ref point has wrong shape with self.assertRaises(ValueError): ExpectedHypervolumeImprovement(model=mm, ref_point=ref_point[:1], partitioning=partitioning) with self.assertRaises(ValueError): # test error if no pareto_Y point is better than ref_point ExpectedHypervolumeImprovement(model=mm, ref_point=[10.0, 10.0], partitioning=partitioning) X = torch.zeros(1, 1, **tkwargs) # basic test acqf = ExpectedHypervolumeImprovement(model=mm, ref_point=ref_point, partitioning=partitioning) res = acqf(X) self.assertEqual(res.item(), 0.0) # check ref point self.assertTrue( torch.equal(acqf.ref_point, torch.tensor(ref_point, **tkwargs))) # check bounds self.assertTrue(hasattr(acqf, "cell_lower_bounds")) self.assertTrue(hasattr(acqf, "cell_upper_bounds")) # check cached indices expected_indices = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.long, device=self.device) self.assertTrue( torch.equal(acqf._cross_product_indices, expected_indices))
def test_non_dominated_partitioning(self): tkwargs = {"device": self.device} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype partitioning = NondominatedPartitioning(num_outcomes=2) # assert error is raised if pareto_Y has not been computed with self.assertRaises(BotorchError): partitioning.pareto_Y # test eps # no pareto_Y self.assertEqual(partitioning.eps, 1e-6) partitioning = NondominatedPartitioning(num_outcomes=2, eps=1.0) # eps set self.assertEqual(partitioning.eps, 1.0) # set pareto_Y partitioning = NondominatedPartitioning(num_outcomes=2) Y = torch.zeros(1, 2, **tkwargs) partitioning.update(Y=Y) self.assertEqual(partitioning.eps, 1e-6 if dtype == torch.float else 1e-8) # test _update_pareto_Y partitioning.Y = -Y self.assertFalse(partitioning._update_pareto_Y()) # test m=2 arange = torch.arange(3, 9, **tkwargs) pareto_Y = torch.stack([arange, 11 - arange], dim=-1) Y = torch.cat( [ pareto_Y, torch.tensor( [[8.0, 2.0], [7.0, 1.0]], **tkwargs ), # add some non-pareto elements ], dim=0, ) partitioning = NondominatedPartitioning(num_outcomes=2, Y=Y) sorting = torch.argsort(pareto_Y[:, 0], descending=True) self.assertTrue(torch.equal(pareto_Y[sorting], partitioning.pareto_Y)) ref_point = torch.zeros(2, **tkwargs) inf = float("inf") expected_cell_bounds = torch.tensor( [ [ [8.0, 0.0], [7.0, 3.0], [6.0, 4.0], [5.0, 5.0], [4.0, 6.0], [3.0, 7.0], [0.0, 8.0], ], [ [inf, inf], [8.0, inf], [7.0, inf], [6.0, inf], [5.0, inf], [4.0, inf], [3.0, inf], ], ], **tkwargs, ) cell_bounds = partitioning.get_hypercell_bounds(ref_point) self.assertTrue(torch.equal(cell_bounds, expected_cell_bounds)) # test compute hypervolume hv = partitioning.compute_hypervolume(ref_point) self.assertEqual(hv.item(), 49.0) # test error when reference is not worse than all pareto_Y with self.assertRaises(ValueError): partitioning.compute_hypervolume(pareto_Y.max(dim=0).values) # test batched, m=2 case Y = torch.rand(3, 10, 2, **tkwargs) partitioning = NondominatedPartitioning(num_outcomes=2, Y=Y) cell_bounds = partitioning.get_hypercell_bounds(ref_point) partitionings = [] for i in range(Y.shape[0]): partitioning_i = NondominatedPartitioning(num_outcomes=2, Y=Y[i]) partitionings.append(partitioning_i) # check pareto_Y pareto_set1 = {tuple(x) for x in partitioning_i.pareto_Y.tolist()} pareto_set2 = {tuple(x) for x in partitioning.pareto_Y[i].tolist()} self.assertEqual(pareto_set1, pareto_set2) expected_cell_bounds_i = partitioning_i.get_hypercell_bounds(ref_point) # remove padding no_padding_cell_bounds_i = cell_bounds[:, i][ :, ((cell_bounds[1, i] - cell_bounds[0, i]) != 0).all(dim=-1) ] self.assertTrue( torch.equal(expected_cell_bounds_i, no_padding_cell_bounds_i) ) # test batch ref point cell_bounds2 = partitioning.get_hypercell_bounds( ref_point.unsqueeze(0).expand(3, 2) ) self.assertTrue(torch.equal(cell_bounds, cell_bounds2)) # test improper batch shape with self.assertRaises(BotorchTensorDimensionError): partitioning.get_hypercell_bounds(ref_point.unsqueeze(0).expand(4, 2)) # test improper Y shape (too many batch dims) with self.assertRaises(NotImplementedError): NondominatedPartitioning(num_outcomes=2, Y=Y.unsqueeze(0)) # test batched compute_hypervolume, m=2 hvs = partitioning.compute_hypervolume(ref_point) hvs_non_batch = torch.stack( [ partitioning_i.compute_hypervolume(ref_point) for partitioning_i in partitionings ], dim=0, ) self.assertTrue(torch.allclose(hvs, hvs_non_batch)) # test batched m>2 with self.assertRaises(NotImplementedError): NondominatedPartitioning( num_outcomes=3, Y=torch.cat([Y, Y[..., :1]], dim=-1) ) # test error with partition_non_dominated_space_2d for m=3 partitioning = NondominatedPartitioning( num_outcomes=3, Y=torch.zeros(1, 3, **tkwargs) ) with self.assertRaises(BotorchTensorDimensionError): partitioning.partition_non_dominated_space_2d() # test m=3 pareto_Y = torch.tensor( [[1.0, 6.0, 8.0], [2.0, 4.0, 10.0], [3.0, 5.0, 7.0]], **tkwargs ) partitioning = NondominatedPartitioning(num_outcomes=3, Y=pareto_Y) sorting = torch.argsort(pareto_Y[:, 0], descending=True) self.assertTrue(torch.equal(pareto_Y[sorting], partitioning.pareto_Y)) ref_point = torch.tensor([-1.0, -2.0, -3.0], **tkwargs) expected_cell_bounds = torch.tensor( [ [ [1.0, 4.0, 7.0], [-1.0, -2.0, 10.0], [-1.0, 4.0, 8.0], [1.0, -2.0, 10.0], [1.0, 4.0, 8.0], [-1.0, 6.0, -3.0], [1.0, 5.0, -3.0], [-1.0, 5.0, 8.0], [2.0, -2.0, 7.0], [2.0, 4.0, 7.0], [3.0, -2.0, -3.0], [2.0, -2.0, 8.0], [2.0, 5.0, -3.0], ], [ [2.0, 5.0, 8.0], [1.0, 4.0, inf], [1.0, 5.0, inf], [2.0, 4.0, inf], [2.0, 5.0, inf], [1.0, inf, 8.0], [2.0, inf, 8.0], [2.0, inf, inf], [3.0, 4.0, 8.0], [3.0, 5.0, 8.0], [inf, 5.0, 8.0], [inf, 5.0, inf], [inf, inf, inf], ], ], **tkwargs, ) cell_bounds = partitioning.get_hypercell_bounds(ref_point) # cell bounds can have different order num_matches = ( (cell_bounds.unsqueeze(0) == expected_cell_bounds.unsqueeze(1)) .all(dim=-1) .any(dim=0) .sum() ) self.assertTrue(num_matches, 9) # test compute hypervolume hv = partitioning.compute_hypervolume(ref_point) self.assertEqual(hv.item(), 358.0)
def test_q_expected_hypervolume_improvement(self): tkwargs = {"device": self.device} for dtype in (torch.float, torch.double): ref_point = [0.0, 0.0] tkwargs["dtype"] = dtype pareto_Y = torch.tensor( [[4.0, 5.0], [5.0, 5.0], [8.5, 3.5], [8.5, 3.0], [9.0, 1.0]], **tkwargs) partitioning = NondominatedPartitioning(num_outcomes=2) # the event shape is `b x q x m` = 1 x 1 x 2 samples = torch.zeros(1, 1, 2, **tkwargs) mm = MockModel(MockPosterior(samples=samples)) # test error if there is not pareto_Y initialized in partitioning with self.assertRaises(BotorchError): qExpectedHypervolumeImprovement(model=mm, ref_point=ref_point, partitioning=partitioning) partitioning.update(Y=pareto_Y) # test error if ref point has wrong shape with self.assertRaises(ValueError): qExpectedHypervolumeImprovement(model=mm, ref_point=ref_point[:1], partitioning=partitioning) X = torch.zeros(1, 1, **tkwargs) # basic test sampler = IIDNormalSampler(num_samples=1) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) # check ref point self.assertTrue( torch.equal(acqf.ref_point, torch.tensor(ref_point, **tkwargs))) # check cached indices self.assertTrue(hasattr(acqf, "q_subset_indices")) self.assertIn("q_choose_1", acqf.q_subset_indices) self.assertTrue( torch.equal( acqf.q_subset_indices["q_choose_1"], torch.tensor([[0]], device=self.device), )) # test q=2 X2 = torch.zeros(2, 1, **tkwargs) samples2 = torch.zeros(1, 2, 2, **tkwargs) mm2 = MockModel(MockPosterior(samples=samples2)) acqf.model = mm2 res = acqf(X2) self.assertEqual(res.item(), 0.0) # check cached indices self.assertTrue(hasattr(acqf, "q_subset_indices")) self.assertIn("q_choose_1", acqf.q_subset_indices) self.assertTrue( torch.equal( acqf.q_subset_indices["q_choose_1"], torch.tensor([[0], [1]], device=self.device), )) self.assertIn("q_choose_2", acqf.q_subset_indices) self.assertTrue( torch.equal( acqf.q_subset_indices["q_choose_2"], torch.tensor([[0, 1]], device=self.device), )) self.assertNotIn("q_choose_3", acqf.q_subset_indices) # now back to 1 and sure all caches were cleared acqf.model = mm res = acqf(X) self.assertNotIn("q_choose_2", acqf.q_subset_indices) self.assertIn("q_choose_1", acqf.q_subset_indices) self.assertTrue( torch.equal( acqf.q_subset_indices["q_choose_1"], torch.tensor([[0]], device=self.device), )) X = torch.zeros(1, 1, **tkwargs) samples = torch.zeros(1, 1, 2, **tkwargs) mm = MockModel(MockPosterior(samples=samples)) # basic test, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 2])) bs = acqf.sampler.base_samples.clone() res = acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 2])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 2])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs)) # basic test for X_pending and warning acqf.set_X_pending() self.assertIsNone(acqf.X_pending) acqf.set_X_pending(None) self.assertIsNone(acqf.X_pending) acqf.set_X_pending(X) self.assertEqual(acqf.X_pending, X) res = acqf(X) X2 = torch.zeros(1, 1, 1, requires_grad=True, **tkwargs) with warnings.catch_warnings( record=True) as ws, settings.debug(True): acqf.set_X_pending(X2) self.assertEqual(acqf.X_pending, X2) self.assertEqual(len(ws), 1) self.assertTrue(issubclass(ws[-1].category, BotorchWarning)) # test objective acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, objective=IdentityMCMultiOutputObjective(), ) res = acqf(X) self.assertEqual(res.item(), 0.0) # Test that the hypervolume improvement is correct for given sample # test q = 1 X = torch.zeros(1, 1, **tkwargs) # basic test samples = torch.tensor([[[6.5, 4.5]]], **tkwargs) mm = MockModel(MockPosterior(samples=samples)) sampler = IIDNormalSampler(num_samples=1) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 1.5) # test q = 1, does not contribute samples = torch.tensor([0.0, 1.0], **tkwargs).view(1, 1, 2) sampler = IIDNormalSampler(1) mm = MockModel(MockPosterior(samples=samples)) acqf.model = mm res = acqf(X) self.assertEqual(res.item(), 0.0) # test q = 2, both points contribute X = torch.zeros(2, 1, **tkwargs) samples = torch.tensor([[6.5, 4.5], [7.0, 4.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf.model = mm res = acqf(X) self.assertEqual(res.item(), 1.75) # test q = 2, only 1 point contributes samples = torch.tensor([[6.5, 4.5], [6.0, 4.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf.model = mm res = acqf(X) self.assertEqual(res.item(), 1.5) # test q = 2, neither contributes samples = torch.tensor([[2.0, 2.0], [0.0, 0.1]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf.model = mm res = acqf(X) self.assertEqual(res.item(), 0.0) # test q = 2, test point better than current best second objective samples = torch.tensor([[6.5, 4.5], [6.0, 6.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf.model = mm res = acqf(X) self.assertEqual(res.item(), 8.0) # test q = 2, test point better than current-best first objective samples = torch.tensor([[6.5, 4.5], [9.0, 2.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 2.0) # test q = 3, all contribute X = torch.zeros(3, 1, **tkwargs) samples = torch.tensor([[6.5, 4.5], [9.0, 2.0], [7.0, 4.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 2.25) # test q = 3, not all contribute samples = torch.tensor([[6.5, 4.5], [9.0, 2.0], [7.0, 5.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 3.5) # test q = 3, none contribute samples = torch.tensor([[0.0, 4.5], [1.0, 2.0], [3.0, 0.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) # test m = 3, q=1 pareto_Y = torch.tensor( [[4.0, 2.0, 3.0], [3.0, 5.0, 1.0], [2.0, 4.0, 2.0], [1.0, 3.0, 4.0]], **tkwargs, ) partitioning = NondominatedPartitioning(num_outcomes=3, Y=pareto_Y) samples = torch.tensor([[1.0, 2.0, 6.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) ref_point = [-1.0] * 3 acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) X = torch.zeros(1, 2, **tkwargs) res = acqf(X) self.assertEqual(res.item(), 12.0) # change reference point ref_point = [0.0] * 3 acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 4.0) # test m = 3, no contribution ref_point = [1.0] * 3 acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) # test m = 3, q = 2 pareto_Y = torch.tensor( [[4.0, 2.0, 3.0], [3.0, 5.0, 1.0], [2.0, 4.0, 2.0]], **tkwargs) samples = torch.tensor([[1.0, 2.0, 6.0], [1.0, 3.0, 4.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) ref_point = [-1.0] * 3 partitioning = NondominatedPartitioning(num_outcomes=3, Y=pareto_Y) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) X = torch.zeros(2, 2, **tkwargs) res = acqf(X) self.assertEqual(res.item(), 22.0)
def qehvi_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI). The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with multi-objective optimization when the number of objectives is three or less. .. seealso:: :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value descriptions. """ n_objectives = train_obj.size(-1) if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) is_feas = (train_con <= 0).all(dim=-1) train_obj_feas = train_obj[is_feas] constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) additional_qehvi_kwargs = { "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), "constraints": constraints, } else: train_y = train_obj train_obj_feas = train_obj additional_qehvi_kwargs = {} train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) # Approximate box decomposition similar to Ax when the number of objectives is large. # https://github.com/facebook/Ax/blob/master/ax/models/torch/botorch_moo_defaults if n_objectives > 2: alpha = 10**(-8 + n_objectives) else: alpha = 0.0 partitioning = NondominatedPartitioning(num_outcomes=n_objectives, Y=train_obj_feas, alpha=alpha) ref_point = train_obj.min(dim=0).values - 1e-8 ref_point_list = ref_point.tolist() acqf = qExpectedHypervolumeImprovement( model=model, ref_point=ref_point_list, partitioning=partitioning, sampler=SobolQMCNormalSampler(num_samples=256), **additional_qehvi_kwargs, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=20, raw_samples=1024, options={ "batch_limit": 5, "maxiter": 200, "nonnegative": True }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(x: torch.Tensor): # Caution: unnormalize and maximize x = unnormalize(x, bounds=problem_bounds) x = x.cpu().numpy().astype(np.float64) # caution res = problem.evaluate(x) res['objs'] = [-y for y in res['objs']] return res # Caution: negative values imply feasibility in botorch hv_diffs = [] time_list = [] global_start_time = time.time() # random seed np.random.seed(seed) torch.manual_seed(seed) # call helper functions to generate initial training data and initialize model train_x, train_obj, train_con = generate_initial_data( initial_runs, objective_function, time_list, global_start_time) # fix bug: find feasible real_initial_runs = initial_runs while real_initial_runs < max_runs: # compute feasible observations is_feas = (train_con <= 0).all(dim=-1) # compute points that are better than the known reference point better_than_ref = (train_obj > problem.ref_point).all(dim=-1) if (is_feas & better_than_ref).any(): break train_x, train_obj, train_con = expand_initial_data( train_x, train_obj, train_con, objective_function, time_list, global_start_time) real_initial_runs += 1 print('=== Expand initial data to find feasible. Iter =', real_initial_runs) mll, model = initialize_model(train_x, train_obj, train_con) # for plot X_init = train_x.cpu().numpy().astype(np.float64) Y_init = -1 * train_obj.cpu().numpy().astype(np.float64) # calculate hypervolume of init data for i in range(real_initial_runs): train_obj_i = train_obj[:i + 1] train_con_i = train_con[:i + 1] # compute pareto front is_feas_i = (train_con_i <= 0).all(dim=-1) feas_train_obj_i = train_obj_i[is_feas_i] pareto_mask = is_non_dominated(feas_train_obj_i) pareto_y = feas_train_obj_i[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # run (max_runs - real_initial_runs) rounds of BayesOpt after the initial random batch for iteration in range(real_initial_runs + 1, max_runs + 1): t0 = time.time() try: # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # compute feasible observations is_feas = (train_con <= 0).all(dim=-1) # compute points that are better than the known reference point better_than_ref = (train_obj > problem.ref_point).all(dim=-1) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, # use observations that are better than the specified reference point and feasible Y=train_obj[better_than_ref & is_feas], ) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, # define an objective that specifies which outcomes are the objectives objective=IdentityMCMultiOutputObjective( outcomes=list(range(problem.num_objs))), # specify that the constraint is on the last outcome constraints=constraint_callable_list( problem.num_constraints, num_objs=problem.num_objs), ) # optimize and get new observation new_x, new_obj, new_con = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) except Exception as e: # handle numeric problem step = 2 print( '===== Exception in optimization loop, restart with 1/%d of training data: %s' % (step, str(e))) if refit == 1: mll, model = initialize_model(train_x[::step], train_obj[::step], train_con[::step]) else: mll, model = initialize_model( train_x[::step], train_obj[::step], train_con[::step], model.state_dict(), ) # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # compute feasible observations is_feas = (train_con[::step] <= 0).all(dim=-1) # compute points that are better than the known reference point better_than_ref = (train_obj[::step] > problem.ref_point).all( dim=-1) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, # use observations that are better than the specified reference point and feasible Y=train_obj[::step][better_than_ref & is_feas], ) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, # define an objective that specifies which outcomes are the objectives objective=IdentityMCMultiOutputObjective( outcomes=list(range(problem.num_objs))), # specify that the constraint is on the last outcome constraints=constraint_callable_list( problem.num_constraints, num_objs=problem.num_objs), ) # optimize and get new observation new_x, new_obj, new_con = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) assert len(time_list) == iteration # update training points train_x = torch.cat([train_x, new_x]) train_obj = torch.cat([train_obj, new_obj]) train_con = torch.cat([train_con, new_con]) # update progress # compute pareto front is_feas = (train_con <= 0).all(dim=-1) feas_train_obj = train_obj[is_feas] pareto_mask = is_non_dominated(feas_train_obj) pareto_y = feas_train_obj[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting # Note: they find improved performance from not warm starting the model hyperparameters # using the hyperparameters from the previous iteration if refit == 1: mll, model = initialize_model(train_x, train_obj, train_con) else: mll, model = initialize_model( train_x, train_obj, train_con, model.state_dict(), ) t1 = time.time() print( "Iter %d: x=%s, perf=%s, con=%s, hv_diff=%f, time=%.2f, global_time=%.2f" % (iteration, unnormalize(new_x, bounds=problem_bounds), -new_obj, new_con, hv_diff, t1 - t0, time_list[-1]), flush=True) # compute pareto front is_feas = (train_con <= 0).all(dim=-1) feas_train_obj = train_obj[is_feas] pareto_mask = is_non_dominated(feas_train_obj) pareto_y = feas_train_obj[pareto_mask] pf = -1 * pareto_y.cpu().numpy().astype(np.float64) # Save result X = unnormalize(train_x, bounds=problem_bounds).cpu().numpy().astype( np.float64) # caution train_obj[~is_feas] = -INFEASIBLE_OBJ_VALUE # set infeasible Y = -1 * train_obj.cpu().numpy().astype(np.float64) # plot for debugging if plot_mode == 1: plot_pf(problem, problem_str, mth, pf, Y_init) return hv_diffs, pf, X, Y, time_list
def get_acquisition_function( acquisition_function_name: str, model: Model, objective: MCAcquisitionObjective, X_observed: Tensor, X_pending: Optional[Tensor] = None, constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, mc_samples: int = 500, qmc: bool = True, seed: Optional[int] = None, **kwargs, ) -> monte_carlo.MCAcquisitionFunction: r"""Convenience function for initializing botorch acquisition functions. Args: acquisition_function_name: Name of the acquisition function. model: A fitted model. objective: A MCAcquisitionObjective. X_observed: A `m1 x d`-dim Tensor of `m1` design points that have already been observed. X_pending: A `m2 x d`-dim Tensor of `m2` design points whose evaluation is pending. constraints: A list of callables, each mapping a Tensor of dimension `sample_shape x batch-shape x q x m` to a Tensor of dimension `sample_shape x batch-shape x q`, where negative values imply feasibility. Used when constraint_transforms are not passed as part of the objective. mc_samples: The number of samples to use for (q)MC evaluation of the acquisition function. qmc: If True, use quasi-Monte-Carlo sampling (instead of iid). seed: If provided, perform deterministic optimization (i.e. the function to optimize is fixed and not stochastic). Returns: The requested acquisition function. Example: >>> model = SingleTaskGP(train_X, train_Y) >>> obj = LinearMCObjective(weights=torch.tensor([1.0, 2.0])) >>> acqf = get_acquisition_function("qEI", model, obj, train_X) """ # initialize the sampler if qmc: sampler = SobolQMCNormalSampler(num_samples=mc_samples, seed=seed) else: sampler = IIDNormalSampler(num_samples=mc_samples, seed=seed) # instantiate and return the requested acquisition function if acquisition_function_name == "qEI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qExpectedImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, ) elif acquisition_function_name == "qPI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qProbabilityOfImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, tau=kwargs.get("tau", 1e-3), ) elif acquisition_function_name == "qNEI": return monte_carlo.qNoisyExpectedImprovement( model=model, X_baseline=X_observed, sampler=sampler, objective=objective, X_pending=X_pending, prune_baseline=kwargs.get("prune_baseline", False), ) elif acquisition_function_name == "qSR": return monte_carlo.qSimpleRegret( model=model, sampler=sampler, objective=objective, X_pending=X_pending ) elif acquisition_function_name == "qUCB": if "beta" not in kwargs: raise ValueError("`beta` must be specified in kwargs for qUCB.") return monte_carlo.qUpperConfidenceBound( model=model, beta=kwargs["beta"], sampler=sampler, objective=objective, X_pending=X_pending, ) elif acquisition_function_name == "qEHVI": # pyre-fixme [16]: `Model` has no attribute `train_targets` if "ref_point" not in kwargs: raise ValueError("`ref_point` must be specified in kwargs for qEHVI") if "Y" not in kwargs: raise ValueError("`Y` must be specified in kwargs for qEHVI") ref_point = kwargs["ref_point"] Y = kwargs.get("Y") # get feasible points if constraints is not None: feas = torch.stack([c(Y) <= 0 for c in constraints], dim=-1).all(dim=-1) Y = Y[feas] obj = objective(Y) partitioning = NondominatedPartitioning( num_outcomes=obj.shape[-1], Y=obj, alpha=kwargs.get("alpha", 0.0) ) return moo_monte_carlo.qExpectedHypervolumeImprovement( model=model, ref_point=ref_point, partitioning=partitioning, sampler=sampler, objective=objective, constraints=constraints, ) raise NotImplementedError( f"Unknown acquisition function {acquisition_function_name}" )
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(x: torch.Tensor): # Caution: unnormalize and maximize x = unnormalize(x, bounds=problem_bounds) x = x.cpu().numpy().astype(np.float64) # caution res = problem.evaluate(x) objs = [-y for y in res['objs']] return objs hv_diffs = [] time_list = [] global_start_time = time.time() # random seed np.random.seed(seed) torch.manual_seed(seed) # call helper functions to generate initial training data and initialize model train_x, train_obj = generate_initial_data(initial_runs, objective_function, time_list, global_start_time) mll, model = initialize_model(train_x, train_obj) # for plot X_init = train_x.cpu().numpy().astype(np.float64) Y_init = -1 * train_obj.cpu().numpy().astype(np.float64) # calculate hypervolume of init data for i in range(initial_runs): train_obj_i = train_obj[:i + 1] # compute pareto front pareto_mask = is_non_dominated(train_obj_i) pareto_y = train_obj_i[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # run (max_runs - initial_runs) rounds of BayesOpt after the initial random batch for iteration in range(initial_runs + 1, max_runs + 1): t0 = time.time() try: # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, Y=train_obj) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, ) # optimize and get new observation new_x, new_obj = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) except Exception as e: step = 2 print( '===== Exception in optimization loop, restart with 1/%d of training data: %s' % (step, str(e))) if refit == 1: mll, model = initialize_model(train_x[::step], train_obj[::step]) else: mll, model = initialize_model( train_x[::step], train_obj[::step], model.state_dict(), ) # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, Y=train_obj[::step]) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, ) # optimize and get new observation new_x, new_obj = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) assert len(time_list) == iteration # update training points train_x = torch.cat([train_x, new_x]) train_obj = torch.cat([train_obj, new_obj]) # update progress # compute pareto front pareto_mask = is_non_dominated(train_obj) pareto_y = train_obj[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting # Note: they find improved performance from not warm starting the model hyperparameters # using the hyperparameters from the previous iteration if refit == 1: mll, model = initialize_model(train_x, train_obj) else: mll, model = initialize_model( train_x, train_obj, model.state_dict(), ) t1 = time.time() print( "Iter %d: x=%s, perf=%s, hv_diff=%f, time=%.2f, global_time=%.2f" % (iteration, unnormalize(new_x, bounds=problem_bounds), -new_obj, hv_diff, t1 - t0, time_list[-1]), flush=True) # Save result X = unnormalize(train_x, bounds=problem_bounds).cpu().numpy().astype( np.float64) # caution Y = -1 * train_obj.cpu().numpy().astype(np.float64) # compute pareto front pareto_mask = is_non_dominated(train_obj) pareto_y = train_obj[pareto_mask] pf = -1 * pareto_y.cpu().numpy().astype(np.float64) # plot for debugging if plot_mode == 1: plot_pf(problem, problem_str, mth, pf, Y_init) return hv_diffs, pf, X, Y, time_list
def test_non_dominated_partitioning(self): tkwargs = {"device": self.device} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype partitioning = NondominatedPartitioning(num_outcomes=2) # assert error is raised if pareto_Y has not been computed with self.assertRaises(BotorchError): partitioning.pareto_Y # test eps # no pareto_Y self.assertEqual(partitioning.eps, 1e-6) partitioning = NondominatedPartitioning(num_outcomes=2, eps=1.0) # eps set self.assertEqual(partitioning.eps, 1.0) # set pareto_Y partitioning = NondominatedPartitioning(num_outcomes=2) Y = torch.zeros(1, 2, **tkwargs) partitioning.update(Y=Y) self.assertEqual(partitioning.eps, 1e-6 if dtype == torch.float else 1e-8) # test _update_pareto_Y partitioning.Y = -Y self.assertFalse(partitioning._update_pareto_Y()) # test m=2 arange = torch.arange(3, 9, **tkwargs) pareto_Y = torch.stack([arange, 11 - arange], dim=-1) Y = torch.cat( [ pareto_Y, torch.tensor([[8.0, 2.0], [7.0, 1.0]], ** tkwargs), # add some non-pareto elements ], dim=0, ) partitioning = NondominatedPartitioning(num_outcomes=2, Y=Y) sorting = torch.argsort(pareto_Y[:, 0], descending=True) self.assertTrue( torch.equal(pareto_Y[sorting], partitioning.pareto_Y)) ref_point = torch.zeros(2, **tkwargs) inf = float("inf") expected_cell_bounds = torch.tensor([ [ [8.0, 0.0], [7.0, 3.0], [6.0, 4.0], [5.0, 5.0], [4.0, 6.0], [3.0, 7.0], [0.0, 8.0], ], [ [inf, inf], [8.0, inf], [7.0, inf], [6.0, inf], [5.0, inf], [4.0, inf], [3.0, inf], ], ], **tkwargs) cell_bounds = partitioning.get_hypercell_bounds(ref_point) self.assertTrue(torch.equal(cell_bounds, expected_cell_bounds)) # test compute hypervolume hv = partitioning.compute_hypervolume(ref_point) self.assertEqual(hv, 49.0) # test error when reference is not worse than all pareto_Y with self.assertRaises(ValueError): partitioning.compute_hypervolume(pareto_Y.max(dim=0).values) # test error with partition_non_dominated_space_2d for m=3 partitioning = NondominatedPartitioning(num_outcomes=3, Y=torch.zeros( 1, 3, **tkwargs)) with self.assertRaises(BotorchTensorDimensionError): partitioning.partition_non_dominated_space_2d() # test m=3 pareto_Y = torch.tensor( [[1.0, 6.0, 8.0], [2.0, 4.0, 10.0], [3.0, 5.0, 7.0]], **tkwargs) partitioning = NondominatedPartitioning(num_outcomes=3, Y=pareto_Y) sorting = torch.argsort(pareto_Y[:, 0], descending=True) self.assertTrue( torch.equal(pareto_Y[sorting], partitioning.pareto_Y)) ref_point = torch.tensor([-1.0, -2.0, -3.0], **tkwargs) expected_cell_bounds = torch.tensor([ [ [1.0, 4.0, 7.0], [-1.0, -2.0, 10.0], [-1.0, 4.0, 8.0], [1.0, -2.0, 10.0], [1.0, 4.0, 8.0], [-1.0, 6.0, -3.0], [1.0, 5.0, -3.0], [-1.0, 5.0, 8.0], [2.0, -2.0, 7.0], [2.0, 4.0, 7.0], [3.0, -2.0, -3.0], [2.0, -2.0, 8.0], [2.0, 5.0, -3.0], ], [ [2.0, 5.0, 8.0], [1.0, 4.0, inf], [1.0, 5.0, inf], [2.0, 4.0, inf], [2.0, 5.0, inf], [1.0, inf, 8.0], [2.0, inf, 8.0], [2.0, inf, inf], [3.0, 4.0, 8.0], [3.0, 5.0, 8.0], [inf, 5.0, 8.0], [inf, 5.0, inf], [inf, inf, inf], ], ], **tkwargs) cell_bounds = partitioning.get_hypercell_bounds(ref_point) # cell bounds can have different order num_matches = ((cell_bounds.unsqueeze(0) == expected_cell_bounds. unsqueeze(1)).all(dim=-1).any(dim=0).sum()) self.assertTrue(num_matches, 9) # test compute hypervolume hv = partitioning.compute_hypervolume(ref_point) self.assertEqual(hv, 358.0)
def __init__( self, model: Model, ref_point: List[float], partitioning: NondominatedPartitioning, sampler: Optional[MCSampler] = None, objective: Optional[MCMultiOutputObjective] = None, constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, X_pending: Optional[Tensor] = None, eta: float = 1e-3, ) -> None: r"""q-Expected Hypervolume Improvement supporting m>=2 outcomes. See [Daulton2020qehvi]_ for details. Example: >>> model = SingleTaskGP(train_X, train_Y) >>> ref_point = [0.0, 0.0] >>> qEHVI = qExpectedHypervolumeImprovement(model, ref_point, partitioning) >>> qehvi = qEHVI(test_X) Args: model: A fitted model. ref_point: A list with `m` elements representing the reference point (in the outcome space) w.r.t. to which compute the hypervolume. This is a reference point for the objective values (i.e. after applying `objective` to the samples). partitioning: A `NondominatedPartitioning` module that provides the non- dominated front and a partitioning of the non-dominated space in hyper- rectangles. If constraints are present, this partitioning must only include feasible points. sampler: The sampler used to draw base samples. Defaults to `SobolQMCNormalSampler(num_samples=512, collapse_batch_dims=True)`. objective: The MCMultiOutputObjective under which the samples are evaluated. Defaults to `IdentityMultiOutputObjective()`. constraints: A list of callables, each mapping a Tensor of dimension `sample_shape x batch-shape x q x m` to a Tensor of dimension `sample_shape x batch-shape x q`, where negative values imply feasibility. The acqusition function will compute expected feasible hypervolume. X_pending: A `batch_shape x m x d`-dim Tensor of `m` design points that have points that have been submitted for function evaluation but have not yet been evaluated. Concatenated into `X` upon forward call. Copied and set to have no gradient. eta: The temperature parameter for the sigmoid function used for the differentiable approximation of the constraints. """ if len(ref_point) != partitioning.num_outcomes: raise ValueError( "The length of the reference point must match the number of outcomes. " f"Got ref_point with {len(ref_point)} elements, but expected " f"{partitioning.num_outcomes}.") ref_point = torch.tensor( ref_point, dtype=partitioning.pareto_Y.dtype, device=partitioning.pareto_Y.device, ) super().__init__(model=model, sampler=sampler, objective=objective, X_pending=X_pending) self.constraints = constraints self.eta = eta self.register_buffer("ref_point", ref_point) cell_bounds = partitioning.get_hypercell_bounds( ref_point=self.ref_point) self.register_buffer("cell_lower_bounds", cell_bounds[0]) self.register_buffer("cell_upper_bounds", cell_bounds[1]) self.q = -1 self.q_subset_indices = BufferDict()