def optimize_qparego_and_get_observation(model, train_obj, sampler): """Samples a set of random weights for each candidate in the batch, performs sequential greedy optimization of the qParEGO acquisition function, and returns a new candidate and observation.""" acq_func_list = [] for _ in range(BATCH_SIZE): weights = sample_simplex(problem.num_objectives, **tkwargs).squeeze() objective = GenericMCObjective( get_chebyshev_scalarization(weights=weights, Y=train_obj)) acq_func = qExpectedImprovement( # pyre-ignore: [28] model=model, objective=objective, best_f=objective(train_obj).max(), sampler=sampler, ) acq_func_list.append(acq_func) # optimize candidates, _ = optimize_acqf_list( acq_function_list=acq_func_list, bounds=standard_bounds, num_restarts=NUM_RESTARTS, raw_samples=RAW_SAMPLES, # used for intialization heuristic options={ "batch_limit": 5, "maxiter": 200 }, ) # observe new values new_x = unnormalize(candidates.detach(), bounds=problem.bounds) new_obj = problem(new_x) return new_x, new_obj
def test_1d_query(self): seed = 1 torch.manual_seed(seed) np.random.seed(seed) n_init = 150 n_opt = 1 lb = -4.0 ub = 4.0 target = 0.5 def obj(x): return -((Normal(0, 1).cdf(x[..., 0]) - target) ** 2) # Test sine function with period 4 def test_fun(x): return np.sin(np.pi * x / 4) strat_list = [ Strategy( lb=lb, ub=ub, n_trials=n_init, generator=SobolGenerator(lb=lb, ub=ub, seed=seed), ), Strategy( lb=lb, ub=ub, model=GPClassificationModel(lb=lb, ub=ub, inducing_size=10), generator=OptimizeAcqfGenerator( qUpperConfidenceBound, acqf_kwargs={"beta": 1.96, "objective": GenericMCObjective(obj)}, ), n_trials=n_opt, ), ] strat = SequentialStrategy(strat_list) for _i in range(n_init + n_opt): next_x = strat.gen() strat.add_data(next_x, [bernoulli.rvs(norm.cdf(test_fun(next_x)))]) # We expect the global max to be at (2, 1), the min at (-2, -1) fmax, argmax = strat.get_max() self.assertTrue(np.abs(fmax - 1) < 0.5) self.assertTrue(np.abs(argmax[0] - 2) < 0.5) fmin, argmin = strat.get_min() self.assertTrue(np.abs(fmin + 1) < 0.5) self.assertTrue(np.abs(argmin[0] + 2) < 0.5) # Query at x=2 should be f=1 self.assertTrue(np.abs(strat.predict(torch.tensor([2]))[0] - 1) < 0.5) # Inverse query at val 1 should return (1,[2]) val, loc = strat.inv_query(1.0, constraints={}) self.assertTrue(np.abs(val - 1) < 0.5) self.assertTrue(np.abs(loc[0] - 2) < 0.5)
def get_PosteriorMean( model: Model, objective_weights: Tensor, outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, X_observed: Optional[Tensor] = None, X_pending: Optional[Tensor] = None, **kwargs: Any, ) -> AcquisitionFunction: r"""Instantiates a PosteriorMean acquisition function. Note: If no OutcomeConstraints given, return an analytic acquisition function. This requires {optimizer_kwargs: {joint_optimization: True}} or an optimizer that does not assume pending point support. Args: objective_weights: The objective is to maximize a weighted sum of the columns of f(x). These are the weights. outcome_constraints: A tuple of (A, b). For k outcome constraints and m outputs at f(x), A is (k x m) and b is (k x 1) such that A f(x) <= b. (Not used by single task models) X_observed: A tensor containing points observed for all objective outcomes and outcomes that appear in the outcome constraints (if there are any). X_pending: A tensor containing points whose evaluation is pending (i.e. that have been submitted for evaluation) present for all objective outcomes and outcomes that appear in the outcome constraints (if there are any). Returns: PosteriorMean: The instantiated acquisition function. """ if X_observed is None: raise ValueError("There are no feasible observed points.") # construct Objective module if kwargs.get("chebyshev_scalarization", False): obj_tf = get_chebyshev_scalarization( weights=objective_weights, Y=torch.stack(kwargs.get("Ys")).transpose(0, 1).squeeze(-1), ) else: obj_tf = get_objective_weights_transform(objective_weights) def obj_fn(samples: Tensor, X: Optional[Tensor] = None) -> Tensor: return obj_tf(samples) if outcome_constraints is None: objective = GenericMCObjective(objective=obj_fn) else: con_tfs = get_outcome_constraint_transforms(outcome_constraints) inf_cost = get_infeasible_cost(X=X_observed, model=model, objective=obj_fn) objective = ConstrainedMCObjective(objective=obj_fn, constraints=con_tfs or [], infeasible_cost=inf_cost) # Use qSimpleRegret, not analytic posterior, to handle arbitrary objective fns. acq_func = qSimpleRegret(model, objective=objective) return acq_func
def test_prune_inferior_points(self): for dtype in (torch.float, torch.double): X = torch.rand(3, 2, device=self.device, dtype=dtype) # the event shape is `q x t` = 3 x 1 samples = torch.tensor([[-1.0], [0.0], [1.0]], device=self.device, dtype=dtype) mm = MockModel(MockPosterior(samples=samples)) # test that a batched X raises errors with self.assertRaises(UnsupportedError): prune_inferior_points(model=mm, X=X.expand(2, 3, 2)) # test that a batched model raises errors (event shape is `q x t` = 3 x 1) mm2 = MockModel(MockPosterior(samples=samples.expand(2, 3, 1))) with self.assertRaises(UnsupportedError): prune_inferior_points(model=mm2, X=X) # test that invalid max_frac is checked properly with self.assertRaises(ValueError): prune_inferior_points(model=mm, X=X, max_frac=1.1) # test basic behaviour X_pruned = prune_inferior_points(model=mm, X=X) self.assertTrue(torch.equal(X_pruned, X[[-1]])) # test custom objective neg_id_obj = GenericMCObjective(lambda X: -X.squeeze(-1)) X_pruned = prune_inferior_points(model=mm, X=X, objective=neg_id_obj) self.assertTrue(torch.equal(X_pruned, X[[0]])) # test non-repeated samples (requires mocking out MockPosterior's rsample) samples = torch.tensor( [[[3.0], [0.0], [0.0]], [[0.0], [2.0], [0.0]], [[0.0], [0.0], [1.0]]], device=self.device, dtype=dtype, ) with mock.patch.object(MockPosterior, "rsample", return_value=samples): mm = MockModel(MockPosterior(samples=samples)) X_pruned = prune_inferior_points(model=mm, X=X) self.assertTrue(torch.equal(X_pruned, X)) # test max_frac limiting with mock.patch.object(MockPosterior, "rsample", return_value=samples): mm = MockModel(MockPosterior(samples=samples)) X_pruned = prune_inferior_points(model=mm, X=X, max_frac=2 / 3) self.assertTrue(torch.equal(X_pruned, X[:2])) # test that zero-probability is in fact pruned samples[2, 0, 0] = 10 with mock.patch.object(MockPosterior, "rsample", return_value=samples): mm = MockModel(MockPosterior(samples=samples)) X_pruned = prune_inferior_points(model=mm, X=X) self.assertTrue(torch.equal(X_pruned, X[:2]))
def test_generic_mc_objective(self): for dtype in (torch.float, torch.double): obj = GenericMCObjective(generic_obj) samples = torch.randn(1, device=self.device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(2, device=self.device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(3, 1, device=self.device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(3, 2, device=self.device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
def test_generic_mc_objective(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): obj = GenericMCObjective(generic_obj) samples = torch.randn(1, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(2, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(3, 1, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(3, 2, device=device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
def test_evaluate_kg(self): # a thorough test using real model and dtype double d = 2 dtype = torch.double bounds = torch.tensor([[0], [1]], device=self.device, dtype=dtype).repeat(1, d) train_X = torch.rand(3, d, device=self.device, dtype=dtype) train_Y = torch.rand(3, 1, device=self.device, dtype=dtype) model = SingleTaskGP(train_X, train_Y) qKG = qKnowledgeGradient( model=model, num_fantasies=2, objective=None, X_pending=torch.rand(2, d, device=self.device, dtype=dtype), current_value=torch.rand(1, device=self.device, dtype=dtype), ) X = torch.rand(4, 3, d, device=self.device, dtype=dtype) options = {"num_inner_restarts": 2, "raw_inner_samples": 3} val = qKG.evaluate(X, bounds=bounds, num_restarts=2, raw_samples=3, options=options) # verify output shape self.assertEqual(val.size(), torch.Size([4])) # verify dtype self.assertEqual(val.dtype, dtype) # test i) no dimension is squeezed out, ii) dtype float, iii) MC objective, # and iv) t_batch_mode_transform dtype = torch.float bounds = torch.tensor([[0], [1]], device=self.device, dtype=dtype) train_X = torch.rand(1, 1, device=self.device, dtype=dtype) train_Y = torch.rand(1, 1, device=self.device, dtype=dtype) model = SingleTaskGP(train_X, train_Y) qKG = qKnowledgeGradient( model=model, num_fantasies=1, objective=GenericMCObjective( objective=lambda Y, X: Y.norm(dim=-1)), ) X = torch.rand(1, 1, device=self.device, dtype=dtype) options = {"num_inner_restarts": 1, "raw_inner_samples": 1} val = qKG.evaluate(X, bounds=bounds, num_restarts=1, raw_samples=1, options=options) # verify output shape self.assertEqual(val.size(), torch.Size([1])) # verify dtype self.assertEqual(val.dtype, dtype)
def test_get_value_function(self): mm = MockModel(None) # test PosteriorMean vf = _get_value_function(mm) self.assertIsInstance(vf, PosteriorMean) self.assertIsNone(vf.objective) # test SimpleRegret obj = GenericMCObjective(lambda Y: Y.sum(dim=-1)) sampler = IIDNormalSampler(num_samples=2) vf = _get_value_function(model=mm, objective=obj, sampler=sampler) self.assertIsInstance(vf, qSimpleRegret) self.assertEqual(vf.objective, obj) self.assertEqual(vf.sampler, sampler)
def test_get_value_function(self): with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) # test PosteriorMean vf = _get_value_function(mm) self.assertIsInstance(vf, PosteriorMean) self.assertIsNone(vf.objective) # test SimpleRegret obj = GenericMCObjective(lambda Y: Y.sum(dim=-1)) sampler = IIDNormalSampler(num_samples=2) vf = _get_value_function(model=mm, objective=obj, sampler=sampler) self.assertIsInstance(vf, qSimpleRegret) self.assertEqual(vf.objective, obj) self.assertEqual(vf.sampler, sampler)
def test_generic_mc_objective_deprecated(self): for dtype in (torch.float, torch.double): with warnings.catch_warnings(record=True) as ws, settings.debug(True): obj = GenericMCObjective(generic_obj_deprecated) warning_msg = ( "The `objective` callable of `GenericMCObjective` is expected to " "take two arguments. Passing a callable that expects a single " "argument will result in an error in future versions." ) self.assertTrue( any(issubclass(w.category, DeprecationWarning) for w in ws) ) self.assertTrue(any(warning_msg in str(w.message) for w in ws)) samples = torch.randn(1, device=self.device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(2, device=self.device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(3, 1, device=self.device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples))) samples = torch.randn(3, 2, device=self.device, dtype=dtype) self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
def test_cache_root_decomposition(self): tkwargs = {"device": self.device} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype # test mt-mvn train_x = torch.rand(2, 1, **tkwargs) train_y = torch.rand(2, 2, **tkwargs) test_x = torch.rand(2, 1, **tkwargs) model = SingleTaskGP(train_x, train_y) sampler = IIDNormalSampler(1) with torch.no_grad(): posterior = model.posterior(test_x) acqf = DummyCachedCholeskyAcqf( model=model, sampler=sampler, objective=GenericMCObjective(lambda Y: Y[..., 0]), ) baseline_L = torch.eye(2, **tkwargs) with mock.patch( EXTRACT_BATCH_COVAR_PATH, wraps=extract_batch_covar) as mock_extract_batch_covar: with mock.patch(CHOLESKY_PATH, return_value=baseline_L) as mock_cholesky: acqf._cache_root_decomposition(posterior=posterior) mock_extract_batch_covar.assert_called_once_with( posterior.mvn) mock_cholesky.assert_called_once() # test mvn model = SingleTaskGP(train_x, train_y[:, :1]) with torch.no_grad(): posterior = model.posterior(test_x) with mock.patch( EXTRACT_BATCH_COVAR_PATH) as mock_extract_batch_covar: with mock.patch(CHOLESKY_PATH, return_value=baseline_L) as mock_cholesky: acqf._cache_root_decomposition(posterior=posterior) mock_extract_batch_covar.assert_not_called() mock_cholesky.assert_called_once() self.assertTrue(torch.equal(acqf._baseline_L, baseline_L))
def test_get_value_function(self): with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) # test PosteriorMean vf = _get_value_function(mm) self.assertIsInstance(vf, PosteriorMean) self.assertIsNone(vf.objective) # test SimpleRegret obj = GenericMCObjective(lambda Y, X: Y.sum(dim=-1)) sampler = IIDNormalSampler(num_samples=2) vf = _get_value_function(model=mm, objective=obj, sampler=sampler) self.assertIsInstance(vf, qSimpleRegret) self.assertEqual(vf.objective, obj) self.assertEqual(vf.sampler, sampler) # test with project mock_project = mock.Mock( return_value=torch.ones(1, 1, 1, device=self.device) ) vf = _get_value_function( model=mm, objective=obj, sampler=sampler, project=mock_project, ) self.assertIsInstance(vf, ProjectedAcquisitionFunction) self.assertEqual(vf.objective, obj) self.assertEqual(vf.sampler, sampler) self.assertEqual(vf.project, mock_project) test_X = torch.rand(1, 1, 1, device=self.device) with mock.patch.object( vf, "base_value_function", __class__=torch.nn.Module, return_value=None ) as patch_bvf: vf(test_X) mock_project.assert_called_once_with(test_X) patch_bvf.assert_called_once_with( torch.ones(1, 1, 1, device=self.device) )
def bo_qei(config): """Optimizes over designs x in an offline optimization problem using the CMA Evolution Strategy Args: config: dict a dictionary of hyper parameters such as the learning rate """ # create the training task and logger logger = Logger(config['logging_dir']) task = StaticGraphTask(config['task'], **config['task_kwargs']) if config['normalize_ys']: task.map_normalize_y() if task.is_discrete and not config["use_vae"]: task.map_to_logits() if config['normalize_xs']: task.map_normalize_x() x = task.x y = task.y if task.is_discrete and config["use_vae"]: vae_model = SequentialVAE(task, hidden_size=config['vae_hidden_size'], latent_size=config['vae_latent_size'], activation=config['vae_activation'], kernel_size=config['vae_kernel_size'], num_blocks=config['vae_num_blocks']) vae_trainer = VAETrainer(vae_model, vae_optim=tf.keras.optimizers.Adam, vae_lr=config['vae_lr'], beta=config['vae_beta']) # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, batch_size=config['vae_batch_size'], val_size=config['val_size']) # estimate the number of training steps per epoch vae_trainer.launch(train_data, val_data, logger, config['vae_epochs']) # map the x values to latent space x = vae_model.encoder_cnn.predict(x)[0] mean = np.mean(x, axis=0, keepdims=True) standard_dev = np.std(x - mean, axis=0, keepdims=True) x = (x - mean) / standard_dev input_shape = x.shape[1:] input_size = np.prod(input_shape) # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, bootstraps=config['bootstraps'], batch_size=config['ensemble_batch_size'], val_size=config['val_size']) # make several keras neural networks with two hidden layers forward_models = [ ForwardModel(input_shape, hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) for b in range(config['bootstraps']) ] # create a trainer for a forward model with a conservative objective ensemble = Ensemble(forward_models, forward_model_optim=tf.keras.optimizers.Adam, forward_model_lr=config['ensemble_lr']) # train the model for an additional number of epochs ensemble.launch(train_data, val_data, logger, config['ensemble_epochs']) # select the top 1 initial designs from the dataset indices = tf.math.top_k(y[:, 0], k=config['bo_gp_samples'])[1] initial_x = tf.gather(x, indices, axis=0) initial_y = tf.gather(y, indices, axis=0) from botorch.models import FixedNoiseGP, ModelListGP from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood from botorch.acquisition.objective import GenericMCObjective from botorch.optim import optimize_acqf from botorch import fit_gpytorch_model from botorch.acquisition.monte_carlo import qExpectedImprovement from botorch.sampling.samplers import SobolQMCNormalSampler from botorch.exceptions import BadInitialCandidatesWarning import torch import time import warnings warnings.filterwarnings('ignore', category=BadInitialCandidatesWarning) warnings.filterwarnings('ignore', category=RuntimeWarning) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dtype = torch.float32 def objective(input_x): original_x = input_x # convert the tensor into numpy before using a TF model if torch.cuda.is_available(): input_x = input_x.detach().cpu().numpy() else: input_x = input_x.detach().numpy() batch_shape = input_x.shape[:-1] # pass the input into a TF model input_x = tf.reshape(input_x, [-1, *input_shape]) # optimize teh ground truth or the learned model if config["optimize_ground_truth"]: if task.is_discrete and config["use_vae"]: input_x = tf.argmax( vae_model.decoder_cnn.predict(input_x * standard_dev + mean), axis=2, output_type=tf.int32) value = task.predict(input_x) else: value = ensemble.get_distribution(input_x).mean() ys = value.numpy() ys.reshape(list(batch_shape) + [1]) # convert the scores back to pytorch tensors return torch.tensor(ys).type_as(original_x).to(device, dtype=dtype) NOISE_SE = config['bo_noise_se'] train_yvar = torch.tensor(NOISE_SE**2, device=device, dtype=dtype) def initialize_model(train_x, train_obj, state_dict=None): # define models for objective model_obj = FixedNoiseGP(train_x, train_obj, train_yvar.expand_as(train_obj)).to(train_x) # combine into a multi-output GP model model = ModelListGP(model_obj) mll = SumMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model def obj_callable(Z): return Z[..., 0] # define a feasibility-weighted objective for optimization obj = GenericMCObjective(obj_callable) BATCH_SIZE = config['bo_batch_size'] bounds = torch.tensor([ np.min(x, axis=0).reshape([input_size]).tolist(), np.max(x, axis=0).reshape([input_size]).tolist() ], device=device, dtype=dtype) def optimize_acqf_and_get_observation(acq_func): """Optimizes the acquisition function, and returns a new candidate and a noisy observation.""" # optimize try: candidates, _ = optimize_acqf( acq_function=acq_func, bounds=bounds, q=BATCH_SIZE, num_restarts=config['bo_num_restarts'], raw_samples=config[ 'bo_raw_samples'], # used for intialization heuristic options={ "batch_limit": config['bo_batch_limit'], "maxiter": config['bo_maxiter'] }) except RuntimeError: return # observe new values new_x = candidates.detach() exact_obj = objective(candidates) new_obj = exact_obj + NOISE_SE * torch.randn_like(exact_obj) return new_x, new_obj N_BATCH = config['bo_iterations'] MC_SAMPLES = config['bo_mc_samples'] best_observed_ei = [] # call helper functions to generate initial training data and initialize model train_x_ei = initial_x.numpy().reshape([initial_x.shape[0], input_size]) train_x_ei = torch.tensor(train_x_ei).to(device, dtype=dtype) train_obj_ei = initial_y.numpy().reshape([initial_y.shape[0], 1]) train_obj_ei = torch.tensor(train_obj_ei).to(device, dtype=dtype) best_observed_value_ei = train_obj_ei.max().item() mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei) best_observed_ei.append(best_observed_value_ei) # run N_BATCH rounds of BayesOpt after the initial random batch for iteration in range(1, N_BATCH + 1): t0 = time.time() # fit the models fit_gpytorch_model(mll_ei) # define the qEI acquisition module using a QMC sampler qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # for best_f, we use the best observed noisy values as an approximation qEI = qExpectedImprovement(model=model_ei, best_f=train_obj_ei.max(), sampler=qmc_sampler, objective=obj) # optimize and get new observation result = optimize_acqf_and_get_observation(qEI) if result is None: print("RuntimeError was encountered, most likely a " "'symeig_cpu: the algorithm failed to converge'") break new_x_ei, new_obj_ei = result # update training points train_x_ei = torch.cat([train_x_ei, new_x_ei]) train_obj_ei = torch.cat([train_obj_ei, new_obj_ei]) # update progress best_value_ei = obj(train_x_ei).max().item() best_observed_ei.append(best_value_ei) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei, model_ei.state_dict()) t1 = time.time() print( f"Batch {iteration:>2}: best_value = " f"({best_value_ei:>4.2f}), " f"time = {t1 - t0:>4.2f}.", end="") if torch.cuda.is_available(): x_sol = train_x_ei.detach().cpu().numpy() y_sol = train_obj_ei.detach().cpu().numpy() else: x_sol = train_x_ei.detach().numpy() y_sol = train_obj_ei.detach().numpy() # select the top 1 initial designs from the dataset indices = tf.math.top_k(y_sol[:, 0], k=config['solver_samples'])[1] solution = tf.gather(x_sol, indices, axis=0) solution = tf.reshape(solution, [-1, *input_shape]) if task.is_discrete and config["use_vae"]: solution = solution * standard_dev + mean logits = vae_model.decoder_cnn.predict(solution) solution = tf.argmax(logits, axis=2, output_type=tf.int32) # save the current solution to the disk np.save(os.path.join(config["logging_dir"], f"solution.npy"), solution.numpy()) # evaluate the found solution and record a video score = task.predict(solution) if task.is_normalized_y: score = task.denormalize_y(score) logger.record("score", score, N_BATCH, percentile=True)
def test_evaluate_q_multi_fidelity_knowledge_gradient(self): for dtype in (torch.float, torch.double): # basic test n_f = 4 current_value = torch.rand(1, device=self.device, dtype=dtype) cau = GenericCostAwareUtility(mock_util) mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qMFKG = qMultiFidelityKnowledgeGradient( model=mm, num_fantasies=n_f, current_value=current_value, cost_aware_utility=cau, ) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) val = qMFKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1])) val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4)) self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :])) # batched evaluation b = 2 current_value = torch.rand(b, device=self.device, dtype=dtype) cau = GenericCostAwareUtility(mock_util) mean = torch.rand(n_f, b, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, b, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) X = torch.rand(b, n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qMFKG = qMultiFidelityKnowledgeGradient( model=mm, num_fantasies=n_f, current_value=current_value, cost_aware_utility=cau, ) val = qMFKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([b, 1, 1])) val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4)) self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :])) # pending points and current value mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) X_pending = torch.rand(2, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) current_value = torch.rand(1, device=self.device, dtype=dtype) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qMFKG = qMultiFidelityKnowledgeGradient( model=mm, num_fantasies=n_f, X_pending=X_pending, current_value=current_value, cost_aware_utility=cau, ) val = qMFKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 3, 1])) val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4)) self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :])) # test objective (inner MC sampling) objective = GenericMCObjective(objective=lambda Y: Y.norm(dim=-1)) samples = torch.randn(3, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(samples=samples)) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qMFKG = qMultiFidelityKnowledgeGradient( model=mm, num_fantasies=n_f, objective=objective, current_value=current_value, cost_aware_utility=cau, ) val = qMFKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1])) val_exp = mock_util(X, objective(samples) - current_value).mean(dim=0) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4)) self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :]))
def qparego_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization. The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with multi-objective optimization when the number of objectives is larger than three. .. seealso:: :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value descriptions. """ n_objectives = train_obj.size(-1) weights = sample_simplex(n_objectives).squeeze() scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj) if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) objective = ConstrainedMCObjective( objective=lambda Z: scalarization(Z[..., :n_objectives]), constraints=constraints, ) else: train_y = train_obj objective = GenericMCObjective(scalarization) train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) acqf = qExpectedImprovement( model=model, best_f=objective(train_y).max(), sampler=SobolQMCNormalSampler(num_samples=256), objective=objective, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=20, raw_samples=1024, options={ "batch_limit": 5, "maxiter": 200 }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
def test_initialize_q_knowledge_gradient(self): for dtype in (torch.float, torch.double): mean = torch.zeros(1, 1, device=self.device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean)) # test error when neither specifying neither sampler nor num_fantasies with self.assertRaises(ValueError): qKnowledgeGradient(model=mm, num_fantasies=None) # test error when sampler and num_fantasies arg are inconsistent sampler = IIDNormalSampler(num_samples=16) with self.assertRaises(ValueError): qKnowledgeGradient(model=mm, num_fantasies=32, sampler=sampler) # test default construction qKG = qKnowledgeGradient(model=mm, num_fantasies=32) self.assertEqual(qKG.num_fantasies, 32) self.assertIsInstance(qKG.sampler, SobolQMCNormalSampler) self.assertEqual(qKG.sampler.sample_shape, torch.Size([32])) self.assertIsNone(qKG.objective) self.assertIsNone(qKG.inner_sampler) self.assertIsNone(qKG.X_pending) self.assertIsNone(qKG.current_value) self.assertEqual(qKG.get_augmented_q_batch_size(q=3), 32 + 3) # test custom construction obj = GenericMCObjective(lambda Y, X: Y.mean(dim=-1)) sampler = IIDNormalSampler(num_samples=16) X_pending = torch.zeros(2, 2, device=self.device, dtype=dtype) qKG = qKnowledgeGradient( model=mm, num_fantasies=16, sampler=sampler, objective=obj, X_pending=X_pending, ) self.assertEqual(qKG.num_fantasies, 16) self.assertEqual(qKG.sampler, sampler) self.assertEqual(qKG.sampler.sample_shape, torch.Size([16])) self.assertEqual(qKG.objective, obj) self.assertIsInstance(qKG.inner_sampler, SobolQMCNormalSampler) self.assertEqual(qKG.inner_sampler.sample_shape, torch.Size([128])) self.assertTrue(torch.equal(qKG.X_pending, X_pending)) self.assertIsNone(qKG.current_value) self.assertEqual(qKG.get_augmented_q_batch_size(q=3), 16 + 3) # test assignment of num_fantasies from sampler if not provided qKG = qKnowledgeGradient(model=mm, num_fantasies=None, sampler=sampler) self.assertEqual(qKG.sampler.sample_shape, torch.Size([16])) # test custom construction with inner sampler and current value inner_sampler = SobolQMCNormalSampler(num_samples=256) current_value = torch.zeros(1, device=self.device, dtype=dtype) qKG = qKnowledgeGradient( model=mm, num_fantasies=8, objective=obj, inner_sampler=inner_sampler, current_value=current_value, ) self.assertEqual(qKG.num_fantasies, 8) self.assertEqual(qKG.sampler.sample_shape, torch.Size([8])) self.assertEqual(qKG.objective, obj) self.assertIsInstance(qKG.inner_sampler, SobolQMCNormalSampler) self.assertEqual(qKG.inner_sampler, inner_sampler) self.assertIsNone(qKG.X_pending) self.assertTrue(torch.equal(qKG.current_value, current_value)) self.assertEqual(qKG.get_augmented_q_batch_size(q=3), 8 + 3) # test construction with non-MC objective (ScalarizedObjective) qKG_s = qKnowledgeGradient( model=mm, num_fantasies=16, sampler=sampler, objective=ScalarizedObjective(weights=torch.rand(2)), ) self.assertIsNone(qKG_s.inner_sampler) self.assertIsInstance(qKG_s.objective, ScalarizedObjective) # test error if no objective and multi-output model mean2 = torch.zeros(1, 2, device=self.device, dtype=dtype) mm2 = MockModel(MockPosterior(mean=mean2)) with self.assertRaises(UnsupportedError): qKnowledgeGradient(model=mm2)
def test_evaluate_qMFKG(self): for dtype in (torch.float, torch.double): # basic test n_f = 4 current_value = torch.rand(1, device=self.device, dtype=dtype) cau = GenericCostAwareUtility(mock_util) mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qMFKG = qMultiFidelityKnowledgeGradient( model=mm, num_fantasies=n_f, current_value=current_value, cost_aware_utility=cau, ) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) val = qMFKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1])) val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4)) self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :])) # batched evaluation b = 2 current_value = torch.rand(b, device=self.device, dtype=dtype) cau = GenericCostAwareUtility(mock_util) mean = torch.rand(n_f, b, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, b, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) X = torch.rand(b, n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qMFKG = qMultiFidelityKnowledgeGradient( model=mm, num_fantasies=n_f, current_value=current_value, cost_aware_utility=cau, ) val = qMFKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([b, 1, 1])) val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4)) self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :])) # pending points and current value mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) X_pending = torch.rand(2, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) current_value = torch.rand(1, device=self.device, dtype=dtype) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qMFKG = qMultiFidelityKnowledgeGradient( model=mm, num_fantasies=n_f, X_pending=X_pending, current_value=current_value, cost_aware_utility=cau, ) val = qMFKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 3, 1])) val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4)) self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :])) # test objective (inner MC sampling) objective = GenericMCObjective(objective=lambda Y, X: Y.norm(dim=-1)) samples = torch.randn(3, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(samples=samples)) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qMFKG = qMultiFidelityKnowledgeGradient( model=mm, num_fantasies=n_f, objective=objective, current_value=current_value, cost_aware_utility=cau, ) val = qMFKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1])) val_exp = mock_util(X, objective(samples) - current_value).mean(dim=0) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4)) self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :])) # test valfunc_cls and valfunc_argfac d, p, d_prime = 4, 3, 2 samples = torch.ones(3, 1, 1, device=self.device, dtype=dtype) mean = torch.tensor( [[0.25], [0.5], [0.75]], device=self.device, dtype=dtype ) weights = torch.tensor([0.5, 1.0, 1.0], device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, samples=samples)) X = torch.rand(n_f * d + d, d, device=self.device, dtype=dtype) sample_points = torch.rand(p, d_prime, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qMFKG = qMultiFidelityKnowledgeGradient( model=mm, num_fantasies=n_f, project=lambda X: project_to_sample_points(X, sample_points), valfunc_cls=ScalarizedPosteriorMean, valfunc_argfac=lambda model: {"weights": weights}, ) val = qMFKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 16, 4])) val_exp = torch.tensor([1.375], dtype=dtype) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4)) patch_f.reset_mock() qMFKG = qMultiFidelityKnowledgeGradient( model=mm, num_fantasies=n_f, project=lambda X: project_to_sample_points(X, sample_points), valfunc_cls=qExpectedImprovement, valfunc_argfac=lambda model: {"best_f": 0.0}, ) val = qMFKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 16, 4])) val_exp = torch.tensor([1.0], dtype=dtype) self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
def test_evaluate_q_knowledge_gradient(self): for dtype in (torch.float, torch.double): # basic test n_f = 4 mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1])) self.assertTrue(torch.allclose(val, mean.mean(), atol=1e-4)) self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # batched evaluation b = 2 mean = torch.rand(n_f, b, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, b, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) X = torch.rand(b, n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([b, 1, 1])) self.assertTrue( torch.allclose(val, mean.mean(dim=0).squeeze(-1), atol=1e-4) ) self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # pending points and current value X_pending = torch.rand(2, 1, device=self.device, dtype=dtype) mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) current_value = torch.rand(1, device=self.device, dtype=dtype) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qKG = qKnowledgeGradient( model=mm, num_fantasies=n_f, X_pending=X_pending, current_value=current_value, ) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 3, 1])) self.assertTrue(torch.allclose(val, mean.mean() - current_value, atol=1e-4)) self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # test objective (inner MC sampling) objective = GenericMCObjective(objective=lambda Y, X: Y.norm(dim=-1)) samples = torch.randn(3, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(samples=samples)) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 1 mm = MockModel(None) qKG = qKnowledgeGradient( model=mm, num_fantasies=n_f, objective=objective ) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1])) self.assertTrue(torch.allclose(val, objective(samples).mean(), atol=1e-4)) self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # test non-MC objective (ScalarizedObjective) weights = torch.rand(2, device=self.device, dtype=dtype) objective = ScalarizedObjective(weights=weights) mean = torch.tensor([1.0, 0.5], device=self.device, dtype=dtype).expand( n_f, 1, 2 ) cov = torch.tensor( [[1.0, 0.1], [0.1, 0.5]], device=self.device, dtype=dtype ).expand(n_f, 2, 2) posterior = GPyTorchPosterior(MultitaskMultivariateNormal(mean, cov)) mfm = MockModel(posterior) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs: mock_num_outputs.return_value = 2 mm = MockModel(None) qKG = qKnowledgeGradient( model=mm, num_fantasies=n_f, objective=objective ) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1])) val_expected = (mean * weights).sum(-1).mean(0) self.assertTrue(torch.allclose(val, val_expected))
def test_prune_inferior_points(self): for dtype in (torch.float, torch.double): X = torch.rand(3, 2, device=self.device, dtype=dtype) # the event shape is `q x t` = 3 x 1 samples = torch.tensor([[-1.0], [0.0], [1.0]], device=self.device, dtype=dtype) mm = MockModel(MockPosterior(samples=samples)) # test that a batched X raises errors with self.assertRaises(UnsupportedError): prune_inferior_points(model=mm, X=X.expand(2, 3, 2)) # test that a batched model raises errors (event shape is `q x t` = 3 x 1) mm2 = MockModel(MockPosterior(samples=samples.expand(2, 3, 1))) with self.assertRaises(UnsupportedError): prune_inferior_points(model=mm2, X=X) # test that invalid max_frac is checked properly with self.assertRaises(ValueError): prune_inferior_points(model=mm, X=X, max_frac=1.1) # test basic behaviour X_pruned = prune_inferior_points(model=mm, X=X) self.assertTrue(torch.equal(X_pruned, X[[-1]])) # test custom objective neg_id_obj = GenericMCObjective(lambda Y, X: -(Y.squeeze(-1))) X_pruned = prune_inferior_points(model=mm, X=X, objective=neg_id_obj) self.assertTrue(torch.equal(X_pruned, X[[0]])) # test non-repeated samples (requires mocking out MockPosterior's rsample) samples = torch.tensor( [[[3.0], [0.0], [0.0]], [[0.0], [2.0], [0.0]], [[0.0], [0.0], [1.0]]], device=self.device, dtype=dtype, ) with mock.patch.object(MockPosterior, "rsample", return_value=samples): mm = MockModel(MockPosterior(samples=samples)) X_pruned = prune_inferior_points(model=mm, X=X) self.assertTrue(torch.equal(X_pruned, X)) # test max_frac limiting with mock.patch.object(MockPosterior, "rsample", return_value=samples): mm = MockModel(MockPosterior(samples=samples)) X_pruned = prune_inferior_points(model=mm, X=X, max_frac=2 / 3) if self.device == torch.device("cuda"): # sorting has different order on cuda self.assertTrue( torch.equal(X_pruned, torch.stack([X[2], X[1]], dim=0))) else: self.assertTrue(torch.equal(X_pruned, X[:2])) # test that zero-probability is in fact pruned samples[2, 0, 0] = 10 with mock.patch.object(MockPosterior, "rsample", return_value=samples): mm = MockModel(MockPosterior(samples=samples)) X_pruned = prune_inferior_points(model=mm, X=X) self.assertTrue(torch.equal(X_pruned, X[:2])) # test high-dim sampling with ExitStack() as es: mock_event_shape = es.enter_context( mock.patch( "botorch.utils.testing.MockPosterior.base_sample_shape", new_callable=mock.PropertyMock, )) mock_event_shape.return_value = torch.Size( [1, 1, torch.quasirandom.SobolEngine.MAXDIM + 1]) es.enter_context( mock.patch.object(MockPosterior, "rsample", return_value=samples)) mm = MockModel(MockPosterior(samples=samples)) with warnings.catch_warnings( record=True) as ws, settings.debug(True): prune_inferior_points(model=mm, X=X) self.assertTrue( issubclass(ws[-1].category, SamplingWarning))
def make_bayes_opt_functions(args): ''' Generates and returns functions used to run Bayesian optimization Argument: args: Keyword arguments specifying exact settings for optimization Returns: objective : objective maximized for BO generate_initial_observations : function to generate initial observations initialize_model : function to initialize GP optimize_acqf_and_get_observation : function to optimize acquisition function based on model case_diff : computes case difference between prediction array and ground truth at t=T unnormalize_theta : converts BO params to simulation params (unit cube to real parameters) header : header lines to be printed to log file ''' header = [] # depending on mode, set parameter bounds if args.measures_optimized: param_bounds = settings_measures_param_bounds else: param_bounds = settings_model_param_bounds # remember line executed header.append('=' * 100) header.append(datetime.now().strftime("%d/%m/%Y %H:%M:%S")) header.append('python ' + ' '.join(sys.argv)) header.append('=' * 100) mob_settings = args.mob data_area = args.area data_country = args.country # initialize mobility object to obtain information (no trace generation yet) with open(mob_settings, 'rb') as fp: kwargs = pickle.load(fp) mob = MobilitySimulator(**kwargs) # data settings verbose = not args.not_verbose use_households = not args.no_households data_start_date = args.start data_end_date = args.end debug_simulation_days = args.endsimat # simulation settings n_init_samples = args.ninit n_iterations = args.niters simulation_roll_outs = args.rollouts cpu_count = args.cpu_count dynamic_tracing = not args.no_dynamic_tracing load_observations = args.load # set testing parameters testing_params = settings_testing_params # BO acquisition function optimization (Knowledge gradient) acqf_opt_num_fantasies = args.acqf_opt_num_fantasies acqf_opt_num_restarts = args.acqf_opt_num_restarts acqf_opt_raw_samples = args.acqf_opt_raw_samples acqf_opt_batch_limit = args.acqf_opt_batch_limit acqf_opt_maxiter = args.acqf_opt_maxiter """ Bayesian optimization pipeline """ # Import Covid19 data # Shape (max_days, num_age_groups) new_cases_ = collect_data_from_df(country=data_country, area=data_area, datatype='new', start_date_string=data_start_date, end_date_string=data_end_date) assert(len(new_cases_.shape) == 2) if new_cases_[0].sum() == 0: print('No positive cases at provided start time; cannot seed simulation.\n' 'Consider setting a later start date for calibration using the "--start" flag.') exit(0) # Scale down cases based on number of people in town, region, and downsampling new_cases = np.ceil( (new_cases_ * mob.num_people_unscaled) / (mob.downsample * mob.region_population)) num_age_groups = new_cases.shape[1] header.append('Downsampling : ' + str(mob.downsample)) header.append('Town population: ' + str(mob.num_people)) header.append('Town population (unscaled): ' + str(mob.num_people_unscaled)) header.append('Region population : ' + str(mob.region_population)) # Set test capacity per day as (a) command line; or (b) maximum daily positive case increase over observed period if args.testingcap: testing_params['tests_per_batch'] = (args.testingcap / mob.num_people_unscaled) else: daily_increase = new_cases.sum(axis=1)[1:] - new_cases.sum(axis=1)[:-1] testing_params['tests_per_batch'] = int(daily_increase.max()) test_lag_days = int(testing_params['test_reporting_lag'] / TO_HOURS) assert(int(testing_params['test_reporting_lag']) % 24 == 0) # generate initial seeds based on case numbers initial_seeds = gen_initial_seeds(new_cases) header.append('Initial seed counts : ' + str(initial_seeds)) # in debug mode, shorten time of simulation, shorten time if debug_simulation_days: new_cases = new_cases[:debug_simulation_days] # Maximum time fixed by real data, init mobility simulator simulation # maximum time to simulate, in hours max_time = int(new_cases.shape[0] * TO_HOURS) max_time += TO_HOURS * test_lag_days # longer due to test lag in simulations testing_params['testing_t_window'] = [0.0, max_time] mob.simulate(max_time=max_time, dynamic_tracing=True) header.append( 'Daily test capacity in sim.: ' + str(testing_params['tests_per_batch'])) header.append( 'Max time T (days): ' + str(new_cases.shape[0])) header.append( 'Target cases per age group at t=0: ' + str(list(map(int, new_cases[0].tolist())))) header.append( 'Target cases per age group at t=T: ' + str(list(map(int, new_cases[-1].tolist())))) # instantiate correct distributions distributions = CovidDistributions(country=args.country) # set Bayesian optimization target as positive cases n_days, n_age = new_cases.shape G_obs = torch.tensor(new_cases).reshape(n_days * n_age) # flattened sim_bounds = pdict_to_parr(param_bounds, measures_optimized=args.measures_optimized).T n_params = sim_bounds.shape[1] header.append(f'Parameters : {n_params}') header.append('Parameter bounds: ' + str(parr_to_pdict(sim_bounds.T, measures_optimized=args.measures_optimized))) # extract lockdown period sim_start_date = pd.to_datetime(args.start) sim_end_date = sim_start_date + timedelta(days=int(max_time / TO_HOURS)) lockdown_start_date = pd.to_datetime( settings_lockdown_dates[args.country]['start']) lockdown_end_date = pd.to_datetime( settings_lockdown_dates[args.country]['end']) days_until_lockdown_start = (lockdown_start_date - sim_start_date).days days_until_lockdown_end = (lockdown_end_date - sim_start_date).days header.append(f'Simulation starts at : {sim_start_date}') header.append(f' ends at : {sim_end_date}') header.append(f'Lockdown starts at : {lockdown_start_date}') header.append(f' ends at : {lockdown_end_date}') # create settings dictionary for simulations launch_kwargs = dict( mob_settings=mob_settings, distributions=distributions, random_repeats=simulation_roll_outs, cpu_count=cpu_count, initial_seeds=initial_seeds, testing_params=testing_params, max_time=max_time, num_people=mob.num_people, num_sites=mob.num_sites, home_loc=mob.home_loc, site_loc=mob.site_loc, dynamic_tracing=dynamic_tracing, verbose=False) ''' Define central functions for optimization ''' G_obs = torch.tensor(new_cases).reshape(1, n_days * n_age) def composite_squared_loss(G): ''' Objective function Note: in BO, objectives are maximized ''' return - (G - G_obs).pow(2).sum(dim=-1) # select objective objective = GenericMCObjective(composite_squared_loss) def case_diff(preds): ''' Computes case difference of predictions and ground truth at t=T ''' return preds.reshape(n_days, n_age)[-1].sum() - torch.tensor(new_cases)[-1].sum() def unnormalize_theta(theta): ''' Computes unnormalized parameters ''' return transforms.unnormalize(theta, sim_bounds) def composite_simulation(norm_params): """ Takes a set of normalized (unit cube) BO parameters and returns simulator output means and standard errors based on multiple random restarts. This corresponds to the black-box function. """ # un-normalize normalized params to obtain simulation parameters params = transforms.unnormalize(norm_params, sim_bounds) # finalize settings based which parameters are calibrated kwargs = copy.deepcopy(launch_kwargs) if args.measures_optimized: ''' Measures are calibrated ''' measure_params = parr_to_pdict(params, measures_optimized=args.measures_optimized) # social distancing measures: calibration is only done for `SocialDistancingForAllMeasure` for now measure_list_ = [ SocialDistancingForPositiveMeasure( t_window=Interval(0.0, max_time), p_stay_home=1.0), SocialDistancingForPositiveMeasureHousehold( t_window=Interval(0.0, max_time), p_isolate=1.0), SocialDistancingForAllMeasure( t_window=Interval(TO_HOURS * days_until_lockdown_start, TO_HOURS * days_until_lockdown_end), p_stay_home=measure_params['p_stay_home']), ] # close sites if specified if args.measures_close: beta_multipliers = {'education': 1.0, 'social': 1.0, 'bus_stop': 1.0, 'office': 1.0, 'supermarket': 1.0} for category in args.measures_close: if category in beta_multipliers.keys(): beta_multipliers[category] = 0.0 else: raise ValueError(f'Site type `{category}` passed in `--measures_close` is invalid.\n' f'Available are {str(list(beta_multipliers.keys()))}') measure_list_.append(BetaMultiplierMeasureByType( t_window=Interval(TO_HOURS * days_until_lockdown_start, TO_HOURS * days_until_lockdown_end), beta_multiplier=beta_multipliers )) kwargs['measure_list'] = MeasureList(measure_list_) # get optimized model paramters for this country and area calibrated_model_params = settings_optimized_town_params[args.country][args.area] if calibrated_model_params is None: raise ValueError(f'Cannot optimize measures for {args.country}-{args.area} because model parameters ' 'have not been fitted yet. Set values in `calibration_settings.py`') kwargs['params'] = calibrated_model_params else: ''' Model parameters calibrated ''' kwargs['measure_list'] = MeasureList([ SocialDistancingForPositiveMeasure( t_window=Interval(0.0, max_time), p_stay_home=1.0), SocialDistancingForPositiveMeasureHousehold( t_window=Interval(0.0, max_time), p_isolate=1.0), ]) kwargs['params'] = parr_to_pdict(params, measures_optimized=args.measures_optimized) # run simulation in parallel, summary = launch_parallel_simulations(**kwargs) # (random_repeats, n_people) posi_started = torch.tensor(summary.state_started_at['posi']) posi_started -= test_lag_days * TO_HOURS # account for test lag # (random_repeats, n_days) age_groups = torch.tensor(summary.people_age) posi_cumulative = convert_timings_to_cumulative_daily( timings=posi_started, age_groups=age_groups, time_horizon=n_days * TO_HOURS) if posi_cumulative.shape[0] <= 1: raise ValueError('Must run at least 2 random restarts per setting to get estimate of noise in observation.') # compute mean and standard error of means G = torch.mean(posi_cumulative, dim=0) G_sem = torch.std(posi_cumulative, dim=0) / math.sqrt(posi_cumulative.shape[0]) # make sure noise is not zero for non-degerateness G_sem = torch.max(G_sem, MIN_NOISE) # flatten G = G.reshape(1, n_days * n_age) G_sem = G_sem.reshape(1, n_days * n_age) return G, G_sem def generate_initial_observations(n, logger): """ Takes an integer `n` and generates `n` initial observations from the black box function using Sobol random parameter settings in the unit cube. Returns parameter setting and black box function outputs """ if n <= 0: raise ValueError( 'qKnowledgeGradient and GP needs at least one observation to be defined properly.') # sobol sequence # new_thetas: [n, n_params] new_thetas = torch.tensor( sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float) # simulator observations # new_G, new_G_sem: [n, n_days * n_age] (flattened outputs) new_G = torch.zeros((n, n_days * n_age), dtype=torch.float) new_G_sem = torch.zeros((n, n_days * n_age), dtype=torch.float) for i in range(n): t0 = time.time() # get mean and standard error of mean (sem) of every simulation output G, G_sem = composite_simulation(new_thetas[i, :]) new_G[i, :] = G new_G_sem[i, :] = G_sem # log G_objectives = objective(new_G[:i+1]) best_idx = G_objectives.argmax() best = G_objectives[best_idx].item() current = objective(G).item() case_diff = ( G.reshape(n_days, n_age)[-1].sum() - G_obs.reshape(n_days, n_age)[-1].sum()) t1 = time.time() logger.log( i=i - n, time=t1 - t0, best=best, objective=current, case_diff=case_diff, theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds) ) # save state state = { 'train_theta': new_thetas[:i+1], 'train_G': new_G[:i+1], 'train_G_sem': new_G_sem[:i+1], 'best_observed_obj': best, 'best_observed_idx': best_idx, } save_state(state, logger.filename + '_init') # compute best objective from simulations f = objective(new_G) best_f_idx = f.argmax() best_f = f[best_f_idx].item() return new_thetas, new_G, new_G_sem, best_f, best_f_idx def initialize_model(train_x, train_y, train_y_sem): """ Defines a GP given X, Y, and noise observations (standard error of mean) """ train_ynoise = train_y_sem.pow(2.0) # noise is in variance units # standardize outputs to zero mean, unit variance to have good hyperparameter tuning model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=Standardize(m=n_days * n_age)) # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(model.likelihood, model) return mll, model # Model initialization # parameters used in BO are always in unit cube for optimal hyperparameter tuning of GPs bo_bounds = torch.stack([torch.zeros(n_params), torch.ones(n_params)]) def optimize_acqf_and_get_observation(acq_func, args): """ Optimizes the acquisition function, and returns a new candidate and a noisy observation. botorch defaults: num_restarts=10, raw_samples=256, batch_limit=5, maxiter=200 """ batch_initial_conditions = gen_one_shot_kg_initial_conditions( acq_function=acq_func, bounds=bo_bounds, q=1, num_restarts=args.acqf_opt_num_restarts, raw_samples=args.acqf_opt_raw_samples, options={"batch_limit": args.acqf_opt_batch_limit, "maxiter": args.acqf_opt_maxiter}, ) # optimize acquisition function candidates, _ = optimize_acqf( acq_function=acq_func, bounds=bo_bounds, q=1, num_restarts=args.acqf_opt_num_restarts, raw_samples=args.acqf_opt_raw_samples, # used for intialization heuristic options={"batch_limit": args.acqf_opt_batch_limit, "maxiter": args.acqf_opt_maxiter}, batch_initial_conditions=batch_initial_conditions ) # proposed evaluation new_theta = candidates.detach() # observe new noisy function evaluation new_G, new_G_sem = composite_simulation(new_theta.squeeze()) return new_theta, new_G, new_G_sem # return functions return ( objective, generate_initial_observations, initialize_model, optimize_acqf_and_get_observation, case_diff, unnormalize_theta, header, )
def get_NEI( model: Model, objective_weights: Tensor, outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, X_observed: Optional[Tensor] = None, X_pending: Optional[Tensor] = None, **kwargs: Any, ) -> AcquisitionFunction: r"""Instantiates a qNoisyExpectedImprovement acquisition function. Args: model: The underlying model which the acqusition function uses to estimate acquisition values of candidates. objective_weights: The objective is to maximize a weighted sum of the columns of f(x). These are the weights. outcome_constraints: A tuple of (A, b). For k outcome constraints and m outputs at f(x), A is (k x m) and b is (k x 1) such that A f(x) <= b. (Not used by single task models) X_observed: A tensor containing points observed for all objective outcomes and outcomes that appear in the outcome constraints (if there are any). X_pending: A tensor containing points whose evaluation is pending (i.e. that have been submitted for evaluation) present for all objective outcomes and outcomes that appear in the outcome constraints (if there are any). mc_samples: The number of MC samples to use (default: 512). qmc: If True, use qMC instead of MC (default: True). prune_baseline: If True, prune the baseline points for NEI (default: True). chebyshev_scalarization: Use augmented Chebyshev scalarization. Returns: qNoisyExpectedImprovement: The instantiated acquisition function. """ if X_observed is None: raise ValueError("There are no feasible observed points.") # construct Objective module if kwargs.get("chebyshev_scalarization", False): if "Ys" not in kwargs: raise ValueError("Chebyshev Scalarization requires Ys argument") Y_tensor = torch.cat(kwargs.get("Ys"), dim=-1) obj_tf = get_chebyshev_scalarization(weights=objective_weights, Y=Y_tensor) else: obj_tf = get_objective_weights_transform(objective_weights) if outcome_constraints is None: objective = GenericMCObjective(objective=obj_tf) else: con_tfs = get_outcome_constraint_transforms(outcome_constraints) inf_cost = get_infeasible_cost(X=X_observed, model=model, objective=obj_tf) objective = ConstrainedMCObjective(objective=obj_tf, constraints=con_tfs or [], infeasible_cost=inf_cost) return get_acquisition_function( acquisition_function_name="qNEI", model=model, objective=objective, X_observed=X_observed, X_pending=X_pending, prune_baseline=kwargs.get("prune_baseline", True), mc_samples=kwargs.get("mc_samples", 512), qmc=kwargs.get("qmc", True), # pyre-fixme[6]: Expected `Optional[int]` for 9th param but got # `Union[float, int]`. seed=torch.randint(1, 10000, (1, )).item(), )
def make_bayes_opt_functions(args): ''' Generates and returns functions used to run Bayesian optimization Argument: args: Keyword arguments specifying exact settings for optimization Returns: objective : objective maximized for BO generate_initial_observations : function to generate initial observations initialize_model : function to initialize GP optimize_acqf_and_get_observation : function to optimize acquisition function based on model case_diff : computes case difference between prediction array and ground truth at t=T unnormalize_theta : converts BO params to simulation params (unit cube to real parameters) header : header lines to be printed to log file ''' header = [] # set parameter bounds based on calibration mode (single beta vs multiple beta) multi_beta_calibration = args.multi_beta_calibration if multi_beta_calibration: param_bounds = calibration_model_param_bounds_multi else: param_bounds = calibration_model_param_bounds_single # remember line executed header.append('=' * 100) header.append(datetime.now().strftime("%d/%m/%Y %H:%M:%S")) header.append('python ' + ' '.join(sys.argv)) header.append('=' * 100) data_country = args.country data_area = args.area mob_settings = args.mob or calibration_mob_paths[data_country][data_area][0] # 0: downscaled, 1: full scale # initialize mobility object to obtain information (no trace generation yet) with open(mob_settings, 'rb') as fp: mob_kwargs = pickle.load(fp) mob = MobilitySimulator(**mob_kwargs) # data settings verbose = not args.not_verbose use_households = not args.no_households data_start_date = args.start or calibration_start_dates[data_country][data_area] data_end_date = args.end or calibration_lockdown_dates[args.country]['end'] per_age_group_objective = args.per_age_group_objective # simulation settings n_init_samples = args.ninit n_iterations = args.niters simulation_roll_outs = args.rollouts cpu_count = args.cpu_count lazy_contacts = not args.no_lazy_contacts load_observations = args.load # set testing parameters testing_params = calibration_testing_params # BO acquisition function optimization (Knowledge gradient) acqf_opt_num_fantasies = args.acqf_opt_num_fantasies acqf_opt_num_restarts = args.acqf_opt_num_restarts acqf_opt_raw_samples = args.acqf_opt_raw_samples acqf_opt_batch_limit = args.acqf_opt_batch_limit acqf_opt_maxiter = args.acqf_opt_maxiter """ Bayesian optimization pipeline """ # Import Covid19 data # Shape (max_days, num_age_groups) unscaled_area_cases = collect_data_from_df(country=data_country, area=data_area, datatype='new', start_date_string=data_start_date, end_date_string=data_end_date) assert(len(unscaled_area_cases.shape) == 2) # Scale down cases based on number of people in town and region sim_cases = downsample_cases(unscaled_area_cases, mob_kwargs) # Generate initial seeds based on unscaled case numbers in town initial_seeds = gen_initial_seeds( sim_cases, day=0) if sum(initial_seeds.values()) == 0: print('No states seeded at start time; cannot start simulation.\n' 'Consider setting a later start date for calibration using the "--start" flag.') exit(0) num_age_groups = sim_cases.shape[1] header.append('Downsampling : {}'.format(mob.downsample)) header.append('Simulation population: {}'.format(mob.num_people)) header.append('Simulation population (unscaled): {}'.format(mob.num_people_unscaled)) header.append('Area population : {}'.format(mob.region_population)) header.append('Initial seed counts : {}'.format(initial_seeds)) scaled_test_capacity = get_test_capacity( country=data_country, area=data_area, mob_settings=mob_kwargs, end_date_string=data_end_date) testing_params['tests_per_batch'] = scaled_test_capacity test_lag_days = int(testing_params['test_reporting_lag'] / TO_HOURS) assert(int(testing_params['test_reporting_lag']) % 24 == 0) # Maximum time fixed by real data, init mobility simulator simulation # maximum time to simulate, in hours max_time = int(sim_cases.shape[0] * TO_HOURS) max_time += TO_HOURS * test_lag_days # simulate longer due to test lag in simulations testing_params['testing_t_window'] = [0.0, max_time] mob.simulate(max_time=max_time, lazy_contacts=True) header.append( 'Target cases per age group at t=0: {} {}'.format(sim_cases[0].sum().item(), list(sim_cases[0].tolist()))) header.append( 'Target cases per age group at t=T: {} {}'.format(sim_cases[-1].sum().item(), list(sim_cases[-1].tolist()))) header.append( 'Daily test capacity in sim.: {}'.format(testing_params['tests_per_batch'])) # instantiate correct distributions distributions = CovidDistributions(country=args.country) # set Bayesian optimization target as positive cases n_days, n_age = sim_cases.shape sim_bounds = pdict_to_parr( pdict=param_bounds, multi_beta_calibration=multi_beta_calibration ).T n_params = sim_bounds.shape[1] header.append(f'Parameters : {n_params}') header.append('Parameter bounds: {}'.format(parr_to_pdict(parr=sim_bounds.T, multi_beta_calibration=multi_beta_calibration))) # extract lockdown period sim_start_date = pd.to_datetime(data_start_date) sim_end_date = sim_start_date + timedelta(days=int(max_time / TO_HOURS)) lockdown_start_date = pd.to_datetime( calibration_lockdown_dates[args.country]['start']) lockdown_end_date = pd.to_datetime( calibration_lockdown_dates[args.country]['end']) days_until_lockdown_start = (lockdown_start_date - sim_start_date).days days_until_lockdown_end = (lockdown_end_date - sim_start_date).days header.append(f'Simulation starts at : {sim_start_date}') header.append(f' ends at : {sim_end_date}') header.append(f'Lockdown starts at : {lockdown_start_date}') header.append(f' ends at : {lockdown_end_date}') header.append(f'Cases compared until : {pd.to_datetime(data_end_date)}') header.append(f' for days : {sim_cases.shape[0]}') # create settings dictionary for simulations launch_kwargs = dict( mob_settings=mob_settings, distributions=distributions, random_repeats=simulation_roll_outs, cpu_count=cpu_count, initial_seeds=initial_seeds, testing_params=testing_params, max_time=max_time, num_people=mob.num_people, num_sites=mob.num_sites, home_loc=mob.home_loc, site_loc=mob.site_loc, lazy_contacts=lazy_contacts, verbose=False) ''' Define central functions for optimization ''' G_obs = torch.tensor(sim_cases).reshape(1, n_days * n_age) G_obs_aggregate = torch.tensor(sim_cases).sum(dim=-1) ''' Objective function Note: in BO and botorch, objectives are maximized ''' if per_age_group_objective: def composite_squared_loss(G): return - (G - G_obs).pow(2).sum(dim=-1) / n_days else: def composite_squared_loss(G): return - (G - G_obs_aggregate).pow(2).sum(dim=-1) / n_days # select objective function objective = GenericMCObjective(composite_squared_loss) def case_diff(preds): ''' Computes aggregate case difference of predictions and ground truth at t=T ''' if per_age_group_objective: return preds[-1].sum(dim=-1) - G_obs_aggregate[-1] else: return preds[-1] - G_obs_aggregate[-1] def unnormalize_theta(theta): ''' Computes unnormalized parameters ''' return transforms.unnormalize(theta, sim_bounds) def composite_simulation(norm_params): """ Takes a set of normalized (unit cube) BO parameters and returns simulator output means and standard errors based on multiple random restarts. This corresponds to the black-box function. """ # un-normalize normalized params to obtain simulation parameters params = transforms.unnormalize(norm_params, sim_bounds) # finalize model parameters based on given parameters and calibration mode kwargs = copy.deepcopy(launch_kwargs) all_params = parr_to_pdict(parr=params, multi_beta_calibration=multi_beta_calibration) if multi_beta_calibration: betas = all_params['betas'] else: betas = { 'education': all_params['beta_site'], 'social': all_params['beta_site'], 'bus_stop': all_params['beta_site'], 'office': all_params['beta_site'], 'supermarket': all_params['beta_site'], } model_params = { 'betas' : betas, 'beta_household' : all_params['beta_household'], } # set exposure parameters kwargs['params'] = model_params # set measure parameters kwargs['measure_list'] = MeasureList([ # standard behavior of positively tested: full isolation SocialDistancingForPositiveMeasure( t_window=Interval(0.0, max_time), p_stay_home=1.0), SocialDistancingForPositiveMeasureHousehold( t_window=Interval(0.0, max_time), p_isolate=1.0), # social distancing factor during lockdown: calibrated SocialDistancingForAllMeasure( t_window=Interval(TO_HOURS * days_until_lockdown_start, TO_HOURS * days_until_lockdown_end), p_stay_home=all_params['p_stay_home']), # site specific measures: fixed in advance, outside of calibration BetaMultiplierMeasureByType( t_window=Interval(TO_HOURS * days_until_lockdown_start, TO_HOURS * days_until_lockdown_end), beta_multiplier=calibration_lockdown_beta_multipliers) ]) # run simulation in parallel, summary = launch_parallel_simulations(**kwargs) # (random_repeats, n_people) posi_started = torch.tensor(summary.state_started_at['posi']) posi_started -= test_lag_days * TO_HOURS # account for test lag in objective computation # (random_repeats, n_days) age_groups = torch.tensor(summary.people_age) # (random_repeats, n_days, n_age_groups) posi_cumulative = convert_timings_to_cumulative_daily( timings=posi_started, age_groups=age_groups, time_horizon=n_days * TO_HOURS) if posi_cumulative.shape[0] <= 1: raise ValueError('Must run at least 2 random restarts per setting to get estimate of noise in observation.') # compute aggregate if not using objective per age-group if not per_age_group_objective: posi_cumulative = posi_cumulative.sum(dim=-1) # compute mean and standard error of means G = torch.mean(posi_cumulative, dim=0) G_sem = torch.std(posi_cumulative, dim=0) / math.sqrt(posi_cumulative.shape[0]) # make sure noise is not zero for non-degenerateness G_sem = torch.max(G_sem, MIN_NOISE) # flatten if per_age_group_objective: G = G.reshape(n_days * n_age) G_sem = G_sem.reshape(n_days * n_age) return G, G_sem def generate_initial_observations(n, logger, loaded_init_theta=None, loaded_init_G=None, loaded_init_G_sem=None): """ Takes an integer `n` and generates `n` initial observations from the black box function using Sobol random parameter settings in the unit cube. Returns parameter setting and black box function outputs. If `loaded_init_theta/G/G_sem` are specified, initialization is loaded (possibly partially, in which case the initialization using the Sobol random sequence is continued where left off). """ if n <= 0: raise ValueError( 'qKnowledgeGradient and GP needs at least one observation to be defined properly.') # sobol sequence proposal points # new_thetas: [n, n_params] new_thetas = torch.tensor( sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float) # check whether initial observations are loaded loaded = (loaded_init_theta is not None and loaded_init_G is not None and loaded_init_G_sem is not None) if loaded: n_loaded = loaded_init_theta.shape[0] # loaded no. of observations total n_loaded_init = min(n_loaded, n) # loaded no. of quasi-random initialization observations n_init = max(n_loaded, n) # final no. of observations returned, at least quasi-random initializations # check whether loaded proposal points are same as without loading observations try: assert(np.allclose(loaded_init_theta[:n_loaded_init], new_thetas[:n_loaded_init])) except AssertionError: print( '\n\n\n===> Warning: parameters of loaded inital observations ' 'do not coincide with initialization that would have been done. ' 'Double check simulation, ninit, and parameter bounds, which could change ' 'the initial random Sobol sequence. \nThe loaded parameter settings are used. \n\n\n' ) if n_init > n: new_thetas = loaded_init_theta # size of tensor increased to `n_init`, as more than Sobol init points loaded else: n_loaded = 0 # loaded no. of observations total n_loaded_init = 0 # loaded no. of quasi-random initialization observations n_init = n # final no. of observations returned, at least quasi-random initializations # instantiate simulator observation tensors if per_age_group_objective: # new_G, new_G_sem: [n_init, n_days * n_age] (flattened outputs) new_G = torch.zeros((n_init, n_days * n_age), dtype=torch.float) new_G_sem = torch.zeros((n_init, n_days * n_age), dtype=torch.float) else: # new_G, new_G_sem: [n_init, n_days] new_G = torch.zeros((n_init, n_days), dtype=torch.float) new_G_sem = torch.zeros((n_init, n_days), dtype=torch.float) # generate `n` initial evaluations at quasi random settings; if applicable, skip and load expensive evaluation result for i in range(n_init): # if loaded, use initial observation for this parameter settings if loaded and i <= n_loaded - 1: new_thetas[i] = loaded_init_theta[i] G, G_sem = loaded_init_G[i], loaded_init_G_sem[i] walltime = 0.0 # if not loaded, evaluate as usual else: t0 = time.time() G, G_sem = composite_simulation(new_thetas[i]) walltime = time.time() - t0 new_G[i] = G new_G_sem[i] = G_sem # log G_objectives = objective(new_G[:i+1]) best_idx = G_objectives.argmax() best = G_objectives[best_idx].item() current = objective(G).item() if per_age_group_objective: case_diff = G.reshape(n_days, n_age)[-1].sum() - G_obs_aggregate[-1] else: case_diff = G[-1] - G_obs_aggregate[-1] logger.log( i=i - n, time=walltime, best=best, objective=current, case_diff=case_diff, theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds) ) # save state state = { 'train_theta': new_thetas[:i+1], 'train_G': new_G[:i+1], 'train_G_sem': new_G_sem[:i+1], 'best_observed_obj': best, 'best_observed_idx': best_idx, } save_state(state, logger.filename) # compute best objective from simulations f = objective(new_G) best_f_idx = f.argmax() best_f = f[best_f_idx].item() return new_thetas, new_G, new_G_sem, best_f, best_f_idx def initialize_model(train_x, train_y, train_y_sem): """ Defines a GP given X, Y, and noise observations (standard error of mean) """ train_ynoise = train_y_sem.pow(2.0) # noise is in variance units # standardize outputs to zero mean, unit variance to have good hyperparameter tuning outcome_transform = Standardize(m=n_days * n_age if per_age_group_objective else n_days) model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=outcome_transform) # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(model.likelihood, model) return mll, model # Model initialization # parameters used in BO are always in unit cube for optimal hyperparameter tuning of GPs bo_bounds = torch.stack([torch.zeros(n_params), torch.ones(n_params)]) def optimize_acqf_and_get_observation(acq_func, args): """ Optimizes the acquisition function, and returns a new candidate and a noisy observation. botorch defaults: num_restarts=10, raw_samples=256, batch_limit=5, maxiter=200 """ batch_initial_conditions = gen_one_shot_kg_initial_conditions( acq_function=acq_func, bounds=bo_bounds, q=1, num_restarts=args.acqf_opt_num_restarts, raw_samples=args.acqf_opt_raw_samples, options={"batch_limit": args.acqf_opt_batch_limit, "maxiter": args.acqf_opt_maxiter}, ) # optimize acquisition function candidates, _ = optimize_acqf( acq_function=acq_func, bounds=bo_bounds, q=1, num_restarts=args.acqf_opt_num_restarts, raw_samples=args.acqf_opt_raw_samples, # used for intialization heuristic options={"batch_limit": args.acqf_opt_batch_limit, "maxiter": args.acqf_opt_maxiter}, batch_initial_conditions=batch_initial_conditions ) # proposed evaluation new_theta = candidates.detach().squeeze() # observe new noisy function evaluation new_G, new_G_sem = composite_simulation(new_theta) return new_theta, new_G, new_G_sem # return functions return ( objective, generate_initial_observations, initialize_model, optimize_acqf_and_get_observation, case_diff, unnormalize_theta, header, )
def test_cache_root(self): sample_cached_path = ( "botorch.acquisition.cached_cholesky.sample_cached_cholesky") raw_state_dict = { "likelihood.noise_covar.raw_noise": torch.tensor([[0.0895], [0.2594]], dtype=torch.float64), "mean_module.constant": torch.tensor([[-0.4545], [-0.1285]], dtype=torch.float64), "covar_module.raw_outputscale": torch.tensor([1.4876, 1.4897], dtype=torch.float64), "covar_module.base_kernel.raw_lengthscale": torch.tensor([[[-0.7202, -0.2868]], [[-0.8794, -1.2877]]], dtype=torch.float64), } # test batched models (e.g. for MCMC) for train_batch_shape, m, dtype in product( (torch.Size([]), torch.Size([3])), (1, 2), (torch.float, torch.double)): state_dict = deepcopy(raw_state_dict) for k, v in state_dict.items(): if m == 1: v = v[0] if len(train_batch_shape) > 0: v = v.unsqueeze(0).expand(*train_batch_shape, *v.shape) state_dict[k] = v tkwargs = {"device": self.device, "dtype": dtype} if m == 2: objective = GenericMCObjective(lambda Y, X: Y.sum(dim=-1)) else: objective = None for k, v in state_dict.items(): state_dict[k] = v.to(**tkwargs) all_close_kwargs = ({ "atol": 1e-1, "rtol": 0.0, } if dtype == torch.float else { "atol": 1e-4, "rtol": 0.0 }) torch.manual_seed(1234) train_X = torch.rand(*train_batch_shape, 3, 2, **tkwargs) train_Y = ( torch.sin(train_X * 2 * pi) + torch.randn(*train_batch_shape, 3, 2, **tkwargs))[..., :m] train_Y = standardize(train_Y) model = SingleTaskGP( train_X, train_Y, ) if len(train_batch_shape) > 0: X_baseline = train_X[0] else: X_baseline = train_X model.load_state_dict(state_dict, strict=False) # test sampler with collapse_batch_dims=False sampler = IIDNormalSampler(5, seed=0, collapse_batch_dims=False) with self.assertRaises(UnsupportedError): qNoisyExpectedImprovement( model=model, X_baseline=X_baseline, sampler=sampler, objective=objective, prune_baseline=False, cache_root=True, ) sampler = IIDNormalSampler(5, seed=0) torch.manual_seed(0) acqf = qNoisyExpectedImprovement( model=model, X_baseline=X_baseline, sampler=sampler, objective=objective, prune_baseline=False, cache_root=True, ) orig_base_samples = acqf.base_sampler.base_samples.detach().clone() sampler2 = IIDNormalSampler(5, seed=0) sampler2.base_samples = orig_base_samples torch.manual_seed(0) acqf_no_cache = qNoisyExpectedImprovement( model=model, X_baseline=X_baseline, sampler=sampler2, objective=objective, prune_baseline=False, cache_root=False, ) for q, batch_shape in product( (1, 3), (torch.Size([]), torch.Size([3]), torch.Size([4, 3]))): test_X = (0.3 + 0.05 * torch.randn(*batch_shape, q, 2, **tkwargs) ).requires_grad_(True) with mock.patch( sample_cached_path, wraps=sample_cached_cholesky) as mock_sample_cached: torch.manual_seed(0) val = acqf(test_X) mock_sample_cached.assert_called_once() val.sum().backward() base_samples = acqf.sampler.base_samples.detach().clone() X_grad = test_X.grad.clone() test_X2 = test_X.detach().clone().requires_grad_(True) acqf_no_cache.sampler.base_samples = base_samples with mock.patch( sample_cached_path, wraps=sample_cached_cholesky) as mock_sample_cached: torch.manual_seed(0) val2 = acqf_no_cache(test_X2) mock_sample_cached.assert_not_called() self.assertTrue(torch.allclose(val, val2, **all_close_kwargs)) val2.sum().backward() self.assertTrue( torch.allclose(X_grad, test_X2.grad, **all_close_kwargs)) # test we fall back to standard sampling for # ill-conditioned covariances acqf._baseline_L = torch.zeros_like(acqf._baseline_L) with warnings.catch_warnings( record=True) as ws, settings.debug(True): with torch.no_grad(): acqf(test_X) self.assertEqual(len(ws), 1) self.assertTrue(issubclass(ws[-1].category, BotorchWarning))
def test_evaluate_q_knowledge_gradient(self): for dtype in (torch.float, torch.double): # basic test n_f = 4 mean = torch.rand(n_f, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: mm = MockModel(None) qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1])) self.assertTrue(torch.allclose(val, mean.mean(), atol=1e-4)) self.assertTrue( torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # batched evaluation b = 2 mean = torch.rand(n_f, b, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, b, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) X = torch.rand(b, n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: mm = MockModel(None) qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([b, 1, 1])) self.assertTrue( torch.allclose(val, mean.mean(dim=0).squeeze(-1), atol=1e-4)) self.assertTrue( torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # pending points and current value mean = torch.rand(n_f, 1, device=self.device, dtype=dtype) variance = torch.rand(n_f, 1, device=self.device, dtype=dtype) X_pending = torch.rand(2, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(mean=mean, variance=variance)) current_value = torch.rand(1, device=self.device, dtype=dtype) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: mm = MockModel(None) qKG = qKnowledgeGradient( model=mm, num_fantasies=n_f, X_pending=X_pending, current_value=current_value, ) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([3, 1])) self.assertTrue( torch.allclose(val, mean.mean() - current_value, atol=1e-4)) self.assertTrue( torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :])) # test objective (inner MC sampling) objective = GenericMCObjective(objective=lambda Y: Y.norm(dim=-1)) samples = torch.randn(3, 1, 1, device=self.device, dtype=dtype) mfm = MockModel(MockPosterior(samples=samples)) X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype) with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f: mm = MockModel(None) qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f, objective=objective) val = qKG(X) patch_f.assert_called_once() cargs, ckwargs = patch_f.call_args self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1])) self.assertTrue( torch.allclose(val, objective(samples).mean(), atol=1e-4)) self.assertTrue( torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :]))