def _setUp(self, double=False, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") dtype = torch.double if double else torch.float train_x = torch.linspace(0, 1, 10, device=device, dtype=dtype).unsqueeze(-1) train_y = torch.sin(train_x * (2 * math.pi)).squeeze(-1) train_yvar = torch.tensor(0.1 ** 2, device=device) noise = torch.tensor(NOISE, device=device, dtype=dtype) self.train_x = train_x self.train_y = train_y + noise self.train_yvar = train_yvar self.bounds = torch.tensor([[0.0], [1.0]], device=device, dtype=dtype) model_st = SingleTaskGP(self.train_x, self.train_y) self.model_st = model_st.to(device=device, dtype=dtype) self.mll_st = ExactMarginalLogLikelihood( self.model_st.likelihood, self.model_st ) self.mll_st = fit_gpytorch_model(self.mll_st, options={"maxiter": 5}) model_fn = FixedNoiseGP( self.train_x, self.train_y, self.train_yvar.expand_as(self.train_y) ) self.model_fn = model_fn.to(device=device, dtype=dtype) self.mll_fn = ExactMarginalLogLikelihood( self.model_fn.likelihood, self.model_fn ) self.mll_fn = fit_gpytorch_model(self.mll_fn, options={"maxiter": 5})
def _setUp(self, double=False, cuda=False, expand=False): device = torch.device("cuda") if cuda else torch.device("cpu") dtype = torch.double if double else torch.float train_x = torch.linspace(0, 1, 10, device=device, dtype=dtype).unsqueeze(-1) train_y = torch.sin(train_x * (2 * math.pi)).squeeze(-1) noise = torch.tensor(NOISE, device=device, dtype=dtype) self.train_x = train_x self.train_y = train_y + noise if expand: self.train_x = self.train_x.expand(-1, 2) ics = torch.tensor([[0.5, 1.0]], device=device, dtype=dtype) else: ics = torch.tensor([[0.5]], device=device, dtype=dtype) self.initial_conditions = ics self.f_best = self.train_y.max().item() model = SingleTaskGP(self.train_x, self.train_y) self.model = model.to(device=device, dtype=dtype) self.mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model) self.mll = fit_gpytorch_model(self.mll, options={"maxiter": 1})
def test_noisy_expected_improvement(self, cuda=False): for dtype in (torch.float, torch.double): model = self._get_model(cuda=cuda, dtype=dtype) X_observed = model.train_inputs[0] nEI = NoisyExpectedImprovement(model, X_observed, num_fantasies=5) X_test = torch.tensor( [[[0.25]], [[0.75]]], device=X_observed.device, dtype=dtype, requires_grad=True, ) val = nEI(X_test) # test basics self.assertEqual(val.dtype, dtype) self.assertEqual(val.device.type, X_observed.device.type) self.assertEqual(val.shape, torch.Size([2])) # test values self.assertGreater(val[0].item(), 1e-4) self.assertLess(val[1].item(), 1e-6) # test gradient val.sum().backward() self.assertGreater(X_test.grad[0].abs().item(), 1e-4) # test without gradient with torch.no_grad(): nEI(X_test) # test non-FixedNoiseGP model other_model = SingleTaskGP(X_observed, model.train_targets.unsqueeze(-1)) with self.assertRaises(UnsupportedError): NoisyExpectedImprovement(other_model, X_observed, num_fantasies=5) # Test with minimize nEI = NoisyExpectedImprovement(model, X_observed, num_fantasies=5, maximize=False)
def _sample(self, candidates: Optional[np.array] = None) -> np.array: if len(self.X_observed) < self.num_initial_random_draws: return self.initial_sampler.sample(candidates=candidates) else: z_observed = torch.Tensor( self.transform_outputs(self.y_observed.numpy())) # build and fit GP gp = SingleTaskGP( train_X=self.X_observed, train_Y=z_observed, # special likelihood for numerical Cholesky errors, following advice from # https://www.gitmemory.com/issue/pytorch/botorch/179/506276521 likelihood=GaussianLikelihood( noise_constraint=GreaterThan(1e-3)), ) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) acq = self.expected_improvement( model=gp, best_f=z_observed.min(dim=0).values, ) if candidates is None: candidate, acq_value = optimize_acqf( acq, bounds=self.bounds_tensor, q=1, num_restarts=5, raw_samples=100, ) return candidate[0] else: # (N,) ei = acq(torch.Tensor(candidates).unsqueeze(dim=-2)) return torch.Tensor(candidates[ei.argmax()])
def test_roundtrip(self): for dtype in (torch.float, torch.double): train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) # SingleTaskGP batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # SingleTaskMultiFidelityGP for lin_trunc in (False, True): batch_gp = SingleTaskMultiFidelityGP( train_X, train_Y, iteration_fidelity=1, linear_truncated=lin_trunc) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig))
def test_cache_root_decomposition(self): tkwargs = {"device": self.device} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype # test mt-mvn train_x = torch.rand(2, 1, **tkwargs) train_y = torch.rand(2, 2, **tkwargs) test_x = torch.rand(2, 1, **tkwargs) model = SingleTaskGP(train_x, train_y) sampler = IIDNormalSampler(1) with torch.no_grad(): posterior = model.posterior(test_x) acqf = DummyCachedCholeskyAcqf( model=model, sampler=sampler, objective=GenericMCObjective(lambda Y: Y[..., 0]), ) baseline_L = torch.eye(2, **tkwargs) with mock.patch( EXTRACT_BATCH_COVAR_PATH, wraps=extract_batch_covar) as mock_extract_batch_covar: with mock.patch(CHOLESKY_PATH, return_value=baseline_L) as mock_cholesky: acqf._cache_root_decomposition(posterior=posterior) mock_extract_batch_covar.assert_called_once_with( posterior.mvn) mock_cholesky.assert_called_once() # test mvn model = SingleTaskGP(train_x, train_y[:, :1]) with torch.no_grad(): posterior = model.posterior(test_x) with mock.patch( EXTRACT_BATCH_COVAR_PATH) as mock_extract_batch_covar: with mock.patch(CHOLESKY_PATH, return_value=baseline_L) as mock_cholesky: acqf._cache_root_decomposition(posterior=posterior) mock_extract_batch_covar.assert_not_called() mock_cholesky.assert_called_once() self.assertTrue(torch.equal(acqf._baseline_L, baseline_L))
def qparego_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization. The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with multi-objective optimization when the number of objectives is larger than three. .. seealso:: :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value descriptions. """ n_objectives = train_obj.size(-1) weights = sample_simplex(n_objectives).squeeze() scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj) if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) objective = ConstrainedMCObjective( objective=lambda Z: scalarization(Z[..., :n_objectives]), constraints=constraints, ) else: train_y = train_obj objective = GenericMCObjective(scalarization) train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) acqf = qExpectedImprovement( model=model, best_f=objective(train_y).max(), sampler=SobolQMCNormalSampler(num_samples=256), objective=objective, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=20, raw_samples=1024, options={ "batch_limit": 5, "maxiter": 200 }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
def test_model_list_to_batched(self): for dtype in (torch.float, torch.double): # basic test train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1 = SingleTaskGP(train_X, train_Y1) gp2 = SingleTaskGP(train_X, train_Y2) list_gp = ModelListGP(gp1, gp2) batch_gp = model_list_to_batched(list_gp) self.assertIsInstance(batch_gp, SingleTaskGP) # test degenerate (single model) batch_gp = model_list_to_batched(ModelListGP(gp1)) self.assertEqual(batch_gp._num_outputs, 1) # test different model classes gp2 = FixedNoiseGP(train_X, train_Y1, torch.ones_like(train_Y1)) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test non-batched models gp1_ = SimpleGPyTorchModel(train_X, train_Y1) gp2_ = SimpleGPyTorchModel(train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1_, gp2_)) # test list of multi-output models train_Y = torch.cat([train_Y1, train_Y2], dim=-1) gp2 = SingleTaskGP(train_X, train_Y) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test different training inputs gp2 = SingleTaskGP(2 * train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check scalar agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.likelihood.noise_covar.noise_prior.rate.fill_(1.0) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check tensor shape agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.covar_module.raw_outputscale = torch.nn.Parameter( torch.tensor([0.0], device=self.device, dtype=dtype)) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test HeteroskedasticSingleTaskGP gp2 = HeteroskedasticSingleTaskGP(train_X, train_Y1, torch.ones_like(train_Y1)) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test custom likelihood gp2 = SingleTaskGP(train_X, train_Y2, likelihood=GaussianLikelihood()) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test FixedNoiseGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1_ = FixedNoiseGP(train_X, train_Y1, torch.rand_like(train_Y1)) gp2_ = FixedNoiseGP(train_X, train_Y2, torch.rand_like(train_Y2)) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp) # test SingleTaskMultiFidelityGP gp1_ = SingleTaskMultiFidelityGP(train_X, train_Y1, iteration_fidelity=1) gp2_ = SingleTaskMultiFidelityGP(train_X, train_Y2, iteration_fidelity=1) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp) gp2_ = SingleTaskMultiFidelityGP(train_X, train_Y2, iteration_fidelity=2) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp) # test input transform input_tf = Normalize( d=2, bounds=torch.tensor([[0.0, 0.0], [1.0, 1.0]], device=self.device, dtype=dtype), ) gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf) gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp) self.assertIsInstance(batch_gp.input_transform, Normalize) self.assertTrue( torch.equal(batch_gp.input_transform.bounds, input_tf.bounds)) # test different input transforms input_tf2 = Normalize( d=2, bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]], device=self.device, dtype=dtype), ) gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf) gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf2) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp) # test batched input transform input_tf2 = Normalize( d=2, bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]], device=self.device, dtype=dtype), batch_shape=torch.Size([3]), ) gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf2) gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf2) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp) # test outcome transform octf = Standardize(m=1) gp1_ = SingleTaskGP(train_X, train_Y1, outcome_transform=octf) gp2_ = SingleTaskGP(train_X, train_Y2, outcome_transform=octf) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp)
def test_proximal(self): for dtype in (torch.float, torch.double): train_X = torch.rand(5, 3, device=self.device, dtype=dtype) train_Y = train_X.norm(dim=-1, keepdim=True) model = (SingleTaskGP(train_X, train_Y).to(device=self.device, dtype=dtype).eval()) EI = ExpectedImprovement(model, best_f=0.0) # test single point proximal_weights = torch.ones(3, device=self.device, dtype=dtype) test_X = torch.rand(1, 3, device=self.device, dtype=dtype) EI_prox = ProximalAcquisitionFunction( EI, proximal_weights=proximal_weights) ei = EI(test_X) mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights)) test_prox_weight = torch.exp( mv_normal.log_prob(test_X)) / torch.exp( mv_normal.log_prob(train_X[-1])) ei_prox = EI_prox(test_X) self.assertTrue(torch.allclose(ei_prox, ei * test_prox_weight)) self.assertTrue(ei_prox.shape == torch.Size([1])) # test t-batch with broadcasting test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype) ei = EI(test_X) mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights)) test_prox_weight = torch.exp( mv_normal.log_prob(test_X)) / torch.exp( mv_normal.log_prob(train_X[-1])) ei_prox = EI_prox(test_X) self.assertTrue( torch.allclose(ei_prox, ei * test_prox_weight.flatten())) self.assertTrue(ei_prox.shape == torch.Size([4])) # test MC acquisition function qEI = qExpectedImprovement(model, best_f=0.0) test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype) qEI_prox = ProximalAcquisitionFunction( qEI, proximal_weights=proximal_weights) qei = qEI(test_X) mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights)) test_prox_weight = torch.exp( mv_normal.log_prob(test_X)) / torch.exp( mv_normal.log_prob(train_X[-1])) qei_prox = qEI_prox(test_X) self.assertTrue( torch.allclose(qei_prox, qei * test_prox_weight.flatten())) self.assertTrue(qei_prox.shape == torch.Size([4])) # test gradient test_X = torch.rand(1, 3, device=self.device, dtype=dtype, requires_grad=True) ei_prox = EI_prox(test_X) ei_prox.backward() # test model without train_inputs bad_model = DummyModel() with self.assertRaises(UnsupportedError): ProximalAcquisitionFunction( ExpectedImprovement(bad_model, 0.0), proximal_weights) # test proximal weights that do not match training_inputs train_X = torch.rand(5, 1, 3, device=self.device, dtype=dtype) train_Y = train_X.norm(dim=-1, keepdim=True) model = SingleTaskGP(train_X, train_Y).to(device=self.device).eval() with self.assertRaises(ValueError): ProximalAcquisitionFunction(ExpectedImprovement(model, 0.0), proximal_weights[:1]) with self.assertRaises(ValueError): ProximalAcquisitionFunction( ExpectedImprovement(model, 0.0), torch.rand(3, 3, device=self.device, dtype=dtype), ) # test for x_pending points pending_acq = DummyAcquisitionFunction(model) pending_acq.set_X_pending( torch.rand(3, 3, device=self.device, dtype=dtype)) with self.assertRaises(UnsupportedError): ProximalAcquisitionFunction(pending_acq, proximal_weights) # test model with multi-batch training inputs train_X = torch.rand(5, 2, 3, device=self.device, dtype=dtype) train_Y = train_X.norm(dim=-1, keepdim=True) bad_single_task = (SingleTaskGP( train_X, train_Y).to(device=self.device).eval()) with self.assertRaises(UnsupportedError): ProximalAcquisitionFunction( ExpectedImprovement(bad_single_task, 0.0), proximal_weights)
def step(self, snapshot_mode: str, meta_info: dict = None): if not self.initialized: # Start initialization phase self.train_init_policies() self.eval_init_policies() self.initialized = True # Normalize the input data and standardize the output data cands_norm = self.uc_normalizer.project_to(self.cands) cands_values_stdized = standardize(self.cands_values).unsqueeze(1) # Create and fit the GP model gp = SingleTaskGP(cands_norm, cands_values_stdized) gp.likelihood.noise_covar.register_constraint('raw_noise', GreaterThan(1e-5)) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) print_cbt('Fitted the GP.', 'g') # Acquisition functions if self.acq_fcn_type == 'UCB': acq_fcn = UpperConfidenceBound(gp, beta=self.acq_param.get( 'beta', 0.1), maximize=True) elif self.acq_fcn_type == 'EI': acq_fcn = ExpectedImprovement( gp, best_f=cands_values_stdized.max().item(), maximize=True) elif self.acq_fcn_type == 'PI': acq_fcn = ProbabilityOfImprovement( gp, best_f=cands_values_stdized.max().item(), maximize=True) else: raise pyrado.ValueErr(given=self.acq_fcn_type, eq_constraint="'UCB', 'EI', 'PI'") # Optimize acquisition function and get new candidate point cand, acq_value = optimize_acqf( acq_function=acq_fcn, bounds=to.stack([to.zeros(self.cand_dim), to.ones(self.cand_dim)]), q=1, num_restarts=self.acq_restarts, raw_samples=self.acq_samples) next_cand = self.uc_normalizer.project_back(cand) print_cbt(f'Found the next candidate: {next_cand.numpy()}', 'g') self.cands = to.cat([self.cands, next_cand], dim=0) to.save(self.cands, osp.join(self._save_dir, 'candidates.pt')) # Train and valuate the new candidate (saves to iter_{self._curr_iter}_policy.pt) prefix = f'iter_{self._curr_iter}' wrapped_trn_fcn = until_thold_exceeded( self.thold_succ_subroutine.item(), max_iter=self.max_subroutine_rep)(self.train_policy_sim) wrapped_trn_fcn(cand, prefix) # Evaluate the current policy on the target domain policy = to.load(osp.join(self._save_dir, f'{prefix}_policy.pt')) self.curr_cand_value = self.eval_policy(self._save_dir, self._env_real, policy, self.montecarlo_estimator, prefix, self.num_eval_rollouts_real) self.cands_values = to.cat( [self.cands_values, self.curr_cand_value.view(1)], dim=0) to.save(self.cands_values, osp.join(self._save_dir, 'candidates_values.pt')) # Store the argmax after training and evaluating curr_argmax_cand = BayRn.argmax_posterior_mean( self.cands, self.cands_values.unsqueeze(1), self.uc_normalizer, self.acq_restarts, self.acq_samples) self.argmax_cand = to.cat([self.argmax_cand, curr_argmax_cand], dim=0) to.save(self.argmax_cand, osp.join(self._save_dir, 'candidates_argmax.pt')) self.make_snapshot(snapshot_mode, float(to.mean(self.cands_values)), meta_info)
def gp_fit_test(x_train: Tensor, y_train: Tensor, error_train: Tensor, x_test: Tensor, y_test: Tensor, error_test: Tensor, gp_obj_model: SingleTaskGP, gp_error_model: SingleTaskGP, tkwargs: Dict[str, Any], gp_test_folder: str, obj_out_wp: bool = False, err_out_wp: bool = False) -> None: """ 1) Estimates mean test error between predicted and the true objective function values. 2) Estimates mean test error between predicted recon. error by the gp_model and the true recon. error of the vae_model. :param x_train: normalised points at which the gps were trained :param y_train: objective value function corresponding to x_train that were used as targets of `gp_obj_model` :param error_train: reconstruction error value at points x_train that were used as targets of `gp_error_model` :param x_test: normalised test points :param y_test: objective value function corresponding to x_test :param error_test: reconstruction error at test points :param gp_obj_model: the gp model trained to predict the black box objective function values :param gp_error_model: the gp model trained to predict reconstruction error :param tkwargs: dict of type and device :param gp_test_folder: folder to save test results :param obj_out_wp: if the `gp_obj_model` was trained with output warping then need to apply the same transform :param err_out_wp: if the `gp_error_model` was trained with output warping then need to apply the same transform :return: (Sum_i||true_y_i - pred_y_i||^2 / n_points, Sum_i||true_recon_i - pred_recon_i||^2 / n_points) """ do_robust = True if gp_error_model is not None else False if not os.path.exists(gp_test_folder): os.mkdir(gp_test_folder) gp_obj_model.eval() gp_obj_model.to(tkwargs['device']) y_train = y_train.view(-1) if do_robust: gp_error_model.eval() gp_error_model.to(tkwargs['device']) error_train = error_train.view(-1) with torch.no_grad(): if obj_out_wp: Y_numpy = y_train.cpu().numpy() if Y_numpy.min() <= 0: y_train = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='yeo-johnson')) else: y_train = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='box-cox')) if y_train.std() < 0.5: Y_numpy = y_train.numpy() y_train = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='yeo-johnson')).to(x_train) Y_numpy = y_test.cpu().numpy() if Y_numpy.min() <= 0: y_test = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='yeo-johnson')) else: y_test = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='box-cox')) if y_test.std() < 0.5: Y_numpy = y_test.numpy() y_test = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='yeo-johnson')).to(x_test) y_train = y_train.view(-1).to(**tkwargs) y_test = y_test.view(-1).to(**tkwargs) gp_obj_val_model_mse_train = ( gp_obj_model.posterior(x_train).mean.view(-1) - y_train).pow(2).div(len(y_train)) gp_obj_val_model_mse_test = ( gp_obj_model.posterior(x_test).mean.view(-1) - y_test).pow(2).div( len(y_test)) torch.save( gp_obj_val_model_mse_train, os.path.join(gp_test_folder, 'gp_obj_val_model_mse_train.npz')) torch.save(gp_obj_val_model_mse_test, os.path.join(gp_test_folder, 'gp_obj_val_model_test.npz')) print( f'GP training fit on objective value: MSE={gp_obj_val_model_mse_train.sum().item():.5f}' ) print( f'GP testing fit on objective value: MSE={gp_obj_val_model_mse_test.sum().item():.5f}' ) if do_robust: if err_out_wp: error_train = error_train.view(-1, 1) R_numpy = error_train.cpu().numpy() if R_numpy.min() <= 0: error_train = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='yeo-johnson')) else: error_train = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='box-cox')) if error_train.std() < 0.5: R_numpy = error_train.numpy() error_train = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='yeo-johnson')).to(x_train) R_numpy = error_test.cpu().numpy() if R_numpy.min() <= 0: error_test = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='yeo-johnson')) else: error_test = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='box-cox')) if error_test.std() < 0.5: R_numpy = error_test.numpy() error_test = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='yeo-johnson')).to(x_test) error_train = error_train.view(-1).to(**tkwargs) error_test = error_test.view(-1).to(**tkwargs) pred_recon_train = gp_error_model.posterior(x_train).mean.view(-1) pred_recon_test = gp_error_model.posterior(x_test).mean.view(-1) gp_error_model_mse_train = (error_train - pred_recon_train).pow(2).div( len(error_train)) gp_error_model_mse_test = (error_test - pred_recon_test).pow(2).div( len(error_test)) torch.save( gp_error_model_mse_train, os.path.join(gp_test_folder, 'gp_error_model_mse_train.npz')) torch.save( gp_error_model_mse_test, os.path.join(gp_test_folder, 'gp_error_model_mse_test.npz')) print( f'GP training fit on reconstruction errors: MSE={gp_error_model_mse_train.sum().item():.5f}' ) print( f'GP testing fit on reconstruction errors: MSE={gp_error_model_mse_test.sum().item():.5f}' ) torch.save(error_test, os.path.join(gp_test_folder, f"true_rec_err_z.pt")) torch.save(error_train, os.path.join(gp_test_folder, f"error_train.pt")) torch.save(x_train, os.path.join(gp_test_folder, f"train_x.pt")) torch.save(x_test, os.path.join(gp_test_folder, f"test_x.pt")) torch.save(y_train, os.path.join(gp_test_folder, f"y_train.pt")) torch.save(x_test, os.path.join(gp_test_folder, f"X_test.pt")) torch.save(y_test, os.path.join(gp_test_folder, f"y_test.pt")) # y plots plt.hist(y_train.cpu().numpy(), bins=100, label='y train', alpha=0.5, density=True) plt.hist(gp_obj_model.posterior(x_train).mean.view( -1).detach().cpu().numpy(), bins=100, label='y pred', alpha=0.5, density=True) plt.legend() plt.title('Training set') plt.savefig(os.path.join(gp_test_folder, 'gp_obj_train.pdf')) plt.close() plt.hist(gp_obj_val_model_mse_train.detach().cpu().numpy(), bins=100, alpha=0.5, density=True) plt.title('MSE of gp_obj_val model on training set') plt.savefig(os.path.join(gp_test_folder, 'gp_obj_train_mse.pdf')) plt.close() plt.hist(y_test.cpu().numpy(), bins=100, label='y true', alpha=0.5, density=True) plt.hist(gp_obj_model.posterior(x_test).mean.detach().cpu().numpy(), bins=100, alpha=0.5, label='y pred', density=True) plt.legend() plt.title('Validation set') plt.savefig(os.path.join(gp_test_folder, 'gp_obj_test.pdf')) plt.close() plt.hist(gp_obj_val_model_mse_test.detach().cpu().numpy(), bins=100, alpha=0.5, density=True) plt.title('MSE of gp_obj_val model on validation set') plt.savefig(os.path.join(gp_test_folder, 'gp_obj_test_mse.pdf')) plt.close() if do_robust: # error plots plt.hist(error_train.cpu().numpy(), bins=100, label='error train', alpha=0.5, density=True) plt.hist( gp_error_model.posterior(x_train).mean.detach().cpu().numpy(), bins=100, label='error pred', alpha=0.5, density=True) plt.legend() plt.title('Training set') plt.savefig(os.path.join(gp_test_folder, 'gp_error_train.pdf')) plt.close() plt.hist(gp_error_model_mse_train.detach().cpu().numpy(), bins=100, alpha=0.5, density=True) plt.title('MSE of gp_error model on training set') plt.savefig(os.path.join(gp_test_folder, 'gp_error_train_mse.pdf')) plt.close() plt.hist(error_test.cpu().numpy(), bins=100, label='error true', alpha=0.5, density=True) plt.hist( gp_error_model.posterior(x_test).mean.detach().cpu().numpy(), bins=100, alpha=0.5, label='error pred', density=True) plt.legend() plt.title('Validation set') plt.savefig(os.path.join(gp_test_folder, 'gp_error_test.pdf')) plt.close() plt.hist(gp_error_model_mse_test.detach().cpu().numpy(), bins=100, alpha=0.5, density=True) plt.title('MSE of gp_error model on validation set') plt.savefig(os.path.join(gp_test_folder, 'gp_error_test_mse.pdf')) plt.close() # y-error plots y_train_sorted, indices_train = torch.sort(y_train) error_train_sorted = error_train[indices_train] gp_y_train_pred_sorted, indices_train_pred = torch.sort( gp_obj_model.posterior(x_train).mean.view(-1)) gp_r_train_pred_sorted = (gp_error_model.posterior( x_train).mean.view(-1))[indices_train_pred] plt.scatter(y_train_sorted.cpu().numpy(), error_train_sorted.cpu().numpy(), label='true', marker='+') plt.scatter(gp_y_train_pred_sorted.detach().cpu().numpy(), gp_r_train_pred_sorted.detach().cpu().numpy(), label='pred', marker='*') plt.xlabel('y train targets') plt.ylabel('recon. error train targets') plt.title('y_train vs. error_train') plt.legend() plt.savefig( os.path.join(gp_test_folder, 'scatter_obj_error_train.pdf')) plt.close() y_test_std_sorted, indices_test = torch.sort(y_test) error_test_sorted = error_test[indices_test] gp_y_test_pred_sorted, indices_test_pred = torch.sort( gp_obj_model.posterior(x_test).mean.view(-1)) gp_r_test_pred_sorted = (gp_error_model.posterior( x_test).mean.view(-1))[indices_test_pred] plt.scatter(y_test_std_sorted.cpu().numpy(), error_test_sorted.cpu().numpy(), label='true', marker='+') plt.scatter(gp_y_test_pred_sorted.detach().cpu().numpy(), gp_r_test_pred_sorted.detach().cpu().numpy(), label='pred', marker='*') plt.xlabel('y test targets') plt.ylabel('recon. error test targets') plt.title('y_test vs. error_test') plt.legend() plt.savefig( os.path.join(gp_test_folder, 'scatter_obj_error_test.pdf')) plt.close() # error var plots error_train_sorted, indices_train_pred = torch.sort(error_train) # error_train_sorted = error_train # indices_train_pred = np.arange(len(error_train)) gp_r_train_pred_sorted = gp_error_model.posterior( x_train).mean[indices_train_pred].view(-1) gp_r_train_pred_std_sorted = gp_error_model.posterior( x_train).variance.view(-1).sqrt()[indices_train_pred] plt.scatter(np.arange(len(indices_train_pred)), error_train_sorted.cpu().numpy(), label='err true', marker='+', color='C1', s=15) plt.errorbar( np.arange(len(indices_train_pred)), gp_r_train_pred_sorted.detach().cpu().numpy().flatten(), yerr=gp_r_train_pred_std_sorted.detach().cpu().numpy().flatten( ), fmt='*', alpha=0.05, label='err pred', color='C0', ecolor='C0') plt.scatter(np.arange(len(indices_train_pred)), gp_r_train_pred_sorted.detach().cpu().numpy(), marker='*', alpha=0.2, s=10, color='C0') # plt.scatter(np.arange(len(indices_train_pred)), # (gp_r_train_pred_sorted + gp_r_train_pred_std_sorted).detach().cpu().numpy(), # label='err pred mean+std', marker='.') # plt.scatter(np.arange(len(indices_train_pred)), # (gp_r_train_pred_sorted - gp_r_train_pred_std_sorted).detach().cpu().numpy(), # label='err pred mean-std', marker='.') plt.legend() plt.title('error predictions and uncertainty on train set') plt.savefig( os.path.join(gp_test_folder, 'gp_error_train_uncertainty.pdf')) plt.close() error_test_sorted, indices_test_pred = torch.sort(error_test) # error_test_sorted = error_test # indices_test_pred = np.arange(len(error_test_sorted)) gp_r_test_pred_sorted = gp_error_model.posterior(x_test).mean.view( -1)[indices_test_pred] gp_r_test_pred_std_sorted = gp_error_model.posterior( x_test).variance.view(-1).sqrt()[indices_test_pred] plt.scatter(np.arange(len(indices_test_pred)), error_test_sorted.cpu().numpy(), label='err true', marker='+', color='C1', s=15) plt.errorbar( np.arange(len(indices_test_pred)), gp_r_test_pred_sorted.detach().cpu().numpy().flatten(), yerr=gp_r_test_pred_std_sorted.detach().cpu().numpy().flatten( ), marker='*', alpha=0.05, label='err pred', color='C0', ecolor='C0') plt.scatter(np.arange(len(indices_test_pred)), gp_r_test_pred_sorted.detach().cpu().numpy().flatten(), marker='*', color='C0', alpha=0.2, s=10) # plt.scatter(np.arange(len(indices_test_pred)), # (gp_r_test_pred_sorted + gp_r_test_pred_std_sorted).detach().cpu().numpy(), # label='err pred mean+std', marker='.') # plt.scatter(np.arange(len(indices_test_pred)), # (gp_r_test_pred_sorted - gp_r_test_pred_std_sorted).detach().cpu().numpy(), # label='err pred mean-std', marker='.') plt.legend() plt.title('error predictions and uncertainty on test set') plt.savefig( os.path.join(gp_test_folder, 'gp_error_test_uncertainty.pdf')) plt.close() # y var plots y_train_std_sorted, indices_train = torch.sort(y_train) gp_y_train_pred_sorted = gp_obj_model.posterior( x_train).mean[indices_train].view(-1) gp_y_train_pred_std_sorted = gp_obj_model.posterior( x_train).variance.sqrt()[indices_train].view(-1) plt.scatter(np.arange(len(indices_train)), y_train_std_sorted.cpu().numpy(), label='y true', marker='+', color='C1', s=15) plt.scatter(np.arange(len(indices_train)), gp_y_train_pred_sorted.detach().cpu().numpy(), marker='*', alpha=0.2, s=10, color='C0') plt.errorbar( np.arange(len(indices_train)), gp_y_train_pred_sorted.detach().cpu().numpy().flatten(), yerr=gp_y_train_pred_std_sorted.detach().cpu().numpy().flatten(), fmt='*', alpha=0.05, label='y pred', color='C0', ecolor='C0') # plt.scatter(np.arange(len(indices_train_pred)), # (gp_y_train_pred_sorted+gp_y_train_pred_std_sorted).detach().cpu().numpy(), # label='y pred mean+std', marker='.') # plt.scatter(np.arange(len(indices_train_pred)), # (gp_y_train_pred_sorted-gp_y_train_pred_std_sorted).detach().cpu().numpy(), # label='y pred mean-std', marker='.') plt.legend() plt.title('y predictions and uncertainty on train set') plt.savefig( os.path.join(gp_test_folder, 'gp_obj_val_train_uncertainty.pdf')) plt.close() y_test_std_sorted, indices_test = torch.sort(y_test) gp_y_test_pred_sorted = gp_obj_model.posterior(x_test).mean.view( -1)[indices_test] gp_y_test_pred_std_sorted = gp_obj_model.posterior( x_test).variance.view(-1).sqrt()[indices_test] plt.scatter(np.arange(len(indices_test)), y_test_std_sorted.cpu().numpy(), label='y true', marker='+', color='C1', s=15) plt.errorbar( np.arange(len(indices_test)), gp_y_test_pred_sorted.detach().cpu().numpy().flatten(), yerr=gp_y_test_pred_std_sorted.detach().cpu().numpy().flatten(), fmt='*', alpha=0.05, label='y pred', color='C0', ecolor='C0') plt.scatter(np.arange(len(indices_test)), gp_y_test_pred_sorted.detach().cpu().numpy(), marker='*', alpha=0.2, s=10, color='C0') # plt.scatter(np.arange(len(indices_test_pred)), # (gp_y_test_pred_sorted + gp_y_test_pred_std_sorted).detach().cpu().numpy(), # label='y pred mean+std', marker='.') # plt.scatter(np.arange(len(indices_test_pred)), # (gp_y_test_pred_sorted - gp_y_test_pred_std_sorted).detach().cpu().numpy(), # label='y pred mean-std', marker='.') plt.legend() plt.title('y predictions and uncertainty on test set') plt.savefig( os.path.join(gp_test_folder, 'gp_obj_val_test_uncertainty.pdf')) plt.close()
def test_cache_root(self): sample_cached_path = ( "botorch.acquisition.cached_cholesky.sample_cached_cholesky") raw_state_dict = { "likelihood.noise_covar.raw_noise": torch.tensor([[0.0895], [0.2594]], dtype=torch.float64), "mean_module.constant": torch.tensor([[-0.4545], [-0.1285]], dtype=torch.float64), "covar_module.raw_outputscale": torch.tensor([1.4876, 1.4897], dtype=torch.float64), "covar_module.base_kernel.raw_lengthscale": torch.tensor([[[-0.7202, -0.2868]], [[-0.8794, -1.2877]]], dtype=torch.float64), } # test batched models (e.g. for MCMC) for train_batch_shape, m, dtype in product( (torch.Size([]), torch.Size([3])), (1, 2), (torch.float, torch.double)): state_dict = deepcopy(raw_state_dict) for k, v in state_dict.items(): if m == 1: v = v[0] if len(train_batch_shape) > 0: v = v.unsqueeze(0).expand(*train_batch_shape, *v.shape) state_dict[k] = v tkwargs = {"device": self.device, "dtype": dtype} if m == 2: objective = GenericMCObjective(lambda Y, X: Y.sum(dim=-1)) else: objective = None for k, v in state_dict.items(): state_dict[k] = v.to(**tkwargs) all_close_kwargs = ({ "atol": 1e-1, "rtol": 0.0, } if dtype == torch.float else { "atol": 1e-4, "rtol": 0.0 }) torch.manual_seed(1234) train_X = torch.rand(*train_batch_shape, 3, 2, **tkwargs) train_Y = ( torch.sin(train_X * 2 * pi) + torch.randn(*train_batch_shape, 3, 2, **tkwargs))[..., :m] train_Y = standardize(train_Y) model = SingleTaskGP( train_X, train_Y, ) if len(train_batch_shape) > 0: X_baseline = train_X[0] else: X_baseline = train_X model.load_state_dict(state_dict, strict=False) # test sampler with collapse_batch_dims=False sampler = IIDNormalSampler(5, seed=0, collapse_batch_dims=False) with self.assertRaises(UnsupportedError): qNoisyExpectedImprovement( model=model, X_baseline=X_baseline, sampler=sampler, objective=objective, prune_baseline=False, cache_root=True, ) sampler = IIDNormalSampler(5, seed=0) torch.manual_seed(0) acqf = qNoisyExpectedImprovement( model=model, X_baseline=X_baseline, sampler=sampler, objective=objective, prune_baseline=False, cache_root=True, ) orig_base_samples = acqf.base_sampler.base_samples.detach().clone() sampler2 = IIDNormalSampler(5, seed=0) sampler2.base_samples = orig_base_samples torch.manual_seed(0) acqf_no_cache = qNoisyExpectedImprovement( model=model, X_baseline=X_baseline, sampler=sampler2, objective=objective, prune_baseline=False, cache_root=False, ) for q, batch_shape in product( (1, 3), (torch.Size([]), torch.Size([3]), torch.Size([4, 3]))): test_X = (0.3 + 0.05 * torch.randn(*batch_shape, q, 2, **tkwargs) ).requires_grad_(True) with mock.patch( sample_cached_path, wraps=sample_cached_cholesky) as mock_sample_cached: torch.manual_seed(0) val = acqf(test_X) mock_sample_cached.assert_called_once() val.sum().backward() base_samples = acqf.sampler.base_samples.detach().clone() X_grad = test_X.grad.clone() test_X2 = test_X.detach().clone().requires_grad_(True) acqf_no_cache.sampler.base_samples = base_samples with mock.patch( sample_cached_path, wraps=sample_cached_cholesky) as mock_sample_cached: torch.manual_seed(0) val2 = acqf_no_cache(test_X2) mock_sample_cached.assert_not_called() self.assertTrue(torch.allclose(val, val2, **all_close_kwargs)) val2.sum().backward() self.assertTrue( torch.allclose(X_grad, test_X2.grad, **all_close_kwargs)) # test we fall back to standard sampling for # ill-conditioned covariances acqf._baseline_L = torch.zeros_like(acqf._baseline_L) with warnings.catch_warnings( record=True) as ws, settings.debug(True): with torch.no_grad(): acqf(test_X) self.assertEqual(len(ws), 1) self.assertTrue(issubclass(ws[-1].category, BotorchWarning))
def test_gen_value_function_initial_conditions(self): num_fantasies = 2 num_solutions = 3 num_restarts = 4 raw_samples = 5 n_train = 6 dim = 2 dtype = torch.float # run a thorough test with dtype float train_X = torch.rand(n_train, dim, device=self.device, dtype=dtype) train_Y = torch.rand(n_train, 1, device=self.device, dtype=dtype) model = SingleTaskGP(train_X, train_Y) fant_X = torch.rand(num_solutions, 1, dim, device=self.device, dtype=dtype) fantasy_model = model.fantasize(fant_X, IIDNormalSampler(num_fantasies)) bounds = torch.tensor([[0, 0], [1, 1]], device=self.device, dtype=dtype) value_function = PosteriorMean(fantasy_model) # test option error with self.assertRaises(ValueError): gen_value_function_initial_conditions( acq_function=value_function, bounds=bounds, num_restarts=num_restarts, raw_samples=raw_samples, current_model=model, options={"frac_random": 2.0}, ) # test output shape ics = gen_value_function_initial_conditions( acq_function=value_function, bounds=bounds, num_restarts=num_restarts, raw_samples=raw_samples, current_model=model, ) self.assertEqual( ics.shape, torch.Size([num_restarts, num_fantasies, num_solutions, 1, dim])) # test bounds self.assertTrue(torch.all(ics >= bounds[0])) self.assertTrue(torch.all(ics <= bounds[1])) # test dtype self.assertEqual(dtype, ics.dtype) # minimal test cases for when all raw samples are random, with dtype double dtype = torch.double n_train = 2 dim = 1 num_solutions = 1 train_X = torch.rand(n_train, dim, device=self.device, dtype=dtype) train_Y = torch.rand(n_train, 1, device=self.device, dtype=dtype) model = SingleTaskGP(train_X, train_Y) fant_X = torch.rand(1, 1, dim, device=self.device, dtype=dtype) fantasy_model = model.fantasize(fant_X, IIDNormalSampler(num_fantasies)) bounds = torch.tensor([[0], [1]], device=self.device, dtype=dtype) value_function = PosteriorMean(fantasy_model) ics = gen_value_function_initial_conditions( acq_function=value_function, bounds=bounds, num_restarts=1, raw_samples=1, current_model=model, options={"frac_random": 0.99}, ) self.assertEqual(ics.shape, torch.Size([1, num_fantasies, num_solutions, 1, dim])) # test bounds self.assertTrue(torch.all(ics >= bounds[0])) self.assertTrue(torch.all(ics <= bounds[1])) # test dtype self.assertEqual(dtype, ics.dtype)
def testSubsetModel(self): x = torch.zeros(1, 1) y = torch.rand(1, 2) obj_t = torch.rand(2) model = SingleTaskGP(x, y) self.assertEqual(model.num_outputs, 2) # basic test, can subset obj_weights = torch.tensor([1.0, 0.0]) subset_model_results = subset_model(model, obj_weights) model_sub = subset_model_results.model obj_weights_sub = subset_model_results.objective_weights ocs_sub = subset_model_results.outcome_constraints obj_t_sub = subset_model_results.objective_thresholds self.assertIsNone(ocs_sub) self.assertIsNone(obj_t_sub) self.assertEqual(model_sub.num_outputs, 1) self.assertTrue(torch.equal(obj_weights_sub, torch.tensor([1.0]))) # basic test, cannot subset obj_weights = torch.tensor([1.0, 2.0]) subset_model_results = subset_model(model, obj_weights) model_sub = subset_model_results.model obj_weights_sub = subset_model_results.objective_weights ocs_sub = subset_model_results.outcome_constraints obj_t_sub = subset_model_results.objective_thresholds self.assertIsNone(ocs_sub) self.assertIsNone(obj_t_sub) self.assertIs(model_sub, model) # check identity self.assertIs(obj_weights_sub, obj_weights) # check identity self.assertTrue( torch.equal(subset_model_results.indices, torch.tensor([0, 1]))) # test w/ outcome constraints, can subset obj_weights = torch.tensor([1.0, 0.0]) ocs = (torch.tensor([[1.0, 0.0]]), torch.tensor([1.0])) subset_model_results = subset_model(model, obj_weights, ocs) model_sub = subset_model_results.model obj_weights_sub = subset_model_results.objective_weights ocs_sub = subset_model_results.outcome_constraints obj_t_sub = subset_model_results.objective_thresholds self.assertEqual(model_sub.num_outputs, 1) self.assertIsNone(obj_t_sub) self.assertTrue(torch.equal(obj_weights_sub, torch.tensor([1.0]))) self.assertTrue(torch.equal(ocs_sub[0], torch.tensor([[1.0]]))) self.assertTrue(torch.equal(ocs_sub[1], torch.tensor([1.0]))) self.assertTrue( torch.equal(subset_model_results.indices, torch.tensor([0]))) # test w/ outcome constraints, cannot subset obj_weights = torch.tensor([1.0, 0.0]) ocs = (torch.tensor([[0.0, 1.0]]), torch.tensor([1.0])) subset_model_results = subset_model(model, obj_weights, ocs) model_sub = subset_model_results.model obj_weights_sub = subset_model_results.objective_weights ocs_sub = subset_model_results.outcome_constraints obj_t_sub = subset_model_results.objective_thresholds self.assertIs(model_sub, model) # check identity self.assertIsNone(obj_t_sub) self.assertIs(obj_weights_sub, obj_weights) # check identity self.assertIs(ocs_sub, ocs) # check identity self.assertTrue( torch.equal(subset_model_results.indices, torch.tensor([0, 1]))) # test w/ objective thresholds, cannot subset obj_weights = torch.tensor([1.0, 0.0]) ocs = (torch.tensor([[0.0, 1.0]]), torch.tensor([1.0])) subset_model_results = subset_model(model, obj_weights, ocs, obj_t) model_sub = subset_model_results.model obj_weights_sub = subset_model_results.objective_weights ocs_sub = subset_model_results.outcome_constraints obj_t_sub = subset_model_results.objective_thresholds self.assertIs(model_sub, model) # check identity self.assertIs(obj_t, obj_t_sub) self.assertIs(obj_weights_sub, obj_weights) # check identity self.assertTrue( torch.equal(subset_model_results.indices, torch.tensor([0, 1]))) self.assertIs(ocs_sub, ocs) # check identity # test w/ objective thresholds, can subset obj_weights = torch.tensor([1.0, 0.0]) ocs = (torch.tensor([[1.0, 0.0]]), torch.tensor([1.0])) subset_model_results = subset_model(model, obj_weights, ocs, obj_t) model_sub = subset_model_results.model obj_weights_sub = subset_model_results.objective_weights ocs_sub = subset_model_results.outcome_constraints obj_t_sub = subset_model_results.objective_thresholds self.assertTrue( torch.equal(subset_model_results.indices, torch.tensor([0]))) self.assertEqual(model_sub.num_outputs, 1) self.assertTrue(torch.equal(obj_weights_sub, torch.tensor([1.0]))) self.assertTrue(torch.equal(obj_t_sub, obj_t[:1])) self.assertTrue(torch.equal(ocs_sub[0], torch.tensor([[1.0]]))) self.assertTrue(torch.equal(ocs_sub[1], torch.tensor([1.0]))) # test unsupported yvar = torch.ones(1, 2) model = HeteroskedasticSingleTaskGP(x, y, yvar) subset_model_results = subset_model(model, obj_weights) model_sub = subset_model_results.model obj_weights_sub = subset_model_results.objective_weights ocs_sub = subset_model_results.outcome_constraints obj_t_sub = subset_model_results.objective_thresholds self.assertIsNone(ocs_sub) self.assertIs(model_sub, model) # check identity self.assertIs(obj_weights_sub, obj_weights) # check identity self.assertTrue( torch.equal(subset_model_results.indices, torch.tensor([0, 1]))) # test error on size inconsistency obj_weights = torch.ones(3) with self.assertRaises(RuntimeError): subset_model(model, obj_weights)
def fit(self, X: DataSet, y: DataSet, **kwargs): """Train model and take spectral samples""" from botorch.models import SingleTaskGP from botorch.fit import fit_gpytorch_model from gpytorch.mlls.exact_marginal_log_likelihood import ( ExactMarginalLogLikelihood, ) import pyrff import torch self.input_columns_ordered = X.columns # Convert to tensors X_np = X.to_numpy().astype(float) y_np = y.to_numpy().astype(float) X = torch.from_numpy(X_np) y = torch.from_numpy(y_np) # Train the GP model self.model = SingleTaskGP(X, y) mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model) fit_gpytorch_model(mll) # self.logger.info model hyperparameters if self.model_name is None: self.model_name = self.output_columns_ordered[0] self.lengthscales_ = self.model.covar_module.base_kernel.lengthscale.detach( )[0].numpy() self.outputscale_ = self.model.covar_module.outputscale.detach().numpy( ) self.noise_ = self.model.likelihood.noise_covar.noise.detach().numpy( )[0] self.logger.debug( f"Model {self.model_name} lengthscales: {self.lengthscales_}") self.logger.debug( f"Model {self.model_name} variance: {self.outputscale_}") self.logger.debug(f"Model {self.model_name} noise: {self.noise_}") # Spectral sampling n_spectral_points = kwargs.get("n_spectral_points", 1500) n_retries = kwargs.get("n_retries", 10) self.logger.debug( f"Spectral sampling {self.model_name} with {n_spectral_points} spectral points." ) self.rff = None nu = self.model.covar_module.base_kernel.nu for _ in range(n_retries): try: self.rff = pyrff.sample_rff( lengthscales=self.lengthscales_, scaling=np.sqrt(self.outputscale_), noise=self.noise_, kernel_nu=nu, X=X_np, Y=y_np[:, 0], M=n_spectral_points, ) break except np.linalg.LinAlgError as e: self.logger.error(e) except ValueError as e: self.logger.error(e) if self.rff is None: raise RuntimeError( f"Spectral sampling failed after {n_retries} retries.") return dict( name=self.model_name, rff=self.rff, lengthscales=self.lengthscales_, outputscale=self.outputscale_, noise=self.noise_, )
def sample_arch(self, START_BO, g, hyperparams, og_flops, empty_val_loss, full_val_loss, target_flops=0): if g < START_BO: if target_flops == 0: f = np.random.rand(1) * (args.upper_channel-args.lower_channel) + args.lower_channel else: f = args.lower_channel parameterization = np.ones(hyperparams.get_dim()) * f layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner) elif g == START_BO: if target_flops == 0: parameterization = np.ones(hyperparams.get_dim()) else: f = args.lower_channel parameterization = np.ones(hyperparams.get_dim()) * f layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner) else: rand = torch.rand(1).cuda() train_X = torch.FloatTensor(self.X).cuda() train_Y_loss = torch.FloatTensor(np.array(self.Y)[:, 0].reshape(-1, 1)).cuda() train_Y_loss = standardize(train_Y_loss) train_Y_cost = torch.FloatTensor(np.array(self.Y)[:, 1].reshape(-1, 1)).cuda() train_Y_cost = standardize(train_Y_cost) covar_module = None if args.ski and g > 128: if args.additive: covar_module = AdditiveStructureKernel( ScaleKernel( GridInterpolationKernel( MaternKernel( nu=2.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), grid_size=128, num_dims=1, grid_bounds=[(0, 1)] ), outputscale_prior=GammaPrior(2.0, 0.15), ), num_dims=train_X.shape[1] ) else: covar_module = ScaleKernel( GridInterpolationKernel( MaternKernel( nu=2.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), grid_size=128, num_dims=train_X.shape[1], grid_bounds=[(0, 1) for _ in range(train_X.shape[1])] ), outputscale_prior=GammaPrior(2.0, 0.15), ) else: if args.additive: covar_module = AdditiveStructureKernel( ScaleKernel( MaternKernel( nu=2.5, lengthscale_prior=GammaPrior(3.0, 6.0), num_dims=1 ), outputscale_prior=GammaPrior(2.0, 0.15), ), num_dims=train_X.shape[1] ) else: covar_module = ScaleKernel( MaternKernel( nu=2.5, lengthscale_prior=GammaPrior(3.0, 6.0), num_dims=train_X.shape[1] ), outputscale_prior=GammaPrior(2.0, 0.15), ) new_train_X = train_X gp_loss = SingleTaskGP(new_train_X, train_Y_loss, covar_module=covar_module) mll = ExactMarginalLogLikelihood(gp_loss.likelihood, gp_loss) mll = mll.to('cuda') fit_gpytorch_model(mll) # Use add-gp for cost covar_module = AdditiveStructureKernel( ScaleKernel( MaternKernel( nu=2.5, lengthscale_prior=GammaPrior(3.0, 6.0), num_dims=1 ), outputscale_prior=GammaPrior(2.0, 0.15), ), num_dims=train_X.shape[1] ) gp_cost = SingleTaskGP(new_train_X, train_Y_cost, covar_module=covar_module) mll = ExactMarginalLogLikelihood(gp_cost.likelihood, gp_cost) mll = mll.to('cuda') fit_gpytorch_model(mll) UCB_loss = UpperConfidenceBound(gp_loss, beta=args.beta).cuda() UCB_cost = UpperConfidenceBound(gp_cost, beta=args.beta).cuda() self.mobo_obj = RandAcquisition(UCB_loss).cuda() self.mobo_obj.setup(UCB_loss, UCB_cost, rand) lower = torch.ones(new_train_X.shape[1])*args.lower_channel upper = torch.ones(new_train_X.shape[1])*args.upper_channel self.mobo_bounds = torch.stack([lower, upper]).cuda() if args.pas: val = np.linspace(args.lower_flops, 1, 50) chosen_target_flops = np.random.choice(val, p=(self.sampling_weights/np.sum(self.sampling_weights))) lower_bnd, upper_bnd = 0, 1 lmda = 0.5 for i in range(10): self.mobo_obj.rand = lmda parameterization, acq_value = optimize_acqf( self.mobo_obj, bounds=self.mobo_bounds, q=1, num_restarts=5, raw_samples=1000, ) parameterization = parameterization[0].cpu().numpy() layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner) sim_flops = self.mask_pruner.simulate_and_count_flops(layer_budget, self.use_mem) ratio = sim_flops/og_flops if np.abs(ratio - chosen_target_flops) <= 0.02: break if args.baseline > 0: if ratio < chosen_target_flops: lower_bnd = lmda lmda = (lmda + upper_bnd) / 2 elif ratio > chosen_target_flops: upper_bnd = lmda lmda = (lmda + lower_bnd) / 2 else: if ratio < chosen_target_flops: upper_bnd = lmda lmda = (lmda + lower_bnd) / 2 elif ratio > chosen_target_flops: lower_bnd = lmda lmda = (lmda + upper_bnd) / 2 rand[0] = lmda writer.add_scalar('Binary search trials', i, g) else: parameterization, acq_value = optimize_acqf( self.mobo_obj, bounds=self.mobo_bounds, q=1, num_restarts=5, raw_samples=1000, ) parameterization = parameterization[0].cpu().numpy() layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner) return layer_budget, parameterization, self.sampling_weights/np.sum(self.sampling_weights)
def testSubsetModel(self): x = torch.zeros(1, 1) y = torch.rand(1, 2) Ys = [y[:, :1], y[:, 1:]] model = SingleTaskGP(x, y) self.assertEqual(model.num_outputs, 2) # basic test, can subset obj_weights = torch.tensor([1.0, 0.0]) model_sub, obj_weights_sub, ocs_sub, Ys_sub = subset_model( model, obj_weights, Ys=Ys ) self.assertIsNone(ocs_sub) self.assertEqual(model_sub.num_outputs, 1) self.assertTrue(torch.equal(obj_weights_sub, torch.tensor([1.0]))) self.assertEqual(Ys_sub[0], Ys[0]) self.assertEqual(len(Ys_sub), 1) # basic test, cannot subset obj_weights = torch.tensor([1.0, 2.0]) model_sub, obj_weights_sub, ocs_sub, Ys_sub = subset_model( model, obj_weights, Ys=Ys ) self.assertIsNone(ocs_sub) self.assertIs(model_sub, model) # check identity self.assertIs(obj_weights_sub, obj_weights) # check identity self.assertIs(Ys_sub, Ys) # test w/ outcome constraints, can subset obj_weights = torch.tensor([1.0, 0.0]) ocs = (torch.tensor([[1.0, 0.0]]), torch.tensor([1.0])) model_sub, obj_weights_sub, ocs_sub, Ys_sub = subset_model( model, obj_weights, ocs, Ys ) self.assertEqual(model_sub.num_outputs, 1) self.assertTrue(torch.equal(obj_weights_sub, torch.tensor([1.0]))) self.assertTrue(torch.equal(ocs_sub[0], torch.tensor([[1.0]]))) self.assertTrue(torch.equal(ocs_sub[1], torch.tensor([1.0]))) self.assertIs(Ys_sub[0], Ys[0]) self.assertEqual(len(Ys_sub), 1) # test w/ outcome constraints, cannot subset obj_weights = torch.tensor([1.0, 0.0]) ocs = (torch.tensor([[0.0, 1.0]]), torch.tensor([1.0])) model_sub, obj_weights_sub, ocs_sub, Ys_sub = subset_model( model, obj_weights, ocs, Ys ) self.assertIs(model_sub, model) # check identity self.assertIs(obj_weights_sub, obj_weights) # check identity self.assertIs(ocs_sub, ocs) # check identity self.assertIs(Ys_sub, Ys) # test unsupported yvar = torch.ones(1, 2) model = HeteroskedasticSingleTaskGP(x, y, yvar) model_sub, obj_weights_sub, ocs, Ys_sub = subset_model( model, obj_weights, Ys=Ys ) self.assertIsNone(ocs) self.assertIs(model_sub, model) # check identity self.assertIs(obj_weights_sub, obj_weights) # check identity self.assertIs(Ys_sub, Ys) # test error on size inconsistency obj_weights = torch.ones(3) with self.assertRaises(RuntimeError): subset_model(model, obj_weights, Ys=Ys)
def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwargs): from botorch.models import SingleTaskGP from botorch.fit import fit_gpytorch_model from botorch.optim import optimize_acqf from torch import tensor from gpytorch.mlls.exact_marginal_log_likelihood import ( ExactMarginalLogLikelihood, ) # Suggest lhs initial design or append new experiments to previous experiments if prev_res is None: lhs = LHS(self.domain) self.iterations += 1 k = num_experiments if num_experiments > 1 else 2 conditions = lhs.suggest_experiments(k) return conditions elif prev_res is not None and self.all_experiments is None: self.all_experiments = prev_res elif prev_res is not None and self.all_experiments is not None: self.all_experiments = self.all_experiments.append(prev_res) self.iterations += 1 data = self.all_experiments # Get inputs (decision variables) and outputs (objectives) inputs, output = self.transform.transform_inputs_outputs( data, categorical_method=self.categorical_method, standardize_inputs=True, standardize_outputs=True, ) # Train model model = SingleTaskGP( torch.tensor(inputs.data_to_numpy()).float(), torch.tensor(output.data_to_numpy()).float(), ) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) # Create acquisition function objective = self.domain.output_variables[0] if objective.maximize: fbest_scaled = output.max()[objective.name] maximize = True else: fbest_scaled = output.min()[objective.name] maximize = False ei = CategoricalEI(self.domain, model, best_f=fbest_scaled, maximize=maximize) # Optimize acquisition function results, acq_values = optimize_acqf( acq_function=ei, bounds=self._get_bounds(), num_restarts=20, q=num_experiments, raw_samples=100, ) # Convert result to datset result = DataSet( results.detach().numpy(), columns=inputs.data_columns, ) # Untransform result = self.transform.un_transform( result, categorical_method=self.categorical_method, standardize_inputs=True) # Add metadata result[("strategy", "METADATA")] = "STBO" return result
def gp_torch_train(train_x: Tensor, train_y: Tensor, n_inducing_points: int, tkwargs: Dict[str, Any], init, scale: bool, covar_name: str, gp_file: Optional[str], save_file: str, input_wp: bool, outcome_transform: Optional[OutcomeTransform] = None, options: Dict[str, Any] = None) -> SingleTaskGP: assert train_y.ndim > 1, train_y.shape assert gp_file or init, (gp_file, init) likelihood = gpytorch.likelihoods.GaussianLikelihood() if init: # build hyp print("Initialize GP hparams...") print("Doing Kmeans init...") assert n_inducing_points > 0, n_inducing_points kmeans = MiniBatchKMeans(n_clusters=n_inducing_points, batch_size=min(10000, train_x.shape[0]), n_init=25) start_time = time.time() kmeans.fit(train_x.cpu().numpy()) end_time = time.time() print(f"K means took {end_time - start_time:.1f}s to finish...") inducing_points = torch.from_numpy(kmeans.cluster_centers_.copy()) output_scale = None if scale: output_scale = train_y.var().item() lscales = torch.empty(1, train_x.shape[1]) for i in range(train_x.shape[1]): lscales[0, i] = torch.pdist(train_x[:, i].view( -1, 1)).median().clamp(min=0.01) base_covar_module = query_covar(covar_name=covar_name, scale=scale, outputscale=output_scale, lscales=lscales) covar_module = InducingPointKernel(base_covar_module, inducing_points=inducing_points, likelihood=likelihood) input_warp_tf = None if input_wp: # Apply input warping # initialize input_warping transformation input_warp_tf = CustomWarp( indices=list(range(train_x.shape[-1])), # use a prior with median at 1. # when a=1 and b=1, the Kumaraswamy CDF is the identity function concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), ) model = SingleTaskGP(train_x, train_y, covar_module=covar_module, likelihood=likelihood, input_transform=input_warp_tf, outcome_transform=outcome_transform) else: # load model output_scale = 1 # will be overwritten when loading model lscales = torch.ones( train_x.shape[1]) # will be overwritten when loading model base_covar_module = query_covar(covar_name=covar_name, scale=scale, outputscale=output_scale, lscales=lscales) covar_module = InducingPointKernel(base_covar_module, inducing_points=torch.empty( n_inducing_points, train_x.shape[1]), likelihood=likelihood) input_warp_tf = None if input_wp: # Apply input warping # initialize input_warping transformation input_warp_tf = Warp( indices=list(range(train_x.shape[-1])), # use a prior with median at 1. # when a=1 and b=1, the Kumaraswamy CDF is the identity function concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), ) model = SingleTaskGP(train_x, train_y, covar_module=covar_module, likelihood=likelihood, input_transform=input_warp_tf, outcome_transform=outcome_transform) print("Loading GP from file") state_dict = torch.load(gp_file) model.load_state_dict(state_dict) print("GP regression") start_time = time.time() model.to(**tkwargs) model.train() mll = ExactMarginalLogLikelihood(model.likelihood, model) # set approx_mll to False since we are using an exact marginal log likelihood # fit_gpytorch_model(mll, optimizer=fit_gpytorch_torch, approx_mll=False, options=options) fit_gpytorch_torch(mll, options=options, approx_mll=False, clip_by_value=True if input_wp else False, clip_value=10.0) end_time = time.time() print(f"Regression took {end_time - start_time:.1f}s to finish...") print("Save GP model...") torch.save(model.state_dict(), save_file) print("Done training of GP.") model.eval() return model
import torch from botorch.models import SingleTaskGP from botorch.fit import fit_gpytorch_model from botorch.utils import standardize from gpytorch.mlls import ExactMarginalLogLikelihood from botorch.acquisition import UpperConfidenceBound from botorch.optim import optimize_acqf # Training data: train_X = torch.rand(10, 2) Y = 1 - torch.norm(train_X - 0.5, dim=-1, keepdim=True) Y = Y + 0.1 * torch.randn_like(Y) # add some noise train_Y = standardize(Y) # Fir the model: gp = SingleTaskGP(train_X, train_Y) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) print(mll) # Construct acquisition function: UCB = UpperConfidenceBound(gp, beta=0.1) print(UCB) bounds = torch.stack([torch.zeros(2), torch.ones(2)]) candidate, acq_value = optimize_acqf( UCB, bounds=bounds, q=1,
def fairBO_debiasing(model_state_dict, data, config, device): def evaluate(lr, beta1, beta2, alpha, T0, verbose=False): model = load_model(data.num_features, config.get('hyperparameters', {})) model.load_state_dict(model_state_dict) model.to(device) loss_fn = torch.nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=lr, betas=(beta1, beta2), weight_decay=alpha) scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, int(T0)) for epoch in range(201): model.train() batch_idxs = torch.split(torch.randperm(data.X_valid.size(0)), 64) train_loss = 0 for batch in batch_idxs: X = data.X_valid_gpu[batch, :] y = data.y_valid_gpu[batch] optimizer.zero_grad() loss = loss_fn(model(X)[:, 0], y) loss.backward() train_loss += loss.item() optimizer.step() scheduler.step(X.size(0)) if epoch % 10 == 0 and verbose: model.eval() with torch.no_grad(): valid_loss = loss_fn( model(data.X_valid_valid.to(device))[:, 0], data.y_valid_valid.to(device)) print( f'=======> Epoch: {epoch} Train loss: {train_loss / len(batch_idxs)} ' f'Valid loss: {valid_loss}') model.eval() with torch.no_grad(): scores = model(data.X_valid_gpu)[:, 0].reshape(-1).cpu().numpy() best_thresh, _ = get_best_thresh(scores, np.linspace(0, 1, 1001), data, config, valid=False, margin=config['fairBO']['margin']) return get_valid_objective(scores > best_thresh, data, config, valid=False), model, best_thresh space = config['fairBO']['hyperparameters'] search_space = {} bounds_dict = {} for var in space: search_space[var] = np.arange(space[var]['start'], space[var]['end'], space[var]['step']) bounds_dict[var] = torch.tensor( [space[var]['start'], space[var]['end']]) if space[var]['log_scale']: search_space[var] = np.exp(np.log(10) * search_space[var]) bounds_dict[var] = torch.exp(float(np.log(10)) * bounds_dict[var]) def sample_space(): return { var: np.random.choice(rng) for var, rng in search_space.items() } X_hyp = [] y_hyp = [] best_model = [None, -math.inf, -1] for it in range(config['fairBO']['initial_budget']): X_hyp.append(sample_space()) logger.info( f'(Iteration {it}) Evaluating fairBO with sample {X_hyp[-1]}') y_eval, model_candidate, thresh = evaluate(**X_hyp[-1]) logger.info(f'Result: {y_eval}') if y_eval['objective'] > best_model[1]: best_model[0] = copy.deepcopy(model_candidate) best_model[1] = y_eval['objective'] best_model[2] = thresh y_hyp.append(y_eval) X_df = pd.DataFrame(X_hyp) X = torch.tensor(X_df.to_numpy()) y = torch.tensor(pd.DataFrame(y_hyp)[['performance', 'bias']].to_numpy()) for it in range(config['fairBO']['total_budget'] - config['fairBO']['initial_budget']): xscaler = StandardScaler() gp = SingleTaskGP(torch.tensor(xscaler.fit_transform(X)), y) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) cEI = ConstrainedExpectedImprovement(gp, y[:, 0].max().item(), 0, {1: (-0.05, 0.05)}) bounds = torch.stack([bounds_dict[x] for x in X_df.columns]) candidate, _ = optimize_acqf(cEI, bounds.T, 1, 100, 1024) inv_candidate = xscaler.inverse_transform(candidate) hyp = {k: v.item() for k, v in zip(X_df.columns, inv_candidate[0])} logger.info( f'(Iteration {it+config["fairBO"]["initial_budget"]}) Evaluating fairBO with sample {hyp}' ) X = torch.cat((X, candidate)) y_eval, model_candidate, thresh = evaluate(**hyp) logger.info(f'Result: {y_eval}') if y_eval['objective'] > best_model[1]: best_model[0] = copy.deepcopy(model_candidate) best_model[1] = y_eval['objective'] best_model[2] = thresh y = torch.cat( (y, torch.tensor([[y_eval['performance'], y_eval['bias']]]))) logger.info('Evaluating best fairBO debiased model.') best_model[0].eval() with torch.no_grad(): y_pred = (best_model[0](data.X_valid_gpu)[:, 0] > best_model[2]).reshape(-1).cpu().numpy() results_valid = get_valid_objective(y_pred, data, config) logger.info(f'Results: {results_valid}') best_model[0].eval() with torch.no_grad(): y_pred = (best_model[0](data.X_test_gpu)[:, 0] > best_model[2]).reshape(-1).cpu().numpy() results_test = get_test_objective(y_pred, data, config) return results_valid, results_test
class ParametricArm: """ the class of an Arm """ def __init__( self, function: SyntheticTestFunction, num_init_samples: int = 10, retrain_gp: bool = False, num_restarts: int = 10, raw_samples: int = 1000, ): """ Initialize the Arm :param function: the function of the arm to sample from :param num_init_samples: number of samples to initialize with :param retrain_gp: retrain the model after each sample if True :param num_restarts: number of random restarts for acquisition function optimization :param raw_samples: number of raw samples for acquisition function optimization """ self.function = function self.dim = function.dim self.bounds = Tensor(function._bounds).t() self.scale = self.bounds[1] - self.bounds[0] self.l_bounds = self.bounds[0] self.num_restarts = num_restarts self.raw_samples = raw_samples self._initialize_model(num_init_samples) self._update_current_best() self._maximize_kg() self.retrain_gp = retrain_gp self.num_samples = num_init_samples def _maximize_kg(self) -> None: """ maximizes the KG acquisition function and stores the resulting value and the candidate """ acq_func = qKnowledgeGradient( model=self.model, current_value=self.current_best_val ) # acq_func = qExpectedImprovement(model=self.model, best_f=self.current_best_val) # acq_func = ExpectedImprovement(model=self.model, best_f=self.current_best_val) self.next_candidate, self.kg_value = optimize_acqf( acq_func, Tensor([[0], [1]]).repeat(1, self.dim), q=1, num_restarts=self.num_restarts, raw_samples=self.raw_samples, ) def _update_current_best(self) -> None: """ Updates the current best solution and corresponding value """ pm = PosteriorMean(self.model) self.current_best_sol, self.current_best_val = optimize_acqf( pm, Tensor([[0], [1]]).repeat(1, self.dim), q=1, num_restarts=self.num_restarts, raw_samples=self.raw_samples, ) def _function_call(self, X: Tensor) -> Tensor: """ Scales the solutions to the function domain and returns the function value. :param X: Solutions from the relative scale of [0, 1] :return: function value """ shape = list(X.size()) shape[-1] = 1 X = X * self.scale.repeat(shape) + self.l_bounds.repeat(shape) # TODO: adjust for minimization return -self.function(X).unsqueeze(1) def _initialize_model(self, num_init_samples: int) -> None: """ initialize the GP model with num_init_samples of initial samples """ self.train_X = torch.rand((num_init_samples, self.dim)) self.train_Y = self._function_call(self.train_X) self.model = SingleTaskGP( self.train_X, self.train_Y, outcome_transform=Standardize(m=1) ) mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model) fit_gpytorch_model(mll) def _update_model(self, new_sample: Tensor, new_observation: Tensor) -> None: """ Update the GP model with the new observation(s) :param new_sample: sampled point :param new_observation: observed function value """ self.train_X = torch.cat((self.train_X, new_sample), 0) self.train_Y = torch.cat((self.train_Y, new_observation), 0) self.model = self.model.condition_on_observations(new_sample, new_observation) if self.retrain_gp: mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model) fit_gpytorch_model(mll) def sample_next(self): """ sample the next point, i.e. the point that maximizes KG update the model and retrain if needed update the relevant values """ Y = self._function_call(self.next_candidate) self._update_model(self.next_candidate, Y) self._update_current_best() self._maximize_kg()
def test_model_list_to_batched(self): for dtype in (torch.float, torch.double): # basic test train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1 = SingleTaskGP(train_X, train_Y1) gp2 = SingleTaskGP(train_X, train_Y2) list_gp = ModelListGP(gp1, gp2) batch_gp = model_list_to_batched(list_gp) self.assertIsInstance(batch_gp, SingleTaskGP) # test degenerate (single model) batch_gp = model_list_to_batched(ModelListGP(gp1)) self.assertEqual(batch_gp._num_outputs, 1) # test different model classes gp2 = FixedNoiseGP(train_X, train_Y1, torch.ones_like(train_Y1)) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test non-batched models gp1_ = SimpleGPyTorchModel(train_X, train_Y1) gp2_ = SimpleGPyTorchModel(train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1_, gp2_)) # test list of multi-output models train_Y = torch.cat([train_Y1, train_Y2], dim=-1) gp2 = SingleTaskGP(train_X, train_Y) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test different training inputs gp2 = SingleTaskGP(2 * train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check scalar agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.likelihood.noise_covar.noise_prior.rate.fill_(1.0) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check tensor shape agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.covar_module.raw_outputscale = torch.nn.Parameter( torch.tensor([0.0], device=self.device, dtype=dtype) ) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test HeteroskedasticSingleTaskGP gp2 = HeteroskedasticSingleTaskGP( train_X, train_Y1, torch.ones_like(train_Y1) ) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test custom likelihood gp2 = SingleTaskGP(train_X, train_Y2, likelihood=GaussianLikelihood()) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test FixedNoiseGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1_ = FixedNoiseGP(train_X, train_Y1, torch.rand_like(train_Y1)) gp2_ = FixedNoiseGP(train_X, train_Y2, torch.rand_like(train_Y2)) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp)
def test_qMS(self): d = 2 q = 1 num_data = 3 q_batch_sizes = [1, 1, 1] num_fantasies = [2, 2, 1] t_batch_size = [2] for dtype in (torch.float, torch.double): bounds = torch.tensor([[0], [1]], device=self.device, dtype=dtype) bounds = bounds.repeat(1, d) train_X = torch.rand(num_data, d, device=self.device, dtype=dtype) train_Y = torch.rand(num_data, 1, device=self.device, dtype=dtype) model = SingleTaskGP(train_X, train_Y) # default evaluation tests qMS = qMultiStepLookahead( model=model, batch_sizes=[1, 1, 1], num_fantasies=num_fantasies, ) q_prime = qMS.get_augmented_q_batch_size(q) eval_X = torch.rand(t_batch_size + [q_prime, d]) result = qMS(eval_X) self.assertEqual(result.shape, torch.Size(t_batch_size)) qMS = qMultiStepLookahead( model=model, batch_sizes=q_batch_sizes, valfunc_cls=[qExpectedImprovement] * 4, valfunc_argfacs=[make_best_f] * 4, num_fantasies=num_fantasies, inner_mc_samples=[2] * 4, ) result = qMS(eval_X) self.assertEqual(result.shape, torch.Size(t_batch_size)) # get induced fantasy model, with collapse_fantasy_base_samples fant_model = qMS.get_induced_fantasy_model(eval_X) self.assertEqual( fant_model.train_inputs[0].shape, torch.Size(num_fantasies[::-1] + t_batch_size + [num_data + sum(q_batch_sizes), d]), ) # collapse fantasy base samples qMS = qMultiStepLookahead( model=model, batch_sizes=q_batch_sizes, valfunc_cls=[qExpectedImprovement] * 4, valfunc_argfacs=[make_best_f] * 4, num_fantasies=num_fantasies, inner_mc_samples=[2] * 4, collapse_fantasy_base_samples=False, ) q_prime = qMS.get_augmented_q_batch_size(q) eval_X = torch.rand(t_batch_size + [q_prime, d]) result = qMS(eval_X) self.assertEqual(result.shape, torch.Size(t_batch_size)) self.assertEqual(qMS.samplers[0].batch_range, (-3, -2)) # get induced fantasy model, without collapse_fantasy_base_samples fant_model = qMS.get_induced_fantasy_model(eval_X) self.assertEqual( fant_model.train_inputs[0].shape, torch.Size(num_fantasies[::-1] + t_batch_size + [num_data + sum(q_batch_sizes), d]), ) # X_pending X_pending = torch.rand(5, d) qMS = qMultiStepLookahead( model=model, batch_sizes=q_batch_sizes, valfunc_cls=[qExpectedImprovement] * 4, valfunc_argfacs=[make_best_f] * 4, num_fantasies=num_fantasies, inner_mc_samples=[2] * 4, X_pending=X_pending, ) q_prime = qMS.get_augmented_q_batch_size(q) eval_X = torch.rand(t_batch_size + [q_prime, d]) result = qMS(eval_X) self.assertEqual(result.shape, torch.Size(t_batch_size)) # add dummy base_weights to samplers samplers = [ SobolQMCNormalSampler(num_samples=nf, resample=False, collapse_batch_dims=True) for nf in num_fantasies ] for s in samplers: s.base_weights = torch.ones(s.sample_shape[0], 1, device=self.device, dtype=dtype) qMS = qMultiStepLookahead( model=model, batch_sizes=[1, 1, 1], samplers=samplers, ) q_prime = qMS.get_augmented_q_batch_size(q) eval_X = torch.rand(t_batch_size + [q_prime, d]) result = qMS(eval_X) self.assertEqual(result.shape, torch.Size(t_batch_size)) # extract candidates cand = qMS.extract_candidates(eval_X) self.assertEqual(cand.shape, torch.Size(t_batch_size + [q, d]))
def step(self, snapshot_mode: str = 'latest', meta_info: dict = None): # Save snapshot to save the correct iteration count self.save_snapshot() if self.curr_checkpoint == -2: # Train the initial policies in the source domain self.train_init_policies() self.reached_checkpoint() # setting counter to -1 if self.curr_checkpoint == -1: # Evaluate the initial policies in the target domain self.eval_init_policies() self.reached_checkpoint() # setting counter to 0 if self.curr_checkpoint == 0: # Normalize the input data and standardize the output data cands_norm = self.ddp_projector.project_to(self.cands) cands_values_stdized = standardize(self.cands_values).unsqueeze(1) # Create and fit the GP model gp = SingleTaskGP(cands_norm, cands_values_stdized) gp.likelihood.noise_covar.register_constraint('raw_noise', GreaterThan(1e-5)) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) print_cbt('Fitted the GP.', 'g') # Acquisition functions if self.acq_fcn_type == 'UCB': acq_fcn = UpperConfidenceBound(gp, beta=self.acq_param.get('beta', 0.1), maximize=True) elif self.acq_fcn_type == 'EI': acq_fcn = ExpectedImprovement(gp, best_f=cands_values_stdized.max().item(), maximize=True) elif self.acq_fcn_type == 'PI': acq_fcn = ProbabilityOfImprovement(gp, best_f=cands_values_stdized.max().item(), maximize=True) else: raise pyrado.ValueErr(given=self.acq_fcn_type, eq_constraint="'UCB', 'EI', 'PI'") # Optimize acquisition function and get new candidate point cand_norm, acq_value = optimize_acqf( acq_function=acq_fcn, bounds=to.stack([to.zeros(self.ddp_space.flat_dim), to.ones(self.ddp_space.flat_dim)]), q=1, num_restarts=self.acq_restarts, raw_samples=self.acq_samples ) next_cand = self.ddp_projector.project_back(cand_norm) print_cbt(f'Found the next candidate: {next_cand.numpy()}', 'g') self.cands = to.cat([self.cands, next_cand], dim=0) pyrado.save(self.cands, 'candidates', 'pt', self.save_dir, meta_info) self.reached_checkpoint() # setting counter to 1 if self.curr_checkpoint == 1: # Train and evaluate a new policy, repeat if the resulting policy did not exceed the success threshold wrapped_trn_fcn = until_thold_exceeded( self.thold_succ_subrtn.item(), self.max_subrtn_rep )(self.train_policy_sim) wrapped_trn_fcn(self.cands[-1, :], prefix=f'iter_{self._curr_iter}') self.reached_checkpoint() # setting counter to 2 if self.curr_checkpoint == 2: # Evaluate the current policy in the target domain policy = pyrado.load(self.policy, 'policy', 'pt', self.save_dir, meta_info=dict(prefix=f'iter_{self._curr_iter}')) self.curr_cand_value = self.eval_policy( self.save_dir, self._env_real, policy, self.mc_estimator, f'iter_{self._curr_iter}', self.num_eval_rollouts_real ) self.cands_values = to.cat([self.cands_values, self.curr_cand_value.view(1)], dim=0) pyrado.save(self.cands_values, 'candidates_values', 'pt', self.save_dir, meta_info) # Store the argmax after training and evaluating curr_argmax_cand = BayRn.argmax_posterior_mean( self.cands, self.cands_values.unsqueeze(1), self.ddp_space, self.acq_restarts, self.acq_samples ) self.argmax_cand = to.cat([self.argmax_cand, curr_argmax_cand], dim=0) pyrado.save(self.argmax_cand, 'candidates_argmax', 'pt', self.save_dir, meta_info) self.reached_checkpoint() # setting counter to 0
def test_qMS_init(self): d = 2 q = 1 num_data = 3 q_batch_sizes = [1, 1, 1] num_fantasies = [2, 2, 1] t_batch_size = [2] for dtype in (torch.float, torch.double): bounds = torch.tensor([[0], [1]], device=self.device, dtype=dtype) bounds = bounds.repeat(1, d) train_X = torch.rand(num_data, d, device=self.device, dtype=dtype) train_Y = torch.rand(num_data, 1, device=self.device, dtype=dtype) model = SingleTaskGP(train_X, train_Y) # exactly one of samplers or num_fantasies with self.assertRaises(UnsupportedError): qMultiStepLookahead( model=model, batch_sizes=q_batch_sizes, valfunc_cls=[qExpectedImprovement] * 4, valfunc_argfacs=[make_best_f] * 4, inner_mc_samples=[2] * 4, ) # cannot use qMS as its own valfunc_cls with self.assertRaises(UnsupportedError): qMultiStepLookahead( model=model, batch_sizes=q_batch_sizes, valfunc_cls=[qMultiStepLookahead] * 4, valfunc_argfacs=[make_best_f] * 4, num_fantasies=num_fantasies, inner_mc_samples=[2] * 4, ) # construct using samplers samplers = [ SobolQMCNormalSampler(num_samples=nf, resample=False, collapse_batch_dims=True) for nf in num_fantasies ] qMS = qMultiStepLookahead( model=model, batch_sizes=q_batch_sizes, valfunc_cls=[qExpectedImprovement] * 4, valfunc_argfacs=[make_best_f] * 4, inner_mc_samples=[2] * 4, samplers=samplers, ) self.assertEqual(qMS.num_fantasies, num_fantasies) # use default valfunc_cls, valfun_argfacs, inner_mc_samples qMS = qMultiStepLookahead( model=model, batch_sizes=q_batch_sizes, samplers=samplers, ) self.assertEqual(len(qMS._valfunc_cls), 4) self.assertEqual(len(qMS.inner_samplers), 4) self.assertEqual(len(qMS._valfunc_argfacs), 4) # _construct_inner_samplers error catching tests below # AnalyticAcquisitionFunction with MCAcquisitionObjective with self.assertRaises(UnsupportedError): qMultiStepLookahead( model=model, objective=IdentityMCObjective(), batch_sizes=q_batch_sizes, valfunc_cls=[ExpectedImprovement] * 4, valfunc_argfacs=[make_best_f] * 4, num_fantasies=num_fantasies, ) # AnalyticAcquisitionFunction and q > 1 with self.assertRaises(UnsupportedError): qMultiStepLookahead( model=model, batch_sizes=[2, 2, 2], valfunc_cls=[ExpectedImprovement] * 4, valfunc_argfacs=[make_best_f] * 4, num_fantasies=num_fantasies, inner_mc_samples=[2] * 4, ) # AnalyticAcquisitionFunction and inner_mc_samples with self.assertWarns(Warning): qMultiStepLookahead( model=model, batch_sizes=q_batch_sizes, valfunc_cls=[ExpectedImprovement] * 4, valfunc_argfacs=[make_best_f] * 4, num_fantasies=num_fantasies, inner_mc_samples=[2] * 4, ) # MCAcquisitionFunction and non MCAcquisitionObjective with self.assertRaises(UnsupportedError): qMultiStepLookahead( model=model, objective=ScalarizedObjective(weights=torch.tensor([1.0])), batch_sizes=[2, 2, 2], valfunc_cls=[qExpectedImprovement] * 4, valfunc_argfacs=[make_best_f] * 4, num_fantasies=num_fantasies, inner_mc_samples=[2] * 4, ) # test warmstarting qMS = qMultiStepLookahead( model=model, batch_sizes=q_batch_sizes, samplers=samplers, ) q_prime = qMS.get_augmented_q_batch_size(q) eval_X = torch.rand(t_batch_size + [q_prime, d]) warmstarted_X = warmstart_multistep( acq_function=qMS, bounds=bounds, num_restarts=5, raw_samples=10, full_optimizer=eval_X, ) self.assertEqual(warmstarted_X.shape, torch.Size([5, q_prime, d]))
def test_batched_multi_output_to_single_output(self): for dtype in (torch.float, torch.double): # basic test train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y = torch.stack( [ train_X.sum(dim=-1), (train_X[:, 0] - train_X[:, 1]), ], dim=1, ) batched_mo_model = SingleTaskGP(train_X, train_Y) batched_so_model = batched_multi_output_to_single_output( batched_mo_model) self.assertIsInstance(batched_so_model, SingleTaskGP) self.assertEqual(batched_so_model.num_outputs, 1) # test non-batched models non_batch_model = SimpleGPyTorchModel(train_X, train_Y[:, :1]) with self.assertRaises(UnsupportedError): batched_multi_output_to_single_output(non_batch_model) gp2 = HeteroskedasticSingleTaskGP(train_X, train_Y, torch.ones_like(train_Y)) with self.assertRaises(NotImplementedError): batched_multi_output_to_single_output(gp2) # test custom likelihood gp2 = SingleTaskGP(train_X, train_Y, likelihood=GaussianLikelihood()) with self.assertRaises(NotImplementedError): batched_multi_output_to_single_output(gp2) # test FixedNoiseGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) batched_mo_model = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) batched_so_model = batched_multi_output_to_single_output( batched_mo_model) self.assertIsInstance(batched_so_model, FixedNoiseGP) self.assertEqual(batched_so_model.num_outputs, 1) # test SingleTaskMultiFidelityGP batched_mo_model = SingleTaskMultiFidelityGP(train_X, train_Y, iteration_fidelity=1) batched_so_model = batched_multi_output_to_single_output( batched_mo_model) self.assertIsInstance(batched_so_model, SingleTaskMultiFidelityGP) self.assertEqual(batched_so_model.num_outputs, 1) # test input transform input_tf = Normalize( d=2, bounds=torch.tensor([[0.0, 0.0], [1.0, 1.0]], device=self.device, dtype=dtype), ) batched_mo_model = SingleTaskGP(train_X, train_Y, input_transform=input_tf) batch_so_model = batched_multi_output_to_single_output( batched_mo_model) self.assertIsInstance(batch_so_model.input_transform, Normalize) self.assertTrue( torch.equal(batch_so_model.input_transform.bounds, input_tf.bounds)) # test batched input transform input_tf2 = Normalize( d=2, bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]], device=self.device, dtype=dtype), batch_shape=torch.Size([2]), ) batched_mo_model = SingleTaskGP(train_X, train_Y, input_transform=input_tf2) batched_so_model = batched_multi_output_to_single_output( batched_mo_model) self.assertIsInstance(batch_so_model.input_transform, Normalize) self.assertTrue( torch.equal(batch_so_model.input_transform.bounds, input_tf.bounds)) # test outcome transform batched_mo_model = SingleTaskGP(train_X, train_Y, outcome_transform=Standardize(m=2)) with self.assertRaises(NotImplementedError): batched_multi_output_to_single_output(batched_mo_model)
def EI_run(seed, alpha, rho, x0=5, n0=100, iter_count=1000, mu_1=2, mu_2=5, sigma_1=1, sigma_2=1, SAA_seed=None): """ Does a single run of the Expected Improvement algorithm for the simple normal problem, without derivatives :param seed: random seed :param alpha: risk level :param rho: risk measure :param x0: Ignored! Just to keep the same arglist as others :param n0: outer sample starting size :param iter_count: number of iterations :param kwargs: passed to estimator :param SAA_seed: if given, an SAA version is run with this seed. :return: """ np.random.seed(seed) begin = datetime.datetime.now() args = (n0, alpha, rho, mu_1, mu_2, sigma_1, sigma_2, SAA_seed) points = torch.empty(iter_count, 1) values = torch.empty(points.shape) points[:4] = draw_sobol_samples(torch.tensor([[-5.], [5.]]), n=4, q=1).reshape(-1, 1) for i in range(4): values[i] = estimate_no_grad(points[i], *args) for i in range(4, iter_count): # fit gp # this transforms the GP to unit domain - botorch priors work best there transformed_points = points / 10. + 0.5 model = SingleTaskGP(transformed_points[:i], values[:i], outcome_transform=Standardize(m=1)) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) # optimize EI to get the candidate acqf = ExpectedImprovement(model, best_f=torch.min(values), maximize=False) best_p, _ = optimize_acqf(acqf, bounds=torch.tensor([[0.], [1.]]), q=1, num_restarts=10, raw_samples=50) # transform it back to original domain best_p = best_p.detach() * 10. - 5. points[i] = best_p values[i] = estimate_no_grad(points[i], *args) best_list = torch.empty(points.shape) for i in range(1, iter_count + 1): # pick the arg min of the history to return best_ind = torch.argmin(values[:i], dim=0) best_list[i - 1] = points[best_ind] x_list = best_list now = datetime.datetime.now() print('done time: %s' % (now - begin)) print('call count: %d' % call_count) # np.save("sa_out/normal/EI_" + rho + "_" + str(alpha) + "_iter_" + str(iter_count) + "_x.npy", x_list) return x_list
def qehvi_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI). The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with multi-objective optimization when the number of objectives is three or less. .. seealso:: :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value descriptions. """ n_objectives = train_obj.size(-1) if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) is_feas = (train_con <= 0).all(dim=-1) train_obj_feas = train_obj[is_feas] constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) additional_qehvi_kwargs = { "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), "constraints": constraints, } else: train_y = train_obj train_obj_feas = train_obj additional_qehvi_kwargs = {} train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) # Approximate box decomposition similar to Ax when the number of objectives is large. # https://github.com/facebook/Ax/blob/master/ax/models/torch/botorch_moo_defaults if n_objectives > 2: alpha = 10**(-8 + n_objectives) else: alpha = 0.0 partitioning = NondominatedPartitioning(num_outcomes=n_objectives, Y=train_obj_feas, alpha=alpha) ref_point = train_obj.min(dim=0).values - 1e-8 ref_point_list = ref_point.tolist() acqf = qExpectedHypervolumeImprovement( model=model, ref_point=ref_point_list, partitioning=partitioning, sampler=SobolQMCNormalSampler(num_samples=256), **additional_qehvi_kwargs, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=20, raw_samples=1024, options={ "batch_limit": 5, "maxiter": 200, "nonnegative": True }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
def sample_arch(self, START_BO, g, steps, hyperparams, og_flops, full_val_loss, target_flops=0): if args.slim: if target_flops == 0: parameterization = hyperparams.random_sample() layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner) else: parameterization = np.ones(hyperparams.get_dim()) * args.lower_channel layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner) else: # random sample to warmup history for MOBO if g < START_BO: if target_flops == 0: f = np.random.rand(1) * (args.upper_channel-args.lower_channel) + args.lower_channel else: f = args.lower_channel parameterization = np.ones(hyperparams.get_dim()) * f layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner) # put the largest model into the history elif g == START_BO: if target_flops == 0: parameterization = np.ones(hyperparams.get_dim()) else: f = args.lower_channel parameterization = np.ones(hyperparams.get_dim()) * f layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner) # MOBO else: # this is the scalarization (lambda_{FLOPs}) rand = torch.rand(1).cuda() # standardize data for building Gaussian Processes train_X = torch.FloatTensor(self.X).cuda() train_Y_loss = torch.FloatTensor(np.array(self.Y)[:, 0].reshape(-1, 1)).cuda() train_Y_loss = standardize(train_Y_loss) train_Y_cost = torch.FloatTensor(np.array(self.Y)[:, 1].reshape(-1, 1)).cuda() train_Y_cost = standardize(train_Y_cost) new_train_X = train_X # GP for the cross entropy loss gp_loss = SingleTaskGP(new_train_X, train_Y_loss) mll = ExactMarginalLogLikelihood(gp_loss.likelihood, gp_loss) mll = mll.to('cuda') fit_gpytorch_model(mll) # GP for FLOPs # we use add-gp since FLOPs has addive structure (not exactly though) # the parameters for ScaleKernel and MaternKernel simply follow the default covar_module = AdditiveStructureKernel( ScaleKernel( MaternKernel( nu=2.5, lengthscale_prior=GammaPrior(3.0, 6.0), num_dims=1 ), outputscale_prior=GammaPrior(2.0, 0.15), ), num_dims=train_X.shape[1] ) gp_cost = SingleTaskGP(new_train_X, train_Y_cost, covar_module=covar_module) mll = ExactMarginalLogLikelihood(gp_cost.likelihood, gp_cost) mll = mll.to('cuda') fit_gpytorch_model(mll) # Build acquisition functions UCB_loss = UpperConfidenceBound(gp_loss, beta=0.1).cuda() UCB_cost = UpperConfidenceBound(gp_cost, beta=0.1).cuda() # Combine them via augmented Tchebyshev scalarization self.mobo_obj = RandAcquisition(UCB_loss).cuda() self.mobo_obj.setup(UCB_loss, UCB_cost, rand) # Bounds for the optimization variable (alpha) lower = torch.ones(new_train_X.shape[1])*args.lower_channel upper = torch.ones(new_train_X.shape[1])*args.upper_channel self.mobo_bounds = torch.stack([lower, upper]).cuda() # Pareto-aware sampling if args.pas: # Generate approximate Pareto front first costs = [] for i in range(len(self.population_data)): costs.append([self.population_data[i]['loss'], self.population_data[i]['ratio']]) costs = np.array(costs) efficient_mask = is_pareto_efficient(costs) costs = costs[efficient_mask] loss = costs[:, 0] flops = costs[:, 1] sorted_idx = np.argsort(flops) loss = loss[sorted_idx] flops = flops[sorted_idx] if flops[0] > args.lower_flops: flops = np.concatenate([[args.lower_flops], flops.reshape(-1)]) loss = np.concatenate([[8], loss.reshape(-1)]) else: flops = flops.reshape(-1) loss = loss.reshape(-1) if flops[-1] < args.upper_flops and (loss[-1] > full_val_loss): flops = np.concatenate([flops.reshape(-1), [args.upper_flops]]) loss = np.concatenate([loss.reshape(-1), [full_val_loss]]) else: flops = flops.reshape(-1) loss = loss.reshape(-1) # Equation (4) in paper areas = (flops[1:]-flops[:-1])*(loss[:-1]-loss[1:]) # Quantize into 50 bins to sample from multinomial self.sampling_weights = np.zeros(50) k = 0 while k < len(flops) and flops[k] < args.lower_flops: k+=1 for i in range(50): lower = i/50. upper = (i+1)/50. if upper < args.lower_flops or lower > args.upper_flops or lower < args.lower_flops: continue cnt = 1 while ((k+1) < len(flops)) and upper > flops[k+1]: self.sampling_weights[i] += areas[k] cnt += 1 k += 1 if k < len(areas): self.sampling_weights[i] += areas[k] self.sampling_weights[i] /= cnt if np.sum(self.sampling_weights) == 0: self.sampling_weights = np.ones(50) if target_flops == 0: val = np.arange(0.01, 1, 0.02) chosen_target_flops = np.random.choice(val, p=(self.sampling_weights/np.sum(self.sampling_weights))) else: chosen_target_flops = target_flops # Binary search is here lower_bnd, upper_bnd = 0, 1 lmda = 0.5 for i in range(10): self.mobo_obj.rand = lmda parameterization, acq_value = optimize_acqf( self.mobo_obj, bounds=self.mobo_bounds, q=1, num_restarts=5, raw_samples=1000, ) parameterization = parameterization[0].cpu().numpy() layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner) sim_flops = self.mask_pruner.simulate_and_count_flops(layer_budget) ratio = sim_flops/og_flops if np.abs(ratio - chosen_target_flops) <= 0.02: break if args.baseline > 0: if ratio < chosen_target_flops: lower_bnd = lmda lmda = (lmda + upper_bnd) / 2 elif ratio > chosen_target_flops: upper_bnd = lmda lmda = (lmda + lower_bnd) / 2 else: if ratio < chosen_target_flops: upper_bnd = lmda lmda = (lmda + lower_bnd) / 2 elif ratio > chosen_target_flops: lower_bnd = lmda lmda = (lmda + upper_bnd) / 2 rand[0] = lmda writer.add_scalar('Binary search trials', i, steps) else: parameterization, acq_value = optimize_acqf( self.mobo_obj, bounds=self.mobo_bounds, q=1, num_restarts=5, raw_samples=1000, ) parameterization = parameterization[0].cpu().numpy() layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner) return layer_budget, parameterization, self.sampling_weights/np.sum(self.sampling_weights)
def qei_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based batch Expected Improvement (qEI). The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with single-objective optimization. Args: train_x: Previous parameter configurations. A ``torch.Tensor`` of shape ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials and ``n_params`` is the number of parameters. ``n_params`` may be larger than the actual number of parameters if categorical parameters are included in the search space, since these parameters are one-hot encoded. Values are not normalized. train_obj: Previously observed objectives. A ``torch.Tensor`` of shape ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``. ``n_objectives`` is the number of objectives. Observations are not normalized. train_con: Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``. ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of constraints. A constraint is violated if strictly larger than 0. If no constraints are involved in the optimization, this argument will be :obj:`None`. bounds: Search space bounds. A ``torch.Tensor`` of shape ``(n_params, 2)``. ``n_params`` is identical to that of ``train_x``. The first and the second column correspond to the lower and upper bounds for each parameter respectively. Returns: Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. """ if train_obj.size(-1) != 1: raise ValueError("Objective may only contain single values with qEI.") if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) is_feas = (train_con <= 0).all(dim=-1) train_obj_feas = train_obj[is_feas] if train_obj_feas.numel() == 0: # TODO(hvy): Do not use 0 as the best observation. _logger.warning( "No objective values are feasible. Using 0 as the best objective in qEI." ) best_f = torch.zeros(()) else: best_f = train_obj_feas.max() constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) objective = ConstrainedMCObjective( objective=lambda Z: Z[..., 0], constraints=constraints, ) else: train_y = train_obj best_f = train_obj.max() objective = None # Using the default identity objective. train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) acqf = qExpectedImprovement( model=model, best_f=best_f, sampler=SobolQMCNormalSampler(num_samples=256), objective=objective, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=10, raw_samples=512, options={ "batch_limit": 5, "maxiter": 200 }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
import torch from botorch.test_functions import Branin from botorch.models import SingleTaskGP from botorch.fit import fit_gpytorch_model from botorch.models.transforms import Standardize from gpytorch.mlls import ExactMarginalLogLikelihood from parametric_bandit.discrete_KG import DiscreteKGAlg torch.manual_seed(0) # generate input n = 10 noise_std = 0.1 function = Branin(noise_std=0.1) dim = function.dim train_X = torch.rand((n, dim)) train_Y = function(train_X).unsqueeze(-1) # fit model gp = SingleTaskGP(train_X, train_Y, outcome_transform=Standardize(m=1)) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) # get mu and Sigma mu = gp.posterior(train_X).mean Sigma = gp.posterior(train_X).mvn.covariance_matrix # initiate the algorithm for testing dkg = DiscreteKGAlg(M=n, error=noise_std**2, mu_0=mu, Sigma_0=Sigma) print(dkg.find_maximizer())