def _setUp(self, double=False): dtype = torch.double if double else torch.float train_x = torch.linspace(0, 1, 10, device=self.device, dtype=dtype).unsqueeze(-1) train_y = torch.sin(train_x * (2 * math.pi)) train_yvar = torch.tensor(0.1**2, device=self.device) noise = torch.tensor(NOISE, device=self.device, dtype=dtype) self.train_x = train_x self.train_y = train_y + noise self.train_yvar = train_yvar self.bounds = torch.tensor([[0.0], [1.0]], device=self.device, dtype=dtype) model_st = SingleTaskGP(self.train_x, self.train_y) self.model_st = model_st.to(device=self.device, dtype=dtype) self.mll_st = ExactMarginalLogLikelihood(self.model_st.likelihood, self.model_st) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=OptimizationWarning) self.mll_st = fit_gpytorch_model(self.mll_st, options={"maxiter": 5}, max_retries=1) model_fn = FixedNoiseGP(self.train_x, self.train_y, self.train_yvar.expand_as(self.train_y)) self.model_fn = model_fn.to(device=self.device, dtype=dtype) self.mll_fn = ExactMarginalLogLikelihood(self.model_fn.likelihood, self.model_fn) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=OptimizationWarning) self.mll_fn = fit_gpytorch_model(self.mll_fn, options={"maxiter": 5}, max_retries=1)
def _setUp(self, double=False, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") dtype = torch.double if double else torch.float train_x = torch.linspace(0, 1, 10, device=device, dtype=dtype).unsqueeze(-1) train_y = torch.sin(train_x * (2 * math.pi)).squeeze(-1) train_yvar = torch.tensor(0.1 ** 2, device=device) noise = torch.tensor(NOISE, device=device, dtype=dtype) self.train_x = train_x self.train_y = train_y + noise self.train_yvar = train_yvar self.bounds = torch.tensor([[0.0], [1.0]], device=device, dtype=dtype) model_st = SingleTaskGP(self.train_x, self.train_y) self.model_st = model_st.to(device=device, dtype=dtype) self.mll_st = ExactMarginalLogLikelihood( self.model_st.likelihood, self.model_st ) self.mll_st = fit_gpytorch_model(self.mll_st, options={"maxiter": 5}) model_fn = FixedNoiseGP( self.train_x, self.train_y, self.train_yvar.expand_as(self.train_y) ) self.model_fn = model_fn.to(device=device, dtype=dtype) self.mll_fn = ExactMarginalLogLikelihood( self.model_fn.likelihood, self.model_fn ) self.mll_fn = fit_gpytorch_model(self.mll_fn, options={"maxiter": 5})
def initialize_model(train_x, train_obj, state_dict=None): model = FixedNoiseGP(train_x, train_obj, train_yvar.expand_as(train_obj)).to(train_x) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def run(): train_x, train_obj, train_con, best_observed_value_nei = generate_initial_data(n=10) # define models for objective and constraint model_obj = FixedNoiseGP(train_x, train_obj, train_yvar.expand_as(train_obj)).to(train_x) model_con = FixedNoiseGP(train_x, train_con, train_yvar.expand_as(train_con)).to(train_x) # combine into a multi-output GP model model = ModelListGP(model_obj, model_con) mll = SumMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) acqui_gpmean_cons = GPmeanConstrained(model=model,objective=constrained_obj) # Forward: # X = torch.rand(size=(1,6)) # acqui_gpmean_cons.forward(X) method_opti = "SLSQP" # constraints # method_opti = "COBYLA" # constraints # method_opti = "L-BFGS-B" # Below, num_restarts must be equal to q, otherwise, it fails... options = {"batch_limit": 1,"maxiter": 200,"ftol":1e-6,"method":method_opti} x_eta_c, eta_c = optimize_acqf(acq_function=acqui_gpmean_cons,bounds=bounds,q=1,num_restarts=1, raw_samples=500,return_best_only=True,options=options) pdb.set_trace()
def initialize_model(train_x, train_obj, train_con, state_dict=None): # define models for objective and constraint model_obj = FixedNoiseGP(train_x, train_obj, train_yvar.expand_as(train_obj)).to(train_x) model_con = FixedNoiseGP(train_x, train_con, train_yvar.expand_as(train_con)).to(train_x) # combine into a multi-output GP model model = ModelListGP(model_obj, model_con) mll = SumMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def _get_model(self, dtype=torch.float): state_dict = { "mean_module.constant": torch.tensor([-0.0066]), "covar_module.raw_outputscale": torch.tensor(1.0143), "covar_module.base_kernel.raw_lengthscale": torch.tensor([[-0.99]]), "covar_module.base_kernel.lengthscale_prior.concentration": torch.tensor(3.0), "covar_module.base_kernel.lengthscale_prior.rate": torch.tensor(6.0), "covar_module.outputscale_prior.concentration": torch.tensor(2.0), "covar_module.outputscale_prior.rate": torch.tensor(0.1500), } train_x = torch.linspace(0, 1, 10, device=self.device, dtype=dtype).unsqueeze(-1) train_y = torch.sin(train_x * (2 * math.pi)) noise = torch.tensor(NEI_NOISE, device=self.device, dtype=dtype) train_y += noise train_yvar = torch.full_like(train_y, 0.25**2) model = FixedNoiseGP(train_X=train_x, train_Y=train_y, train_Yvar=train_yvar) model.load_state_dict(state_dict) model.to(train_x) model.eval() return model
def initialize_model_nei(train_x, train_obj, state_dict=None): """ Define model for the objective. :param train_x: input tensor :param train_obj: output tensor -> g(x) + s(x) :param state_dict: optional model parameters :return: """ model = FixedNoiseGP(train_x, train_obj, train_yvar.expand_as(train_obj)).to(train_x) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def test_roundtrip(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): train_X = torch.rand(10, 2, device=device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) # SingleTaskGP batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig))
def test_roundtrip(self): for dtype in (torch.float, torch.double): train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) # SingleTaskGP batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue(all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue(all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # SingleTaskMultiFidelityGP for lin_trunc in (False, True): batch_gp = SingleTaskMultiFidelityGP( train_X, train_Y, iteration_fidelity=1, linear_truncated=lin_trunc ) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig) )
def _getBatchedModel( self, kind="SingleTaskGP", double=False, outcome_transform=False ): dtype = torch.double if double else torch.float train_x = torch.linspace(0, 1, 10, device=self.device, dtype=dtype).unsqueeze( -1 ) noise = torch.tensor(NOISE, device=self.device, dtype=dtype) train_y1 = torch.sin(train_x * (2 * math.pi)) + noise train_y2 = torch.sin(train_x * (2 * math.pi)) + noise train_y = torch.cat([train_y1, train_y2], dim=-1) kwargs = {} if outcome_transform: kwargs["outcome_transform"] = Standardize(m=2) if kind == "SingleTaskGP": model = SingleTaskGP(train_x, train_y, **kwargs) elif kind == "FixedNoiseGP": model = FixedNoiseGP( train_x, train_y, 0.1 * torch.ones_like(train_y), **kwargs ) elif kind == "HeteroskedasticSingleTaskGP": model = HeteroskedasticSingleTaskGP( train_x, train_y, 0.1 * torch.ones_like(train_y), **kwargs ) else: raise NotImplementedError mll = ExactMarginalLogLikelihood(model.likelihood, model) return mll.to(device=self.device, dtype=dtype)
def initialize_model(train_x, train_y, train_y_sem): """ Defines a GP given X, Y, and noise observations (standard error of mean) train_x (theta): (n_observations, n_parameters) train_y (G_obs): (n_observations, n_black_box_outputs) train_y (G_obs_sem): (n_observations, n_black_box_outputs) """ train_ynoise = train_y_sem.pow(2.0) # noise is in variance units # standardize outputs to zero mean, unit variance (over n_observations dimension) n_output_dims = (n_days * n_age if per_age_group_objective else n_days) \ if args.model_multi_output_simulator else 1 outcome_transform = Standardize(m=n_output_dims) # train_y = standardize(train_y) (the above also normalizes noise) # choose model if args.model_noise_via_sem: assert (args.model_multi_output_simulator) model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=outcome_transform) else: model = SingleTaskGP(train_x, train_y, outcome_transform=outcome_transform) # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(model.likelihood, model) return mll, model
def test_batched_to_model_list(self): for dtype in (torch.float, torch.double): # test SingleTaskGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test SingleTaskMultiFidelityGP for lin_trunc in (False, True): batch_gp = SingleTaskMultiFidelityGP( train_X, train_Y, iteration_fidelity=1, linear_truncated=lin_trunc ) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test HeteroskedasticSingleTaskGP batch_gp = HeteroskedasticSingleTaskGP( train_X, train_Y, torch.rand_like(train_Y) ) with self.assertRaises(NotImplementedError): batched_to_model_list(batch_gp)
def test_batched_to_model_list(self): for dtype in (torch.float, torch.double): # test SingleTaskGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test SingleTaskMultiFidelityGP for lin_trunc in (False, True): batch_gp = SingleTaskMultiFidelityGP( train_X, train_Y, iteration_fidelity=1, linear_truncated=lin_trunc) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test HeteroskedasticSingleTaskGP batch_gp = HeteroskedasticSingleTaskGP(train_X, train_Y, torch.rand_like(train_Y)) with self.assertRaises(NotImplementedError): batched_to_model_list(batch_gp) # test with transforms input_tf = Normalize( d=2, bounds=torch.tensor([[0.0, 0.0], [1.0, 1.0]], device=self.device, dtype=dtype), ) octf = Standardize(m=2) batch_gp = SingleTaskGP(train_X, train_Y, outcome_transform=octf, input_transform=input_tf) list_gp = batched_to_model_list(batch_gp) for i, m in enumerate(list_gp.models): self.assertIsInstance(m.input_transform, Normalize) self.assertTrue( torch.equal(m.input_transform.bounds, input_tf.bounds)) self.assertIsInstance(m.outcome_transform, Standardize) self.assertEqual(m.outcome_transform._m, 1) expected_octf = octf.subset_output(idcs=[i]) for attr_name in ["means", "stdvs", "_stdvs_sq"]: self.assertTrue( torch.equal( m.outcome_transform.__getattr__(attr_name), expected_octf.__getattr__(attr_name), ))
def initialize_model(train_x, train_y, train_y_sem): """ Defines a GP given X, Y, and noise observations (standard error of mean) """ train_ynoise = train_y_sem.pow(2.0) # noise is in variance units # standardize outputs to zero mean, unit variance to have good hyperparameter tuning model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=Standardize(m=n_days * n_age)) # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(model.likelihood, model) return mll, model
def fit_gp_model(self, arm: int, alternative: int = None, update: bool = False) -> None: """ Fits a GP model to the given arm :param arm: Arm index :param alternative: Last sampled arm alternative. Used when adding samples without refitting :param update: Forces GP to be fitted. Otherwise, it is fitted every self.gp_update_freq samples. :return: None """ arm_sample_count = sum([len(e) for e in self.observations[arm]]) if update or arm_sample_count % self.gp_update_freq == 0: train_X_list = list() train_Y_list = list() for j in range(len(self.alternative_points[arm])): for k in range(len(self.observations[arm][j])): train_X_list.append( self.alternative_points[arm][j].unsqueeze(-2)) train_Y_list.append( self.observations[arm][j][k].unsqueeze(-2)) train_X = torch.cat(train_X_list, dim=0) train_Y = torch.cat(train_Y_list, dim=0) if self.noise_std is None: model = SingleTaskGP(train_X, train_Y, outcome_transform=Standardize(m=1)) else: model = FixedNoiseGP( train_X, train_Y, train_Yvar=torch.tensor([self.noise_std**2 ]).expand_as(train_Y), outcome_transform=Standardize(m=1), ) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) self.models[arm] = model else: last_point = self.alternative_points[arm][alternative].reshape( 1, -1) last_observation = self.observations[arm][alternative][-1].reshape( 1, -1) self.models[arm].condition_on_observations(last_point, last_observation, noise=self.noise_std**2)
def test_batched_to_model_list(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # test SingleTaskGP train_X = torch.rand(10, 2, device=device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test HeteroskedasticSingleTaskGP batch_gp = HeteroskedasticSingleTaskGP(train_X, train_Y, torch.rand_like(train_Y)) with self.assertRaises(NotImplementedError): batched_to_model_list(batch_gp)
def test_batched_to_model_list(self): for dtype in (torch.float, torch.double): # test SingleTaskGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test SingleTaskMultiFidelityGP for lin_trunc in (False, True): batch_gp = SingleTaskMultiFidelityGP( train_X, train_Y, iteration_fidelity=1, linear_truncated=lin_trunc) list_gp = batched_to_model_list(batch_gp) self.assertIsInstance(list_gp, ModelListGP) # test HeteroskedasticSingleTaskGP batch_gp = HeteroskedasticSingleTaskGP(train_X, train_Y, torch.rand_like(train_Y)) with self.assertRaises(NotImplementedError): batched_to_model_list(batch_gp) # test input transform input_tf = Normalize( d=2, bounds=torch.tensor([[0.0, 0.0], [1.0, 1.0]], device=self.device, dtype=dtype), ) batch_gp = SingleTaskGP(train_X, train_Y, input_transform=input_tf) list_gp = batched_to_model_list(batch_gp) for m in list_gp.models: self.assertIsInstance(m.input_transform, Normalize) self.assertTrue( torch.equal(m.input_transform.bounds, input_tf.bounds))
def _getBatchedModel(self, kind="SingleTaskGP", double=False, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") dtype = torch.double if double else torch.float train_x = torch.linspace(0, 1, 10, device=device, dtype=dtype).unsqueeze(-1) noise = torch.tensor(NOISE, device=device, dtype=dtype) train_y1 = torch.sin(train_x * (2 * math.pi)) + noise train_y2 = torch.sin(train_x * (2 * math.pi)) + noise train_y = torch.cat([train_y1, train_y2], dim=-1) if kind == "SingleTaskGP": model = SingleTaskGP(train_x, train_y) elif kind == "FixedNoiseGP": model = FixedNoiseGP(train_x, train_y, 0.1 * torch.ones_like(train_y)) elif kind == "HeteroskedasticSingleTaskGP": model = HeteroskedasticSingleTaskGP(train_x, train_y, 0.1 * torch.ones_like(train_y)) else: raise NotImplementedError mll = ExactMarginalLogLikelihood(model.likelihood, model) return mll.to(device=device, dtype=dtype)
def get_fitted_model(train_X, train_Y, train_Yvar, state_dict=None): """ Get a single task GP. The model will be fit unless a state_dict with model hyperparameters is provided. """ Y_mean = train_Y.mean(dim=-2, keepdim=True) Y_std = train_Y.std(dim=-2, keepdim=True) model = FixedNoiseGP(train_X, (train_Y - Y_mean) / Y_std, train_Yvar) model.Y_mean = Y_mean model.Y_std = Y_std if state_dict is None: mll = ExactMarginalLogLikelihood(model.likelihood, model).to(train_X) fit_gpytorch_model(mll) else: model.load_state_dict(state_dict) return model
def initialize_model(X, Y, old_model=None, **kwargs): if old_model is None: covar_module = ScaleKernel( MaternKernel( nu=2.5, lengthscale_prior=GammaPrior(3.0, 6.0), lengthscale_constraint=Interval(1e-4, 12.0), ), outputscale_prior=GammaPrior(2.0, 0.15), outputscale_constraint=Interval(1e-4, 12.0), ) if args.fixed_noise: model_obj = FixedNoiseGP( X, Y, train_Yvar=noise, covar_module=covar_module ) else: model_obj = SingleTaskGP(X, Y, covar_module=covar_module) else: model_obj = old_model mll = ExactMarginalLogLikelihood(model_obj.likelihood, model_obj) return model_obj, mll
def CreateModel(xtrain, ytrain): ''' Creates and trains a GpyTorch GP model. ''' # noise_prior = GammaPrior(0.1, 0.05) # noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate # MIN_INFERRED_NOISE_LEVEL = 1e-5 # likelihood = GaussianLikelihood( # noise_prior=noise_prior, # noise_constraint=GreaterThan( # MIN_INFERRED_NOISE_LEVEL, # transform=None, # initial_value=noise_prior_mode, # ), # ) #model = SingleTaskGP(xtrain, ytrain,likelihood = likelihood) model = FixedNoiseGP(xtrain, ytrain, train_Yvar=torch.full_like(ytrain, 1e-4)) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) return (model)
def _get_model(self, cuda=False, dtype=torch.float): device = torch.device("cuda") if cuda else torch.device("cpu") state_dict = { "mean_module.constant": torch.tensor([-0.0066]), "covar_module.raw_outputscale": torch.tensor(1.0143), "covar_module.base_kernel.raw_lengthscale": torch.tensor([[-0.99]]), "covar_module.base_kernel.lengthscale_prior.concentration": torch.tensor( 3.0 ), "covar_module.base_kernel.lengthscale_prior.rate": torch.tensor(6.0), "covar_module.outputscale_prior.concentration": torch.tensor(2.0), "covar_module.outputscale_prior.rate": torch.tensor(0.1500), } train_x = torch.linspace(0, 1, 10, device=device, dtype=dtype) train_y = torch.sin(train_x * (2 * math.pi)) noise = torch.tensor(NEI_NOISE, device=device, dtype=dtype) train_y += noise train_yvar = torch.full_like(train_y, 0.25 ** 2) train_x = train_x.view(-1, 1) model = FixedNoiseGP(train_X=train_x, train_Y=train_y, train_Yvar=train_yvar) model.load_state_dict(state_dict) model.to(train_x) model.eval() return model
def test_model_list_to_batched(self): for dtype in (torch.float, torch.double): # basic test train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1 = SingleTaskGP(train_X, train_Y1) gp2 = SingleTaskGP(train_X, train_Y2) list_gp = ModelListGP(gp1, gp2) batch_gp = model_list_to_batched(list_gp) self.assertIsInstance(batch_gp, SingleTaskGP) # test degenerate (single model) batch_gp = model_list_to_batched(ModelListGP(gp1)) self.assertEqual(batch_gp._num_outputs, 1) # test different model classes gp2 = FixedNoiseGP(train_X, train_Y1, torch.ones_like(train_Y1)) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test non-batched models gp1_ = SimpleGPyTorchModel(train_X, train_Y1) gp2_ = SimpleGPyTorchModel(train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1_, gp2_)) # test list of multi-output models train_Y = torch.cat([train_Y1, train_Y2], dim=-1) gp2 = SingleTaskGP(train_X, train_Y) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test different training inputs gp2 = SingleTaskGP(2 * train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check scalar agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.likelihood.noise_covar.noise_prior.rate.fill_(1.0) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check tensor shape agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.covar_module.raw_outputscale = torch.nn.Parameter( torch.tensor([0.0], device=self.device, dtype=dtype) ) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test HeteroskedasticSingleTaskGP gp2 = HeteroskedasticSingleTaskGP( train_X, train_Y1, torch.ones_like(train_Y1) ) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test custom likelihood gp2 = SingleTaskGP(train_X, train_Y2, likelihood=GaussianLikelihood()) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test FixedNoiseGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1_ = FixedNoiseGP(train_X, train_Y1, torch.rand_like(train_Y1)) gp2_ = FixedNoiseGP(train_X, train_Y2, torch.rand_like(train_Y2)) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp)
def main(benchmark_name, dataset_name, dimensions, method_name, num_runs, run_start, num_iterations, # acquisition_name, # acquisition_optimizer_name, gamma, num_random_init, num_restarts, raw_samples, noise_variance_init, # use_ard, # use_input_warping, standardize_targets, input_dir, output_dir): # TODO(LT): Turn into options # device = "cpu" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dtype = torch.double benchmark = make_benchmark(benchmark_name, dimensions=dimensions, dataset_name=dataset_name, input_dir=input_dir) name = make_name(benchmark_name, dimensions=dimensions, dataset_name=dataset_name) output_path = Path(output_dir).joinpath(name, method_name) output_path.mkdir(parents=True, exist_ok=True) options = dict(gamma=gamma, num_random_init=num_random_init, num_restarts=num_restarts, raw_samples=raw_samples, noise_variance_init=noise_variance_init, standardize_targets=standardize_targets) with output_path.joinpath("options.yaml").open('w') as f: yaml.dump(options, f) config_space = DenseConfigurationSpace(benchmark.get_config_space()) bounds = create_bounds(config_space.get_bounds(), device=device, dtype=dtype) input_dim = config_space.get_dimensions() def func(tensor, *args, **kwargs): """ Wrapper that receives and returns torch.Tensor """ config = dict_from_tensor(tensor, cs=config_space) # turn into maximization problem res = - benchmark.evaluate(config).value return torch.tensor(res, device=device, dtype=dtype) for run_id in trange(run_start, num_runs, unit="run"): t_start = datetime.now() rows = [] features = [] targets = [] noise_variance = torch.tensor(noise_variance_init, device=device, dtype=dtype) state_dict = None with trange(num_iterations) as iterations: for i in iterations: if len(targets) < num_random_init: # click.echo(f"Completed {i}/{num_random_init} initial runs. " # "Suggesting random candidate...") # TODO(LT): support random seed x_new = torch.rand(size=(input_dim,), device=device, dtype=dtype) else: # construct dataset X = torch.vstack(features) y = torch.hstack(targets).unsqueeze(axis=-1) y = standardize(y) if standardize_targets else y # construct model # model = FixedNoiseGP(X, standardize(y), noise_variance.expand_as(y), model = FixedNoiseGP(X, y, noise_variance.expand_as(y), input_transform=None).to(X) mll = ExactMarginalLogLikelihood(model.likelihood, model) if state_dict is not None: model.load_state_dict(state_dict) # update model fit_gpytorch_model(mll) # construct acquisition function tau = torch.quantile(y, q=1-gamma) iterations.set_postfix(tau=tau.item()) ei = ExpectedImprovement(model=model, best_f=tau) # optimize acquisition function X_batch, b = optimize_acqf(acq_function=ei, bounds=bounds, q=1, num_restarts=num_restarts, raw_samples=raw_samples, options=dict(batch_limit=5, maxiter=200)) x_new = X_batch.squeeze(axis=0) state_dict = model.state_dict() # evaluate blackbox objective # t0 = datetime.now() y_new = func(x_new) t1 = datetime.now() delta = t1 - t_start # update dataset features.append(x_new) targets.append(y_new) row = dict_from_tensor(x_new, cs=config_space) row["loss"] = - y_new.item() row["finished"] = delta.total_seconds() rows.append(row) data = pd.DataFrame(data=rows) data.to_csv(output_path.joinpath(f"{run_id:03d}.csv")) return 0
def main( benchmark_name, dataset_name, dimensions, method_name, num_runs, run_start, num_iterations, acquisition_name, # acquisition_optimizer_name, gamma, num_random_init, mc_samples, batch_size, num_fantasies, num_restarts, raw_samples, noise_variance_init, # use_ard, # use_input_warping, standardize_targets, input_dir, output_dir): # TODO(LT): Turn into options # device = "cpu" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dtype = torch.double benchmark = make_benchmark(benchmark_name, dimensions=dimensions, dataset_name=dataset_name, input_dir=input_dir) name = make_name(benchmark_name, dimensions=dimensions, dataset_name=dataset_name) output_path = Path(output_dir).joinpath(name, method_name) output_path.mkdir(parents=True, exist_ok=True) options = dict(gamma=gamma, num_random_init=num_random_init, acquisition_name=acquisition_name, mc_samples=mc_samples, batch_size=batch_size, num_restarts=num_restarts, raw_samples=raw_samples, num_fantasies=num_fantasies, noise_variance_init=noise_variance_init, standardize_targets=standardize_targets) with output_path.joinpath("options.yaml").open('w') as f: yaml.dump(options, f) config_space = DenseConfigurationSpace(benchmark.get_config_space()) bounds = create_bounds(config_space.get_bounds(), device=device, dtype=dtype) input_dim = config_space.get_dimensions() def func(tensor, *args, **kwargs): """ Wrapper that receives and returns torch.Tensor """ config = dict_from_tensor(tensor, cs=config_space) # turn into maximization problem res = -benchmark.evaluate(config).value return torch.tensor(res, device=device, dtype=dtype) for run_id in trange(run_start, num_runs, unit="run"): run_begin_t = batch_end_t_adj = batch_end_t = datetime.now() frames = [] features = [] targets = [] noise_variance = torch.tensor(noise_variance_init, device=device, dtype=dtype) state_dict = None with trange(num_iterations) as iterations: for batch in iterations: if len(targets) < num_random_init: # click.echo(f"Completed {i}/{num_random_init} initial runs. " # "Suggesting random candidate...") # TODO(LT): support random seed X_batch = torch.rand(size=(batch_size, input_dim), device=device, dtype=dtype) else: # construct dataset X = torch.vstack(features) y = torch.hstack(targets).unsqueeze(axis=-1) y = standardize(y) if standardize_targets else y # construct model # model = FixedNoiseGP(X, standardize(y), noise_variance.expand_as(y), model = FixedNoiseGP(X, y, noise_variance.expand_as(y), input_transform=None).to(X) mll = ExactMarginalLogLikelihood(model.likelihood, model) if state_dict is not None: model.load_state_dict(state_dict) # update model fit_gpytorch_model(mll) # construct acquisition function tau = torch.quantile(y, q=1 - gamma) iterations.set_postfix(tau=tau.item()) if acquisition_name == "q-KG": assert num_fantasies is not None and num_fantasies > 0 acq = qKnowledgeGradient(model, num_fantasies=num_fantasies) elif acquisition_name == "q-EI": assert mc_samples is not None and mc_samples > 0 qmc_sampler = SobolQMCNormalSampler( num_samples=mc_samples) acq = qExpectedImprovement(model=model, best_f=tau, sampler=qmc_sampler) # optimize acquisition function X_batch, b = optimize_acqf(acq_function=acq, bounds=bounds, q=batch_size, num_restarts=num_restarts, raw_samples=raw_samples, options=dict(batch_limit=5, maxiter=200)) state_dict = model.state_dict() # begin batch evaluation batch_begin_t = datetime.now() decision_duration = batch_begin_t - batch_end_t batch_begin_t_adj = batch_end_t_adj + decision_duration eval_end_times = [] # TODO(LT): Deliberately not doing broadcasting for now since # batch sizes are so small anyway. Can revisit later if there # is a compelling reason to do it. rows = [] for j, x_next in enumerate(X_batch): # eval begin time eval_begin_t = datetime.now() # evaluate blackbox objective y_next = func(x_next) # eval end time eval_end_t = datetime.now() # eval duration eval_duration = eval_end_t - eval_begin_t # adjusted eval end time is the duration added to the # time at which batch eval was started eval_end_t_adj = batch_begin_t_adj + eval_duration eval_end_times.append(eval_end_t_adj) elapsed = eval_end_t_adj - run_begin_t # update dataset features.append(x_next) targets.append(y_next) row = dict_from_tensor(x_next, cs=config_space) row["loss"] = -y_next.item() row["cost_eval"] = eval_duration.total_seconds() row["finished"] = elapsed.total_seconds() rows.append(row) batch_end_t = datetime.now() batch_end_t_adj = max(eval_end_times) frame = pd.DataFrame(data=rows) \ .assign(batch=batch, cost_decision=decision_duration.total_seconds()) frames.append(frame) data = pd.concat(frames, axis="index", ignore_index=True) data.to_csv(output_path.joinpath(f"{run_id:03d}.csv")) return 0
def test_model_list_to_batched(self): for dtype in (torch.float, torch.double): # basic test train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1 = SingleTaskGP(train_X, train_Y1) gp2 = SingleTaskGP(train_X, train_Y2) list_gp = ModelListGP(gp1, gp2) batch_gp = model_list_to_batched(list_gp) self.assertIsInstance(batch_gp, SingleTaskGP) # test degenerate (single model) batch_gp = model_list_to_batched(ModelListGP(gp1)) self.assertEqual(batch_gp._num_outputs, 1) # test different model classes gp2 = FixedNoiseGP(train_X, train_Y1, torch.ones_like(train_Y1)) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test non-batched models gp1_ = SimpleGPyTorchModel(train_X, train_Y1) gp2_ = SimpleGPyTorchModel(train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1_, gp2_)) # test list of multi-output models train_Y = torch.cat([train_Y1, train_Y2], dim=-1) gp2 = SingleTaskGP(train_X, train_Y) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test different training inputs gp2 = SingleTaskGP(2 * train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check scalar agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.likelihood.noise_covar.noise_prior.rate.fill_(1.0) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check tensor shape agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.covar_module.raw_outputscale = torch.nn.Parameter( torch.tensor([0.0], device=self.device, dtype=dtype)) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test HeteroskedasticSingleTaskGP gp2 = HeteroskedasticSingleTaskGP(train_X, train_Y1, torch.ones_like(train_Y1)) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test custom likelihood gp2 = SingleTaskGP(train_X, train_Y2, likelihood=GaussianLikelihood()) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test FixedNoiseGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1_ = FixedNoiseGP(train_X, train_Y1, torch.rand_like(train_Y1)) gp2_ = FixedNoiseGP(train_X, train_Y2, torch.rand_like(train_Y2)) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp) # test SingleTaskMultiFidelityGP gp1_ = SingleTaskMultiFidelityGP(train_X, train_Y1, iteration_fidelity=1) gp2_ = SingleTaskMultiFidelityGP(train_X, train_Y2, iteration_fidelity=1) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp) gp2_ = SingleTaskMultiFidelityGP(train_X, train_Y2, iteration_fidelity=2) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp) # test input transform input_tf = Normalize( d=2, bounds=torch.tensor([[0.0, 0.0], [1.0, 1.0]], device=self.device, dtype=dtype), ) gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf) gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp) self.assertIsInstance(batch_gp.input_transform, Normalize) self.assertTrue( torch.equal(batch_gp.input_transform.bounds, input_tf.bounds)) # test different input transforms input_tf2 = Normalize( d=2, bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]], device=self.device, dtype=dtype), ) gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf) gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf2) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp) # test batched input transform input_tf2 = Normalize( d=2, bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]], device=self.device, dtype=dtype), batch_shape=torch.Size([3]), ) gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf2) gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf2) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp) # test outcome transform octf = Standardize(m=1) gp1_ = SingleTaskGP(train_X, train_Y1, outcome_transform=octf) gp2_ = SingleTaskGP(train_X, train_Y2, outcome_transform=octf) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp)
def main(args): if args.cuda and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") init_dict, train_dict, test_dict = prepare_data(args.data_loc, args.num_init, args.num_total) init_x, init_y, init_y_var = ( init_dict["x"].to(device), init_dict["y"].to(device), init_dict["y_var"].to(device), ) train_x, train_y, train_y_var = ( train_dict["x"].to(device), train_dict["y"].to(device), train_dict["y_var"].to(device), ) test_x, test_y, test_y_var = ( test_dict["x"].to(device), test_dict["y"].to(device), test_dict["y_var"].to(device), ) covar_module = ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ) if not args.exact: covar_module = GridInterpolationKernel( base_kernel=covar_module, grid_size=30, num_dims=2, grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]), ) model = FixedNoiseGP( init_x, init_y.view(-1, 1), init_y_var.view(-1, 1), covar_module=covar_module, ).to(device) model.mean_module = ZeroMean() mll = ExactMarginalLogLikelihood(model.likelihood, model) print("---- Fitting initial model ----") start = time.time() with skip_logdet_forward(True), use_toeplitz(args.toeplitz): fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000}) end = time.time() print("Elapsed fitting time: ", end - start) model.zero_grad() model.eval() print("--- Generating initial predictions on test set ----") start = time.time() with detach_test_caches(True), max_cholesky_size( args.cholesky_size), use_toeplitz(args.toeplitz): pred_dist = model(train_x) pred_mean = pred_dist.mean.detach() # pred_var = pred_dist.variance.detach() end = time.time() print("Elapsed initial prediction time: ", end - start) rmse_initial = ((pred_mean.view(-1) - train_y.view(-1))**2).mean().sqrt() print("Initial RMSE: ", rmse_initial.item()) optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) mll_time_list = [] rmse_list = [] for i in range(500, train_x.shape[0]): model.zero_grad() model.train() start = time.time() with skip_logdet_forward(True), max_cholesky_size( args.cholesky_size), use_toeplitz(args.toeplitz): loss = -mll(model(*model.train_inputs), model.train_targets).sum() loss.backward() mll_time = start - time.time() optimizer.step() model.zero_grad() optimizer.zero_grad() start = time.time() if not args.reset_training_data: with torch.no_grad(): model.eval() model.posterior(train_x[i].unsqueeze(0)) model = model.condition_on_observations( X=train_x[i].unsqueeze(0), Y=train_y[i].view(1, 1), noise=train_y_var[i].view(-1, 1), ) else: model.set_train_data(train_x[:i], train_y[:i], strict=False) model.likelihood.noise = train_y_var[:i].t() fantasy_time = start - time.time() mll_time_list.append([-mll_time, -fantasy_time]) if i % 25 == 0: start = time.time() model.eval() model.zero_grad() with detach_test_caches(), max_cholesky_size(10000): pred_dist = model(train_x) end = time.time() rmse = (((pred_dist.mean - train_y.view(-1))**2).mean().sqrt().item()) rmse_list.append([rmse, end - start]) print("Current RMSE: ", rmse) #print( # "Outputscale: ", model.covar_module.base_kernel.raw_outputscale #) #print( # "Lengthscale: ", # model.covar_module.base_kernel.base_kernel.raw_lengthscale, #) print("Step: ", i, "Train Loss: ", loss) optimizer.param_groups[0]["lr"] *= 0.9 torch.save({ "training": mll_time_list, "predictions": rmse_list }, args.output)
def test_batched_multi_output_to_single_output(self): for dtype in (torch.float, torch.double): # basic test train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y = torch.stack( [ train_X.sum(dim=-1), (train_X[:, 0] - train_X[:, 1]), ], dim=1, ) batched_mo_model = SingleTaskGP(train_X, train_Y) batched_so_model = batched_multi_output_to_single_output( batched_mo_model) self.assertIsInstance(batched_so_model, SingleTaskGP) self.assertEqual(batched_so_model.num_outputs, 1) # test non-batched models non_batch_model = SimpleGPyTorchModel(train_X, train_Y[:, :1]) with self.assertRaises(UnsupportedError): batched_multi_output_to_single_output(non_batch_model) gp2 = HeteroskedasticSingleTaskGP(train_X, train_Y, torch.ones_like(train_Y)) with self.assertRaises(NotImplementedError): batched_multi_output_to_single_output(gp2) # test custom likelihood gp2 = SingleTaskGP(train_X, train_Y, likelihood=GaussianLikelihood()) with self.assertRaises(NotImplementedError): batched_multi_output_to_single_output(gp2) # test FixedNoiseGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) batched_mo_model = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) batched_so_model = batched_multi_output_to_single_output( batched_mo_model) self.assertIsInstance(batched_so_model, FixedNoiseGP) self.assertEqual(batched_so_model.num_outputs, 1) # test SingleTaskMultiFidelityGP batched_mo_model = SingleTaskMultiFidelityGP(train_X, train_Y, iteration_fidelity=1) batched_so_model = batched_multi_output_to_single_output( batched_mo_model) self.assertIsInstance(batched_so_model, SingleTaskMultiFidelityGP) self.assertEqual(batched_so_model.num_outputs, 1) # test input transform input_tf = Normalize( d=2, bounds=torch.tensor([[0.0, 0.0], [1.0, 1.0]], device=self.device, dtype=dtype), ) batched_mo_model = SingleTaskGP(train_X, train_Y, input_transform=input_tf) batch_so_model = batched_multi_output_to_single_output( batched_mo_model) self.assertIsInstance(batch_so_model.input_transform, Normalize) self.assertTrue( torch.equal(batch_so_model.input_transform.bounds, input_tf.bounds)) # test batched input transform input_tf2 = Normalize( d=2, bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]], device=self.device, dtype=dtype), batch_shape=torch.Size([2]), ) batched_mo_model = SingleTaskGP(train_X, train_Y, input_transform=input_tf2) batched_so_model = batched_multi_output_to_single_output( batched_mo_model) self.assertIsInstance(batch_so_model.input_transform, Normalize) self.assertTrue( torch.equal(batch_so_model.input_transform.bounds, input_tf.bounds)) # test outcome transform batched_mo_model = SingleTaskGP(train_X, train_Y, outcome_transform=Standardize(m=2)) with self.assertRaises(NotImplementedError): batched_multi_output_to_single_output(batched_mo_model)
def main(args): if args.cuda and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") init_dict, train_dict, test_dict = prepare_data( args.data_loc, args.num_init, args.num_total, test_is_year=False, seed=args.seed, ) init_x, init_y, init_y_var = ( init_dict["x"].to(device), init_dict["y"].to(device), init_dict["y_var"].to(device), ) train_x, train_y, train_y_var = ( train_dict["x"].to(device), train_dict["y"].to(device), train_dict["y_var"].to(device), ) test_x, test_y, test_y_var = ( test_dict["x"].to(device), test_dict["y"].to(device), test_dict["y_var"].to(device), ) if args.model == "wiski": model = FixedNoiseOnlineSKIGP( init_x, init_y.view(-1, 1), init_y_var.view(-1, 1), GridInterpolationKernel( base_kernel=ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ), grid_size=30, num_dims=2, grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]), ), learn_additional_noise=False, ).to(device) mll_type = lambda x, y: BatchedWoodburyMarginalLogLikelihood( x, y, clear_caches_every_iteration=True) elif args.model == "exact": model = FixedNoiseGP( init_x, init_y.view(-1, 1), init_y_var.view(-1, 1), ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ), ).to(device) mll_type = ExactMarginalLogLikelihood mll = mll_type(model.likelihood, model) print("---- Fitting initial model ----") start = time.time() model.train() model.zero_grad() # with max_cholesky_size(args.cholesky_size), skip_logdet_forward(True), \ # use_toeplitz(args.toeplitz), max_root_decomposition_size(args.sketch_size): fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000}) end = time.time() print("Elapsed fitting time: ", end - start) print("Named parameters: ", list(model.named_parameters())) print("--- Now computing initial RMSE") model.eval() with gpytorch.settings.skip_posterior_variances(True): test_pred = model(test_x) pred_rmse = ((test_pred.mean - test_y)**2).mean().sqrt() print("---- Initial RMSE: ", pred_rmse.item()) all_outputs = [] start_ind = init_x.shape[0] end_ind = int(start_ind + args.batch_size) for step in range(args.num_steps): if step > 0 and step % 25 == 0: print("Beginning step ", step) total_time_step_start = time.time() if step > 0: print("---- Fitting model ----") start = time.time() model.train() model.zero_grad() mll = mll_type(model.likelihood, model) # with skip_logdet_forward(True), max_root_decomposition_size( # args.sketch_size # ), max_cholesky_size(args.cholesky_size), use_toeplitz( # args.toeplitz # ): fit_gpytorch_torch(mll, options={ "lr": 0.01 * (0.99**step), "maxiter": 300 }) model.zero_grad() end = time.time() print("Elapsed fitting time: ", end - start) print("Named parameters: ", list(model.named_parameters())) if not args.random: if args.model == "wiski": botorch_model = OnlineSKIBotorchModel(model=model) else: botorch_model = model # qmc_sampler = SobolQMCNormalSampler(num_samples=4) bounds = torch.stack([torch.zeros(2), torch.ones(2)]).to(device) qnipv = qNIPV( model=botorch_model, mc_points=test_x, # sampler=qmc_sampler, ) #with use_toeplitz(args.toeplitz), root_pred_var(True), fast_pred_var(True): candidates, acq_value = optimize_acqf( acq_function=qnipv, bounds=bounds, q=args.batch_size, num_restarts=1, raw_samples=10, # used for intialization heuristic options={ "batch_limit": 5, "maxiter": 200 }, ) else: candidates = torch.rand(args.batch_size, train_x.shape[-1], device=device, dtype=train_x.dtype) acq_value = torch.zeros(1) model.eval() _ = model(test_x[:10]) # to init caches print("---- Finished optimizing; now querying dataset ---- ") with torch.no_grad(): covar_dists = model.covar_module(candidates, train_x) nearest_points = covar_dists.evaluate().argmax(dim=-1) new_x = train_x[nearest_points] new_y = train_y[nearest_points] new_y_var = train_y_var[nearest_points] todrop = torch.tensor( [x in nearest_points for x in range(train_x.shape[0])]) train_x, train_y, train_y_var = train_x[~todrop], train_y[ ~todrop], train_y_var[~todrop] print("New train_x shape", train_x.shape) print("--- Now updating model with simulator ----") model = model.condition_on_observations(X=new_x, Y=new_y.view(-1, 1), noise=new_y_var.view( -1, 1)) print("--- Now computing updated RMSE") model.eval() # with gpytorch.settings.fast_pred_var(True), \ # detach_test_caches(True), \ # max_root_decomposition_size(args.sketch_size), \ # max_cholesky_size(args.cholesky_size), \ # use_toeplitz(args.toeplitz), root_pred_var(True): test_pred = model(test_x) pred_rmse = ((test_pred.mean.view(-1) - test_y.view(-1))**2).mean().sqrt() pred_avg_variance = test_pred.variance.mean() total_time_step_elapsed_time = time.time() - total_time_step_start step_output_list = [ total_time_step_elapsed_time, acq_value.item(), pred_rmse.item(), pred_avg_variance.item() ] print("Step RMSE: ", pred_rmse) all_outputs.append(step_output_list) start_ind = end_ind end_ind = int(end_ind + args.batch_size) output_dict = { "model_state_dict": model.cpu().state_dict(), "queried_points": { 'x': model.cpu().train_inputs[0], 'y': model.cpu().train_targets }, "results": DataFrame(all_outputs) } torch.save(output_dict, args.output)