def get_gpr_model(X, y, model=None): """ Fit a gpr model to the data or update the model to new data Params :: X: (sx1) Tensor: Covariates y: (sx1) Tensor: Observations model: PyTorch SingleTaskGP model: If model is passed, X and y are used to update it. If None then model is trained on X and y. Default is None Return :: model: PyTorch SingleTaskGP model: Trained or updated model. Returned in train mode mll: PyTorch MarginalLogLikelihood object: Returned in train mode """ if model is None: # set up model model = SingleTaskGP(X, y) else: # update model with new observations model = model.condition_on_observations(X, y) mll = ExactMarginalLogLikelihood(model.likelihood, model).to(X) # begin training model.train() mll.train() fit_gpytorch_model(mll) return model, mll
def learn_projections(base_kernels, xs, ys, max_projections=10, mse_threshold=0.0001, post_fit=False, backfit_iters=5, **optim_kwargs): n, d = xs.shape pred_means = torch.zeros(max_projections, n) models = [] for bf_iter in range(backfit_iters): for i in range(max_projections): residuals = ys - pred_means[:i, :].sum( dim=0) - pred_means[i + 1, :].sum(dim=0) if bf_iter == 0: with torch.no_grad(): coef = torch.pinverse(xs).matmul(residuals).reshape(1, -1) base_kernel = base_kernels[i] projection = torch.nn.Linear(d, 1, bias=False).to(xs) projection.weight.data = coef kernel = ScaledProjectionKernel(projection, base_kernel) model = ExactGPModel(xs, residuals, GaussianLikelihood(), kernel).to(xs) else: model = models[i] mll = ExactMarginalLogLikelihood(model.likelihood, model).to(xs) # mll.train() model.train() train_to_convergence(model, xs, residuals, objective=mll, **optim_kwargs) model.eval() models.append(model) with torch.no_grad(): pred_mean = model(xs).mean pred_means[i, :] = pred_mean residuals = residuals - pred_mean mse = (residuals**2).mean() print(mse.item(), end='; ') if mse < mse_threshold: break print() joint_kernel = AdditiveKernel(*[model.covar_module for model in models]) joint_model = ExactGPModel(xs, ys, GaussianLikelihood(), joint_kernel).to(xs) if post_fit: mll = ExactMarginalLogLikelihood(joint_model.likelihood, joint_model).to(xs) train_to_convergence(joint_model, xs, ys, objective=mll, **optim_kwargs) return joint_model
def get_fitted_model(x, obj, state_dict=None): # initialize and fit model fitted_model = SingleTaskGP(train_X=x, train_Y=obj) if state_dict is not None: fitted_model.load_state_dict(state_dict) mll = ExactMarginalLogLikelihood(fitted_model.likelihood, fitted_model) mll.to(x) fit_gpytorch_model(mll) return fitted_model
def initialize_model(train_x, train_y, train_y_sem): """ Defines a GP given X, Y, and noise observations (standard error of mean) train_x (theta): (n_observations, n_parameters) train_y (G_obs): (n_observations, n_black_box_outputs) train_y (G_obs_sem): (n_observations, n_black_box_outputs) """ train_ynoise = train_y_sem.pow(2.0) # noise is in variance units # standardize outputs to zero mean, unit variance (over n_observations dimension) n_output_dims = (n_days * n_age if per_age_group_objective else n_days) \ if args.model_multi_output_simulator else 1 outcome_transform = Standardize(m=n_output_dims) # train_y = standardize(train_y) (the above also normalizes noise) # choose model if args.model_noise_via_sem: assert (args.model_multi_output_simulator) model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=outcome_transform) else: model = SingleTaskGP(train_x, train_y, outcome_transform=outcome_transform) # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(model.likelihood, model) return mll, model
def train_gp(train_x, train_y, use_ard, num_steps, hypers={}): """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized.""" assert train_x.ndim == 2 assert train_y.ndim == 1 assert train_x.shape[0] == train_y.shape[0] # Create hyper parameter bounds noise_constraint = Interval(5e-4, 0.2) if use_ard: lengthscale_constraint = Interval(0.005, 2.0) else: lengthscale_constraint = Interval(0.005, math.sqrt( train_x.shape[1])) # [0.005, sqrt(dim)] outputscale_constraint = Interval(0.05, 20.0) # Create models likelihood = GaussianLikelihood(noise_constraint=noise_constraint).to( device=train_x.device, dtype=train_y.dtype) ard_dims = train_x.shape[1] if use_ard else None model = GP( train_x=train_x, train_y=train_y, likelihood=likelihood, lengthscale_constraint=lengthscale_constraint, outputscale_constraint=outputscale_constraint, ard_dims=ard_dims, ).to(device=train_x.device, dtype=train_x.dtype) # Find optimal model hyperparameters model.train() likelihood.train() # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(likelihood, model) # Initialize model hypers if hypers: model.load_state_dict(hypers) else: hypers = {} hypers["covar_module.outputscale"] = 1.0 hypers["covar_module.base_kernel.lengthscale"] = 0.5 hypers["likelihood.noise"] = 0.005 model.initialize(**hypers) # Use the adam optimizer optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1) for _ in range(num_steps): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() # Switch to eval mode model.eval() likelihood.eval() return model
def test_optimizing(self): # This tests should pass so long as nothing breaks. torch.random.manual_seed(1) data = torch.randn(40, 4) target = torch.sin(data).sum(dim=-1) d = 4 AddK = NewtonGirardAdditiveKernel(RBFKernel(ard_num_dims=d), d, max_degree=3) class TestGPModel(ExactGP): def __init__(self, train_x, train_y, likelihood, kernel): super().__init__(train_x, train_y, likelihood) self.mean_module = ConstantMean() self.covar_module = kernel def forward(self, x): mean_x = self.mean_module(x) covar_x = self.covar_module(x) return MultivariateNormal(mean_x, covar_x) model = TestGPModel(data, target, GaussianLikelihood(), ScaleKernel(AddK)) optim = torch.optim.Adam(model.parameters(), lr=0.1) mll = ExactMarginalLogLikelihood(model.likelihood, model) model.train() for i in range(2): optim.zero_grad() out = model(data) loss = -mll(out, target) loss.backward() optim.step()
def optimize_EI(gp, best_f, n_dim): """ Reference: https://botorch.org/api/optim.html bounds: 2d-ndarray (2, D) The values of lower and upper bound of each parameter. q: int The number of candidates to sample num_restarts: int The number of starting points for multistart optimization. raw_samples: int The number of initial points. Returns for joint_optimize is (num_restarts, q, D) """ mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) ei = ExpectedImprovement(gp, best_f=best_f, maximize=False) bounds = torch.from_numpy(np.array([[0.] * n_dim, [1.] * n_dim])) x = joint_optimize(ei, bounds=bounds, q=1, num_restarts=3, raw_samples=15) return np.array(x[0])
def initialize_model(train_x, train_obj, state_dict=None): model = FixedNoiseGP(train_x, train_obj, train_yvar.expand_as(train_obj)).to(train_x) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def fit_uncertainty_estimator(self, features=None, targets=None): if self.no_deup: return None self.e_predictor = SingleTaskGP(features, targets) mll = ExactMarginalLogLikelihood(self.e_predictor.likelihood, self.e_predictor) fit_gpytorch_model(mll)
def setUp(self): super().setUp() torch.random.manual_seed(0) train_x = torch.rand(2, 10, 1, device=self.device) train_y = torch.randn(2, 10, 3, 5, device=self.device) self.model = HigherOrderGP(train_x, train_y) # check that we can assign different kernels and likelihoods model_2 = HigherOrderGP( train_X=train_x, train_Y=train_y, covar_modules=[RBFKernel(), RBFKernel(), RBFKernel()], likelihood=GaussianLikelihood(), ) model_3 = HigherOrderGP( train_X=train_x, train_Y=train_y, covar_modules=[RBFKernel(), RBFKernel(), RBFKernel()], likelihood=GaussianLikelihood(), latent_init="gp", ) for m in [self.model, model_2, model_3]: mll = ExactMarginalLogLikelihood(m.likelihood, m) fit_gpytorch_torch(mll, options={"maxiter": 1, "disp": False})
def setUp(self): super().setUp() manual_seed(0) train_x = rand(2, 10, 1) train_y = randn(2, 10, 3, 5) train_x = train_x.to(device=self.device) train_y = train_y.to(device=self.device) self.model = HigherOrderGP(train_x, train_y, first_dim_is_batch=True) # check that we can assign different kernels and likelihoods model_2 = HigherOrderGP( train_x, train_y, first_dim_is_batch=True, covar_modules=[RBFKernel(), RBFKernel(), RBFKernel()], likelihood=GaussianLikelihood(), ) for m in [self.model, model_2]: mll = ExactMarginalLogLikelihood(m.likelihood, m) fit_gpytorch_torch(mll, options={"maxiter": 1, "disp": False})
def argmax_posterior_mean(cands: to.Tensor, cands_values: to.Tensor, ddp_space: BoxSpace, num_restarts: int, num_samples: int) -> to.Tensor: """ Compute the GP input with the maximal posterior mean. :param cands: candidates a.k.a. x :param cands_values: observed values a.k.a. y :param ddp_space: space of the domain distribution parameters, indicates the lower and upper bound :param num_restarts: number of restarts for the optimization of the acquisition function :param num_samples: number of samples for the optimization of the acquisition function :return: un-normalized candidate with maximum posterior value a.k.a. x """ if not isinstance(cands, to.Tensor): raise pyrado.TypeErr(given=cands, expected_type=to.Tensor) if not isinstance(cands_values, to.Tensor): raise pyrado.TypeErr(given=cands_values, expected_type=to.Tensor) if not isinstance(ddp_space, BoxSpace): raise pyrado.TypeErr(given=ddp_space, expected_type=BoxSpace) # Normalize the input data and standardize the output data uc_projector = UnitCubeProjector( to.from_numpy(ddp_space.bound_lo).to(dtype=to.get_default_dtype()), to.from_numpy(ddp_space.bound_up).to(dtype=to.get_default_dtype()), ) cands_norm = uc_projector.project_to(cands) cands_values_stdized = standardize(cands_values) if cands_norm.shape[0] > cands_values.shape[0]: print_cbt( f"There are {cands.shape[0]} candidates but only {cands_values.shape[0]} evaluations. Ignoring " f"the candidates without evaluation for computing the argmax.", "y", ) cands_norm = cands_norm[:cands_values.shape[0], :] # Create and fit the GP model gp = SingleTaskGP(cands_norm, cands_values_stdized) gp.likelihood.noise_covar.register_constraint("raw_noise", GreaterThan(1e-5)) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) # Find position with maximal posterior mean cand_norm, _ = optimize_acqf( acq_function=PosteriorMean(gp), bounds=to.stack( [to.zeros(ddp_space.flat_dim), to.ones(ddp_space.flat_dim)]).to(dtype=to.float32), q=1, num_restarts=num_restarts, raw_samples=num_samples, ) cand_norm = cand_norm.to(dtype=to.get_default_dtype()) cand = uc_projector.project_back(cand_norm.detach()) print_cbt(f"Converged to argmax of the posterior mean: {cand.numpy()}", "g", bright=True) return cand
def test_transforms(self): train_x = torch.rand(10, 3, device=self.device) train_y = torch.randn(10, 4, 5, device=self.device) # test handling of Standardize with self.assertWarns(RuntimeWarning): model = HigherOrderGP(train_X=train_x, train_Y=train_y, outcome_transform=Standardize(m=5)) self.assertIsInstance(model.outcome_transform, FlattenedStandardize) self.assertEqual(model.outcome_transform.output_shape, train_y.shape[1:]) self.assertEqual(model.outcome_transform.batch_shape, torch.Size()) model = HigherOrderGP( train_X=train_x, train_Y=train_y, input_transform=Normalize(d=3), outcome_transform=FlattenedStandardize(train_y.shape[1:]), ) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_torch(mll, options={"maxiter": 1, "disp": False}) test_x = torch.rand(2, 5, 3, device=self.device) test_y = torch.randn(2, 5, 4, 5, device=self.device) posterior = model.posterior(test_x) self.assertIsInstance(posterior, TransformedPosterior) conditioned_model = model.condition_on_observations(test_x, test_y) self.assertIsInstance(conditioned_model, HigherOrderGP) self.check_transform_forward(model) self.check_transform_untransform(model)
def initialize_model(train_x, train_obj, state_dict=None): model = SingleTaskGP(train_x, train_obj) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def fit(self, target, input, nb_iter=100, lr=1e-1, verbose=True, preprocess=True): if input.ndim == 1: input = input.reshape(-1, self.input_size) if preprocess: if self.input_trans is None and self.target_trans is None: self.init_preprocess(target, input) target = transform(target[:, None], self.target_trans).squeeze() input = transform(input, self.input_trans) target = target.to(self.device) input = input.to(self.device) self.model.set_train_data(input, target, strict=False) self.model.train().to(self.device) self.likelihood.train().to(self.device) optimizer = Adam([{'params': self.parameters()}], lr=lr) mll = ExactMarginalLogLikelihood(self.likelihood, self.model) for i in range(nb_iter): optimizer.zero_grad() _output = self.model(input) loss = - mll(_output, target) loss.backward() if verbose: print('Iter %d/%d - Loss: %.3f' % (i + 1, nb_iter, loss.item())) optimizer.step() if torch.cuda.is_available(): torch.cuda.empty_cache()
def one_step_acquisition_gp(oracle, full_train_X, full_train_Y, acq, q, bounds, dim, domain, domain_image, state_dict=None, plot_stuff=False): model = SingleTaskGP(full_train_X, full_train_Y) mll = ExactMarginalLogLikelihood(model.likelihood, model) if state_dict is not None: model.load_state_dict(state_dict) fit_gpytorch_model(mll) candidate, EI = get_candidate(model, acq, full_train_Y, q, bounds, dim) if acq == 'EI' and dim == 1 and plot_stuff: plot_util(oracle, model, EI, domain, domain_image, None, full_train_X, full_train_Y, candidate) candidate_image = oracle(candidate) full_train_X = torch.cat([full_train_X, candidate]) full_train_Y = torch.cat([full_train_Y, candidate_image]) state_dict = model.state_dict() return full_train_X, full_train_Y, model, candidate, candidate_image, state_dict
def fit(self): if not self.fitted: mll = ExactMarginalLogLikelihood(self.gp_model.likelihood, self.gp_model) fit_gpytorch_model(mll) if self.domain is not None and self.postprocessor is not None: self.fit_postprocessor_on_domain(self.domain)
def _set_mll(self, mll_conf): """mllとしてself._mllの指示の元、インスタンスを立てるメソッド """ # mllのインスタンスを立てる if self._mll in exactmarginalloglikelihood: return ExactMarginalLogLikelihood(self.likelihood, self.model) else: raise ValueError(f'mll={self._mll}は用意されていません')
def CreateModel(xtrain, ytrain): ''' Creates and trains a GpyTorch GP model. ''' model = SingleTaskGP(xtrain, ytrain) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll); return(model)
def initialize_model(train_x, train_obj, train_con, state_dict=None): # define models for objective and constraint train_y = torch.cat([train_obj, train_con], dim=-1) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def initialize_model(train_x, train_y, state_dict=None): """initialize GP model with/without initial states """ model = SingleTaskGP(train_x, train_y).to(train_x) mll = ExactMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model
def __init__(self, stem, init_x, init_y, lr, **kwargs): super().__init__() self.stem = stem.to(init_x.device) if init_y.t().shape[0] != 1: _batch_shape = init_y.t().shape[:-1] else: _batch_shape = torch.Size() features = self.stem(init_x) self.gp = SingleTaskGP(features, init_y, covar_module=ScaleKernel( RBFKernel(batch_shape=_batch_shape, ard_num_dims=stem.output_dim), batch_shape=_batch_shape)) self.mll = ExactMarginalLogLikelihood(self.gp.likelihood, self.gp) self.optimizer = torch.optim.Adam(self.parameters(), lr=lr) self._raw_inputs = [init_x] self._target_batch_shape = _batch_shape self.target_dim = init_y.size(-1)
def optimise_adam(model, max_iter=50): model.train() mll = ExactMarginalLogLikelihood(model.likelihood, model) opt = torch.optim.Adam([{'params':model.parameters()}], lr=0.1) for _ in range(max_iter): output = model(model.train_inputs[0]) opt.zero_grad() loss = -mll(output, model.train_targets) loss.backward() opt.step()
def initialize_model(x0, y0, n=5): # initialize botorch GP model # generate prior xs and ys for GP train_x = 2 * torch.rand(n, latent_dim, device=device).float() - 1 if not args.inf_norm: train_x = latent_proj(train_x, args.eps) train_obj = obj_func(train_x, x0, y0) mean, std = train_obj.mean(), train_obj.std() if args.standardize: train_obj = (train_obj - train_obj.mean()) / train_obj.std() best_observed_value = train_obj.max().item() # define models for objective and constraint model = SingleTaskGP(train_X=train_x, train_Y=train_obj[:, None]) model = model.to(train_x) mll = ExactMarginalLogLikelihood(model.likelihood, model) mll = mll.to(train_x) return train_x, train_obj, mll, model, best_observed_value, mean, std
def get_gpr_model(X, y, model=None): """Fit a gpr model to the data or update the model to new data. Parameters ---------- X: (sx1) Tensor Covariates y: (sx1) Tensor Observations model: PyTorch SingleTaskGP model If model is passed, X and y are used to update it. If None then model is trained on X and y. Default is None. Returns ------- model: PyTorch SingleTaskGP model Trained or updated model. Returned in train mode. mll: PyTorch MarginalLogLikelihood object This is the loss used to train hyperparameters. Returned in train mode. """ if model is None: # set up model print('X', X.shape) print('y', y.shape) model = SingleTaskGP(X, y) else: # update model with new observations model = model.condition_on_observations(X, y) mll = ExactMarginalLogLikelihood(model.likelihood, model).to(X) # begin training model.train() mll.train() fit_gpytorch_model(mll) return model, mll
def _initialize_model(self, num_init_samples: int) -> None: """ initialize the GP model with num_init_samples of initial samples """ self.train_X = torch.rand((num_init_samples, self.dim)) self.train_Y = self._function_call(self.train_X) self.model = SingleTaskGP( self.train_X, self.train_Y, outcome_transform=Standardize(m=1) ) mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model) fit_gpytorch_model(mll)
def initialize_model(): # generate synthetic data X = torch.rand(20, 2) Y = torch.stack([torch.sin(X[:, 0]), torch.cos(X[:, 1])], -1) # construct and fit the multi-output model gp = SingleTaskGP(X, Y) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll); return gp
def fit(self, x_train, y_train): # normalize parameter (=input) data x_train_norm = self.param_normalizer.project_to(x_train) # normalize the data y_train_norm = self.data_normalizer.standardize(y_train) self.gp = SingleTaskGP(x_train_norm, y_train_norm) self.gp.likelihood.noise_covar.register_constraint( "raw_noise", GreaterThan(1e-5)) mll = ExactMarginalLogLikelihood(self.gp.likelihood, self.gp) fit_gpytorch_model(mll) return self.gp
def _update_model(self, new_sample: Tensor, new_observation: Tensor) -> None: """ Update the GP model with the new observation(s) :param new_sample: sampled point :param new_observation: observed function value """ self.train_X = torch.cat((self.train_X, new_sample), 0) self.train_Y = torch.cat((self.train_Y, new_observation), 0) self.model = self.model.condition_on_observations(new_sample, new_observation) if self.retrain_gp: mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model) fit_gpytorch_model(mll)
def optimise_lbfgs(model): model.train() # model.likelihood.train() mll = ExactMarginalLogLikelihood(model.likelihood, model) def closure(): output = model(model.train_inputs[0]) loss = -mll(output, model.train_targets) opt.zero_grad() loss.backward() return loss opt = torch.optim.LBFGS([{'params':model.parameters()}], max_iter=1000) opt.step(closure)