def test_setters(self): likelihood = MultitaskGaussianLikelihood(num_tasks=3, rank=0) a = torch.randn(3, 2) mat = a.matmul(a.transpose(-1, -2)) # test rank 0 setters likelihood.noise = 0.5 self.assertAlmostEqual(0.5, likelihood.noise.item()) likelihood.task_noises = torch.tensor([0.04, 0.04, 0.04]) for i in range(3): self.assertAlmostEqual(0.04, likelihood.task_noises[i].item()) with self.assertRaises(AttributeError) as context: likelihood.task_noise_covar = mat self.assertTrue("task noises" in str(context.exception)) # test low rank setters likelihood = MultitaskGaussianLikelihood(num_tasks=3, rank=2) likelihood.noise = 0.5 self.assertAlmostEqual(0.5, likelihood.noise.item()) likelihood.task_noise_covar = mat self.assertAllClose(mat, likelihood.task_noise_covar) with self.assertRaises(AttributeError) as context: likelihood.task_noises = torch.tensor([0.04, 0.04, 0.04]) self.assertTrue("task noises" in str(context.exception))
def __init__(self, train_X, train_Y): d = train_X.shape[-1] likelihood = MultitaskGaussianLikelihood(num_tasks=1 + d) super(GPWithDerivatives, self).__init__(train_X, train_Y, likelihood) self.mean_module = gpytorch.means.ConstantMeanGrad() self.base_kernel = gpytorch.kernels.RBFKernelGrad(ard_num_dims=d) self.covar_module = gpytorch.kernels.ScaleKernel(self.base_kernel)
def __init__(self, latent_dimensions, output_dimensions, n_observations, projection_dimensions=None, n_inducing=50, **kwargs): if "likelihood" in kwargs: raise Exception("Likelihood should not be set for the GP-LVM") kwargs["likelihood"] = MultitaskGaussianLikelihood( num_tasks=output_dimensions) super().__init__(**kwargs) self.Q = latent_dimensions self.D = output_dimensions self.N = n_observations self.K = projection_dimensions or self.D if projection_dimensions is not None: L = torch.zeros(self.K, self.D) + 0.1 self.register_parameter("L", nn.Parameter(L)) else: self.L = None svgp = SVGP(self.Q, self.K, n_inducing=n_inducing, collapsed=False) self.add_gp(svgp) self.latent_layer = LatentLayer(self.N, self.Q)
def test_train_and_eval(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = MultitaskGaussianLikelihood(num_tasks=4) model = LMCModel() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = torch.optim.Adam([ {'params': model.parameters()}, {'params': likelihood.parameters()}, ], lr=0.01) # Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0)) # We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less # effective for VI. for i in range(400): # Within each iteration, we will go over each minibatch of data optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1): batch_predictions = likelihood(model(train_x)) preds1 = batch_predictions.mean[:, 0] preds2 = batch_predictions.mean[:, 1] preds3 = batch_predictions.mean[:, 2] preds4 = batch_predictions.mean[:, 3] mean_abs_error1 = torch.mean(torch.abs(train_y[..., 0] - preds1)) mean_abs_error2 = torch.mean(torch.abs(train_y[..., 1] - preds2)) mean_abs_error3 = torch.mean(torch.abs(train_y[..., 2] - preds3)) mean_abs_error4 = torch.mean(torch.abs(train_y[..., 3] - preds4)) self.assertLess(mean_abs_error1.squeeze().item(), 0.15) self.assertLess(mean_abs_error2.squeeze().item(), 0.15) self.assertLess(mean_abs_error3.squeeze().item(), 0.15) self.assertLess(mean_abs_error4.squeeze().item(), 0.15) # Smoke test for getting predictive uncertainties lower, upper = batch_predictions.confidence_region() self.assertEqual(lower.shape, train_y.shape) self.assertEqual(upper.shape, train_y.shape)
def test_train_on_single_set_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = MultitaskGaussianLikelihood( log_noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(1), scale=torch.ones(1), log_transform=True), num_tasks=2, ) gp_model = ExactGPModel(train_x1, train_y1, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) for _ in range(50): optimizer.zero_grad() output = gp_model(train_x1) loss = -mll(output, train_y1).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean[0] preds2 = batch_predictions.mean[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.05) self.assertLess(mean_abs_error2.squeeze().item(), 0.05)
def get_model(*, train_x, train_y, rank, num_mixtures, X_scaler): likelihood = MultitaskGaussianLikelihood(num_tasks=train_y.shape[1], noise_constraint=Interval( 1e-10, 1.0)) model = MultitaskGPModel( train_x, train_y, likelihood, rank=rank, num_mixtures=num_mixtures, X_scaler=X_scaler, ) if IS_CUDA: model = model.cuda(device=DEVICE) likelihood = likelihood.cuda(device=DEVICE) return model, likelihood
def __init__(self, dim): # squeeze output dim before passing train_Y to ExactGP # super().__init__(train_X, train_Y.squeeze(-1), GaussianLikelihood()) # super().__init__(train_X, train_Y, MultitaskGaussianLikelihood(num_tasks=1+train_X.shape[-1])) self.likelihood = MultitaskGaussianLikelihood(num_tasks=1 + dim) self.mean_module = ConstantMeanGrad() base_kernel = RBFKernelGrad(ard_num_dims=dim) self.covar_module = ScaleKernel(base_kernel=base_kernel) # self.to(train_X) # make sure we're on the right device/dtype self.dim = dim
def test_multitask_gp_mean_abs_error(self, cuda=False): train_x, train_y = self._get_data(cuda=cuda) likelihood = MultitaskGaussianLikelihood(num_tasks=2) model = MultitaskGPModel(train_x, train_y, likelihood) if cuda: model.cuda() # Find optimal model hyperparameters model.train() likelihood.train() # Use the adam optimizer optimizer = torch.optim.Adam( model.parameters(), lr=0.1) # Includes GaussianLikelihood parameters # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) n_iter = 50 for _ in range(n_iter): # Zero prev backpropped gradients optimizer.zero_grad() # Make predictions from training data # Again, note feeding duplicated x_data and indices indicating which task output = model(train_x) # TODO: Fix this view call!! loss = -mll(output, train_y) loss.backward() optimizer.step() # Test the model model.eval() likelihood.eval() test_x = torch.linspace( 0, 1, 51, device=torch.device("cuda") if cuda else torch.device("cpu")) test_y1 = torch.sin(test_x * (2 * pi)) test_y2 = torch.cos(test_x * (2 * pi)) test_preds = likelihood(model(test_x)).mean mean_abs_error_task_1 = torch.mean( torch.abs(test_y1 - test_preds[:, 0])) mean_abs_error_task_2 = torch.mean( torch.abs(test_y2 - test_preds[:, 1])) self.assertLess(mean_abs_error_task_1.squeeze().item(), 0.05) self.assertLess(mean_abs_error_task_2.squeeze().item(), 0.05)
def __init__(self, input_size, target_size, device='cpu'): if device == 'gpu' and torch.cuda.is_available(): self.device = torch.device('cuda:0') else: self.device = torch.device('cpu') self.input_size = input_size self.target_size = target_size _likelihood = MultitaskGaussianLikelihood(num_tasks=self.target_size) super(MultiTaskGPRegressor, self).__init__(train_inputs=None, train_targets=None, likelihood=_likelihood) self.mean_module = MultitaskMean(ZeroMean(), num_tasks=self.target_size) self.covar_module = MultitaskKernel(RBFKernel(), num_tasks=self.target_size, rank=1) self.input_trans = None self.target_trans = None
def __init__(self, input_size, target_size, device='cpu'): if device == 'gpu' and torch.cuda.is_available(): self.device = torch.device('cuda:0') else: self.device = torch.device('cpu') self.input_size = input_size self.target_size = target_size _likelihood = MultitaskGaussianLikelihood(num_tasks=self.target_size) super(GPListRegressor, self).__init__(train_inputs=None, train_targets=None, likelihood=_likelihood) self.mean_module = ConstantMean(batch_shape=torch.Size([self.target_size])) self.covar_module = ScaleKernel(RBFKernel(batch_shape=torch.Size([self.target_size])), batch_shape=torch.Size([self.target_size])) self.input_trans = None self.target_trans = None
def __init__( self, model: Optional[ApproximateGP] = None, likelihood: Optional[Likelihood] = None, num_outputs: int = 1, *args, **kwargs, ) -> None: r""" Botorch wrapper class for various (variational) approximate GP models in gpytorch. This can either include stochastic variational GPs (SVGPs) or variational implementations of weight space approximate GPs. Args: model: Instance of gpytorch.approximate GP models. If omitted, constructs a `_SingleTaskVariationalGP`. likelihood: Instance of a GPyYorch likelihood. If omitted, uses a either a `GaussianLikelihood` (if `num_outputs=1`) or a `MultitaskGaussianLikelihood`(if `num_outputs>1`). num_outputs: Number of outputs expected for the GP model. args: Optional positional arguments passed to the `_SingleTaskVariationalGP` constructor if no model is provided. kwargs: Optional keyword arguments passed to the `_SingleTaskVariationalGP` constructor if no model is provided. """ super().__init__() if model is None: model = _SingleTaskVariationalGP(num_outputs=num_outputs, *args, **kwargs) if likelihood is None: if num_outputs == 1: likelihood = GaussianLikelihood() else: likelihood = MultitaskGaussianLikelihood(num_tasks=num_outputs) self.model = model self.likelihood = likelihood self._desired_num_outputs = num_outputs
def test_train_and_eval(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = MultitaskGaussianLikelihood(num_tasks=2) gp_model = ExactGPModel(train_x, train_y12, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) optimizer.n_iter = 0 for _ in range(75): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y12).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1): batch_predictions = likelihood(gp_model(test_x)) preds1 = batch_predictions.mean[:, 0] preds2 = batch_predictions.mean[:, 1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.01) self.assertLess(mean_abs_error2.squeeze().item(), 0.01) # Smoke test for getting predictive uncertainties lower, upper = batch_predictions.confidence_region() self.assertEqual(lower.shape, test_y12.shape) self.assertEqual(upper.shape, test_y12.shape)
def test_multitask_low_rank_noise_covar(self): likelihood = MultitaskGaussianLikelihood(n_tasks=2, rank=1) model = MultitaskGPModel(train_x, train_y, likelihood) # Find optimal model hyperparameters model.train() likelihood.train() # Use the adam optimizer optimizer = torch.optim.Adam( [{ "params": model.parameters() }], # Includes GaussianLikelihood parameters lr=0.1, ) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) n_iter = 50 for _ in range(n_iter): # Zero prev backpropped gradients optimizer.zero_grad() # Make predictions from training data # Again, note feeding duplicated x_data and indices indicating which task output = model(train_x) # TODO: Fix this view call!! loss = -mll(output, train_y) loss.backward() optimizer.step() # Test the model model.eval() likelihood.eval() n_tasks = 2 task_noise_covar_factor = likelihood.task_noise_covar_factor log_noise = likelihood.log_noise task_noise_covar = task_noise_covar_factor.matmul( task_noise_covar_factor.transpose( -1, -2)) + log_noise.exp() * torch.eye(n_tasks) self.assertGreater(task_noise_covar[0, 1].data.squeeze().item(), 0.05)
def test_lcm_icm_equivalence(self): # Training points are every 0.1 in [0,1] (note that they're the same for both tasks) train_x = torch.linspace(0, 1, 100) # y1 function is sin(2*pi*x) with noise N(0, 0.04) train_y1 = torch.sin(train_x.data * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2 # y2 function is cos(2*pi*x) with noise N(0, 0.04) train_y2 = torch.cos(train_x.data * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2 # Create a train_y which interleaves the two train_y = torch.stack([train_y1, train_y2], -1) likelihood = MultitaskGaussianLikelihood(num_tasks=2) model = MultitaskGPModel(train_x, train_y, likelihood) # Use the adam optimizer optimizer = torch.optim.Adam( model.parameters(), lr=0.1) # Includes GaussianL^ikelihood parameters model.train() likelihood.train() mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) n_iter = 50 for _ in range(n_iter): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() model.eval() likelihood.eval() # Make predictions for LCM with torch.no_grad(): test_x = torch.linspace(0, 1, 51) observed_pred = likelihood(model(test_x)) mean = observed_pred.mean model_icm = MultitaskGPModel_ICM(train_x, train_y, likelihood) likelihood = MultitaskGaussianLikelihood(num_tasks=2) model_icm.train() likelihood.train() mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model_icm) optimizer = torch.optim.Adam( model_icm.parameters(), lr=0.1) # Includes GaussianLikelihood parameters for _ in range(n_iter): optimizer.zero_grad() output = model_icm(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() model_icm.eval() likelihood.eval() # Make predictions for ICM with torch.no_grad(): test_x = torch.linspace(0, 1, 51) observed_pred_icm = likelihood(model_icm(test_x)) mean_icm = observed_pred_icm.mean # Make sure predictions from LCM with one base kernel and ICM are the same. self.assertLess((mean - mean_icm).pow(2).mean(), 1e-2)
def create_likelihood(self): return MultitaskGaussianLikelihood(num_tasks=4, rank=2, batch_shape=torch.Size([2, 3]))
def create_likelihood(self): return MultitaskGaussianLikelihood(num_tasks=4, rank=2)
def create_model(self, train_x, train_y): likelihood = MultitaskGaussianLikelihood(num_tasks=2) model = ExactMultiTaskGPModel(train_x, train_y, likelihood) return model
def test_KroneckerMultiTaskGP_custom(self): for batch_shape, dtype in itertools.product( (torch.Size(), ), # torch.Size([3])), TODO: Fix and test batch mode (torch.float, torch.double), ): tkwargs = {"device": self.device, "dtype": dtype} # initialization with custom settings likelihood = MultitaskGaussianLikelihood( num_tasks=2, rank=1, batch_shape=batch_shape, ) data_covar_module = MaternKernel( nu=1.5, lengthscale_prior=GammaPrior(2.0, 4.0), ) task_covar_prior = LKJCovariancePrior( n=2, eta=0.5, sd_prior=SmoothedBoxPrior(math.exp(-3), math.exp(2), 0.1), ) model_kwargs = { "likelihood": likelihood, "data_covar_module": data_covar_module, "task_covar_prior": task_covar_prior, "rank": 1, } model, train_X, _ = _get_kronecker_model_and_training_data( model_kwargs=model_kwargs, batch_shape=batch_shape, **tkwargs) self.assertIsInstance(model, KroneckerMultiTaskGP) self.assertEqual(model.num_outputs, 2) self.assertIsInstance(model.likelihood, MultitaskGaussianLikelihood) self.assertEqual(model.likelihood.rank, 1) self.assertIsInstance(model.mean_module, MultitaskMean) self.assertIsInstance(model.covar_module, MultitaskKernel) base_kernel = model.covar_module self.assertIsInstance(base_kernel.data_covar_module, MaternKernel) self.assertIsInstance(base_kernel.task_covar_module, IndexKernel) task_covar_prior = base_kernel.task_covar_module.IndexKernelPrior self.assertIsInstance(task_covar_prior, LKJCovariancePrior) self.assertEqual(task_covar_prior.correlation_prior.eta, 0.5) lengthscale_prior = base_kernel.data_covar_module.lengthscale_prior self.assertIsInstance(lengthscale_prior, GammaPrior) self.assertEqual(lengthscale_prior.concentration, 2.0) self.assertEqual(lengthscale_prior.rate, 4.0) self.assertEqual( base_kernel.task_covar_module.covar_factor.shape[-1], 1) # test model fitting mll = ExactMarginalLogLikelihood(model.likelihood, model) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=OptimizationWarning) mll = fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1) # test posterior test_x = torch.rand(2, 2, **tkwargs) posterior_f = model.posterior(test_x) self.assertIsInstance(posterior_f, GPyTorchPosterior) self.assertIsInstance(posterior_f.mvn, MultitaskMultivariateNormal) self.assertEqual(posterior_f.mean.shape, torch.Size([2, 2])) self.assertEqual(posterior_f.variance.shape, torch.Size([2, 2])) # test observation noise posterior_noisy = model.posterior(test_x, observation_noise=True) self.assertTrue( torch.allclose(posterior_noisy.variance, model.likelihood(posterior_f.mvn).variance)) # test posterior (batch eval) test_x = torch.rand(3, 2, 2, **tkwargs) posterior_f = model.posterior(test_x) self.assertIsInstance(posterior_f, GPyTorchPosterior) self.assertIsInstance(posterior_f.mvn, MultitaskMultivariateNormal) self.assertEqual(posterior_f.mean.shape, torch.Size([3, 2, 2])) self.assertEqual(posterior_f.variance.shape, torch.Size([3, 2, 2]))
def fit(self, Xc: Tensor, Xe: Tensor, y: Tensor): Xc, Xe, y = filter_nan(Xc, Xe, y, 'all') self.fit_scaler(Xc, Xe, y) Xc, Xe, y = self.xtrans(Xc, Xe, y) assert (Xc.shape[1] == self.num_cont) assert (Xe.shape[1] == self.num_enum) assert (y.shape[1] == self.num_out) self.Xc = Xc self.Xe = Xe self.y = y n_constr = GreaterThan(self.noise_lb) n_prior = LogNormalPrior(-4.63, 0.5) if self.num_out == 1: self.lik = GaussianLikelihood(noise_constraint=n_constr, noise_prior=n_prior) else: self.lik = MultitaskGaussianLikelihood(num_tasks=self.num_out, noise_constraint=n_constr, noise_prior=n_prior) self.gp = GPyTorchModel(self.Xc, self.Xe, self.y, self.lik, **self.conf) if self.num_out == 1: # XXX: only tuned for single-output BO if self.num_cont > 0: self.gp.kern.outputscale = self.y.var() lscales = self.gp.kern.base_kernel.lengthscale.detach().clone( ).view(1, -1) for i in range(self.num_cont): lscales[0, i] = torch.pdist(self.Xc[:, i].view( -1, 1)).median().clamp(min=0.02) self.gp.kern.base_kernel.lengthscale = lscales if self.noise_free: self.gp.likelihood.noise = self.noise_lb * 1.1 self.gp.likelihood.raw_noise.requires_grad = False else: self.gp.likelihood.noise = max(1e-2, self.noise_lb) self.gp.train() self.lik.train() opt = torch.optim.LBFGS(self.gp.parameters(), lr=self.lr, max_iter=5, line_search_fn='strong_wolfe') mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.lik, self.gp) for epoch in range(self.num_epochs): def closure(): dist = self.gp(self.Xc, self.Xe) loss = -1 * mll(dist, self.y.squeeze()) opt.zero_grad() loss.backward() return loss opt.step(closure) if self.verbose and ((epoch + 1) % self.print_every == 0 or epoch == 0): print('After %d epochs, loss = %g' % (epoch + 1, closure().item()), flush=True) self.gp.eval() self.lik.eval()
def gprTorch_multiTask(): """ Multi-Task GPR + heteroscedastic noise level """ #synthetic data train_x = torch.linspace(0, 1, 75) sem_y1 = 0.05 + (0.55 - 0.05) * torch.linspace(0, 1, 75) sem_y2 = 0.75 - (0.75 - 0.05) * torch.linspace(0, 1, 75) train_y = torch.stack([ torch.sin(train_x * (2 * math.pi)) + sem_y1 * torch.randn(train_x.size()), torch.cos(train_x * (2 * math.pi)) + sem_y2 * torch.randn(train_x.size()), ], -1) train_y_log_var = torch.stack([(s**2).log() for s in (sem_y1, sem_y2)], -1) #construct the GPR numTasks = 2 log_noise_model = MultitaskGPModel( train_x, train_y_log_var, MultitaskGaussianLikelihood(num_tasks=numTasks), num_tasks=numTasks, ) likelihood = _MultitaskGaussianLikelihoodBase( num_tasks=numTasks, noise_covar=HeteroskedasticNoise(log_noise_model), ) model = MultitaskGPModel(train_x, train_y, likelihood, num_tasks=numTasks, rank=numTasks) # Find optimal model hyperparameters model.train() likelihood.train() # Use the adam optimizer optimizer = torch.optim.Adam( [ { 'params': model.parameters() }, # Includes GaussianLikelihood parameters ], lr=0.1) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) n_iter = 75 for i in range(n_iter): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y, train_x) loss.backward() if (i + 1) % 10 == 0: print('Iter %d/%d - Loss: %.3f' % (i + 1, n_iter, loss.item())) optimizer.step() model.eval() likelihood.eval() with torch.no_grad(): test_x = torch.linspace(0, 1, 35) post_f = model(test_x) post_obs = likelihood(post_f, test_x) with torch.no_grad(): f, axs = plt.subplots(1, 2, figsize=(14, 6)) lower_f, upper_f = post_f.confidence_region() lower_obs, upper_obs = post_obs.confidence_region() for i, ax in enumerate(axs): ax.plot(train_x.numpy(), train_y[:, i].numpy(), 'k*') ax.plot(test_x.numpy(), post_f.mean[:, i].numpy(), 'b') ax.fill_between(test_x.numpy(), lower_f[:, i].numpy(), upper_f[:, i].numpy(), alpha=0.5) ax.fill_between(test_x.numpy(), lower_obs[:, i].numpy(), upper_obs[:, i].numpy(), alpha=0.25, color='r') ax.set_ylim([-3, 3]) ax.legend([ 'Observed Data', 'Mean', 'Confidence (f)', 'Confidence (obs)' ]) plt.title('Multi-Task GP + Heteroscedastic Noise') plt.show()
def train_gp(train_x, train_y, use_ard, num_steps, hypers={}): """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized.""" assert train_x.ndim == 2 assert train_y.ndim == 2 assert train_x.shape[0] == train_y.shape[0] # Create hyper parameter bounds noise_constraint = Interval(5e-4, 0.2) if use_ard: lengthscale_constraint = Interval(0.005, 2.0) else: lengthscale_constraint = Interval(0.005, math.sqrt( train_x.shape[1])) # [0.005, sqrt(dim)] outputscale_constraint = Interval(0.05, 20.0) # Create models likelihood = MultitaskGaussianLikelihood( num_tasks=train_y.size(-1), noise_constraint=noise_constraint, ).to(device=train_x.device, dtype=train_y.dtype) ard_dims = train_x.shape[1] if use_ard else None model = GP( train_x=train_x, train_y=train_y, likelihood=likelihood, lengthscale_constraint=lengthscale_constraint, outputscale_constraint=outputscale_constraint, ard_dims=ard_dims, ).to(device=train_x.device, dtype=train_x.dtype) # Find optimal model hyperparameters model.train() likelihood.train() # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(likelihood, model) # Initialize model hypers if hypers: model.load_state_dict(hypers) else: hypers = {} hypers["covar_module.outputscale"] = 1.0 hypers["covar_module.base_kernel.lengthscale"] = 0.5 hypers["likelihood.noise"] = 0.005 model.initialize(**hypers) # Use the adam optimizer optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1) for _ in range(num_steps): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() # Switch to eval mode model.eval() likelihood.eval() return model
def __init__( self, train_X: Tensor, train_Y: Optional[Tensor] = None, likelihood: Optional[Likelihood] = None, num_outputs: int = 1, learn_inducing_points: bool = True, covar_module: Optional[Kernel] = None, mean_module: Optional[Mean] = None, variational_distribution: Optional[_VariationalDistribution] = None, variational_strategy: Type[_VariationalStrategy] = VariationalStrategy, inducing_points: Optional[Union[Tensor, int]] = None, outcome_transform: Optional[OutcomeTransform] = None, input_transform: Optional[InputTransform] = None, ) -> None: r""" A single task stochastic variational Gaussian process model (SVGP) as described by [hensman2013svgp]_. We use pivoted cholesky initialization [burt2020svgp]_ to initialize the inducing points of the model. Args: train_X: Training inputs (due to the ability of the SVGP to sub-sample this does not have to be all of the training inputs). train_Y: Training targets (optional). likelihood: Instance of a GPyYorch likelihood. If omitted, uses a either a `GaussianLikelihood` (if `num_outputs=1`) or a `MultitaskGaussianLikelihood`(if `num_outputs>1`). num_outputs: Number of output responses per input (default: 1). covar_module: Kernel function. If omitted, uses a `MaternKernel`. mean_module: Mean of GP model. If omitted, uses a `ConstantMean`. variational_distribution: Type of variational distribution to use (default: CholeskyVariationalDistribution), the properties of the variational distribution will encourage scalability or ease of optimization. variational_strategy: Type of variational strategy to use (default: VariationalStrategy). The default setting uses "whitening" of the variational distribution to make training easier. inducing_points: The number or specific locations of the inducing points. """ with torch.no_grad(): transformed_X = self.transform_inputs( X=train_X, input_transform=input_transform) if train_Y is not None: if outcome_transform is not None: train_Y, _ = outcome_transform(train_Y) self._validate_tensor_args(X=transformed_X, Y=train_Y) validate_input_scaling(train_X=transformed_X, train_Y=train_Y) if train_Y.shape[-1] != num_outputs: num_outputs = train_Y.shape[-1] self._num_outputs = num_outputs self._input_batch_shape = train_X.shape[:-2] aug_batch_shape = copy.deepcopy(self._input_batch_shape) if num_outputs > 1: aug_batch_shape += torch.Size([num_outputs]) self._aug_batch_shape = aug_batch_shape if likelihood is None: if num_outputs == 1: noise_prior = GammaPrior(1.1, 0.05) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=noise_prior_mode, ), ) else: likelihood = MultitaskGaussianLikelihood(num_tasks=num_outputs) else: self._is_custom_likelihood = True model = _SingleTaskVariationalGP( train_X=transformed_X, train_Y=train_Y, num_outputs=num_outputs, learn_inducing_points=learn_inducing_points, covar_module=covar_module, mean_module=mean_module, variational_distribution=variational_distribution, variational_strategy=variational_strategy, inducing_points=inducing_points, ) super().__init__(model=model, likelihood=likelihood, num_outputs=num_outputs) if outcome_transform is not None: self.outcome_transform = outcome_transform if input_transform is not None: self.input_transform = input_transform # for model fitting utilities # TODO: make this a flag? self.model.train_inputs = [transformed_X] if train_Y is not None: self.model.train_targets = train_Y.squeeze(-1) self.to(train_X)
def run(obs, params_true, device='cpu'): device = safe_cast(torch.device, device) dx, NX = PARAM_DX, PARAM_MESH_RES_SPACE ts = torch.arange(PARAM_MESH_RES_TIME, device=device) priors_uniform = priors() y = torch.tensor(obs['Ss'], device=device) def simulate(params): _theta = {'a': params[0], 'b': params[1], 'k': params[2]} sim_pde = LandauCahnHilliard(params=_theta, M=PARAM_DT, dx=dx, device=device) loss_fn = Evaluator(sim_pde, loss) return loss_fn pgd = lhs(3, samples=PARAM_INIT_EVAL ) # calculate initial samples from latin hypercube xs, ys = [], [] for j in range(PARAM_INIT_EVAL): xk = torch.stack( [(priors_uniform[k][1] - priors_uniform[k][0]) * torch.tensor(pgd[j, i], device=device, dtype=torch.float32) + priors_uniform[k][0] for i, k in enumerate(('a', 'b', 'k'))], 0) xs.append(xk) # ell, params = simulate(params) phi0 = (0.2 * torch.rand((NX, NX), device=device)).view(-1, 1, NX, NX) # with torch.no_grad(): for j in range(PARAM_INIT_EVAL): params = xs[j] loss_fn = simulate(params) # print(loss_fn._pde) # for n,p in loss_fn.named_parameters(): # print(n + '->' + str(p)) ell = loss_fn(phi0, ts, y, dx) ell.backward() grads = (loss_fn._pde._a.grad, loss_fn._pde._b.grad, loss_fn._pde._k.grad) ys.append(torch.stack([ell.detach(), *grads]).unsqueeze(0)) print('init sample %d/%d' % (j, PARAM_INIT_EVAL)) x_init, y_init = torch.stack(xs), torch.cat(ys, 0) # # print(y_init) N = PARAM_SEARCH_RES x_eval = torch.cat([x.reshape(-1,1) for x in torch.meshgrid( *[torch.linspace(priors_uniform[k][0], priors_uniform[k][1], N)\ for k in priors_uniform] )],1) print(x_init.shape) print(x_eval.shape) x_train = x_init y_train = y_init print(x_init) print(y_init) jit = 1e-2 lik = MultitaskGaussianLikelihood(num_tasks=4) lik.noise_covar.noise = jit * torch.ones(4) lik.noise = torch.tensor(jit).sqrt() for i in range(PARAM_MAX_EVAL - PARAM_INIT_EVAL): for ntry in range(5): model = ExactGPModel(x_train, y_train, lik) try: optimise(model, method='adam', max_iter=1000) break except Exception as err: print('attempt %d failed' % ntry) if ntry == 4: raise err u = acq(y_train[:, 0].min(), model, x_eval) idx = u.argmax() xn = x_eval[idx, :] loss_fn = simulate(xn) ell = loss_fn(phi0, ts, y, dx) ell.backward() grads = (loss_fn._pde._a.grad, loss_fn._pde._b.grad, loss_fn._pde._k.grad) #ys.append(torch.stack([ell.detach(), *grads]).unsqueeze(0)) yn = torch.stack([ell.detach(), *grads], -1).unsqueeze(0) x_eval = torch.cat([x_eval[0:idx, :], x_eval[idx + 1:, :]], 0) x_train = torch.cat([x_train, xn.reshape(1, -1)]) # y_train = torch.stack([*y_train, yn.detach()]) y_train = torch.cat([y_train, yn], 0) print(x_train) print(y_train) print(i) # return (x_train, y_train)
class GP(BaseModel): support_grad = True support_multi_output = True def __init__(self, num_cont, num_enum, num_out, **conf): super().__init__(num_cont, num_enum, num_out, **conf) self.lr = conf.get('lr', 3e-2) self.num_epochs = conf.get('num_epochs', 100) self.verbose = conf.get('verbose', False) self.print_every = conf.get('print_every', 10) self.noise_free = conf.get('noise_free', False) self.pred_likeli = conf.get('pred_likeli', True) self.noise_lb = conf.get('noise_lb', 1e-5) self.xscaler = TorchMinMaxScaler((-1, 1)) self.yscaler = TorchStandardScaler() def fit_scaler(self, Xc: Tensor, Xe: Tensor, y: Tensor): if Xc is not None and Xc.shape[1] > 0: self.xscaler.fit(Xc) self.yscaler.fit(y) def xtrans(self, Xc: Tensor, Xe: Tensor, y: Tensor = None): if Xc is not None and Xc.shape[1] > 0: Xc_t = self.xscaler.transform(Xc) else: Xc_t = torch.zeros(Xe.shape[0], 0) if Xe is None: Xe_t = torch.zeros(Xc.shape[0], 0).long() else: Xe_t = Xe.long() if y is not None: y_t = self.yscaler.transform(y) return Xc_t, Xe_t, y_t else: return Xc_t, Xe_t def fit(self, Xc: Tensor, Xe: Tensor, y: Tensor): Xc, Xe, y = filter_nan(Xc, Xe, y, 'all') self.fit_scaler(Xc, Xe, y) Xc, Xe, y = self.xtrans(Xc, Xe, y) assert (Xc.shape[1] == self.num_cont) assert (Xe.shape[1] == self.num_enum) assert (y.shape[1] == self.num_out) self.Xc = Xc self.Xe = Xe self.y = y n_constr = GreaterThan(self.noise_lb) n_prior = LogNormalPrior(-4.63, 0.5) if self.num_out == 1: self.lik = GaussianLikelihood(noise_constraint=n_constr, noise_prior=n_prior) else: self.lik = MultitaskGaussianLikelihood(num_tasks=self.num_out, noise_constraint=n_constr, noise_prior=n_prior) self.gp = GPyTorchModel(self.Xc, self.Xe, self.y, self.lik, **self.conf) if self.num_out == 1: # XXX: only tuned for single-output BO if self.num_cont > 0: self.gp.kern.outputscale = self.y.var() lscales = self.gp.kern.base_kernel.lengthscale.detach().clone( ).view(1, -1) for i in range(self.num_cont): lscales[0, i] = torch.pdist(self.Xc[:, i].view( -1, 1)).median().clamp(min=0.02) self.gp.kern.base_kernel.lengthscale = lscales if self.noise_free: self.gp.likelihood.noise = self.noise_lb * 1.1 self.gp.likelihood.raw_noise.requires_grad = False else: self.gp.likelihood.noise = max(1e-2, self.noise_lb) self.gp.train() self.lik.train() opt = torch.optim.LBFGS(self.gp.parameters(), lr=self.lr, max_iter=5, line_search_fn='strong_wolfe') mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.lik, self.gp) for epoch in range(self.num_epochs): def closure(): dist = self.gp(self.Xc, self.Xe) loss = -1 * mll(dist, self.y.squeeze()) opt.zero_grad() loss.backward() return loss opt.step(closure) if self.verbose and ((epoch + 1) % self.print_every == 0 or epoch == 0): print('After %d epochs, loss = %g' % (epoch + 1, closure().item()), flush=True) self.gp.eval() self.lik.eval() def predict(self, Xc, Xe): Xc, Xe = self.xtrans(Xc, Xe) with gpytorch.settings.fast_pred_var(), gpytorch.settings.debug(False): pred = self.gp(Xc, Xe) if self.pred_likeli: pred = self.lik(pred) mu_ = pred.mean.reshape(-1, self.num_out) var_ = pred.variance.reshape(-1, self.num_out) mu = self.yscaler.inverse_transform(mu_) var = var_ * self.yscaler.std**2 return mu, var def sample_y(self, Xc, Xe, n_samples=1) -> FloatTensor: """ Should return (n_samples, Xc.shape[0], self.num_out) """ Xc, Xe = self.xtrans(Xc, Xe) with gpytorch.settings.debug(False): if self.pred_likeli: pred = self.lik(self.gp(Xc, Xe)) else: pred = self.gp(Xc, Xe) samp = pred.rsample(torch.Size( (n_samples, ))).view(n_samples, Xc.shape[0], self.num_out) return self.yscaler.inverse_transform(samp) def sample_f(self): raise NotImplementedError( 'Thompson sampling is not supported for GP, use `sample_y` instead' ) @property def noise(self): if self.num_out == 1: return (self.gp.likelihood.noise * self.yscaler.std**2).view( self.num_out).detach() else: return (self.gp.likelihood.noise_covar.noise * self.yscaler.std**2).view(self.num_out).detach()