def test_indexed_train_and_eval(self): likelihood = GaussianLikelihood() model = LMCModel() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = torch.optim.Adam( [ { "params": model.parameters() }, { "params": likelihood.parameters() }, ], lr=0.01, ) # Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0)) # Create some task indices arange = torch.arange(train_x.size(0)) train_i = torch.rand(train_x.size(0)).mul(4).floor().long() # We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less # effective for VI. for i in range(400): # Within each iteration, we will go over each minibatch of data optimizer.zero_grad() output = model(train_x, task_indices=train_i) loss = -mll(output, train_y[arange, train_i]) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1): predictions = likelihood(model(train_x, task_indices=train_i)) mean_abs_error = torch.mean( torch.abs(train_y[arange, train_i] - predictions.mean)) self.assertLess(mean_abs_error.squeeze().item(), 0.15) # Smoke test for getting predictive uncertainties lower, upper = predictions.confidence_region() self.assertEqual(lower.shape, train_i.shape) self.assertEqual(upper.shape, train_i.shape)
def test_sgpr_mean_abs_error_cuda(self): if torch.cuda.is_available(): train_x, train_y, test_x, test_y = make_data(cuda=True) likelihood = GaussianLikelihood().cuda() gp_model = GPRegressionModel(train_x, train_y, likelihood).cuda() mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) self.assertLess(mean_abs_error.squeeze().item(), 0.02)
def test_regression_error_shared_inducing_locations(self): train_x, train_y = train_data() likelihood = GaussianLikelihood() inducing_points = torch.linspace(0, 1, 25).unsqueeze(-1) model = SVGPRegressionModel(inducing_points) mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(-1)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{"params": model.parameters()}, {"params": likelihood.parameters()}], lr=0.01) for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss = loss.sum() loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y[0, :] - test_preds[0, :]) / 2) mean_abs_error2 = torch.mean(torch.abs(train_y[1, :] - test_preds[1, :]) / 2) self.assertLess(mean_abs_error.item(), 1e-1) self.assertLess(mean_abs_error2.item(), 1e-1)
def test_fantasy_updates_batch(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() with gpytorch.settings.fast_pred_var(): # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) # Cut data down, and then add back via the fantasy interface gp_model.set_train_data(train_x[:5], train_y[:5], strict=False) likelihood(gp_model(test_x)) fantasy_x = train_x[5:].clone().unsqueeze(0).unsqueeze(-1).repeat( 3, 1, 1).requires_grad_(True) fantasy_y = train_y[5:].unsqueeze(0).repeat(3, 1) fant_model = gp_model.get_fantasy_model(fantasy_x, fantasy_y) fant_function_predictions = likelihood(fant_model(test_x)) self.assertTrue( approx_equal(test_function_predictions.mean, fant_function_predictions.mean[0])) fant_function_predictions.mean.sum().backward() self.assertTrue(fantasy_x.grad is not None)
def test_kissgp_gp_mean_abs_error(self): likelihood = GaussianLikelihood() gp_model = GPRegressionModel(train_x.data, train_y.data, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) with gpytorch.settings.max_preconditioner_size(10), gpytorch.settings.max_cg_iterations(30): # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.2) optimizer.n_iter = 0 for _ in range(20): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) self.assertLess(mean_abs_error.data.squeeze().item(), 0.15)
def test_regression_error(self, cuda=False, skip_logdet_forward=False, cholesky=False): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() model = SVGPRegressionModel(torch.linspace(0, 1, 25)) mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y)) if cuda: likelihood = likelihood.cuda() model = model.cuda() mll = mll.cuda() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg) with gpytorch.settings.max_cholesky_size( math.inf if cholesky else 0 ), gpytorch.settings.skip_logdet_forward( skip_logdet_forward), warnings.catch_warnings( record=True) as w, patch( "gpytorch.utils.linear_cg", new=_wrapped_cg) as linear_cg_mock: for _ in range(150): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() # Make sure CG was called (or not), and no warnings were thrown self.assertEqual(len(w), 0) if cholesky: self.assertFalse(linear_cg_mock.called) else: self.assertTrue(linear_cg_mock.called) for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1)
def test_regression_error_cuda(self): if torch.cuda.is_available(): train_x, train_y = train_data(cuda=True) likelihood = GaussianLikelihood().cuda() model = SVGPRegressionModel(torch.linspace(0, 1, 25)).cuda() mll = gpytorch.mlls.VariationalMarginalLogLikelihood(likelihood, model, num_data=len(train_y)) # Find optimal model hyperparameters model.train() optimizer = optim.Adam([{"params": model.parameters()}, {"params": likelihood.parameters()}], lr=0.01) optimizer.n_iter = 0 for _ in range(150): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Set back to eval mode model.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1)
def test_regression_error_full(self, skip_logdet_forward=False): train_x, train_y = train_data() likelihood = GaussianLikelihood() model = SVGPRegressionModel(inducing_points=train_x, learn_locs=False) mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{"params": model.parameters()}, {"params": likelihood.parameters()}], lr=0.01) with gpytorch.settings.skip_logdet_forward(skip_logdet_forward): for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1)
def test_kissgp_gp_mean_abs_error(self): train_x, train_y, test_x, test_y = make_data() train_dataset = TensorDataset(train_x, train_y) train_loader = DataLoader(train_dataset, shuffle=True, batch_size=64) model = GPRegressionModel() likelihood = GaussianLikelihood() mll = gpytorch.mlls.VariationalMarginalLogLikelihood( likelihood, model, num_data=len(train_y)) # We use SGD here, rather than Adam # Emperically, we find that SGD is better for variational regression optimizer = torch.optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) # Our loss object # We're using the VariationalMarginalLogLikelihood object mll = gpytorch.mlls.VariationalMarginalLogLikelihood( likelihood, model, num_data=train_y.size(0)) # The training loop def train(n_epochs=15): # We use a Learning rate scheduler from PyTorch to lower the learning rate during optimization # We're going to drop the learning rate by 1/10 after 3/4 of training # This helps the model converge to a minimum scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[0.75 * n_epochs], gamma=0.1) for _ in range(n_epochs): scheduler.step() for x_batch, y_batch in train_loader: x_batch = x_batch.float() y_batch = y_batch.float() optimizer.zero_grad() output = model(x_batch) loss = -mll(output, y_batch) loss.backward() optimizer.step() train() for _, param in model.named_parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model model.eval() likelihood.eval() test_preds = likelihood(model(test_x)).mean mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) self.assertLess(mean_abs_error.squeeze().item(), 0.1)
def test_posterior_latent_gp_and_likelihood_fast_pred_var( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.settings.fast_pred_var(), gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to # something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood( likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Set the cache test_function_predictions = likelihood(gp_model(train_x)) # Now bump up the likelihood to something huge # This will make it easy to calculate the variance likelihood.noise_covar.raw_noise.data.fill_(3) test_function_predictions = likelihood(gp_model(train_x)) noise = likelihood.noise_covar.noise var_diff = (test_function_predictions.variance - noise).abs() self.assertLess(torch.max(var_diff / noise), 0.05)
def test_regression_error( self, cuda=False, mll_cls=gpytorch.mlls.VariationalELBO, distribution_cls=gpytorch.variational.CholeskyVariationalDistribution, ): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() model = SVGPRegressionModel(torch.linspace(0, 1, 25), distribution_cls) mll = mll_cls(likelihood, model, num_data=len(train_y)) if cuda: likelihood = likelihood.cuda() model = model.cuda() mll = mll.cuda() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 0.014) if distribution_cls is gpytorch.variational.CholeskyVariationalDistribution: # finally test fantasization # we only will check that tossing the entire training set into the model will reduce the mae model.likelihood = likelihood fant_model = model.get_fantasy_model(train_x, train_y) fant_preds = fant_model.likelihood( fant_model(train_x)).mean.squeeze() updated_abs_error = torch.mean(torch.abs(train_y - fant_preds) / 2) # TODO: figure out why this error is worse than before self.assertLess(updated_abs_error.item(), 0.15)
def test_kissgp_gp_mean_abs_error(self): train_x, train_y, test_x, test_y = make_data() train_dataset = TensorDataset(train_x, train_y) loader = DataLoader(train_dataset, shuffle=True, batch_size=64) gp_model = GPRegressionModel() likelihood = GaussianLikelihood() mll = gpytorch.mlls.VariationalMarginalLogLikelihood( likelihood, gp_model, n_data=len(train_y), ) # Optimize the model gp_model.train() likelihood.train() with gpytorch.beta_features.diagonal_correction(): optimizer = optim.SGD( list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1, ) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[15], gamma=0.1, ) for _ in range(20): scheduler.step() for x_batch, y_batch in loader: x_batch = Variable(x_batch.float()) y_batch = Variable(y_batch.float()) optimizer.zero_grad() output = gp_model(x_batch) loss = -mll(output, y_batch) loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(Variable(test_x))).mean() mean_abs_error = torch.mean( torch.abs(Variable(test_y) - test_preds)) self.assertLess(mean_abs_error.data.squeeze().item(), 0.1)
def test_posterior_latent_gp_and_likelihood_with_optimization(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)) gp_model = ExactGPModel(train_x1.data, train_y1.data, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x1) loss = -mll(output, train_y1) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Create data batches train_x12 = torch.cat((train_x1.unsqueeze(0), train_x2.unsqueeze(0)), dim=0).contiguous() train_y12 = torch.cat((train_y1.unsqueeze(0), train_y2.unsqueeze(0)), dim=0).contiguous() test_x12 = torch.cat((test_x1.unsqueeze(0), test_x2.unsqueeze(0)), dim=0).contiguous() # Update gp model to use both sine and cosine training data as train data gp_model.set_train_data(train_x12, train_y12, strict=False) # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean()[0] preds2 = batch_predictions.mean()[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.data.squeeze().item(), 0.05) self.assertLess(mean_abs_error2.data.squeeze().item(), 0.05)
def test_regression_error( self, cuda=False, mll_cls=gpytorch.mlls.VariationalELBO, distribution_cls=gpytorch.variational.CholeskyVariationalDistribution, ): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() model = SVGPRegressionModel(torch.linspace(0, 1, 25), distribution_cls) mll = mll_cls(likelihood, model, num_data=len(train_y)) if cuda: likelihood = likelihood.cuda() model = model.cuda() mll = mll.cuda() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg) _cg_mock = patch("gpytorch.utils.linear_cg", new=_wrapped_cg) with warnings.catch_warnings(record=True) as ws, _cg_mock as cg_mock: for _ in range(150): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1) # Make sure CG was called (or not), and no warnings were thrown self.assertFalse(cg_mock.called) self.assertFalse( any( issubclass(w.category, ExtraComputationWarning) for w in ws))
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False, checkpoint=0): train_x, test_x, train_y, test_y = self._get_data( cuda=cuda, num_data=(1000 if checkpoint else 11), add_noise=bool(checkpoint), ) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) optimizer.n_iter = 0 with gpytorch.beta_features.checkpoint_kernel( checkpoint), gpytorch.settings.fast_pred_var(): for _ in range(20 if checkpoint else 50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() with gpytorch.settings.skip_posterior_variances(True): test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.item(), 0.05)
def test_multitask_gp_mean_abs_error(self): likelihood = GaussianLikelihood(log_noise_bounds=(-6, 6)) gp_model = MultitaskGPModel( ( torch.cat([train_x.data, train_x.data]), torch.cat([y1_inds.data, y2_inds.data]), ), torch.cat([train_y1.data, train_y2.data]), likelihood, ) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.eval() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(100): optimizer.zero_grad() output = gp_model(torch.cat([train_x, train_x]), torch.cat([y1_inds, y2_inds])) loss = -mll(output, torch.cat([train_y1, train_y2])) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_preds_task_1 = likelihood(gp_model(test_x, y1_inds_test)).mean() mean_abs_error_task_1 = torch.mean( torch.abs(test_y1 - test_preds_task_1)) self.assertLess(mean_abs_error_task_1.data.squeeze().item(), 0.05) test_preds_task_2 = likelihood(gp_model(test_x, y2_inds_test)).mean() mean_abs_error_task_2 = torch.mean( torch.abs(test_y2 - test_preds_task_2)) self.assertLess(mean_abs_error_task_2.data.squeeze().item(), 0.05)
def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): with gpytorch.fast_pred_var(): # We're manually going to set the hyperparameters to # something they shouldn't be likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)) gp_model = ExactGPModel(train_x.data, train_y.data, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood( likelihood, gp_model) gp_model.rbf_covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Set the cache test_function_predictions = likelihood(gp_model(train_x)) # Now bump up the likelihood to something huge # This will make it easy to calculate the variance likelihood.log_noise.data.fill_(3) test_function_predictions = likelihood(gp_model(train_x)) noise = likelihood.log_noise.exp() var_diff = (test_function_predictions.var() - noise).abs() self.assertLess(torch.max(var_diff.data / noise.data), 0.05)
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.rbf_covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): for _ in range(75): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.squeeze().item(), 0.05)
def test_posterior_latent_gp_and_likelihood_with_optimization(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)) gp_model = ExactGPModel(train_x.data, train_y.data, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam( list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1, ) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean())) self.assertLess(mean_abs_error.data.squeeze()[0], 0.05)
def test_kissgp_gp_mean_abs_error(self): likelihood = GaussianLikelihood() gp_model = GPRegressionModel(train_x.data, train_y.data, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam( list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.2, ) optimizer.n_iter = 0 for _ in range(20): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) self.assertLess(mean_abs_error.data.squeeze()[0], 0.1)
def test_spectral_mixture_gp_mean_abs_error(self): likelihood = GaussianLikelihood(log_noise_bounds=(-5, 5)) gp_model = SpectralMixtureGPModel(train_x.data, train_y.data, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam( list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1, ) optimizer.n_iter = 0 with gpytorch.settings.num_trace_samples(100): for _ in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) # The spectral mixture kernel should be trivially able to # extrapolate the sine function. self.assertLess(mean_abs_error.data.squeeze()[0], 0.15)
def test_kissgp_gp_fast_pred_var(): with gpytorch.fast_pred_var(): train_x, train_y, test_x, test_y = make_data() likelihood = GaussianLikelihood() gp_model = GPRegressionModel(train_x.data, train_y.data, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for i in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Set the cache test_function_predictions = likelihood(gp_model(train_x)) # Now bump up the likelihood to something huge # This will make it easy to calculate the variance likelihood.log_noise.data.fill_(3) test_function_predictions = likelihood(gp_model(train_x)) noise = likelihood.log_noise.exp() var_diff = (test_function_predictions.var() - noise).abs() assert(torch.max(var_diff.data / noise.data) < 0.05)
def test_train_on_batch_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(2), scale=torch.ones(2)), batch_shape=torch.Size([2])) gp_model = ExactGPModel(train_x12, train_y12, likelihood, batch_shape=torch.Size([2])) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) for _ in range(50): optimizer.zero_grad() output = gp_model(train_x12) loss = -mll(output, train_y12, train_x12).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # First test on non-batch non_batch_predictions = likelihood(gp_model(test_x1)) preds1 = non_batch_predictions.mean mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1[0])) self.assertLess(mean_abs_error1.squeeze().item(), 0.1) # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean[0] preds2 = batch_predictions.mean[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.1) self.assertLess(mean_abs_error2.squeeze().item(), 0.1) # Smoke test for batch mode derivatives failing test_x_param = torch.nn.Parameter(test_x12.data) batch_predictions = likelihood(gp_model(test_x_param)) batch_predictions.mean.sum().backward() self.assertTrue(test_x_param.grad is not None) # Smoke test for non-batch mode derivatives failing test_x_param = torch.nn.Parameter(test_x1.data) batch_predictions = likelihood(gp_model(test_x_param)) batch_predictions.mean.sum().backward() self.assertTrue(test_x_param.grad is not None)
def test_kissgp_gp_mean_abs_error_cuda(): if torch.cuda.is_available(): train_x, train_y, test_x, test_y = make_data(cuda=True) likelihood = GaussianLikelihood().cuda() gp_model = GPRegressionModel(train_x.data, train_y.data, likelihood).cuda() mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for i in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) assert(mean_abs_error.data.squeeze()[0] < 0.02)
def test_regression_error( self, mll_cls=gpytorch.mlls.VariationalELBO, distribution_cls=gpytorch.variational.CholeskyVariationalDistribution, ): train_x, train_y = train_data() likelihood = GaussianLikelihood() model = SVGPRegressionModel(torch.linspace(0, 1, 128), torch.linspace(0, 1, 16)) mll = mll_cls(likelihood, model, num_data=len(train_y)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg) _cg_mock = patch("gpytorch.utils.linear_cg", new=_wrapped_cg) with _cg_mock as cg_mock: for _ in range(75): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1) self.assertFalse(cg_mock.called)
def test_regression_error( self, cuda=False, mll_cls=gpytorch.mlls.VariationalELBO, distribution_cls=gpytorch.variational.CholeskyVariationalDistribution, ): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() model = SVGPRegressionModel(torch.linspace(0, 1, 25), distribution_cls) mll = mll_cls(likelihood, model, num_data=len(train_y)) if cuda: likelihood = likelihood.cuda() model = model.cuda() mll = mll.cuda() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1)
def test_kissgp_gp_fast_pred_var(self): with gpytorch.fast_pred_var(), gpytorch.settings.debug(False): train_x, train_y, test_x, test_y = make_data() likelihood = GaussianLikelihood() gp_model = GPRegressionModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood( likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # Set the cache test_function_predictions = likelihood(gp_model(train_x)) # Now bump up the likelihood to something huge # This will make it easy to calculate the variance likelihood.log_noise.data.fill_(3) test_function_predictions = likelihood(gp_model(train_x)) noise = likelihood.log_noise.exp() var_diff = (test_function_predictions.variance - noise).abs() self.assertLess(torch.max(var_diff / noise), 0.05)
def test_train_on_single_set_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( log_noise_prior=gpytorch.priors.NormalPrior( loc=torch.zeros(1), scale=torch.ones(1), log_transform=True)) gp_model = ExactGPModel(train_x1, train_y1, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) for _ in range(50): optimizer.zero_grad() output = gp_model(train_x1) loss = -mll(output, train_y1).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean[0] preds2 = batch_predictions.mean[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.1) self.assertLess(mean_abs_error2.squeeze().item(), 0.1)
def test_train_on_batch_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x12, train_y12, likelihood, batch_size=2) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(log_lengthscale=-1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=0) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x12) loss = -mll(output, train_y12).sum() loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean()[0] preds2 = batch_predictions.mean()[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.05) self.assertLess(mean_abs_error2.squeeze().item(), 0.05)
def svgp(args, dataloader, test_x, kernel=None): N = len(dataloader.dataset) inducing_points, _ = kmeans2(dataloader.dataset.train_x.numpy(), args.n_inducing, minit='points') inducing_points = torch.from_numpy(inducing_points).squeeze(-1) model = SVGP(inducing_points, kernel) # p(y|f) likelihood = GaussianLikelihood() model.train() likelihood.train() optimizer = optim.Adam([{ 'params': model.parameters() }, { 'params': likelihood.parameters() }], lr=args.learning_rate) mll = VariationalELBO(likelihood, model, N, combine_terms=False) for epoch in range(args.n_iters): for train_x, train_y in dataloader: train_x, train_y = train_x.squeeze(), train_y.squeeze() optimizer.zero_grad() output = model(train_x) log_ll, kl_div, log_prior = mll(output, train_y) loss = -(log_ll - kl_div + log_prior) loss.backward() optimizer.step() if epoch % 50 == 0: print("Iter {}, lower bound = {:.4f}, obs_var = {:.4f}".format( epoch, -loss.item(), likelihood.noise.item())) test_stats = TestStats(None, None) model.eval() likelihood.eval() with torch.no_grad(): observed_pred = likelihood(model(test_x)) test_y_mean = observed_pred.mean test_y_var = observed_pred.variance test_stats = test_stats._replace(test_y_mean=test_y_mean, test_y_var=test_y_var) return test_stats