def test_fantasy_updates_batch(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() with gpytorch.settings.fast_pred_var(): # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) # Cut data down, and then add back via the fantasy interface gp_model.set_train_data(train_x[:5], train_y[:5], strict=False) likelihood(gp_model(test_x)) fantasy_x = train_x[5:].clone().unsqueeze(0).unsqueeze(-1).repeat( 3, 1, 1).requires_grad_(True) fantasy_y = train_y[5:].unsqueeze(0).repeat(3, 1) fant_model = gp_model.get_fantasy_model(fantasy_x, fantasy_y) fant_function_predictions = likelihood(fant_model(test_x)) self.assertTrue( approx_equal(test_function_predictions.mean, fant_function_predictions.mean[0])) fant_function_predictions.mean.sum().backward() self.assertTrue(fantasy_x.grad is not None)
def test_posterior_latent_gp_and_likelihood_without_optimization(self): # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)) gp_model = ExactGPModel(train_x.data, train_y.data, likelihood) # Update bounds to accommodate extreme parameters gp_model.covar_module.set_bounds(log_lengthscale=(-10, 10)) likelihood.set_bounds(log_noise=(-10, 10)) # Update parameters gp_model.covar_module.initialize(log_lengthscale=-10) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=-10) # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data function_predictions = likelihood(gp_model(train_x)) self.assertLess( torch.norm(function_predictions.mean().data - train_y.data), 1e-3, ) self.assertLess(torch.norm(function_predictions.var().data), 1e-3) # It shouldn't fit much else though test_function_predictions = gp_model(Variable(torch.Tensor([1.1]))) self.assertLess( torch.norm(test_function_predictions.mean().data - 0), 1e-4, ) self.assertLess(torch.norm(test_function_predictions.var().data - 1), 1e-4)
def test_prior(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1), noise_constraint=Positive(), # Prior for this test is looser than default bound ) gp_model = ExactGPModel(None, None, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.covar_module.base_kernel.register_prior( "lengthscale_prior", SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5), "raw_lengthscale" ) gp_model.mean_module.initialize(constant=1.5) gp_model.covar_module.base_kernel.initialize(lengthscale=1) likelihood.initialize(noise=0) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # The model should predict in prior mode function_predictions = likelihood(gp_model(train_x)) correct_variance = gp_model.covar_module.outputscale + likelihood.noise self.assertAllClose(function_predictions.mean, torch.full_like(function_predictions.mean, fill_value=1.5)) self.assertAllClose( function_predictions.variance, correct_variance.squeeze().expand_as(function_predictions.variance) )
def test_posterior_latent_gp_and_likelihood_with_optimization(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)) gp_model = ExactGPModel(train_x.data, train_y.data, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam( list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1, ) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean())) self.assertLess(mean_abs_error.data.squeeze()[0], 0.05)
def test_posterior_latent_gp_and_likelihood_without_optimization(self): # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood(log_noise_prior=SmoothedBoxPrior( exp(-3), exp(3), sigma=0.1, log_transform=True)) gp_model = ExactGPModel(train_x, train_y, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.covar_module.set_parameter_priors( log_lengthscale=SmoothedBoxPrior( exp(-10), exp(10), sigma=0.5, log_transform=True)) gp_model.covar_module.initialize(log_lengthscale=-10) likelihood.initialize(log_noise=-10) # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data with gpytorch.settings.debug(False): function_predictions = likelihood(gp_model(train_x)) self.assertLess(torch.norm(function_predictions.mean() - train_y), 1e-3) self.assertLess(torch.norm(function_predictions.var()), 1e-3) # It shouldn't fit much else though test_function_predictions = gp_model(torch.Tensor([1.1])) self.assertLess(torch.norm(test_function_predictions.mean() - 0), 1e-4) self.assertLess(torch.norm(test_function_predictions.var() - 1), 1e-4)
def test_prior(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(None, None, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.covar_module.base_kernel.register_prior( "lengthscale_prior", SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5), "raw_lengthscale") gp_model.mean_module.initialize(constant=1.5) gp_model.covar_module.base_kernel.initialize(lengthscale=1) likelihood.initialize(noise=0) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # The model should predict in prior mode function_predictions = likelihood(gp_model(train_x)) correct_variance = gp_model.covar_module.outputscale + likelihood.noise self.assertLess(torch.norm(function_predictions.mean - 1.5), 1e-3) self.assertLess( torch.norm(function_predictions.variance - correct_variance), 1e-3)
def test_posterior_latent_gp_and_likelihood_without_optimization( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(-15)) likelihood.initialize(noise=exp(-15)) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data with gpytorch.settings.debug(False): function_predictions = likelihood(gp_model(train_x)) self.assertLess(torch.norm(function_predictions.mean - train_y), 1e-3) self.assertLess(torch.norm(function_predictions.variance), 1e-3) # It shouldn't fit much else though test_function_predictions = gp_model( torch.tensor([1.1]).type_as(test_x)) self.assertLess(torch.norm(test_function_predictions.mean - 0), 1e-4) self.assertLess( torch.norm(test_function_predictions.variance - gp_model.covar_module.outputscale), 1e-4)
def test_posterior_latent_gp_and_likelihood_fast_pred_var( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.settings.fast_pred_var(), gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to # something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood( likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Set the cache test_function_predictions = likelihood(gp_model(train_x)) # Now bump up the likelihood to something huge # This will make it easy to calculate the variance likelihood.noise_covar.raw_noise.data.fill_(3) test_function_predictions = likelihood(gp_model(train_x)) noise = likelihood.noise_covar.noise var_diff = (test_function_predictions.variance - noise).abs() self.assertLess(torch.max(var_diff / noise), 0.05)
def test_posterior_latent_gp_and_likelihood_with_optimization(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)) gp_model = ExactGPModel(train_x1.data, train_y1.data, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x1) loss = -mll(output, train_y1) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Create data batches train_x12 = torch.cat((train_x1.unsqueeze(0), train_x2.unsqueeze(0)), dim=0).contiguous() train_y12 = torch.cat((train_y1.unsqueeze(0), train_y2.unsqueeze(0)), dim=0).contiguous() test_x12 = torch.cat((test_x1.unsqueeze(0), test_x2.unsqueeze(0)), dim=0).contiguous() # Update gp model to use both sine and cosine training data as train data gp_model.set_train_data(train_x12, train_y12, strict=False) # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean()[0] preds2 = batch_predictions.mean()[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.data.squeeze().item(), 0.05) self.assertLess(mean_abs_error2.data.squeeze().item(), 0.05)
class GPSurrogate(Surrogate): def __init__(self, **hps): super().__init__() defaults = { 'gp_noise': 1e-4, 'opt_iters': 200, 'lr': 0.05, } defaults.update(hps) self.hps = defaults self.likelihood = GaussianLikelihood() self.likelihood.initialize(noise=self.hps['gp_noise']) def fit(self, x, y, iters=None): if not iters: iters = self.hps['opt_iters'] train_x = torch.from_numpy(x.astype(np.float32)) train_y = torch.from_numpy(y.astype(np.float32)) self.regressor = GPRegressionModel(train_x, train_y, self.likelihood) # set train mode self.regressor.train() self.likelihood.train() optimizer = torch.optim.Adam(self.regressor.parameters(), lr=self.hps['lr']) mll = ExactMarginalLogLikelihood(self.likelihood, self.regressor) for i in range(iters): optimizer.zero_grad() output = self.regressor(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() return self.regressor def predict(self, x, var=False): with torch.no_grad(), gpytorch.settings.fast_pred_var(): pred_x = torch.from_numpy(x.astype(np.float32)) # set eval mode self.regressor.eval() self.likelihood.eval() prediction = self.likelihood(self.regressor(pred_x)) if var: return prediction.mean.numpy(), prediction.variance.numpy() return prediction.mean.numpy()
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False, checkpoint=0): train_x, test_x, train_y, test_y = self._get_data( cuda=cuda, num_data=(1000 if checkpoint else 11), add_noise=bool(checkpoint), ) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) optimizer.n_iter = 0 with gpytorch.beta_features.checkpoint_kernel( checkpoint), gpytorch.settings.fast_pred_var(): for _ in range(20 if checkpoint else 50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() with gpytorch.settings.skip_posterior_variances(True): test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.item(), 0.05)
def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): with gpytorch.fast_pred_var(): # We're manually going to set the hyperparameters to # something they shouldn't be likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)) gp_model = ExactGPModel(train_x.data, train_y.data, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood( likelihood, gp_model) gp_model.rbf_covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Set the cache test_function_predictions = likelihood(gp_model(train_x)) # Now bump up the likelihood to something huge # This will make it easy to calculate the variance likelihood.log_noise.data.fill_(3) test_function_predictions = likelihood(gp_model(train_x)) noise = likelihood.log_noise.exp() var_diff = (test_function_predictions.var() - noise).abs() self.assertLess(torch.max(var_diff.data / noise.data), 0.05)
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.rbf_covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): for _ in range(75): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.squeeze().item(), 0.05)
def test_posterior_latent_gp_and_likelihood_with_optimization_cuda(self): if torch.cuda.is_available(): # We're manually going to set the hyperparameters to # something they shouldn't be likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)).cuda() gp_model = ExactGPModel( train_x.data.cuda(), train_y.data.cuda(), likelihood ).cuda() mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x.cuda()) loss = -mll(output, train_y.cuda()) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x.cuda())) mean_abs_error = torch.mean( torch.abs(test_y.cuda() - test_function_predictions.mean()) ) self.assertLess(mean_abs_error.data.squeeze().item(), 0.05)
def test_posterior_latent_gp_and_likelihood_without_optimization( self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-10), exp(10), sigma=0.25), noise_constraint=Positive(), ) gp_model = ExactGPModel(train_x, train_y, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.rbf_covar_module.register_prior( "lengthscale_prior", SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5), "raw_lengthscale") gp_model.rbf_covar_module.initialize(lengthscale=exp(-10)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(-10)) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data function_predictions = likelihood(gp_model(train_x)) self.assertLess(torch.norm(function_predictions.mean - train_y), 1e-3) self.assertLess(torch.norm(function_predictions.variance), 5e-3) # It shouldn't fit much else though test_function_predictions = gp_model( torch.tensor([1.1]).type_as(test_x)) self.assertLess(torch.norm(test_function_predictions.mean - 0), 1e-4) self.assertLess( torch.norm(test_function_predictions.variance - gp_model.covar_module.outputscale), 1e-4)
def test_train_on_batch_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x12, train_y12, likelihood, batch_size=2) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(log_lengthscale=-1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=0) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x12) loss = -mll(output, train_y12).sum() loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean()[0] preds2 = batch_predictions.mean()[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.05) self.assertLess(mean_abs_error2.squeeze().item(), 0.05)
def test_posterior_latent_gp_and_likelihood_without_optimization( self, cuda=False): warnings.simplefilter("ignore", gpytorch.utils.warnings.NumericalWarning) train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood(noise_constraint=Positive( )) # This test actually wants a noise < 1e-4 gp_model = ExactGPModel(train_x, train_y, likelihood) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(-15)) likelihood.initialize(noise=exp(-15)) if cuda: gp_model.cuda() likelihood.cuda() # Compute posterior distribution gp_model.eval() likelihood.eval() # Let's see how our model does, conditioned with weird hyperparams # The posterior should fit all the data with gpytorch.settings.debug(False): function_predictions = likelihood(gp_model(train_x)) self.assertAllClose(function_predictions.mean, train_y) self.assertAllClose(function_predictions.variance, torch.zeros_like(function_predictions.variance)) # It shouldn't fit much else though test_function_predictions = gp_model( torch.tensor([1.1]).type_as(test_x)) self.assertAllClose(test_function_predictions.mean, torch.zeros_like(test_function_predictions.mean)) self.assertAllClose( test_function_predictions.variance, gp_model.covar_module.outputscale.expand_as( test_function_predictions.variance), )
def test_prior(self): # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood(log_noise_prior=SmoothedBoxPrior( exp(-3), exp(3), sigma=0.1, log_transform=True)) gp_model = ExactGPModel(None, None, likelihood) # Update lengthscale prior to accommodate extreme parameters gp_model.covar_module.set_parameter_priors( log_lengthscale=SmoothedBoxPrior( exp(-10), exp(10), sigma=0.5, log_transform=True)) gp_model.mean_module.initialize(constant=1.5) gp_model.covar_module.initialize(log_lengthscale=0) likelihood.initialize(log_noise=0) # Compute posterior distribution gp_model.eval() likelihood.eval() # The model should predict in prior mode function_predictions = likelihood(gp_model(train_x)) self.assertLess(torch.norm(function_predictions.mean() - 1.5), 1e-3) self.assertLess(torch.norm(function_predictions.var() - 2), 1e-3)
def test_fantasy_updates(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.15) for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() train_x.requires_grad = True gp_model.set_train_data(train_x, train_y) with gpytorch.settings.fast_pred_var( ), gpytorch.settings.detach_test_caches(False): # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) test_function_predictions.mean.sum().backward() real_fant_x_grad = train_x.grad[5:].clone() train_x.grad = None train_x.requires_grad = False gp_model.set_train_data(train_x, train_y) # Cut data down, and then add back via the fantasy interface gp_model.set_train_data(train_x[:5], train_y[:5], strict=False) likelihood(gp_model(test_x)) fantasy_x = train_x[5:].clone().detach().requires_grad_(True) fant_model = gp_model.get_fantasy_model(fantasy_x, train_y[5:]) fant_function_predictions = likelihood(fant_model(test_x)) self.assertAllClose(test_function_predictions.mean, fant_function_predictions.mean, atol=1e-4) fant_function_predictions.mean.sum().backward() self.assertTrue(fantasy_x.grad is not None) relative_error = torch.norm( real_fant_x_grad - fantasy_x.grad) / fantasy_x.grad.norm() self.assertLess(relative_error, 15e-1) # This was only passing by a hair before