def test_regression_error(self, cuda=False): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() inducing_points = torch.linspace(0, 1, 25).unsqueeze(-1).repeat(2, 1, 1) model = SVGPRegressionModel(inducing_points) if cuda: likelihood.cuda() model.cuda() mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(-1)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss = loss.sum() loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean( torch.abs(train_y[0, :] - test_preds[0, :]) / 2) mean_abs_error2 = torch.mean( torch.abs(train_y[1, :] - test_preds[1, :]) / 2) self.assertLess(mean_abs_error.item(), 1e-1) self.assertLess(mean_abs_error2.item(), 1e-1)
def test_posterior_latent_gp_and_likelihood_with_optimization( self, cuda=False, checkpoint=0): train_x, test_x, train_y, test_y = self._get_data( cuda=cuda, num_data=(1000 if checkpoint else 11), add_noise=bool(checkpoint)) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(lengthscale=exp(1)) gp_model.mean_module.initialize(constant=0) likelihood.initialize(noise=exp(1)) if cuda: gp_model.cuda() likelihood.cuda() # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.15) with gpytorch.beta_features.checkpoint_kernel( checkpoint), gpytorch.settings.fast_pred_var(): for _ in range(20 if checkpoint else 50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() with gpytorch.settings.skip_posterior_variances(True): test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean)) self.assertLess(mean_abs_error.item(), 0.05)
def __init__(self): likelihood = GaussianLikelihood(log_noise_bounds=(-3, 3)) super(KissGPModel, self).__init__(likelihood) self.mean_module = ConstantMean(constant_bounds=(-1, 1)) covar_module = RBFKernel(log_lengthscale_bounds=(-100, 100)) covar_module.log_lengthscale.data = torch.FloatTensor([-2]) self.grid_covar_module = GridInterpolationKernel(covar_module) self.initialize_interpolation_grid(300, grid_bounds=[(0, 1)])
def init(): r_lik = GaussianLikelihood() r_kernel = GridInterpolationKernelWithFantasy( RBFKernel(), grid_size=self.grid_size, grid_bounds=[(-4.0, 14.0)]).double() r_model = RegularExactGP(self.xs, self.labels, r_lik, r_kernel, ZeroMean()) lik = GaussianLikelihood() kernel = GridInterpolationKernelWithFantasy( RBFKernel(), grid_size=self.grid_size, grid_bounds=[(-4.0, 14.0)]).double() model = OnlineWoodburyGP(self.xs, self.labels, lik, kernel, ZeroMean()) return r_model, model
def test_kissgp_gp_mean_abs_error(self): train_x, train_y, test_x, test_y = make_data() train_dataset = TensorDataset(train_x, train_y) loader = DataLoader(train_dataset, shuffle=True, batch_size=64) gp_model = GPRegressionModel() likelihood = GaussianLikelihood() mll = gpytorch.mlls.VariationalMarginalLogLikelihood( likelihood, gp_model, n_data=len(train_y), ) # Optimize the model gp_model.train() likelihood.train() with gpytorch.beta_features.diagonal_correction(): optimizer = optim.SGD( list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1, ) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[15], gamma=0.1, ) for _ in range(20): scheduler.step() for x_batch, y_batch in loader: x_batch = Variable(x_batch.float()) y_batch = Variable(y_batch.float()) optimizer.zero_grad() output = gp_model(x_batch) loss = -mll(output, y_batch) loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(Variable(test_x))).mean() mean_abs_error = torch.mean( torch.abs(Variable(test_y) - test_preds)) self.assertLess(mean_abs_error.data.squeeze().item(), 0.1)
def test_regression_error( self, cuda=False, mll_cls=gpytorch.mlls.VariationalELBO, distribution_cls=gpytorch.variational.CholeskyVariationalDistribution, ): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() model = SVGPRegressionModel(torch.linspace(0, 1, 25), distribution_cls) mll = mll_cls(likelihood, model, num_data=len(train_y)) if cuda: likelihood = likelihood.cuda() model = model.cuda() mll = mll.cuda() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg) _cg_mock = patch("gpytorch.utils.linear_cg", new=_wrapped_cg) with _cg_mock as cg_mock: for _ in range(150): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1) # Make sure CG was called (or not), and no warnings were thrown self.assertFalse(cg_mock.called)
def __init__(self, train_X, train_Y): self._validate_tensor_args(train_X, train_Y) train_Y = train_Y.squeeze(-1) likelihood = GaussianLikelihood() super().__init__(train_X, train_Y, likelihood) self.mean_module = ConstantMean() self.covar_module = ScaleKernel(RBFKernel()) self.to(train_X)
def test_train_on_batch_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(2), scale=torch.ones(2)), batch_shape=torch.Size([2])) gp_model = ExactGPModel(train_x12, train_y12, likelihood, batch_shape=torch.Size([2])) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) for _ in range(50): optimizer.zero_grad() output = gp_model(train_x12) loss = -mll(output, train_y12, train_x12).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # First test on non-batch non_batch_predictions = likelihood(gp_model(test_x1)) preds1 = non_batch_predictions.mean mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1[0])) self.assertLess(mean_abs_error1.squeeze().item(), 0.1) # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean[0] preds2 = batch_predictions.mean[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.1) self.assertLess(mean_abs_error2.squeeze().item(), 0.1) # Smoke test for batch mode derivatives failing test_x_param = torch.nn.Parameter(test_x12.data) batch_predictions = likelihood(gp_model(test_x_param)) batch_predictions.mean.sum().backward() self.assertTrue(test_x_param.grad is not None) # Smoke test for non-batch mode derivatives failing test_x_param = torch.nn.Parameter(test_x1.data) batch_predictions = likelihood(gp_model(test_x_param)) batch_predictions.mean.sum().backward() self.assertTrue(test_x_param.grad is not None)
def test_simple_model_list_gp_regression(self, cuda=False): train_x1 = torch.linspace(0, 0.95, 25) + 0.05 * torch.rand(25) train_x2 = torch.linspace(0, 0.95, 15) + 0.05 * torch.rand(15) train_y1 = torch.sin(train_x1 * (2 * math.pi)) + 0.2 * torch.randn_like(train_x1) train_y2 = torch.cos(train_x2 * (2 * math.pi)) + 0.2 * torch.randn_like(train_x2) likelihood1 = GaussianLikelihood() model1 = ExactGPModel(train_x1, train_y1, likelihood1) likelihood2 = GaussianLikelihood() model2 = ExactGPModel(train_x2, train_y2, likelihood2) model = IndependentModelList(model1, model2) likelihood = LikelihoodList(model1.likelihood, model2.likelihood) if cuda: model = model.cuda() model.train() likelihood.train() mll = SumMarginalLogLikelihood(likelihood, model) optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1) for _ in range(10): optimizer.zero_grad() output = model(*model.train_inputs) loss = -mll(output, model.train_targets) loss.backward() optimizer.step() model.eval() likelihood.eval() with torch.no_grad(), gpytorch.settings.fast_pred_var(): test_x = torch.linspace(0, 1, 10, device=torch.device("cuda") if cuda else torch.device("cpu")) outputs_f = model(test_x, test_x) predictions_obs_noise = likelihood(*outputs_f) self.assertIsInstance(outputs_f, list) self.assertEqual(len(outputs_f), 2) self.assertIsInstance(predictions_obs_noise, list) self.assertEqual(len(predictions_obs_noise), 2)
def test_kissgp_gp_mean_abs_error(self): likelihood = GaussianLikelihood() gp_model = GPRegressionModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) with gpytorch.settings.max_preconditioner_size(10), gpytorch.settings.max_cg_iterations(50): with gpytorch.beta_features.fast_pred_var(): # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.01) optimizer.n_iter = 0 for _ in range(15): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) self.assertLess(mean_abs_error.squeeze().item(), 0.2)
def test_train_on_single_set_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x1, train_y1, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(log_lengthscale=-1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=0) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = gp_model(train_x1) loss = -mll(output, train_y1) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Update gp model to use both sine and cosine training data as train data gp_model.set_train_data(train_x12, train_y12, strict=False) # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean()[0] preds2 = batch_predictions.mean()[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.05) self.assertLess(mean_abs_error2.squeeze().item(), 0.05)
def test_sgpr_mean_abs_error(self): train_x, train_y, test_x, test_y = make_data() likelihood = GaussianLikelihood() gp_model = GPRegressionModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) for _ in range(30): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) self.assertLess(mean_abs_error.squeeze().item(), 0.05)
def test_spectral_mixture_gp_mean_abs_error(self): likelihood = GaussianLikelihood(log_noise_bounds=(-5, 5)) gp_model = SpectralMixtureGPModel(train_x.data, train_y.data, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam( list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1, ) optimizer.n_iter = 0 with gpytorch.settings.num_trace_samples(100): for _ in range(50): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) # The spectral mixture kernel should be trivially able to # extrapolate the sine function. self.assertLess(mean_abs_error.data.squeeze()[0], 0.15)
def test_kissgp_gp_mean_abs_error(self): likelihood = GaussianLikelihood() gp_model = GPRegressionModel(train_x.data, train_y.data, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam( list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.2, ) optimizer.n_iter = 0 for _ in range(20): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) self.assertLess(mean_abs_error.data.squeeze()[0], 0.1)
def test_kissgp_gp_fast_pred_var(): with gpytorch.fast_pred_var(): train_x, train_y, test_x, test_y = make_data() likelihood = GaussianLikelihood() gp_model = GPRegressionModel(train_x.data, train_y.data, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for i in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() # Set the cache test_function_predictions = likelihood(gp_model(train_x)) # Now bump up the likelihood to something huge # This will make it easy to calculate the variance likelihood.log_noise.data.fill_(3) test_function_predictions = likelihood(gp_model(train_x)) noise = likelihood.log_noise.exp() var_diff = (test_function_predictions.var() - noise).abs() assert(torch.max(var_diff.data / noise.data) < 0.05)
def test_kissgp_gp_mean_abs_error_cuda(): if torch.cuda.is_available(): train_x, train_y, test_x, test_y = make_data(cuda=True) likelihood = GaussianLikelihood().cuda() gp_model = GPRegressionModel(train_x.data, train_y.data, likelihood).cuda() mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) # Optimize the model gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for i in range(25): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_preds = likelihood(gp_model(test_x)).mean() mean_abs_error = torch.mean(torch.abs(test_y - test_preds)) assert(mean_abs_error.data.squeeze()[0] < 0.02)
def reset(self, x, y, var): self.set_train_data(x, y, var) # self.likelihood = GaussianLikelihood(learn_noise=self.learn_likelihood_noise) self.likelihood = GaussianLikelihood() self.model = ExactGPModel(self._train_x, self._zero_mean_train_y, self.likelihood, self._train_var, self.latent, self.kernel_params, self.latent_params) self.optimizer = torch.optim.Adam([ { 'params': self.model.parameters() }, ], lr=self.lr) self.mll = gpytorch.mlls.ExactMarginalLogLikelihood( self.likelihood, self.model) self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode='min', patience=50, verbose=True)
def __init__(self): super(ExactGPObservationModel, self).__init__(GaussianLikelihood()) self.mean_module = ConstantMean() self.covar_module = RBFKernel() self.params = MLEParameterGroup( constant_mean=Parameter(torch.Tensor([0])), log_noise=Parameter(torch.Tensor([0])), log_lengthscale=Parameter(torch.Tensor([0])), )
def __init__(self): likelihood = GaussianLikelihood(log_noise_bounds=(-6, 6)) super(MultitaskGPModel, self).__init__(likelihood) self.mean_module = ConstantMean(constant_bounds=(-1, 1)) self.covar_module = RBFKernel(log_lengthscale_bounds=(-6, 6)) self.task_covar_module = IndexKernel(n_tasks=2, rank=1, covar_factor_bounds=(-6, 6), log_var_bounds=(-6, 6))
def __init__(self): super(SpectralMixtureGPModel, self).__init__(GaussianLikelihood()) self.mean_module = ConstantMean() self.covar_module = SpectralMixtureKernel() self.params = MLEParameterGroup( log_noise=Parameter(torch.Tensor([-2])), log_mixture_weights=Parameter(torch.zeros(3)), log_mixture_means=Parameter(torch.zeros(3)), log_mixture_scales=Parameter(torch.zeros(3)))
def test_recursive_initialize(self, cuda=False): train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) likelihood_1 = GaussianLikelihood() gp_model_1 = ExactGPModel(train_x, train_y, likelihood_1) likelihood_2 = GaussianLikelihood() gp_model_2 = ExactGPModel(train_x, train_y, likelihood_2) gp_model_1.initialize(**{"likelihood.noise": 1e-2, "covar_module.base_kernel.lengthscale": 1e-1}) gp_model_2.likelihood.initialize(noise=1e-2) gp_model_2.covar_module.base_kernel.initialize(lengthscale=1e-1) self.assertTrue(torch.equal(gp_model_1.likelihood.noise, gp_model_2.likelihood.noise)) self.assertTrue( torch.equal( gp_model_1.covar_module.base_kernel.lengthscale, gp_model_2.covar_module.base_kernel.lengthscale ) )
def test_train_on_single_set_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood( log_noise_prior=gpytorch.priors.NormalPrior( loc=torch.zeros(1), scale=torch.ones(1), log_transform=True)) gp_model = ExactGPModel(train_x1, train_y1, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) for _ in range(50): optimizer.zero_grad() output = gp_model(train_x1) loss = -mll(output, train_y1).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean[0] preds2 = batch_predictions.mean[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.1) self.assertLess(mean_abs_error2.squeeze().item(), 0.1)
def fit_model(self): """ If no state_dict exists, fits the model and saves the state_dict. Otherwise, constructs the model but uses the fit given by the state_dict. """ # read the data data_list = list() for i in range(1, 31): data_file = os.path.join(script_dir, "port_evals", "port_n=100_seed=%d" % i) data_list.append(torch.load(data_file)) # join the data together X = torch.cat([data_list[i]["X"] for i in range(len(data_list))], dim=0).squeeze(-2) Y = torch.cat([data_list[i]["Y"] for i in range(len(data_list))], dim=0).squeeze(-2) # fit GP noise_prior = GammaPrior(1.1, 0.5) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=[], noise_constraint=GreaterThan( 0.000005, # minimum observation noise assumed in the GP model transform=None, initial_value=noise_prior_mode, ), ) # We save the state dict to avoid fitting the GP every time which takes ~3 mins try: state_dict = torch.load( os.path.join(script_dir, "portfolio_surrogate_state_dict.pt")) model = SingleTaskGP(X, Y, likelihood, outcome_transform=Standardize(m=1)) model.load_state_dict(state_dict) except FileNotFoundError: model = SingleTaskGP(X, Y, likelihood, outcome_transform=Standardize(m=1)) mll = ExactMarginalLogLikelihood(model.likelihood, model) from time import time start = time() fit_gpytorch_model(mll) print("fitting took %s seconds" % (time() - start)) torch.save( model.state_dict(), os.path.join(script_dir, "portfolio_surrogate_state_dict.pt"), ) self.model = model
def __init__(self): likelihood = GaussianLikelihood(log_noise_bounds=(-5, 5)) super(SpectralMixtureGPModel, self).__init__(likelihood) self.mean_module = ConstantMean(constant_bounds=(-1, 1)) self.covar_module = SpectralMixtureKernel( n_mixtures=3, log_mixture_weight_bounds=(-5, 5), log_mixture_mean_bounds=(-5, 5), log_mixture_scale_bounds=(-5, 5), )
def __init__(self, likelihood=GaussianLikelihood(), add_input=False): super().__init__() self.likelihood = likelihood self.X_scaler = TorchScaler() self.Y_scaler = TorchScaler() self.add_input = add_input self.latent_layer = None self.add_input = add_input
def test_regression_error_full(self, skip_logdet_forward=False, cuda=False): train_x, train_y = train_data(cuda=cuda) likelihood = GaussianLikelihood() model = SVGPRegressionModel(inducing_points=train_x, learn_locs=False) if cuda: likelihood.cuda() model.cuda() mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.01) with gpytorch.settings.skip_logdet_forward(skip_logdet_forward): for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-1)
def __init__(self, input_dim, feature_dim, label_dim, hidden_width, hidden_depth, n_inducing, batch_size, max_epochs_since_update, **kwargs): """ Args: input_dim (int) feature_dim (int): dimension of deep kernel features label_dim (int) hidden_depth (int) hidden_width (int or list) n_inducing (int): number of inducing points for variational approximation batch_size (int) max_epochs_since_update (int) """ params = locals() del params['self'] self.__dict__ = params super().__init__() noise_constraint = GreaterThan(1e-4) self.likelihood = GaussianLikelihood(batch_shape=torch.Size( [label_dim]), noise_constraint=noise_constraint) self.nn = FCNet(input_dim, output_dim=label_dim, hidden_width=hidden_width, hidden_depth=hidden_depth, batch_norm=True) self.batch_norm = torch.nn.BatchNorm1d(feature_dim) self.mean_module = ConstantMean(batch_shape=torch.Size([label_dim])) base_kernel = RBFKernel(batch_shape=torch.Size([label_dim]), ard_num_dims=feature_dim) self.covar_module = ScaleKernel(base_kernel, batch_shape=torch.Size([label_dim])) variational_dist = MeanFieldVariationalDistribution( num_inducing_points=n_inducing, batch_shape=torch.Size([label_dim])) inducing_points = torch.randn(n_inducing, feature_dim) self.variational_strategy = VariationalStrategy( self, inducing_points, variational_dist, learn_inducing_locations=True) # initialize preprocessers self.register_buffer("input_mean", torch.zeros(input_dim)) self.register_buffer("input_std", torch.ones(input_dim)) self.register_buffer("label_mean", torch.zeros(label_dim)) self.register_buffer("label_std", torch.ones(label_dim)) self._train_ckpt = deepcopy(self.state_dict()) self._eval_ckpt = deepcopy(self.state_dict())
def test_posterior_latent_gp_and_likelihood_with_optimization(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(log_noise_prior=SmoothedBoxPrior( exp(-3), exp(3), sigma=0.1, log_transform=True)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() with gpytorch.settings.debug(False): output = gp_model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Test the model gp_model.eval() likelihood.eval() test_function_predictions = likelihood(gp_model(test_x)) mean_abs_error = torch.mean( torch.abs(test_y - test_function_predictions.mean())) self.assertLess(mean_abs_error.item(), 0.05)
def create_rp_model(data, y, proj_ratio=1): n, d = data.shape kernel = ScaleKernel( RPPolyKernel(round(proj_ratio * d), 1, d, MaternKernel, nu=2.5, weighted=True, space_proj=True)) model = ExactGPModel(data, y, GaussianLikelihood(), kernel) return model
def __init__(self, train_X, train_Y): self._validate_tensor_args(train_X, train_Y) self._set_dimensions(train_X=train_X, train_Y=train_Y) train_X, train_Y, _ = self._transform_tensor_args(X=train_X, Y=train_Y) likelihood = GaussianLikelihood(batch_shape=self._aug_batch_shape) super().__init__(train_X, train_Y, likelihood) self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape) self.covar_module = ScaleKernel( RBFKernel(batch_shape=self._aug_batch_shape), batch_shape=self._aug_batch_shape, ) self.to(train_X)