def test_posterior(self): for dtype in [torch.float, torch.double]: for mcs in [800, 10]: torch.random.manual_seed(0) with max_cholesky_size(mcs): test_x = torch.rand(2, 12, 1).to(device=self.device, dtype=dtype) self.model.to(dtype) # clear caches self.model.train() self.model.eval() # test the posterior works posterior = self.model.posterior(test_x) self.assertIsInstance(posterior, GPyTorchPosterior) # test the posterior works with observation noise posterior = self.model.posterior(test_x, observation_noise=True) self.assertIsInstance(posterior, GPyTorchPosterior) # test the posterior works with no variances # some funkiness in MVNs registration so the variance is non-zero. with skip_posterior_variances(): posterior = self.model.posterior(test_x) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertLessEqual(posterior.variance.max(), 1e-6)
def test_root_inv_decomposition_no_cholesky(self): with settings.max_cholesky_size(0): lazy_tensor = self.create_lazy_tensor() test_mat = torch.randn(*lazy_tensor.batch_shape, lazy_tensor.size(-1), 5) # Check that cholesky is not called with mock.patch.object(lazy_tensor, "cholesky") as chol_mock: root_approx = lazy_tensor.root_inv_decomposition() res = root_approx.matmul(test_mat) actual = torch.solve(test_mat, lazy_tensor.evaluate()).solution self.assertAllClose(res, actual, rtol=0.05, atol=0.02) chol_mock.assert_not_called()
def test_lanczos_fantasy_model(self): lanczos_thresh = 10 n = lanczos_thresh + 1 n_dims = 2 with settings.max_cholesky_size(lanczos_thresh): x = torch.ones((n, n_dims)) y = torch.randn(n) likelihood = GaussianLikelihood() model = ExactGPModel(x, y, likelihood=likelihood) mll = ExactMarginalLogLikelihood(likelihood, model) mll.train() mll.eval() # get a posterior to fill in caches model(torch.randn((1, n_dims))) new_n = 2 new_x = torch.randn((new_n, n_dims)) new_y = torch.randn(new_n) # just check that this can run without error model.get_fantasy_model(new_x, new_y)
acq_value.item(), pred_rmse.item(), pred_avg_variance.item() ] print("Step RMSE: ", pred_rmse) all_outputs.append(step_output_list) start_ind = end_ind end_ind = int(end_ind + args.batch_size) output_dict = { "model_state_dict": model.cpu().state_dict(), "queried_points": { 'x': model.cpu().train_inputs[0], 'y': model.cpu().train_targets }, "results": DataFrame(all_outputs) } torch.save(output_dict, args.output) if __name__ == "__main__": args = parse() with fast_pred_var(True), \ use_toeplitz(args.toeplitz), \ detach_test_caches(True), \ max_cholesky_size(args.cholesky_size), \ max_root_decomposition_size(args.sketch_size), \ root_pred_var(True): main(args)
def test_GPyTorchPosterior(self): for dtype in (torch.float, torch.double): n = 3 mean = torch.rand(n, dtype=dtype, device=self.device) variance = 1 + torch.rand(n, dtype=dtype, device=self.device) covar = variance.diag() mvn = MultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, self.device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([n, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) self.assertTrue( torch.equal(posterior.variance, variance.unsqueeze(-1))) # rsample samples = posterior.rsample() self.assertEqual(samples.shape, torch.Size([1, n, 1])) for sample_shape in ([4], [4, 2]): samples = posterior.rsample( sample_shape=torch.Size(sample_shape)) self.assertEqual(samples.shape, torch.Size(sample_shape + [n, 1])) # check enabling of approximate root decomposition with ExitStack() as es: mock_func = es.enter_context( mock.patch(ROOT_DECOMP_PATH, return_value=torch.cholesky(covar))) es.enter_context(gpt_settings.max_cholesky_size(0)) es.enter_context( gpt_settings.fast_computations( covar_root_decomposition=True)) # need to clear cache, cannot re-use previous objects mvn = MultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) posterior.rsample(sample_shape=torch.Size([4])) mock_func.assert_called_once() # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=self.device, dtype=dtype) # incompatible shapes with self.assertRaises(RuntimeError): posterior.rsample(sample_shape=torch.Size([3]), base_samples=base_samples) # ensure consistent result for sample_shape in ([4], [4, 2]): base_samples = torch.randn(*sample_shape, 3, 1, device=self.device, dtype=dtype) samples = [ posterior.rsample(sample_shape=torch.Size(sample_shape), base_samples=base_samples) for _ in range(2) ] self.assertTrue(torch.allclose(*samples)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=self.device) b_variance = 1 + torch.rand(2, 3, dtype=dtype, device=self.device) b_covar = torch.diag_embed(b_variance) b_mvn = MultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 1, device=self.device, dtype=dtype) b_samples = b_posterior.rsample(sample_shape=torch.Size([4]), base_samples=b_base_samples) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def main(args): if args.cuda and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") init_dict, train_dict, test_dict = prepare_data(args.data_loc, args.num_init, args.num_total, test_is_year=False) init_x, init_y, init_y_var = ( init_dict["x"].to(device), init_dict["y"].to(device), init_dict["y_var"].to(device), ) train_x, train_y, train_y_var = ( train_dict["x"].to(device), train_dict["y"].to(device), train_dict["y_var"].to(device), ) test_x, test_y, test_y_var = ( test_dict["x"].to(device), test_dict["y"].to(device), test_dict["y_var"].to(device), ) model = FixedNoiseOnlineSKIGP( init_x, init_y.view(-1, 1), init_y_var.view(-1, 1), GridInterpolationKernel( base_kernel=ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ), grid_size=30, num_dims=2, grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]), ), learn_additional_noise=False, ).to(device) mll = BatchedWoodburyMarginalLogLikelihood(model.likelihood, model) print("---- Fitting initial model ----") start = time.time() with skip_logdet_forward(True), max_root_decomposition_size( args.sketch_size), use_toeplitz(args.toeplitz): fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000}) end = time.time() print("Elapsed fitting time: ", end - start) model.zero_grad() model.eval() print("--- Generating initial predictions on test set ----") start = time.time() with detach_test_caches(True), max_root_decomposition_size( args.sketch_size), max_cholesky_size( args.cholesky_size), use_toeplitz(args.toeplitz): pred_dist = model(test_x) pred_mean = pred_dist.mean.detach() # pred_var = pred_dist.variance.detach() end = time.time() print("Elapsed initial prediction time: ", end - start) rmse_initial = ((pred_mean.view(-1) - test_y.view(-1))**2).mean().sqrt() print("Initial RMSE: ", rmse_initial.item()) optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) mll_time_list = [] rmse_list = [] for i in range(500, train_x.shape[0]): model.zero_grad() model.train() start = time.time() with skip_logdet_forward(True), max_root_decomposition_size( args.sketch_size), max_cholesky_size( args.cholesky_size), use_toeplitz(args.toeplitz): loss = -mll(model(train_x[:i]), train_y[:i]).sum() loss.backward() mll_time = start - time.time() optimizer.step() model.zero_grad() optimizer.zero_grad() start = time.time() with torch.no_grad(): model.condition_on_observations( train_x[i].unsqueeze(0), train_y[i].view(1, 1), train_y_var[i].view(-1, 1), inplace=True, ) fantasy_time = start - time.time() mll_time_list.append([-mll_time, -fantasy_time]) if i % 25 == 0: start = time.time() model.eval() model.zero_grad() with detach_test_caches(), max_root_decomposition_size( args.sketch_size), max_cholesky_size(args.cholesky_size): pred_dist = model(test_x) end = time.time() rmse = (((pred_dist.mean - test_y.view(-1))**2).mean().sqrt().item()) rmse_list.append([rmse, end - start]) print("Current RMSE: ", rmse) print("Outputscale: ", model.covar_module.base_kernel.raw_outputscale) print( "Lengthscale: ", model.covar_module.base_kernel.base_kernel.raw_lengthscale, ) print("Step: ", i, "Train Loss: ", loss) optimizer.param_groups[0]["lr"] *= 0.9 torch.save({ "training": mll_time_list, "predictions": rmse_list }, args.output)
for key in y_means: y_means[key] = y_means[key].cpu() output_dict = { "observations": { "x": train_x.cpu(), "y": train_y.cpu(), "means": y_means, "latent_y": latent_y.cpu(), }, "results": DataFrame(all_outputs), "args": args } torch.save(output_dict, args.output) if __name__ == "__main__": args = parse() use_fast_pred_var = True if not args.use_exact else False with use_toeplitz(args.toeplitz), max_cholesky_size( args.cholesky_size ), max_root_decomposition_size(args.sketch_size), cholesky_jitter( 1e-3 ), fast_pred_var( use_fast_pred_var ), fast_pred_samples( True ): main(args)
def test_KroneckerMultiTaskGP_custom(self): for batch_shape, dtype in itertools.product( (torch.Size(),), # torch.Size([3])), TODO: Fix and test batch mode (torch.float, torch.double), ): tkwargs = {"device": self.device, "dtype": dtype} # initialization with custom settings likelihood = MultitaskGaussianLikelihood( num_tasks=2, rank=1, batch_shape=batch_shape, ) data_covar_module = MaternKernel( nu=1.5, lengthscale_prior=GammaPrior(2.0, 4.0), ) task_covar_prior = LKJCovariancePrior( n=2, eta=torch.tensor(0.5, **tkwargs), sd_prior=SmoothedBoxPrior(math.exp(-3), math.exp(2), 0.1), ) model_kwargs = { "likelihood": likelihood, "data_covar_module": data_covar_module, "task_covar_prior": task_covar_prior, "rank": 1, } model, train_X, _ = _get_kronecker_model_and_training_data( model_kwargs=model_kwargs, batch_shape=batch_shape, **tkwargs ) self.assertIsInstance(model, KroneckerMultiTaskGP) self.assertEqual(model.num_outputs, 2) self.assertIsInstance(model.likelihood, MultitaskGaussianLikelihood) self.assertEqual(model.likelihood.rank, 1) self.assertIsInstance(model.mean_module, MultitaskMean) self.assertIsInstance(model.covar_module, MultitaskKernel) base_kernel = model.covar_module self.assertIsInstance(base_kernel.data_covar_module, MaternKernel) self.assertIsInstance(base_kernel.task_covar_module, IndexKernel) task_covar_prior = base_kernel.task_covar_module.IndexKernelPrior self.assertIsInstance(task_covar_prior, LKJCovariancePrior) self.assertEqual(task_covar_prior.correlation_prior.eta, 0.5) lengthscale_prior = base_kernel.data_covar_module.lengthscale_prior self.assertIsInstance(lengthscale_prior, GammaPrior) self.assertEqual(lengthscale_prior.concentration, 2.0) self.assertEqual(lengthscale_prior.rate, 4.0) self.assertEqual(base_kernel.task_covar_module.covar_factor.shape[-1], 1) # test model fitting mll = ExactMarginalLogLikelihood(model.likelihood, model) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=OptimizationWarning) mll = fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1) # test posterior max_cholesky_sizes = [1, 800] for max_cholesky in max_cholesky_sizes: model.train() test_x = torch.rand(2, 2, **tkwargs) # small root decomp to enforce zero padding with max_cholesky_size(max_cholesky), max_root_decomposition_size(3): posterior_f = model.posterior(test_x) self.assertIsInstance(posterior_f, GPyTorchPosterior) self.assertIsInstance(posterior_f.mvn, MultitaskMultivariateNormal) self.assertEqual(posterior_f.mean.shape, torch.Size([2, 2])) self.assertEqual(posterior_f.variance.shape, torch.Size([2, 2])) # test observation noise posterior_noisy = model.posterior(test_x, observation_noise=True) self.assertTrue( torch.allclose( posterior_noisy.variance, model.likelihood(posterior_f.mvn).variance ) ) # test posterior (batch eval) test_x = torch.rand(3, 2, 2, **tkwargs) posterior_f = model.posterior(test_x) self.assertIsInstance(posterior_f, GPyTorchPosterior) self.assertIsInstance(posterior_f.mvn, MultitaskMultivariateNormal) self.assertEqual(posterior_f.mean.shape, torch.Size([3, 2, 2])) self.assertEqual(posterior_f.variance.shape, torch.Size([3, 2, 2]))