def _get_given_covar_module_model(**tkwargs): train_X, train_Y = _get_random_mt_data(**tkwargs) model = MultiTaskGP( train_X, train_Y, task_feature=1, covar_module=RBFKernel(lengthscale_prior=LogNormalPrior(0.0, 1.0)), ) return model.to(**tkwargs)
def test_pyro_sampling(self): try: import pyro from pyro.infer.mcmc import NUTS, MCMC except: return train_x, test_x, train_y, test_y = self._get_data(cuda=False) likelihood = GaussianLikelihood( noise_constraint=gpytorch.constraints.Positive()) gp_model = ExactGPModel(train_x, train_y, likelihood) # Register normal GPyTorch priors gp_model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1), "constant") gp_model.covar_module.base_kernel.register_prior( "lengthscale_prior", UniformPrior(0.01, 0.2), "lengthscale") gp_model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale") likelihood.register_prior("noise_prior", LogNormalPrior(-1.5, 0.1), "noise") mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) def pyro_model(x, y): gp_model.pyro_sample_from_prior() output = gp_model(x) loss = mll.pyro_factor(output, y) return y nuts_kernel = NUTS(pyro_model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=3, warmup_steps=20) mcmc_run.run(train_x, train_y) gp_model.pyro_load_from_samples(mcmc_run.get_samples()) gp_model.eval() expanded_test_x = test_x.unsqueeze(-1).repeat(3, 1, 1) output = gp_model(expanded_test_x) self.assertEqual(output.mean.size(0), 3) # All 3 samples should do reasonably well on a noiseless dataset. self.assertLess( torch.norm(output.mean[0] - test_y) / test_y.norm(), 0.2) self.assertLess( torch.norm(output.mean[1] - test_y) / test_y.norm(), 0.2) self.assertLess( torch.norm(output.mean[2] - test_y) / test_y.norm(), 0.2)
def test_warp_transform(self): for dtype in (torch.float, torch.double): tkwargs = {"device": self.device, "dtype": dtype} # basic init indices = [0, 2] warp_tf = get_test_warp(indices).to(**tkwargs) self.assertTrue(warp_tf.training) k = Kumaraswamy(warp_tf.concentration1, warp_tf.concentration0) self.assertEqual(warp_tf.indices.tolist(), indices) # basic usage for batch_shape in (torch.Size(), torch.Size([3])): X = torch.rand(*batch_shape, 4, 3, **tkwargs) with torch.no_grad(): warp_tf = get_test_warp(indices=indices).to(**tkwargs) X_tf = warp_tf(X) expected_X_tf = X.clone() expected_X_tf[..., indices] = k.cdf( expected_X_tf[..., indices].clamp_min( warp_tf.eps).clamp_max(1 - warp_tf.eps)) # check non-integers parameters are unchanged self.assertTrue(torch.equal(expected_X_tf, X_tf)) # test untransform untransformed_X = warp_tf.untransform(X_tf) print(untransformed_X) print(X) self.assertTrue( torch.allclose(untransformed_X, X, rtol=1e-3, atol=1e-3)) # test no transform on eval warp_tf = get_test_warp( indices, transform_on_eval=False).to(**tkwargs) X_tf = warp_tf(X) self.assertFalse(torch.equal(X, X_tf)) warp_tf.eval() X_tf = warp_tf(X) self.assertTrue(torch.equal(X, X_tf)) # test no transform on train warp_tf = get_test_warp( indices=indices, transform_on_train=False).to(**tkwargs) X_tf = warp_tf(X) self.assertTrue(torch.equal(X, X_tf)) warp_tf.eval() X_tf = warp_tf(X) self.assertFalse(torch.equal(X, X_tf)) # test equals warp_tf2 = get_test_warp( indices=indices, transform_on_train=False).to(**tkwargs) self.assertTrue(warp_tf.equals(warp_tf2)) # test different transform_on_train warp_tf2 = get_test_warp(indices=indices) self.assertFalse(warp_tf.equals(warp_tf2)) # test different indices warp_tf2 = get_test_warp( indices=[0, 1], transform_on_train=False).to(**tkwargs) self.assertFalse(warp_tf.equals(warp_tf2)) # test preprocess_transform self.assertTrue( torch.equal(warp_tf.preprocess_transform(X), X)) warp_tf.transform_on_preprocess = True self.assertTrue( torch.equal(warp_tf.preprocess_transform(X), X_tf)) # test _set_concentration warp_tf._set_concentration(0, warp_tf.concentration0.shape) warp_tf._set_concentration(1, warp_tf.concentration1.shape) # test concentration prior prior0 = LogNormalPrior(0.0, 0.75).to(**tkwargs) prior1 = LogNormalPrior(0.0, 0.5).to(**tkwargs) warp_tf = get_test_warp( indices=[0, 1], concentration0_prior=prior0, concentration1_prior=prior1, ) for i, (name, module, p, _, _) in enumerate(warp_tf.named_priors()): self.assertEqual(name, f"concentration{i}_prior") self.assertIsInstance(p, LogNormalPrior) self.assertEqual(p.base_dist.scale, 0.75 if i == 0 else 0.5) # test gradients X = 1 + 5 * torch.rand(*batch_shape, 4, 3, **tkwargs) warp_tf = get_test_warp(indices=indices).to(**tkwargs) X_tf = warp_tf(X) X_tf.sum().backward() for grad in (warp_tf.concentration0.grad, warp_tf.concentration1.grad): self.assertIsNotNone(grad) self.assertFalse(torch.isnan(grad).any()) self.assertFalse(torch.isinf(grad).any()) self.assertFalse((grad == 0).all())
def gp_torch_train(train_x: Tensor, train_y: Tensor, n_inducing_points: int, tkwargs: Dict[str, Any], init, scale: bool, covar_name: str, gp_file: Optional[str], save_file: str, input_wp: bool, outcome_transform: Optional[OutcomeTransform] = None, options: Dict[str, Any] = None) -> SingleTaskGP: assert train_y.ndim > 1, train_y.shape assert gp_file or init, (gp_file, init) likelihood = gpytorch.likelihoods.GaussianLikelihood() if init: # build hyp print("Initialize GP hparams...") print("Doing Kmeans init...") assert n_inducing_points > 0, n_inducing_points kmeans = MiniBatchKMeans(n_clusters=n_inducing_points, batch_size=min(10000, train_x.shape[0]), n_init=25) start_time = time.time() kmeans.fit(train_x.cpu().numpy()) end_time = time.time() print(f"K means took {end_time - start_time:.1f}s to finish...") inducing_points = torch.from_numpy(kmeans.cluster_centers_.copy()) output_scale = None if scale: output_scale = train_y.var().item() lscales = torch.empty(1, train_x.shape[1]) for i in range(train_x.shape[1]): lscales[0, i] = torch.pdist(train_x[:, i].view( -1, 1)).median().clamp(min=0.01) base_covar_module = query_covar(covar_name=covar_name, scale=scale, outputscale=output_scale, lscales=lscales) covar_module = InducingPointKernel(base_covar_module, inducing_points=inducing_points, likelihood=likelihood) input_warp_tf = None if input_wp: # Apply input warping # initialize input_warping transformation input_warp_tf = CustomWarp( indices=list(range(train_x.shape[-1])), # use a prior with median at 1. # when a=1 and b=1, the Kumaraswamy CDF is the identity function concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), ) model = SingleTaskGP(train_x, train_y, covar_module=covar_module, likelihood=likelihood, input_transform=input_warp_tf, outcome_transform=outcome_transform) else: # load model output_scale = 1 # will be overwritten when loading model lscales = torch.ones( train_x.shape[1]) # will be overwritten when loading model base_covar_module = query_covar(covar_name=covar_name, scale=scale, outputscale=output_scale, lscales=lscales) covar_module = InducingPointKernel(base_covar_module, inducing_points=torch.empty( n_inducing_points, train_x.shape[1]), likelihood=likelihood) input_warp_tf = None if input_wp: # Apply input warping # initialize input_warping transformation input_warp_tf = Warp( indices=list(range(train_x.shape[-1])), # use a prior with median at 1. # when a=1 and b=1, the Kumaraswamy CDF is the identity function concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), ) model = SingleTaskGP(train_x, train_y, covar_module=covar_module, likelihood=likelihood, input_transform=input_warp_tf, outcome_transform=outcome_transform) print("Loading GP from file") state_dict = torch.load(gp_file) model.load_state_dict(state_dict) print("GP regression") start_time = time.time() model.to(**tkwargs) model.train() mll = ExactMarginalLogLikelihood(model.likelihood, model) # set approx_mll to False since we are using an exact marginal log likelihood # fit_gpytorch_model(mll, optimizer=fit_gpytorch_torch, approx_mll=False, options=options) fit_gpytorch_torch(mll, options=options, approx_mll=False, clip_by_value=True if input_wp else False, clip_value=10.0) end_time = time.time() print(f"Regression took {end_time - start_time:.1f}s to finish...") print("Save GP model...") torch.save(model.state_dict(), save_file) print("Done training of GP.") model.eval() return model
def test_warp_transform(self): for dtype, batch_shape, warp_batch_shape in itertools.product( (torch.float, torch.double), (torch.Size(), torch.Size([3])), (torch.Size(), torch.Size([2])), ): tkwargs = {"device": self.device, "dtype": dtype} eps = 1e-6 if dtype == torch.double else 1e-5 # basic init indices = [0, 2] warp_tf = get_test_warp(indices, batch_shape=warp_batch_shape, eps=eps).to(**tkwargs) self.assertTrue(warp_tf.training) k = Kumaraswamy(warp_tf.concentration1, warp_tf.concentration0) self.assertEqual(warp_tf.indices.tolist(), indices) # We don't want these data points to end up all the way near zero, since # this would cause numerical issues and thus result in a flaky test. X = 0.025 + 0.95 * torch.rand(*batch_shape, 4, 3, **tkwargs) X = X.unsqueeze(-3) if len(warp_batch_shape) > 0 else X with torch.no_grad(): warp_tf = get_test_warp(indices=indices, batch_shape=warp_batch_shape, eps=eps).to(**tkwargs) X_tf = warp_tf(X) expected_X_tf = expand_and_copy_tensor( X, batch_shape=warp_tf.batch_shape) expected_X_tf[..., indices] = k.cdf(expected_X_tf[..., indices] * warp_tf._X_range + warp_tf._X_min) self.assertTrue(torch.equal(expected_X_tf, X_tf)) # test untransform untransformed_X = warp_tf.untransform(X_tf) self.assertTrue( torch.allclose( untransformed_X, expand_and_copy_tensor( X, batch_shape=warp_tf.batch_shape), rtol=1e-3, atol=1e-3 if self.device == torch.device("cpu") else 1e-2, )) if len(warp_batch_shape) > 0: with self.assertRaises(BotorchTensorDimensionError): warp_tf.untransform(X_tf.unsqueeze(-3)) # test no transform on eval warp_tf = get_test_warp( indices, transform_on_eval=False, batch_shape=warp_batch_shape, eps=eps, ).to(**tkwargs) X_tf = warp_tf(X) self.assertFalse(torch.equal(X, X_tf)) warp_tf.eval() X_tf = warp_tf(X) self.assertTrue(torch.equal(X, X_tf)) # test no transform on train warp_tf = get_test_warp( indices=indices, transform_on_train=False, batch_shape=warp_batch_shape, eps=eps, ).to(**tkwargs) X_tf = warp_tf(X) self.assertTrue(torch.equal(X, X_tf)) warp_tf.eval() X_tf = warp_tf(X) self.assertFalse(torch.equal(X, X_tf)) # test equals warp_tf2 = get_test_warp( indices=indices, transform_on_train=False, batch_shape=warp_batch_shape, eps=eps, ).to(**tkwargs) self.assertTrue(warp_tf.equals(warp_tf2)) # test different transform_on_train warp_tf2 = get_test_warp(indices=indices, batch_shape=warp_batch_shape, eps=eps) self.assertFalse(warp_tf.equals(warp_tf2)) # test different indices warp_tf2 = get_test_warp( indices=[0, 1], transform_on_train=False, batch_shape=warp_batch_shape, eps=eps, ).to(**tkwargs) self.assertFalse(warp_tf.equals(warp_tf2)) # test preprocess_transform warp_tf.transform_on_train = False self.assertTrue(torch.equal(warp_tf.preprocess_transform(X), X)) warp_tf.transform_on_train = True self.assertTrue( torch.equal(warp_tf.preprocess_transform(X), X_tf)) # test _set_concentration warp_tf._set_concentration(0, warp_tf.concentration0) warp_tf._set_concentration(1, warp_tf.concentration1) # test concentration prior prior0 = LogNormalPrior(0.0, 0.75).to(**tkwargs) prior1 = LogNormalPrior(0.0, 0.5).to(**tkwargs) warp_tf = get_test_warp( indices=[0, 1], concentration0_prior=prior0, concentration1_prior=prior1, batch_shape=warp_batch_shape, eps=eps, ) for i, (name, _, p, _, _) in enumerate(warp_tf.named_priors()): self.assertEqual(name, f"concentration{i}_prior") self.assertIsInstance(p, LogNormalPrior) self.assertEqual(p.base_dist.scale, 0.75 if i == 0 else 0.5) # test gradients X = 1 + 5 * torch.rand(*batch_shape, 4, 3, **tkwargs) X = X.unsqueeze(-3) if len(warp_batch_shape) > 0 else X warp_tf = get_test_warp(indices=indices, batch_shape=warp_batch_shape, eps=eps).to(**tkwargs) X_tf = warp_tf(X) X_tf.sum().backward() for grad in (warp_tf.concentration0.grad, warp_tf.concentration1.grad): self.assertIsNotNone(grad) self.assertFalse(torch.isnan(grad).any()) self.assertFalse(torch.isinf(grad).any()) self.assertFalse((grad == 0).all()) # test set with scalar warp_tf._set_concentration(i=0, value=2.0) self.assertTrue((warp_tf.concentration0 == 2.0).all()) warp_tf._set_concentration(i=1, value=3.0) self.assertTrue((warp_tf.concentration1 == 3.0).all())