def test_degree2(self): AddK = NewtonGirardAdditiveKernel(RBFKernel(ard_num_dims=3), 3, 2) self.assertEqual(AddK.base_kernel.lengthscale.numel(), 3) self.assertEqual(AddK.outputscale.numel(), 2) testvals = torch.tensor([[1, 2, 3], [7, 5, 2]], dtype=torch.float) add_k_val = AddK(testvals, testvals).evaluate() manual_k1 = ScaleKernel( AdditiveKernel(RBFKernel(active_dims=0), RBFKernel(active_dims=1), RBFKernel(active_dims=2))) manual_k1.initialize(outputscale=1 / 2) manual_k2 = ScaleKernel( AdditiveKernel(RBFKernel(active_dims=[0, 1]), RBFKernel(active_dims=[1, 2]), RBFKernel(active_dims=[0, 2]))) manual_k2.initialize(outputscale=1 / 2) manual_k = AdditiveKernel(manual_k1, manual_k2) manual_add_k_val = manual_k(testvals, testvals).evaluate() # np.testing.assert_allclose(add_k_val.detach().numpy(), manual_add_k_val.detach().numpy(), atol=1e-5) self.assertTrue(torch.allclose(add_k_val, manual_add_k_val, atol=1e-5))
def __init__(self, train_X, train_Y, outcome_transform=None): if outcome_transform is not None: train_Y, _ = outcome_transform(train_Y) self._validate_tensor_args(train_X, train_Y) train_Y = train_Y.squeeze(-1) likelihood = GaussianLikelihood() super().__init__(train_X, train_Y, likelihood) self.mean_module = ConstantMean() self.covar_module = ScaleKernel(RBFKernel()) if outcome_transform is not None: self.outcome_transform = outcome_transform self._num_outputs = 1 self.to(train_X)
def __init__(self, density=16): super().__init__() self.density = density self.psi = ScaleKernel(RBFKernel()) self.phi = PowerFunction(K=1) self.cnn = nn.Sequential(nn.Conv1d(3, 16, 5, 1, 2), nn.ReLU(), nn.Conv1d(16, 32, 5, 1, 2), nn.ReLU(), nn.Conv1d(32, 16, 5, 1, 2), nn.ReLU(), nn.Conv1d(16, 2, 5, 1, 2)) def weights_init(m): if isinstance(m, nn.Conv1d): torch.nn.init.xavier_uniform_(m.weight) torch.nn.init.zeros_(m.bias) self.cnn.apply(weights_init) self.pos = nn.Softplus() self.psi_rho = ScaleKernel(RBFKernel())
def __init__(self, train_inputs, train_targets, inducing_points, likelihood): super().__init__(train_inputs, train_targets, likelihood) if train_inputs.ndim == 2: dims = train_inputs.shape[1] else: dims = 1 self.mean_module = gpytorch.means.ZeroMean() # self.base_covar_module = ScaleKernel(RBFKernel(ard_num_dims=dims)) self.base_covar_module = ScaleKernel( MaternKernel(ard_num_dims=dims, nu=1.5)) self.covar_module = InducingPointKernel(self.base_covar_module, inducing_points, likelihood)
def __init__(self, grid_size=20, grid_bounds=[(-0.1, 1.1)]): variational_distribution = gpytorch.variational.CholeskyVariationalDistribution( num_inducing_points=int(pow(grid_size, len(grid_bounds)))) variational_strategy = gpytorch.variational.GridInterpolationVariationalStrategy( self, grid_size=grid_size, grid_bounds=grid_bounds, variational_distribution=variational_distribution) super(GPRegressionModel, self).__init__(variational_strategy) self.mean_module = ConstantMean(prior=SmoothedBoxPrior(-10, 10)) self.covar_module = ScaleKernel( RBFKernel(log_lengthscale_prior=SmoothedBoxPrior( exp(-3), exp(6), sigma=0.1, log_transform=True)))
def __init__(self, train_x, train_y, likelihood): """Using InducingPointKernel in order to handle large data sets.""" super(GPRegressionModel, self).__init__(train_x, train_y, likelihood) self.mean_module = gpytorch.means.ZeroMean() self.base_covar_module = ScaleKernel(RBFKernel()) rank = 10 X = train_x.numpy() induced_points = np.linspace(0, X.shape[0] - 1, num=rank, dtype=np.int) self.covar_module = InducingPointKernel( self.base_covar_module, inducing_points=train_x[induced_points, :], likelihood=likelihood)
def __init__(self, train_X, train_Y, outcome_transform=None): self._validate_tensor_args(train_X, train_Y) self._set_dimensions(train_X=train_X, train_Y=train_Y) train_X, train_Y, _ = self._transform_tensor_args(X=train_X, Y=train_Y) likelihood = GaussianLikelihood(batch_shape=self._aug_batch_shape) super().__init__(train_X, train_Y, likelihood) self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape) self.covar_module = ScaleKernel( RBFKernel(batch_shape=self._aug_batch_shape), batch_shape=self._aug_batch_shape, ) if outcome_transform is not None: self.outcome_transform = outcome_transform self.to(train_X)
def __init__(self): super(GPClassificationModel, self).__init__(grid_size=16, grid_bounds=[(-1, 1)], num_dim=2) self.mean_module = ConstantMean(prior=SmoothedBoxPrior(-1e-5, 1e-5)) self.covar_module = ScaleKernel( RBFKernel(ard_num_dims=2, log_lengthscale_prior=SmoothedBoxPrior( exp(-5), exp(6), sigma=0.1, log_transform=True)), log_outputscale_prior=SmoothedBoxPrior(exp(-5), exp(6), sigma=0.1, log_transform=True), )
def test_get_deterministic_model_multi_samples(self): tkwargs = {"device": self.device} n_samples = 5 for dtype, m in product((torch.float, torch.double), (1, 2)): tkwargs["dtype"] = dtype for batch_shape_w, batch_shape_x in product( [torch.Size([]), torch.Size([3])], repeat=2): weights = [] bases = [] for i in range(m): num_rff = 2 * (i + 2) # we require weights to be of shape # `n_samples x (batch_shape) x num_rff` weights.append( torch.rand(*batch_shape_w, n_samples, num_rff, **tkwargs)) kernel = ScaleKernel( RBFKernel(ard_num_dims=2)).to(**tkwargs) kernel.outputscale = 0.3 + torch.rand(1, **tkwargs).view( kernel.outputscale.shape) kernel.base_kernel.lengthscale = 0.3 + torch.rand( 2, **tkwargs).view( kernel.base_kernel.lengthscale.shape) bases.append( RandomFourierFeatures( kernel=kernel, input_dim=2, num_rff_features=num_rff, sample_shape=torch.Size([n_samples]), )) model = get_deterministic_model_multi_samples(weights=weights, bases=bases) self.assertIsInstance(model, DeterministicModel) self.assertEqual(model.num_outputs, m) X = torch.rand(*batch_shape_x, n_samples, 1, 2, **tkwargs) Y = model(X) for i in range(m): wi = weights[i] for _ in range(len(batch_shape_x)): wi = wi.unsqueeze(-3) wi = wi.expand(*batch_shape_w, *batch_shape_x, *wi.shape[-2:]) expected_Yi = (bases[i](X) @ wi.unsqueeze(-1)).squeeze(-1) self.assertTrue(torch.allclose(Y[..., i], expected_Yi)) self.assertEqual( Y.shape, torch.Size( [*batch_shape_w, *batch_shape_x, n_samples, 1, m]), )
def test_forward(self): a = torch.Tensor([4, 2, 8]).view(3, 1) b = torch.Tensor([0, 2]).view(2, 1) lengthscale = 2 base_kernel = RBFKernel().initialize(log_lengthscale=math.log(lengthscale)) kernel = ScaleKernel(base_kernel) kernel.initialize(log_outputscale=torch.Tensor([3]).log()) kernel.eval() actual = torch.Tensor([[16, 4], [4, 0], [64, 36]]).mul_(-0.5).div_(lengthscale ** 2).exp() actual = actual * 3 res = kernel(a, b).evaluate() self.assertLess(torch.norm(res - actual), 1e-5)
def test_forward_batch_mode(self): a = torch.Tensor([4, 2, 8]).view(1, 3, 1).repeat(4, 1, 1) b = torch.Tensor([0, 2]).view(1, 2, 1).repeat(4, 1, 1) lengthscale = 2 base_kernel = RBFKernel().initialize(log_lengthscale=math.log(lengthscale)) kernel = ScaleKernel(base_kernel, batch_size=4) kernel.initialize(log_outputscale=torch.Tensor([1, 2, 3, 4]).log()) kernel.eval() base_actual = torch.Tensor([[16, 4], [4, 0], [64, 36]]).mul_(-0.5).div_(lengthscale ** 2).exp() actual = base_actual.unsqueeze(0).mul(torch.Tensor([1, 2, 3, 4]).view(4, 1, 1)) res = kernel(a, b).evaluate() self.assertLess(torch.norm(res - actual), 1e-5)
def test_scale_kernel(self): kernel = ScaleKernel(StrictlyAdditiveKernel(2, RBFKernel)) lik = gpytorch.likelihoods.GaussianLikelihood() trainX = torch.tensor([[0, 0]], dtype=torch.float) trainY = torch.tensor([2], dtype=torch.float) testXequi = torch.tensor([[1, 1]], dtype=torch.float) testXdiff = torch.tensor([[1, 0]], dtype=torch.float) kernel(trainX, testXdiff).evaluate() model = AdditiveExactGPModel(trainX, trainY, lik, kernel) model.eval() equi_pred = model.additive_pred(testXequi) diff_pred = model.additive_pred(testXdiff) self.assertEqual(equi_pred[0].mean[0], equi_pred[1].mean[0]) self.assertNotEqual(diff_pred[0].mean[0], diff_pred[1].mean[0]) combined = diff_pred[0] + diff_pred[1] total = model(testXdiff) self.assertEqual(combined.mean, total.mean) # self.assertEqual(combined.covariance_matrix, total.covariance_matrix) testXlarger = torch.tensor([[1, 0], [1.1, 1.2]], dtype=torch.float) pred_larger = model.additive_pred(testXlarger) trainX = torch.rand(200, 10, dtype=torch.float) trainY = torch.sin(trainX[:, 0]) + torch.sin( trainX[:, 1]) + torch.randn(200, dtype=torch.float) * 0.5 kernel = ScaleKernel(StrictlyAdditiveKernel(2, RBFKernel)) lik = gpytorch.likelihoods.GaussianLikelihood().to(dtype=torch.float) kernel = kernel.to(dtype=torch.float) model = AdditiveExactGPModel(trainX, trainY, lik, kernel) testXmuchlarger = torch.rand(2000, 10, dtype=torch.float) preds = model.additive_pred(testXmuchlarger) for p in preds: p.sample(torch.Size([1]))
def __init__(self, stem, init_x, init_y, lr, max_data_per_model, share_covar=True, **kwargs): stem = stem.to(init_x.device) if init_y.t().shape[0] != 1: _batch_shape = init_y.t().shape[:-1] else: _batch_shape = torch.Size() features = stem(init_x) covar_module = ScaleKernel( RBFKernel(batch_shape=_batch_shape, ard_num_dims=stem.output_dim), batch_shape=_batch_shape, ) if init_x.shape[-2] < max_data_per_model: model_assignments = torch.zeros(init_x.shape[-2]).to(init_x.device) model_list = torch.nn.ModuleList( [SingleTaskGP(features, init_y, covar_module=covar_module)]) else: num_models = math.ceil(init_x.shape[-2] / max_data_per_model) model_assignments = torch.randint( num_models, (init_x.shape[-2], )).to(init_x.device) model_list = torch.nn.ModuleList([]) for i in range(num_models): idx = (model_assignments == i) model_list.append( SingleTaskGP(features[idx], init_y[idx], covar_module=covar_module)) super().__init__(*model_list) self.stem = stem self.covar_module = covar_module self.update_model_caches() self.max_data_per_model = max_data_per_model self._raw_inputs = [init_x] self._raw_targets = init_y self.input_dim = init_x.size(-1) self.target_dim = init_y.size(-1) self._assignments = model_assignments self.optimizer = torch.optim.Adam(self.parameters(), lr=lr) self.mll = mlls.SumMarginalLogLikelihood(self.models[0].likelihood, self)
def __init__(self, train_x, train_y, likelihood, Z_init): """The sparse GP class for regression with the collapsed bound. q*(u) is implicit. """ super(SparseGPR, self).__init__(train_x, train_y, likelihood) self.train_x = train_x self.train_y = train_y self.inducing_points = Z_init self.num_inducing = len(Z_init) self.likelihood = likelihood self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel(RBFKernel()) self.covar_module = InducingPointKernel(self.base_covar_module, inducing_points=Z_init, likelihood=self.likelihood)
def __init__(self, grid_size=16, grid_bounds=([-1, 1],)): variational_distribution = CholeskyVariationalDistribution(num_inducing_points=16, batch_size=2) variational_strategy = AdditiveGridInterpolationVariationalStrategy( self, grid_size=grid_size, grid_bounds=grid_bounds, num_dim=2, variational_distribution=variational_distribution, ) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = ConstantMean(prior=SmoothedBoxPrior(-1e-5, 1e-5)) self.covar_module = ScaleKernel( RBFKernel(ard_num_dims=1, lengthscale_prior=SmoothedBoxPrior(exp(-5), exp(6), sigma=0.1)), outputscale_prior=SmoothedBoxPrior(exp(-5), exp(6), sigma=0.1), )
def create_dpa_gp_ard_model(data, y, J): n, d = data.shape kernel = ScaleKernel( create_additive_rp_kernel(d, J, learn_proj=False, kernel_type='RBF', space_proj=True, prescale=True, batch_kernel=False, ard=True, proj_dist='sphere', mem_efficient=True)) model = ExactGPModel(data, y, GaussianLikelihood(), kernel) return model
def __init__(self, train_x, train_y, likelihood, outputscale=10, transform_input_fn=None): super().__init__(train_x, train_y, likelihood) self.mean_module = ZeroMean() self.kernel = ScaleKernel(MaternKernel(nu=2.5)) self.likelihood.noise_covar.noise = 1e-8 self.kernel.outputscale = outputscale self.transform_input_fn = transform_input_fn
def __init__(self, grid_size=32, grid_bounds=[(0, 1)]): variational_distribution = gpytorch.variational.CholeskyVariationalDistribution( num_inducing_points=int(pow(grid_size, len(grid_bounds)))) variational_strategy = gpytorch.variational.GridInterpolationVariationalStrategy( self, grid_size=grid_size, grid_bounds=grid_bounds, variational_distribution=variational_distribution) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = ConstantMean(prior=SmoothedBoxPrior(-5, 5)) self.covar_module = ScaleKernel( RBFKernel(lengthscale_prior=SmoothedBoxPrior( exp(-5), exp(6), sigma=0.1)), outputscale_prior=SmoothedBoxPrior(exp(-5), exp(6), sigma=0.1), )
def __init__(self, inducting_points): ''' A more extreme method of reducing parameters is to get rid of S entirely. This corresponds to learning a delta distribution u=m rather than a multivariate Normal distribution for u. In other words, this corresponds to performing MAP estimation rather than variational inference. ''' variational_distribution = DeltaVariationalDistribution( inducting_points.size(-2)) variational_strategy = VariationalStrategy( self, inducting_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy) self.mean = ConstantMean() self.covar = ScaleKernel(RBFKernel())
def __init__(self, train_inputs, train_targets, likelihood, batch_size=1): super(ExactGPModel, self).__init__(train_inputs, train_targets, likelihood) self.mean_module = ConstantMean(batch_size=batch_size, prior=gpytorch.priors.SmoothedBoxPrior( -1, 1)) self.covar_module = ScaleKernel( RBFKernel( batch_size=batch_size, lengthscale_prior=gpytorch.priors.NormalPrior( loc=torch.zeros(batch_size, 1, 1), scale=torch.ones(batch_size, 1, 1)), ), batch_size=batch_size, outputscale_prior=gpytorch.priors.SmoothedBoxPrior(-2, 2), )
def __init__(self, inducting_points): ''' As a default, we'll use the default VariationalStrategy class with a CholeskyVariationalDistribution. The CholeskyVariationalDistribution class allows S to be on any positive semidefinite matrix. This is the most general/expressive option for approximate GPs ''' variational_distribution = CholeskyVariationalDistribution( inducting_points.size(-2)) variational_strategy = VariationalStrategy( self, inducting_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy) self.mean = ConstantMean() self.covar = ScaleKernel(RBFKernel())
def main(): x_data = torch.linspace(0, 1, 100) y_data = torch.sin(3.7 * x_data * (2 * math.pi)) + torch.randn(x_data.size()) * 0.1 for i in range(5): y_data[i + 35] = y_data[i + 35] + 0.5 myDetector = GpnsDetector(ConstantMean(), ScaleKernel(RBFKernel()), GaussianLikelihood(), x_data, y_data) optimizer = torch.optim.Adam optimizer_kwargs = {'lr': 0.1} mll = gpytorch.mlls.ExactMarginalLogLikelihood myDetector.train(5000, 10, mll, optimizer, **optimizer_kwargs)
def __init__(self, inducing_points, use_fast_strategy): inducing_points = torch.from_numpy(inducing_points).float() variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_strategy = UnwhitenedVariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy) self.mean_module = ConstantMean() dims = inducing_points.shape[1] # self.covar_module = ScaleKernel(MaternKernel(ard_num_dims=dims)) + ScaleKernel(LinearKernel(ard_num_dims=dims)) self.covar_module = ScaleKernel(RBFKernel())
def __init__(self, num_dim, grid_bounds=(-10.0, 10.0), grid_size=64): variational_distribution = CholeskyVariationalDistribution(num_inducing_points=grid_size, batch_shape=torch.Size([num_dim])) base_strategy = GridInterpolationVariationalStrategy(self, grid_size=grid_size, grid_bounds=[grid_bounds], variational_distribution=variational_distribution) variational_strategy = MultitaskVariationalStrategy(base_strategy, num_tasks=num_dim) super().__init__(variational_strategy) self.covar = ScaleKernel( RBFKernel(lengthscale_prior=SmoothedBoxPrior(math.exp(-1), math.exp(1), sigma=0.1, transform=torch.exp))) self.mean = ConstantMean() self.grid_bounds = grid_bounds
def __init__(self, inducting_points): ''' One way to reduce the number of parameters is to restrict that $\mathbf S$ is only diagonal. This is less expressive, but the number of parameters is now linear in $m$ instead of quadratic. All we have to do is take the previous example, and change CholeskyVariationalDistribution S to MeanFieldVariationalDistribution S. ''' variational_distribution = MeanFieldVariationalDistribution( inducting_points.size(-2)) variational_strategy = VariationalStrategy( self, inducting_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy) self.mean = ConstantMean() self.covar = ScaleKernel(RBFKernel())
def __init__(self, train_X, train_Y, likelihood, dim, lengthscale_constraint, outputscale_constraint, ard_dims): # squeeze output dim before passing train_Y to ExactGP super().__init__(train_X, train_Y, likelihood) # GaussianLikelihood()) # GaussianLikelihood() noise.squeeze(-1) self.dim = dim self.mean_module = ConstantMean() self.covar_module = ScaleKernel(CylindricalKernel( num_angular_weights=ard_dims, alpha_prior=KumaAlphaPrior(), alpha_constraint=gpytorch.constraints.constraints.Interval(lower_bound=0.5, upper_bound=1.), beta_prior=KumaBetaPrior(), beta_constraint=gpytorch.constraints.constraints.Interval(lower_bound=1., upper_bound=2.), radial_base_kernel=MaternKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=1, nu=2.5), # angular_weights_constraint=gpytorch.constraints.constraints.Interval(lower_bound=np.exp(-12.), # upper_bound=np.exp(20.)), angular_weights_prior=AngularWeightsPrior() )) self.to(train_X) # make sure we're on the right device/dtype
def __init__(self, input_dims, output_dims, num_inducing=128, mean_type='constant'): if output_dims is None: inducing_points = torch.randn(num_inducing, input_dims) batch_shape = torch.Size([]) else: inducing_points = torch.randn(output_dims, num_inducing, input_dims) batch_shape = torch.Size([output_dims]) variational_distribution = CholeskyVariationalDistribution( num_inducing_points=num_inducing, batch_shape=batch_shape) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super(DGPHiddenLayer, self).__init__(variational_strategy, input_dims, output_dims) if mean_type == 'constant': self.mean_module = ConstantMean(batch_shape=batch_shape) else: # (if 'linear') self.mean_module = LinearMean(input_dims) #lengthscale_constraint = gpytorch.constraints.Interval(0.0001, 10.0) # needs to be floats lengthscale_prior = gpytorch.priors.NormalPrior(0.1, 2.0) outputscale_prior = gpytorch.priors.NormalPrior(1.0, 3.0) lengthscale_constraint = None #lengthscale_prior = None self.covar_module = ScaleKernel( RBFKernel( batch_shape=batch_shape, ard_num_dims=input_dims, #active_dims=(0), lengthscale_constraint=lengthscale_constraint, lengthscale_prior=lengthscale_prior), outputscale_prior=outputscale_prior, batch_shape=batch_shape, ard_num_dims=input_dims)
def __init__(self, inducing_points, kernel=None): # q(u) variational_distribution = CholeskyVariationalDistribution( inducing_points.size(-1)) # q(f|x) = ∫q(f, u)du = ∫q(f|u, x)q(u)du variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy) self.mean_module = ConstantMean() if kernel is None: kernel = RBFKernel() self.covar_module = ScaleKernel(kernel)
def __init__(self, input_size, device='cpu'): if device == 'gpu' and torch.cuda.is_available(): self.device = torch.device('cuda:0') else: self.device = torch.device('cpu') self.input_size = input_size _likelihood = GaussianLikelihood() super(GPRegressor, self).__init__(train_inputs=None, train_targets=None, likelihood=_likelihood) self.mean_module = ZeroMean() self.covar_module = ScaleKernel(RBFKernel()) self.input_trans = None self.target_trans = None
def create_bayesian_quadrature_iso_gauss(): x1 = torch.from_numpy(np.array([[-1, 1], [0, 0], [-2, 0.1]])) x2 = torch.from_numpy(np.array([[-1, 1], [0, 0], [-2, 0.1], [-3, 3]])) M1 = x1.size()[0] M2 = x2.size()[0] D = x1.size()[1] prior_mean = torch.from_numpy(np.arange(D))[None, :] prior_variance = 2. rbf = RBFKernel() rbf.lengthscale = 1. kernel = ScaleKernel(rbf) kernel.outputscale = 1. bqkernel = QuadratureRBFGaussPrior(kernel, prior_mean, prior_variance) return bqkernel, x1, x2, M1, M2, D