def test_forward(self): x1 = torch.tensor([[4, 2], [3, 1], [8, 5], [7, 6]], dtype=torch.float) x2 = torch.tensor([[4, 2], [3, 0], [4, 4]], dtype=torch.float) lengthscale = 2 kernel = CategoricalKernel().initialize(lengthscale=lengthscale) kernel.eval() sc_dists = (x1.unsqueeze(-2) != x2.unsqueeze(-3)) / lengthscale actual = torch.exp(-sc_dists.mean(-1)) res = kernel(x1, x2).evaluate() self.assertTrue(torch.allclose(res, actual))
def test_active_dims(self): x1 = torch.tensor([[4, 2], [3, 1], [8, 5], [7, 6]], dtype=torch.float) x2 = torch.tensor([[4, 2], [3, 0], [4, 4]], dtype=torch.float) lengthscale = 2 kernel = CategoricalKernel(active_dims=[0]).initialize( lengthscale=lengthscale) kernel.eval() dists = x1[:, :1].unsqueeze(-2) != x2[:, :1].unsqueeze(-3) sq_sc_dists = dists**2 / lengthscale**2 actual = torch.exp(-sq_sc_dists.mean(-1)) res = kernel(x1, x2).evaluate() self.assertTrue(torch.allclose(res, actual))
def test_ard(self): x1 = torch.tensor([[4, 2], [3, 1], [8, 5]], dtype=torch.float) x2 = torch.tensor([[4, 2], [3, 0], [4, 4]], dtype=torch.float) lengthscales = torch.tensor([1, 2], dtype=torch.float).view(1, 1, 2) kernel = CategoricalKernel(ard_num_dims=2) kernel.initialize(lengthscale=lengthscales) kernel.eval() sc_dists = x1.unsqueeze(-2) != x2.unsqueeze(-3) sc_dists = sc_dists / lengthscales.unsqueeze(-2) actual = torch.exp(-sc_dists.mean(-1)) res = kernel(x1, x2).evaluate() self.assertTrue(torch.allclose(res, actual)) # diag res = kernel(x1, x2).diag() actual = torch.diagonal(actual, dim1=-1, dim2=-2) self.assertTrue(torch.allclose(res, actual)) # batch_dims actual = torch.exp(-sc_dists).transpose(-1, -3) res = kernel(x1, x2, last_dim_is_batch=True).evaluate() self.assertTrue(torch.allclose(res, actual)) # batch_dims + diag res = kernel(x1, x2, last_dim_is_batch=True).diag() self.assertTrue(torch.allclose(res, torch.diagonal(actual, dim1=-1, dim2=-2)))
def test_ard_separate_batch(self): x1 = torch.tensor( [ [[4, 2, 1], [3, 1, 5]], [[3, 2, 3], [6, 1, 7]], ], dtype=torch.float, ) x2 = torch.tensor([[[4, 2, 1], [6, 0, 0]]], dtype=torch.float) lengthscales = torch.tensor([[[1, 2, 1]], [[2, 1, 0.5]]], dtype=torch.float) kernel = CategoricalKernel(batch_shape=torch.Size([2]), ard_num_dims=3) kernel.initialize(lengthscale=lengthscales) kernel.eval() sc_dists = x1.unsqueeze(-2) != x2.unsqueeze(-3) sc_dists = sc_dists / lengthscales.unsqueeze(-2) actual = torch.exp(-sc_dists.mean(-1)) res = kernel(x1, x2).evaluate() self.assertTrue(torch.allclose(res, actual)) # diag res = kernel(x1, x2).diag() actual = torch.diagonal(actual, dim1=-1, dim2=-2) self.assertTrue(torch.allclose(res, actual)) # batch_dims actual = torch.exp(-sc_dists).transpose(-1, -3) res = kernel(x1, x2, last_dim_is_batch=True).evaluate() self.assertTrue(torch.allclose(res, actual)) # batch_dims + diag res = kernel(x1, x2, last_dim_is_batch=True).diag() self.assertTrue(torch.allclose(res, torch.diagonal(actual, dim1=-1, dim2=-2)))
def test_ard_batch(self): x1 = torch.tensor( [ [[4, 2, 1], [3, 1, 5]], [[3, 2, 3], [6, 1, 7]], ], dtype=torch.float, ) x2 = torch.tensor([[[4, 2, 1], [6, 0, 0]]], dtype=torch.float) lengthscales = torch.tensor([[[1, 2, 1]]], dtype=torch.float) kernel = CategoricalKernel(batch_shape=torch.Size([2]), ard_num_dims=3) kernel.initialize(lengthscale=lengthscales) kernel.eval() sc_dists = x1.unsqueeze(-2) != x2.unsqueeze(-3) sc_dists = sc_dists / lengthscales.unsqueeze(-2) actual = torch.exp(-sc_dists.mean(-1)) res = kernel(x1, x2).evaluate() self.assertTrue(torch.allclose(res, actual))
def __init__( self, train_X: Tensor, train_Y: Tensor, cat_dims: List[int], cont_kernel_factory: Optional[Callable[[int, List[int]], Kernel]] = None, likelihood: Optional[Likelihood] = None, outcome_transform: Optional[OutcomeTransform] = None, # TODO input_transform: Optional[InputTransform] = None, # TODO ) -> None: r"""A single-task exact GP model supporting categorical parameters. Args: train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. cat_dims: A list of indices corresponding to the columns of the input `X` that should be considered categorical features. cont_kernel_factory: A method that accepts `ard_num_dims` and `active_dims` arguments and returns an instatiated GPyTorch `Kernel` object to be used as the ase kernel for the continuous dimensions. If omitted, this model uses a Matern-2.5 kernel as the kernel for the ordinal parameters. likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. # outcome_transform: An outcome transform that is applied to the # training data during instantiation and to the posterior during # inference (that is, the `Posterior` obtained by calling # `.posterior` on the model will be on the original scale). # input_transform: An input transform that is applied in the model's # forward pass. Example: >>> train_X = torch.cat( [torch.rand(20, 2), torch.randint(3, (20, 1))], dim=-1) ) >>> train_Y = ( torch.sin(train_X[..., :-1]).sum(dim=1, keepdim=True) + train_X[..., -1:] ) >>> model = MixedSingleTaskGP(train_X, train_Y, cat_dims=[-1]) """ if outcome_transform is not None: raise UnsupportedError("outcome transforms not yet supported") if input_transform is not None: raise UnsupportedError("input transforms not yet supported") if len(cat_dims) == 0: raise ValueError( "Must specify categorical dimensions for MixedSingleTaskGP" ) input_batch_shape, aug_batch_shape = self.get_batch_dimensions( train_X=train_X, train_Y=train_Y ) if cont_kernel_factory is None: def cont_kernel_factory( batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int] ) -> MaternKernel: return MaternKernel( nu=2.5, batch_shape=batch_shape, ard_num_dims=ard_num_dims, active_dims=active_dims, ) if likelihood is None: # This Gamma prior is quite close to the Horseshoe prior min_noise = 1e-5 if train_X.dtype == torch.float else 1e-6 likelihood = GaussianLikelihood( batch_shape=aug_batch_shape, noise_constraint=GreaterThan( min_noise, transform=None, initial_value=1e-3 ), noise_prior=GammaPrior(0.9, 10.0), ) d = train_X.shape[-1] cat_dims = normalize_indices(indices=cat_dims, d=d) ord_dims = sorted(set(range(d)) - set(cat_dims)) if len(ord_dims) == 0: covar_module = ScaleKernel( CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), ) ) else: sum_kernel = ScaleKernel( cont_kernel_factory( batch_shape=aug_batch_shape, ard_num_dims=len(ord_dims), active_dims=ord_dims, ) + ScaleKernel( CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), active_dims=cat_dims, ) ) ) prod_kernel = ScaleKernel( cont_kernel_factory( batch_shape=aug_batch_shape, ard_num_dims=len(ord_dims), active_dims=ord_dims, ) * CategoricalKernel( batch_shape=aug_batch_shape, ard_num_dims=len(cat_dims), active_dims=cat_dims, ) ) covar_module = sum_kernel + prod_kernel super().__init__( train_X=train_X, train_Y=train_Y, likelihood=likelihood, covar_module=covar_module, outcome_transform=outcome_transform, input_transform=input_transform, )
def test_initialize_lengthscale_batch(self): kernel = CategoricalKernel(batch_shape=torch.Size([2])) ls_init = torch.tensor([1.0, 2.0]) kernel.initialize(lengthscale=ls_init) actual_value = ls_init.view_as(kernel.lengthscale) self.assertLess(torch.norm(kernel.lengthscale - actual_value), 1e-5)
def test_initialize_lengthscale(self): kernel = CategoricalKernel() kernel.initialize(lengthscale=1) actual_value = torch.tensor(1.0).view_as(kernel.lengthscale) self.assertLess(torch.norm(kernel.lengthscale - actual_value), 1e-5)
def create_kernel_no_ard(self, **kwargs): return CategoricalKernel(**kwargs)