def test_forward(self):
     x1 = torch.tensor([[4, 2], [3, 1], [8, 5], [7, 6]], dtype=torch.float)
     x2 = torch.tensor([[4, 2], [3, 0], [4, 4]], dtype=torch.float)
     lengthscale = 2
     kernel = CategoricalKernel().initialize(lengthscale=lengthscale)
     kernel.eval()
     sc_dists = (x1.unsqueeze(-2) != x2.unsqueeze(-3)) / lengthscale
     actual = torch.exp(-sc_dists.mean(-1))
     res = kernel(x1, x2).evaluate()
     self.assertTrue(torch.allclose(res, actual))
Exemple #2
0
 def test_active_dims(self):
     x1 = torch.tensor([[4, 2], [3, 1], [8, 5], [7, 6]], dtype=torch.float)
     x2 = torch.tensor([[4, 2], [3, 0], [4, 4]], dtype=torch.float)
     lengthscale = 2
     kernel = CategoricalKernel(active_dims=[0]).initialize(
         lengthscale=lengthscale)
     kernel.eval()
     dists = x1[:, :1].unsqueeze(-2) != x2[:, :1].unsqueeze(-3)
     sq_sc_dists = dists**2 / lengthscale**2
     actual = torch.exp(-sq_sc_dists.mean(-1))
     res = kernel(x1, x2).evaluate()
     self.assertTrue(torch.allclose(res, actual))
    def test_ard(self):
        x1 = torch.tensor([[4, 2], [3, 1], [8, 5]], dtype=torch.float)
        x2 = torch.tensor([[4, 2], [3, 0], [4, 4]], dtype=torch.float)
        lengthscales = torch.tensor([1, 2], dtype=torch.float).view(1, 1, 2)

        kernel = CategoricalKernel(ard_num_dims=2)
        kernel.initialize(lengthscale=lengthscales)
        kernel.eval()

        sc_dists = x1.unsqueeze(-2) != x2.unsqueeze(-3)
        sc_dists = sc_dists / lengthscales.unsqueeze(-2)
        actual = torch.exp(-sc_dists.mean(-1))
        res = kernel(x1, x2).evaluate()
        self.assertTrue(torch.allclose(res, actual))

        # diag
        res = kernel(x1, x2).diag()
        actual = torch.diagonal(actual, dim1=-1, dim2=-2)
        self.assertTrue(torch.allclose(res, actual))

        # batch_dims
        actual = torch.exp(-sc_dists).transpose(-1, -3)
        res = kernel(x1, x2, last_dim_is_batch=True).evaluate()
        self.assertTrue(torch.allclose(res, actual))

        # batch_dims + diag
        res = kernel(x1, x2, last_dim_is_batch=True).diag()
        self.assertTrue(torch.allclose(res, torch.diagonal(actual, dim1=-1, dim2=-2)))
    def test_ard_separate_batch(self):
        x1 = torch.tensor(
            [
                [[4, 2, 1], [3, 1, 5]],
                [[3, 2, 3], [6, 1, 7]],
            ],
            dtype=torch.float,
        )
        x2 = torch.tensor([[[4, 2, 1], [6, 0, 0]]], dtype=torch.float)
        lengthscales = torch.tensor([[[1, 2, 1]], [[2, 1, 0.5]]], dtype=torch.float)

        kernel = CategoricalKernel(batch_shape=torch.Size([2]), ard_num_dims=3)
        kernel.initialize(lengthscale=lengthscales)
        kernel.eval()

        sc_dists = x1.unsqueeze(-2) != x2.unsqueeze(-3)
        sc_dists = sc_dists / lengthscales.unsqueeze(-2)
        actual = torch.exp(-sc_dists.mean(-1))
        res = kernel(x1, x2).evaluate()
        self.assertTrue(torch.allclose(res, actual))

        # diag
        res = kernel(x1, x2).diag()
        actual = torch.diagonal(actual, dim1=-1, dim2=-2)
        self.assertTrue(torch.allclose(res, actual))

        # batch_dims
        actual = torch.exp(-sc_dists).transpose(-1, -3)
        res = kernel(x1, x2, last_dim_is_batch=True).evaluate()
        self.assertTrue(torch.allclose(res, actual))

        # batch_dims + diag
        res = kernel(x1, x2, last_dim_is_batch=True).diag()
        self.assertTrue(torch.allclose(res, torch.diagonal(actual, dim1=-1, dim2=-2)))
    def test_ard_batch(self):
        x1 = torch.tensor(
            [
                [[4, 2, 1], [3, 1, 5]],
                [[3, 2, 3], [6, 1, 7]],
            ],
            dtype=torch.float,
        )
        x2 = torch.tensor([[[4, 2, 1], [6, 0, 0]]], dtype=torch.float)
        lengthscales = torch.tensor([[[1, 2, 1]]], dtype=torch.float)

        kernel = CategoricalKernel(batch_shape=torch.Size([2]), ard_num_dims=3)
        kernel.initialize(lengthscale=lengthscales)
        kernel.eval()

        sc_dists = x1.unsqueeze(-2) != x2.unsqueeze(-3)
        sc_dists = sc_dists / lengthscales.unsqueeze(-2)
        actual = torch.exp(-sc_dists.mean(-1))
        res = kernel(x1, x2).evaluate()
        self.assertTrue(torch.allclose(res, actual))
Exemple #6
0
    def __init__(
        self,
        train_X: Tensor,
        train_Y: Tensor,
        cat_dims: List[int],
        cont_kernel_factory: Optional[Callable[[int, List[int]], Kernel]] = None,
        likelihood: Optional[Likelihood] = None,
        outcome_transform: Optional[OutcomeTransform] = None,  # TODO
        input_transform: Optional[InputTransform] = None,  # TODO
    ) -> None:
        r"""A single-task exact GP model supporting categorical parameters.

        Args:
            train_X: A `batch_shape x n x d` tensor of training features.
            train_Y: A `batch_shape x n x m` tensor of training observations.
            cat_dims: A list of indices corresponding to the columns of
                the input `X` that should be considered categorical features.
            cont_kernel_factory: A method that accepts `ard_num_dims` and
                `active_dims` arguments and returns an instatiated GPyTorch
                `Kernel` object to be used as the ase kernel for the continuous
                dimensions. If omitted, this model uses a Matern-2.5 kernel as
                the kernel for the ordinal parameters.
            likelihood: A likelihood. If omitted, use a standard
                GaussianLikelihood with inferred noise level.
            # outcome_transform: An outcome transform that is applied to the
            #     training data during instantiation and to the posterior during
            #     inference (that is, the `Posterior` obtained by calling
            #     `.posterior` on the model will be on the original scale).
            # input_transform: An input transform that is applied in the model's
            #     forward pass.

        Example:
            >>> train_X = torch.cat(
                    [torch.rand(20, 2), torch.randint(3, (20, 1))], dim=-1)
                )
            >>> train_Y = (
                    torch.sin(train_X[..., :-1]).sum(dim=1, keepdim=True)
                    + train_X[..., -1:]
                )
            >>> model = MixedSingleTaskGP(train_X, train_Y, cat_dims=[-1])
        """
        if outcome_transform is not None:
            raise UnsupportedError("outcome transforms not yet supported")
        if input_transform is not None:
            raise UnsupportedError("input transforms not yet supported")
        if len(cat_dims) == 0:
            raise ValueError(
                "Must specify categorical dimensions for MixedSingleTaskGP"
            )
        input_batch_shape, aug_batch_shape = self.get_batch_dimensions(
            train_X=train_X, train_Y=train_Y
        )

        if cont_kernel_factory is None:

            def cont_kernel_factory(
                batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int]
            ) -> MaternKernel:
                return MaternKernel(
                    nu=2.5,
                    batch_shape=batch_shape,
                    ard_num_dims=ard_num_dims,
                    active_dims=active_dims,
                )

        if likelihood is None:
            # This Gamma prior is quite close to the Horseshoe prior
            min_noise = 1e-5 if train_X.dtype == torch.float else 1e-6
            likelihood = GaussianLikelihood(
                batch_shape=aug_batch_shape,
                noise_constraint=GreaterThan(
                    min_noise, transform=None, initial_value=1e-3
                ),
                noise_prior=GammaPrior(0.9, 10.0),
            )

        d = train_X.shape[-1]
        cat_dims = normalize_indices(indices=cat_dims, d=d)
        ord_dims = sorted(set(range(d)) - set(cat_dims))
        if len(ord_dims) == 0:
            covar_module = ScaleKernel(
                CategoricalKernel(
                    batch_shape=aug_batch_shape,
                    ard_num_dims=len(cat_dims),
                )
            )
        else:
            sum_kernel = ScaleKernel(
                cont_kernel_factory(
                    batch_shape=aug_batch_shape,
                    ard_num_dims=len(ord_dims),
                    active_dims=ord_dims,
                )
                + ScaleKernel(
                    CategoricalKernel(
                        batch_shape=aug_batch_shape,
                        ard_num_dims=len(cat_dims),
                        active_dims=cat_dims,
                    )
                )
            )
            prod_kernel = ScaleKernel(
                cont_kernel_factory(
                    batch_shape=aug_batch_shape,
                    ard_num_dims=len(ord_dims),
                    active_dims=ord_dims,
                )
                * CategoricalKernel(
                    batch_shape=aug_batch_shape,
                    ard_num_dims=len(cat_dims),
                    active_dims=cat_dims,
                )
            )
            covar_module = sum_kernel + prod_kernel
        super().__init__(
            train_X=train_X,
            train_Y=train_Y,
            likelihood=likelihood,
            covar_module=covar_module,
            outcome_transform=outcome_transform,
            input_transform=input_transform,
        )
 def test_initialize_lengthscale_batch(self):
     kernel = CategoricalKernel(batch_shape=torch.Size([2]))
     ls_init = torch.tensor([1.0, 2.0])
     kernel.initialize(lengthscale=ls_init)
     actual_value = ls_init.view_as(kernel.lengthscale)
     self.assertLess(torch.norm(kernel.lengthscale - actual_value), 1e-5)
 def test_initialize_lengthscale(self):
     kernel = CategoricalKernel()
     kernel.initialize(lengthscale=1)
     actual_value = torch.tensor(1.0).view_as(kernel.lengthscale)
     self.assertLess(torch.norm(kernel.lengthscale - actual_value), 1e-5)
 def create_kernel_no_ard(self, **kwargs):
     return CategoricalKernel(**kwargs)