def _create_marginal_input(self, batch_shape=torch.Size([])):
     mat = torch.randn(*batch_shape, 5, 5)
     mat2 = torch.randn(*batch_shape, 4, 4)
     covar = KroneckerProductLazyTensor(RootLazyTensor(mat),
                                        RootLazyTensor(mat2))
     return MultitaskMultivariateNormal(torch.randn(*batch_shape, 5, 4),
                                        covar)
Beispiel #2
0
    def test_batch_mode_matmul_batch_mat_with_five_matrices(self):
        mats = make_random_mat(6, rank=4, batch_size=30)
        vec = torch.randn(5, 6, 7, requires_grad=True)
        mats_copy = mats.clone().detach().requires_grad_(True)
        vec_copy = vec.clone().detach().requires_grad_(True)

        # Forward
        res = RootLazyTensor(mats).mul_batch(mul_batch_size=6).matmul(vec)
        reshaped_mats_copy = mats_copy.view(5, 6, 6, 4)
        actual = prod(
            [
                (reshaped_mats_copy[:, 0].matmul(reshaped_mats_copy[:, 0].transpose(-1, -2)).view(5, 6, 6)),
                (reshaped_mats_copy[:, 1].matmul(reshaped_mats_copy[:, 1].transpose(-1, -2)).view(5, 6, 6)),
                (reshaped_mats_copy[:, 2].matmul(reshaped_mats_copy[:, 2].transpose(-1, -2)).view(5, 6, 6)),
                (reshaped_mats_copy[:, 3].matmul(reshaped_mats_copy[:, 3].transpose(-1, -2)).view(5, 6, 6)),
                (reshaped_mats_copy[:, 4].matmul(reshaped_mats_copy[:, 4].transpose(-1, -2)).view(5, 6, 6)),
                (reshaped_mats_copy[:, 5].matmul(reshaped_mats_copy[:, 5].transpose(-1, -2)).view(5, 6, 6)),
            ]
        ).matmul(vec_copy)
        self.assertLess(torch.max(((res - actual) / actual).abs()), 0.01)

        # Backward
        res.sum().backward()
        actual.sum().backward()
        self.assertLess(torch.max(((mats.grad - mats_copy.grad) / mats_copy.grad).abs()), 0.05)
        self.assertLess(torch.max(((vec.grad - vec_copy.grad) / vec_copy.grad).abs()), 0.05)
    def test_batch_get_indices(self):
        root = torch.randn(2, 5, 1)
        actual = root.matmul(root.transpose(-1, -2))
        res = RootLazyTensor(root)

        batch_indices = torch.tensor([0, 1, 0, 1], dtype=torch.long)
        left_indices = torch.tensor([1, 2, 4, 0], dtype=torch.long)
        right_indices = torch.tensor([0, 1, 3, 2], dtype=torch.long)

        self.assertTrue(
            approx_equal(
                actual[batch_indices, left_indices, right_indices],
                res._batch_get_indices(batch_indices, left_indices,
                                       right_indices),
            ))

        batch_indices = torch.tensor(
            [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
            dtype=torch.long)
        left_indices = torch.tensor(
            [1, 2, 4, 0, 1, 2, 3, 1, 2, 2, 1, 1, 0, 0, 4, 4, 4, 4],
            dtype=torch.long)
        right_indices = torch.tensor(
            [0, 1, 3, 2, 3, 4, 2, 2, 1, 1, 2, 1, 2, 4, 4, 3, 3, 0],
            dtype=torch.long)

        self.assertTrue(
            approx_equal(
                actual[batch_indices, left_indices, right_indices],
                res._batch_get_indices(batch_indices, left_indices,
                                       right_indices),
            ))
Beispiel #4
0
 def create_lazy_tensor(self):
     mat1 = make_random_mat(40, rank=5, batch_size=2)
     mat2 = make_random_mat(40, rank=5, batch_size=2)
     mat3 = make_random_mat(40, rank=5, batch_size=2)
     mat4 = make_random_mat(40, rank=5, batch_size=2)
     mat5 = make_random_mat(40, rank=5, batch_size=2)
     res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2),
                         RootLazyTensor(mat3), RootLazyTensor(mat4),
                         RootLazyTensor(mat5))
     return res.add_diag(torch.tensor(0.5))
Beispiel #5
0
 def create_lazy_tensor(self):
     mat1 = make_random_mat(30, 3)
     mat2 = make_random_mat(30, 3)
     mat3 = make_random_mat(30, 3)
     mat4 = make_random_mat(30, 3)
     mat5 = make_random_mat(30, 3)
     res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2),
                         RootLazyTensor(mat3), RootLazyTensor(mat4),
                         RootLazyTensor(mat5))
     return res.add_diag(torch.tensor(1.0))
Beispiel #6
0
    def forward(self, x1, x2):
        if x1.size() == x2.size() and torch.equal(x1, x2):
            # Use RootLazyTensor when x1 == x2 for efficiency when composing
            # with other kernels
            prod = RootLazyTensor(x1 - self.offset)
        else:
            prod = MatmulLazyTensor(x1 - self.offset,
                                    (x2 - self.offset).transpose(2, 1))

        return prod + self.variance.expand(prod.size())
    def _test_inv_quad_logdet(self, inv_quad_rhs=None, logdet=False, improper_logdet=False):
        # Set up
        mat = torch.randn(*self.__class__.matrix_shape).requires_grad_(True)
        mat_clone = mat.detach().clone().requires_grad_(True)

        if inv_quad_rhs is not None:
            inv_quad_rhs.requires_grad_(True)
            inv_quad_rhs_clone = inv_quad_rhs.detach().clone().requires_grad_(True)

        # Compute actual values
        actual_tensor = mat_clone @ mat_clone.transpose(-1, -2)
        if inv_quad_rhs is not None:
            actual_inv_quad = actual_tensor.inverse().matmul(inv_quad_rhs_clone).mul(inv_quad_rhs_clone)
            actual_inv_quad = actual_inv_quad.sum([-1, -2]) if inv_quad_rhs.dim() >= 2 else actual_inv_quad.sum()
        if logdet:
            flattened_tensor = actual_tensor.view(-1, *actual_tensor.shape[-2:])
            logdets = torch.cat([mat.logdet().unsqueeze(0) for mat in flattened_tensor])
            if actual_tensor.dim() > 2:
                actual_logdet = logdets.view(*actual_tensor.shape[:-2])
            else:
                actual_logdet = logdets.squeeze()

        # Compute values with LazyTensor
        _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg)
        with gpytorch.settings.num_trace_samples(2000), \
                gpytorch.settings.max_cholesky_size(0), \
                gpytorch.settings.cg_tolerance(1e-5), \
                gpytorch.settings.skip_logdet_forward(improper_logdet), \
                patch("gpytorch.utils.linear_cg", new=_wrapped_cg) as linear_cg_mock:
            lazy_tensor = RootLazyTensor(mat)
            res_inv_quad, res_logdet = lazy_tensor.inv_quad_logdet(inv_quad_rhs=inv_quad_rhs, logdet=logdet)

        # Compare forward pass
        if inv_quad_rhs is not None:
            self.assertAllClose(res_inv_quad, actual_inv_quad, rtol=1e-2)
        if logdet:
            if improper_logdet:
                self.assertAlmostEqual(res_logdet.norm().item(), 0)
            else:
                self.assertAllClose(res_logdet, actual_logdet, rtol=1e-1, atol=2e-1)

        # Backward
        if inv_quad_rhs is not None:
            actual_inv_quad.sum().backward(retain_graph=True)
            res_inv_quad.sum().backward(retain_graph=True)
        if logdet:
            actual_logdet.sum().backward()
            res_logdet.sum().backward()

        self.assertAllClose(mat_clone.grad, mat.grad, rtol=1e-1, atol=2e-1)
        if inv_quad_rhs is not None:
            self.assertAllClose(inv_quad_rhs.grad, inv_quad_rhs_clone.grad, rtol=1e-2)

        # Make sure CG was called
        self.assertTrue(linear_cg_mock.called)
Beispiel #8
0
 def create_lazy_tensor(self):
     mat1 = make_random_mat(6, rank=6, batch_shape=torch.Size((
         2,
         3,
     )))
     mat2 = make_random_mat(6, rank=6, batch_shape=torch.Size((
         2,
         3,
     )))
     res = RootLazyTensor(mat1) * RootLazyTensor(mat2)
     return res.add_diag(torch.tensor(0.5))
    def test_batch_diag(self):
        root = torch.randn(4, 5, 3)
        actual = root.matmul(root.transpose(-1, -2))
        actual_diag = torch.cat([
            actual[0].diag().unsqueeze(0),
            actual[1].diag().unsqueeze(0),
            actual[2].diag().unsqueeze(0),
            actual[3].diag().unsqueeze(0),
        ])

        res = RootLazyTensor(root)
        self.assertTrue(approx_equal(actual_diag, res.diag()))
Beispiel #10
0
    def _get_covariance(self, x1, x2):
        k_ux1 = delazify(self.base_kernel(x1, self.inducing_points))
        if torch.equal(x1, x2):
            covar = RootLazyTensor(k_ux1.matmul(self._inducing_inv_root))

            # Diagonal correction for predictive posterior
            correction = (self.base_kernel(x1, x2, diag=True) -
                          covar.diag()).clamp(0, math.inf)
            covar = PsdSumLazyTensor(covar, DiagLazyTensor(correction))
        else:
            k_ux2 = delazify(self.base_kernel(x2, self.inducing_points))
            covar = MatmulLazyTensor(
                k_ux1.matmul(self._inducing_inv_root),
                k_ux2.matmul(self._inducing_inv_root).transpose(-1, -2))

        return covar
Beispiel #11
0
    def exact_predictive_covar(self, test_test_covar, test_train_covar):
        """
        Computes the posterior predictive covariance of a GP
        Args:
            test_train_covar (:obj:`gpytorch.lazy.LazyTensor`): Covariance matrix between test and train inputs
            test_test_covar (:obj:`gpytorch.lazy.LazyTensor`): Covariance matrix between test inputs
        Returns:
            :obj:`gpytorch.lazy.LazyTensor`: A LazyTensor representing the predictive posterior covariance of the
                                               test points
        """
        if settings.fast_pred_var.on():
            self._last_test_train_covar = test_train_covar

        if settings.skip_posterior_variances.on():
            return ZeroLazyTensor(*test_test_covar.size())

        if settings.fast_pred_var.off():
            super().exact_predictive_covar(test_test_covar, test_train_covar)
        else:
            features_xstar = test_train_covar.evaluate_kernel().get_root(
                dim=-2)

            # compute J^T Cache as our root tensor
            j_star_covar = features_xstar.t() @ self.covar_cache

            covar_expanded = RootLazyTensor(j_star_covar)
            return self.noise * covar_expanded
Beispiel #12
0
    def block_logdet(self, var, cov_mat_root):
        var = flatten(var)

        cov_mat_lt = RootLazyTensor(cov_mat_root.t())
        var_lt = DiagLazyTensor(var + 1e-6)
        covar_lt = AddedDiagLazyTensor(var_lt, cov_mat_lt)

        return covar_lt.log_det()
Beispiel #13
0
    def root_inv_decomposition(self, method=None, initial_vectors=None, test_vectors=None):
        from gpytorch.lazy import RootLazyTensor

        # return a dense root decomposition if the matrix is small
        if self.shape[-1] <= settings.max_cholesky_size.value():
            return super().root_inv_decomposition()

        root_list = [lt.root_inv_decomposition().root for lt in self.lazy_tensors]
        kronecker_root = KroneckerProductLazyTensor(*root_list)
        return RootLazyTensor(kronecker_root)
Beispiel #14
0
    def test_diag(self):
        mat1 = make_random_mat(20, rank=4)
        mat2 = make_random_mat(20, rank=4)
        mat3 = make_random_mat(20, rank=4)

        mat1_copy = mat1.clone().detach().requires_grad_(True)
        mat2_copy = mat2.clone().detach().requires_grad_(True)
        mat3_copy = mat3.clone().detach().requires_grad_(True)

        # Forward
        res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3)).diag()
        actual = prod(
            [
                mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
                mat2_copy.matmul(mat2_copy.transpose(-1, -2)),
                mat3_copy.matmul(mat3_copy.transpose(-1, -2)),
            ]
        ).diag()
        assert torch.max(((res - actual) / actual).abs()) < 0.01
    def test_matmul(self):
        root = torch.randn(5, 3, requires_grad=True)
        covar = RootLazyTensor(root)
        mat = torch.eye(5)
        res = covar.matmul(mat)

        root_clone = root.clone().detach()
        root_clone.requires_grad = True
        mat_clone = mat.clone().detach()
        mat_clone.requires_grad = True
        actual = root_clone.matmul(root_clone.transpose(-1,
                                                        -2)).matmul(mat_clone)

        self.assertTrue(approx_equal(res, actual))

        gradient = torch.randn(5, 5)
        actual.backward(gradient=gradient)
        res.backward(gradient=gradient)

        self.assertTrue(approx_equal(root.grad, root_clone.grad))
Beispiel #16
0
    def test_mul_adding_another_variable(self):
        mat1 = make_random_mat(20, rank=4, batch_size=5)
        mat2 = make_random_mat(20, rank=4, batch_size=5)
        mat3 = make_random_mat(20, rank=4, batch_size=5)

        mat1_copy = mat1.clone().detach().requires_grad_(True)
        mat2_copy = mat2.clone().detach().requires_grad_(True)
        mat3_copy = mat3.clone().detach().requires_grad_(True)

        # Forward
        res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2))
        res = res * RootLazyTensor(mat3)
        actual = prod(
            [
                mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
                mat2_copy.matmul(mat2_copy.transpose(-1, -2)),
                mat3_copy.matmul(mat3_copy.transpose(-1, -2)),
            ]
        )
        self.assertLess(torch.max(((res.evaluate() - actual) / actual).abs()), 0.01)
 def _get_covariance(self, x1, x2):
     k_ux1 = self.base_kernel_module(x1, self.inducing_points).evaluate()
     if torch.equal(x1, x2):
         covar = RootLazyTensor(k_ux1.matmul(self._inducing_inv_root))
     else:
         k_ux2 = self.base_kernel_module(x2,
                                         self.inducing_points).evaluate()
         covar = MatmulLazyTensor(
             k_ux1.matmul(self._inducing_inv_root),
             k_ux2.matmul(self._inducing_inv_root).transpose(-1, -2))
     return covar
Beispiel #18
0
    def test_batch_diag(self):
        mat1 = make_random_mat(20, rank=4, batch_size=5)
        mat2 = make_random_mat(20, rank=4, batch_size=5)
        mat3 = make_random_mat(20, rank=4, batch_size=5)

        mat1_copy = mat1.clone().detach().requires_grad_(True)
        mat2_copy = mat2.clone().detach().requires_grad_(True)
        mat3_copy = mat3.clone().detach().requires_grad_(True)

        # Forward
        res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3)).diag()
        actual = prod(
            [
                mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
                mat2_copy.matmul(mat2_copy.transpose(-1, -2)),
                mat3_copy.matmul(mat3_copy.transpose(-1, -2)),
            ]
        )
        actual = torch.cat([actual[i].diag().unsqueeze(0) for i in range(5)])
        self.assertLess(torch.max(((res - actual) / actual).abs()), 0.01)
Beispiel #19
0
    def test_precond_solve(self):
        seed = 4
        torch.random.manual_seed(seed)

        tensor = torch.randn(1000, 800)
        diag = torch.abs(torch.randn(1000))

        standard_lt = AddedDiagLazyTensor(RootLazyTensor(tensor),
                                          DiagLazyTensor(diag))
        evals, evecs = standard_lt.symeig(eigenvectors=True)

        # this preconditioner is a simple example of near deflation
        def nonstandard_preconditioner(self):
            top_100_evecs = evecs[:, :100]
            top_100_evals = evals[:100] + 0.2 * torch.randn(100)

            precond_lt = RootLazyTensor(
                top_100_evecs @ torch.diag(top_100_evals**0.5))
            logdet = top_100_evals.log().sum()

            def precond_closure(rhs):
                rhs2 = top_100_evecs.t() @ rhs
                return top_100_evecs @ torch.diag(1.0 / top_100_evals) @ rhs2

            return precond_closure, precond_lt, logdet

        overrode_lt = AddedDiagLazyTensor(
            RootLazyTensor(tensor),
            DiagLazyTensor(diag),
            preconditioner_override=nonstandard_preconditioner)

        # compute a solve - mostly to make sure that we can actually perform the solve
        rhs = torch.randn(1000, 1)
        standard_solve = standard_lt.inv_matmul(rhs)
        overrode_solve = overrode_lt.inv_matmul(rhs)

        # gut checking that our preconditioner is not breaking anything
        self.assertEqual(standard_solve.shape, overrode_solve.shape)
        self.assertLess(
            torch.norm(standard_solve - overrode_solve) /
            standard_solve.norm(), 1.0)
Beispiel #20
0
    def compute_ll_for_block(self, vec, mean, var, cov_mat_root):
        vec = flatten(vec)
        mean = flatten(mean)
        var = flatten(var)

        cov_mat_lt = RootLazyTensor(cov_mat_root.t())
        var_lt = DiagLazyTensor(var + 1e-6)
        covar_lt = AddedDiagLazyTensor(var_lt, cov_mat_lt)
        qdist = MultivariateNormal(mean, covar_lt)

        with gpytorch.settings.num_trace_samples(1) and gpytorch.settings.max_cg_iterations(25):
            return qdist.log_prob(vec)
        def nonstandard_preconditioner(self):
            top_100_evecs = evecs[:, :100]
            top_100_evals = evals[:100] + 0.2 * torch.randn(100)

            precond_lt = RootLazyTensor(top_100_evecs @ torch.diag(top_100_evals ** 0.5))
            logdet = top_100_evals.log().sum()

            def precond_closure(rhs):
                rhs2 = top_100_evecs.t() @ rhs
                return top_100_evecs @ torch.diag(1.0 / top_100_evals) @ rhs2

            return precond_closure, precond_lt, logdet
Beispiel #22
0
    def test_batch_matmul_mat_with_five_matrices(self):
        mat1 = make_random_mat(20, rank=4, batch_size=5)
        mat2 = make_random_mat(20, rank=4, batch_size=5)
        mat3 = make_random_mat(20, rank=4, batch_size=5)
        mat4 = make_random_mat(20, rank=4, batch_size=5)
        mat5 = make_random_mat(20, rank=4, batch_size=5)
        vec = torch.randn(5, 20, 7, requires_grad=True)

        mat1_copy = mat1.clone().detach().requires_grad_(True)
        mat2_copy = mat2.clone().detach().requires_grad_(True)
        mat3_copy = mat3.clone().detach().requires_grad_(True)
        mat4_copy = mat4.clone().detach().requires_grad_(True)
        mat5_copy = mat5.clone().detach().requires_grad_(True)
        vec_copy = vec.clone().detach().requires_grad_(True)

        # Forward
        res = MulLazyTensor(
            RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3), RootLazyTensor(mat4), RootLazyTensor(mat5)
        ).matmul(vec)
        actual = prod(
            [
                mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
                mat2_copy.matmul(mat2_copy.transpose(-1, -2)),
                mat3_copy.matmul(mat3_copy.transpose(-1, -2)),
                mat4_copy.matmul(mat4_copy.transpose(-1, -2)),
                mat5_copy.matmul(mat5_copy.transpose(-1, -2)),
            ]
        ).matmul(vec_copy)
        self.assertLess(torch.max(((res - actual) / actual).abs()), 0.01)

        # Backward
        res.sum().backward()
        actual.sum().backward()
        self.assertLess(torch.max(((mat1.grad - mat1_copy.grad) / mat1_copy.grad).abs()), 0.01)
        self.assertLess(torch.max(((mat2.grad - mat2_copy.grad) / mat2_copy.grad).abs()), 0.01)
        self.assertLess(torch.max(((mat3.grad - mat3_copy.grad) / mat3_copy.grad).abs()), 0.01)
        self.assertLess(torch.max(((mat4.grad - mat4_copy.grad) / mat4_copy.grad).abs()), 0.01)
        self.assertLess(torch.max(((mat5.grad - mat5_copy.grad) / mat5_copy.grad).abs()), 0.01)
        self.assertLess(torch.max(((vec.grad - vec_copy.grad) / vec_copy.grad).abs()), 0.01)
Beispiel #23
0
    def test_matmul_mat_with_two_matrices(self):
        mat1 = make_random_mat(20, 5)
        mat2 = make_random_mat(20, 5)
        vec = torch.randn(20, 7, requires_grad=True)

        mat1_copy = mat1.clone().detach().requires_grad_(True)
        mat2_copy = mat2.clone().detach().requires_grad_(True)
        vec_copy = vec.clone().detach().requires_grad_(True)

        # Forward
        res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2)).matmul(vec)
        actual = prod(
            [mat1_copy.matmul(mat1_copy.transpose(-1, -2)), mat2_copy.matmul(mat2_copy.transpose(-1, -2))]
        ).matmul(vec_copy)
        assert torch.max(((res - actual) / actual).abs()) < 0.01

        # Backward
        res.sum().backward()
        actual.sum().backward()
        self.assertLess(torch.max(((mat1.grad - mat1_copy.grad) / mat1_copy.grad).abs()), 0.01)
        self.assertLess(torch.max(((mat2.grad - mat2_copy.grad) / mat2_copy.grad).abs()), 0.01)
        self.assertLess(torch.max(((vec.grad - vec_copy.grad) / vec_copy.grad).abs()), 0.01)
Beispiel #24
0
    def test_getitem(self):
        mat1 = make_random_mat(20, rank=4)
        mat2 = make_random_mat(20, rank=4)
        mat3 = make_random_mat(20, rank=4)

        mat1_copy = mat1.clone().detach().requires_grad_(True)
        mat2_copy = mat2.clone().detach().requires_grad_(True)
        mat3_copy = mat3.clone().detach().requires_grad_(True)

        # Forward
        res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3))
        actual = prod(
            [
                mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
                mat2_copy.matmul(mat2_copy.transpose(-1, -2)),
                mat3_copy.matmul(mat3_copy.transpose(-1, -2)),
            ]
        )

        self.assertLess(torch.max(((res[5, 3:5] - actual[5, 3:5]) / actual[5, 3:5]).abs()), 0.01)
        self.assertLess(torch.max(((res[3:5, 2:].evaluate() - actual[3:5, 2:]) / actual[3:5, 2:]).abs()), 0.01)
        self.assertLess(torch.max(((res[2:, 3:5].evaluate() - actual[2:, 3:5]) / actual[2:, 3:5]).abs()), 0.01)
    def test_base_sample_shape(self):
        a = torch.randn(5, 10)
        lazy_square_a = RootLazyTensor(lazify(a))
        dist = MultivariateNormal(torch.zeros(5), lazy_square_a)

        # check that providing the base samples is okay
        samples = dist.rsample(torch.Size((16, )),
                               base_samples=torch.randn(16, 10))
        self.assertEqual(samples.shape, torch.Size((16, 5)))

        # check that an event shape of base samples fails
        self.assertRaises(RuntimeError,
                          dist.rsample,
                          torch.Size((16, )),
                          base_samples=torch.randn(16, 5))

        # check that the proper event shape of base samples is okay for
        # a non root lt
        nonlazy_square_a = lazify(lazy_square_a.evaluate())
        dist = MultivariateNormal(torch.zeros(5), nonlazy_square_a)

        samples = dist.rsample(torch.Size((16, )),
                               base_samples=torch.randn(16, 5))
        self.assertEqual(samples.shape, torch.Size((16, 5)))
Beispiel #26
0
    def root_decomposition(self, method: Optional[str] = None):
        from gpytorch.lazy import RootLazyTensor

        if method == "symeig" or method is None:
            evals, evecs = self._symeig(eigenvectors=True,
                                        return_evals_as_lazy=True)
            # TODO: only use non-zero evals (req. dealing w/ batches...)
            f_list = [
                evec * eval.diag().clamp(0.0).sqrt().unsqueeze(-2)
                for eval, evec in zip(evals.lazy_tensors, evecs.lazy_tensors)
            ]
            F = KroneckerProductLazyTensor(*f_list)
            return RootLazyTensor(F)
        else:
            return super().root_decomposition(method=method)
Beispiel #27
0
    def _make_predictive_covar(self, qmatrix=None, Kuu=None, Kuu_Lmat=None):
        if qmatrix is None:
            qmatrix = self.current_qmatrix
        if Kuu is None:
            Kuu = self.Kuu
        if Kuu_Lmat is None:
            Kuu_Lmat = self.current_inducing_compression_matrix.evaluate()

        if fast_pred_var.on():
            qmat_inv_root = qmatrix.root_inv_decomposition()
            # to lazify you have to evaluate the inverse root which is slow
            # otherwise, you can't backprop your way through it
            inner_cache = RootLazyTensor(
                Kuu_Lmat.matmul(qmat_inv_root.root.evaluate()))
        else:
            inner_cache = Kuu_Lmat.matmul(
                qmatrix.inv_matmul(Kuu_Lmat.transpose(-1, -2)))

        predictive_covar_cache = Kuu - inner_cache
        return predictive_covar_cache
Beispiel #28
0
    def forward(self, input):
        """
        Adds the log task noises to the diagonal of the covariance matrix of the supplied
        :obj:`gpytorch.random_variables.GaussianRandomVariable` or
        :obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`, in case of
        `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it.

        To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyTensor` between :math:`I_{n}`,
        an identity matrix with size equal to the data and a (not necessarily diagonal) matrix containing the task
        noises :math:`D_{t}`.

        We also incorporate a shared `log_noise` parameter from the base
        :class:`gpytorch.likelihoods.GaussianLikelihood` that we extend.

        The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`.

        Args:
            input (:obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`): Random variable whose covariance
                matrix is a :obj:`gpytorch.lazy.LazyTensor` we intend to augment.
        Returns:
            :obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`: A new random variable whose covariance
            matrix is a :obj:`gpytorch.lazy.LazyTensor` with :math:`D_{t} \otimes I_{n}` and :math:`\sigma^{2}I_{nt}`
            added.
        """
        mean, covar = input.representation()
        eye_lv = DiagLazyTensor(
            torch.ones(covar.size(-1) // self.n_tasks,
                       device=self.log_noise.device))
        if hasattr(self, "log_task_noises"):
            task_var_lv = DiagLazyTensor(self.log_task_noises.exp())
        else:
            task_var_lv = RootLazyTensor(self.task_noise_covar_factor)
        covar_kron_lv = KroneckerProductLazyTensor(task_var_lv, eye_lv)
        noise = covar + covar_kron_lv
        noise = add_diag(noise, self.log_noise.exp())
        return input.__class__(mean, noise)
Beispiel #29
0
    def test_mul_adding_constant_mul(self):
        mat1 = make_random_mat(20, rank=4, batch_size=5)
        mat2 = make_random_mat(20, rank=4, batch_size=5)
        mat3 = make_random_mat(20, rank=4, batch_size=5)
        const = torch.ones(1, requires_grad=True)

        mat1_copy = mat1.clone().detach().requires_grad_(True)
        mat2_copy = mat2.clone().detach().requires_grad_(True)
        mat3_copy = mat3.clone().detach().requires_grad_(True)
        const_copy = const.clone().detach().requires_grad_(True)

        # Forward
        res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3))
        res = res * const
        actual = (
            prod(
                [
                    mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
                    mat2_copy.matmul(mat2_copy.transpose(-1, -2)),
                    mat3_copy.matmul(mat3_copy.transpose(-1, -2)),
                ]
            )
            * const_copy
        )
        self.assertLess(torch.max(((res.evaluate() - actual) / actual).abs()), 0.01)

        # Forward
        res = MulLazyTensor(RootLazyTensor(mat1), RootLazyTensor(mat2), RootLazyTensor(mat3))
        res = res * 2.5
        actual = (
            prod(
                [
                    mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
                    mat2_copy.matmul(mat2_copy.transpose(-1, -2)),
                    mat3_copy.matmul(mat3_copy.transpose(-1, -2)),
                ]
            )
            * 2.5
        )
        self.assertLess(torch.max(((res.evaluate() - actual) / actual).abs()), 0.01)
Beispiel #30
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        posterior_transform: Optional[PosteriorTransform] = None,
        **kwargs: Any,
    ) -> MultitaskGPPosterior:
        self.eval()

        if posterior_transform is not None:
            # this could be very costly, disallow for now
            raise NotImplementedError(
                "Posterior transforms currently not supported for "
                f"{self.__class__.__name__}")

        X = self.transform_inputs(X)
        train_x = self.transform_inputs(self.train_inputs[0])

        # construct Ktt
        task_covar = self._task_covar_matrix
        task_rootlt = self._task_covar_matrix.root_decomposition(
            method="diagonalization")
        task_root = task_rootlt.root
        if task_covar.batch_shape != X.shape[:-2]:
            task_covar = BatchRepeatLazyTensor(task_covar,
                                               batch_repeat=X.shape[:-2])
            task_root = BatchRepeatLazyTensor(lazify(task_root),
                                              batch_repeat=X.shape[:-2])

        task_covar_rootlt = RootLazyTensor(task_root)

        # construct RR' \approx Kxx
        data_data_covar = self.train_full_covar.lazy_tensors[0]
        # populate the diagonalziation caches for the root and inverse root
        # decomposition
        data_data_evals, data_data_evecs = data_data_covar.diagonalization()

        # pad the eigenvalue and eigenvectors with zeros if we are using lanczos
        if data_data_evecs.shape[-1] < data_data_evecs.shape[-2]:
            cols_to_add = data_data_evecs.shape[-2] - data_data_evecs.shape[-1]
            zero_evecs = torch.zeros(
                *data_data_evecs.shape[:-1],
                cols_to_add,
                dtype=data_data_evals.dtype,
                device=data_data_evals.device,
            )
            zero_evals = torch.zeros(
                *data_data_evecs.shape[:-2],
                cols_to_add,
                dtype=data_data_evals.dtype,
                device=data_data_evals.device,
            )
            data_data_evecs = CatLazyTensor(
                data_data_evecs,
                lazify(zero_evecs),
                dim=-1,
                output_device=data_data_evals.device,
            )
            data_data_evals = torch.cat((data_data_evals, zero_evals), dim=-1)

        # construct K_{xt, x}
        test_data_covar = self.covar_module.data_covar_module(X, train_x)
        # construct K_{xt, xt}
        test_test_covar = self.covar_module.data_covar_module(X)

        # now update root so that \tilde{R}\tilde{R}' \approx K_{(x,xt), (x,xt)}
        # cloning preserves the gradient history
        updated_lazy_tensor = data_data_covar.cat_rows(
            cross_mat=test_data_covar.clone(),
            new_mat=test_test_covar,
            method="diagonalization",
        )
        updated_root = updated_lazy_tensor.root_decomposition().root
        # occasionally, there's device errors so enforce this comes out right
        updated_root = updated_root.to(data_data_covar.device)

        # build a root decomposition of the joint train/test covariance matrix
        # construct (\tilde{R} \otimes M)(\tilde{R} \otimes M)' \approx
        # (K_{(x,xt), (x,xt)} \otimes Ktt)
        joint_covar = RootLazyTensor(
            KroneckerProductLazyTensor(updated_root,
                                       task_covar_rootlt.root.detach()))

        # construct K_{xt, x} \otimes Ktt
        test_obs_kernel = KroneckerProductLazyTensor(test_data_covar,
                                                     task_covar)

        # collect y - \mu(x) and \mu(X)
        train_diff = self.train_targets - self.mean_module(train_x)
        if detach_test_caches.on():
            train_diff = train_diff.detach()
        test_mean = self.mean_module(X)

        train_noise = self.likelihood._shaped_noise_covar(train_x.shape)
        diagonal_noise = isinstance(train_noise, DiagLazyTensor)
        if detach_test_caches.on():
            train_noise = train_noise.detach()
        test_noise = (self.likelihood._shaped_noise_covar(X.shape)
                      if observation_noise else None)

        # predictive mean and variance for the mvn
        # first the predictive mean
        pred_mean = (test_obs_kernel.matmul(
            self.predictive_mean_cache).reshape_as(test_mean) + test_mean)
        # next the predictive variance, assume diagonal noise
        test_var_term = KroneckerProductLazyTensor(test_test_covar,
                                                   task_covar).diag()

        if diagonal_noise:
            task_evals, task_evecs = self._task_covar_matrix.diagonalization()
            # TODO: make this be the default KPMatmulLT diagonal method in gpytorch
            full_data_inv_evals = (KroneckerProductDiagLazyTensor(
                DiagLazyTensor(data_data_evals), DiagLazyTensor(task_evals)) +
                                   train_noise).inverse()
            test_train_hadamard = KroneckerProductLazyTensor(
                test_data_covar.matmul(data_data_evecs).evaluate()**2,
                task_covar.matmul(task_evecs).evaluate()**2,
            )
            data_var_term = test_train_hadamard.matmul(
                full_data_inv_evals).sum(dim=-1)
        else:
            # if non-diagonal noise (but still kronecker structured), we have to pull
            # across the noise because the inverse is not closed form
            # should be a kronecker lt, R = \Sigma_X^{-1/2} \kron \Sigma_T^{-1/2}
            # TODO: enforce the diagonalization to return a KPLT for all shapes in
            # gpytorch or dense linear algebra for small shapes
            data_noise, task_noise = train_noise.lazy_tensors
            data_noise_root = data_noise.root_inv_decomposition(
                method="diagonalization")
            task_noise_root = task_noise.root_inv_decomposition(
                method="diagonalization")

            # ultimately we need to compute the diagonal of
            # (K_{x* X} \kron K_T)(K_{XX} \kron K_T + \Sigma_X \kron \Sigma_T)^{-1}
            #                           (K_{x* X} \kron K_T)^T
            # = (K_{x* X} \Sigma_X^{-1/2} Q_R)(\Lambda_R + I)^{-1}
            #                       (K_{x* X} \Sigma_X^{-1/2} Q_R)^T
            # where R = (\Sigma_X^{-1/2T}K_{XX}\Sigma_X^{-1/2} \kron
            #                   \Sigma_T^{-1/2T}K_{T}\Sigma_T^{-1/2})
            # first we construct the components of R's eigen-decomposition
            # TODO: make this be the default KPMatmulLT diagonal method in gpytorch
            whitened_data_covar = (data_noise_root.transpose(
                -1, -2).matmul(data_data_covar).matmul(data_noise_root))
            w_data_evals, w_data_evecs = whitened_data_covar.diagonalization()
            whitened_task_covar = (task_noise_root.transpose(-1, -2).matmul(
                self._task_covar_matrix).matmul(task_noise_root))
            w_task_evals, w_task_evecs = whitened_task_covar.diagonalization()

            # we add one to the eigenvalues as above (not just for stability)
            full_data_inv_evals = (KroneckerProductDiagLazyTensor(
                DiagLazyTensor(w_data_evals),
                DiagLazyTensor(w_task_evals)).add_jitter(1.0).inverse())

            test_data_comp = (test_data_covar.matmul(data_noise_root).matmul(
                w_data_evecs).evaluate()**2)
            task_comp = (task_covar.matmul(task_noise_root).matmul(
                w_task_evecs).evaluate()**2)

            test_train_hadamard = KroneckerProductLazyTensor(
                test_data_comp, task_comp)
            data_var_term = test_train_hadamard.matmul(
                full_data_inv_evals).sum(dim=-1)

        pred_variance = test_var_term - data_var_term
        specialized_mvn = MultitaskMultivariateNormal(
            pred_mean, DiagLazyTensor(pred_variance))
        if observation_noise:
            specialized_mvn = self.likelihood(specialized_mvn)

        posterior = MultitaskGPPosterior(
            mvn=specialized_mvn,
            joint_covariance_matrix=joint_covar,
            test_train_covar=test_obs_kernel,
            train_diff=train_diff,
            test_mean=test_mean,
            train_train_covar=self.train_full_covar,
            train_noise=train_noise,
            test_noise=test_noise,
        )

        if hasattr(self, "outcome_transform"):
            posterior = self.outcome_transform.untransform_posterior(posterior)
        return posterior