def kernel2(self, Kxx, V, U):
     # TODO: Make this a separate module
     # if M1, M2 = (0, 0)
     #    [ k_** РіЌ B ] РіЌ A
     Kij_xx_22 = KroneckerProductLazyTensor(
         KroneckerProductLazyTensor(Kxx, V), U)
     return Kij_xx_22
    def test_matmul_vec_random_rectangular(self):
        ax = torch.randn(4, 2, 3, requires_grad=True)
        bx = torch.randn(4, 5, 2, requires_grad=True)
        cx = torch.randn(4, 6, 4, requires_grad=True)
        rhsx = torch.randn(4, 3 * 2 * 4, 1)
        rhsx = (rhsx / torch.norm(rhsx)).requires_grad_(True)
        ax_copy = ax.clone().detach().requires_grad_(True)
        bx_copy = bx.clone().detach().requires_grad_(True)
        cx_copy = cx.clone().detach().requires_grad_(True)
        rhsx_copy = rhsx.clone().detach().requires_grad_(True)

        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(ax),
                                                 NonLazyTensor(bx),
                                                 NonLazyTensor(cx))
        res = kp_lazy_var.matmul(rhsx)

        actual_mat = kron(kron(ax_copy, bx_copy), cx_copy)
        actual = actual_mat.matmul(rhsx_copy)

        self.assertTrue(approx_equal(res, actual))

        actual.sum().backward()
        res.sum().backward()
        self.assertTrue(approx_equal(ax_copy.grad, ax.grad))
        self.assertTrue(approx_equal(bx_copy.grad, bx.grad))
        self.assertTrue(approx_equal(cx_copy.grad, cx.grad))
        self.assertTrue(approx_equal(rhsx_copy.grad, rhsx.grad))
 def kernel1(self, Kxx, H1, H2, V, U):
     # TODO: Make this a separate module
     # If M1, M2 = (1, 1)
     #    HРѓЂрхђ [ K РіЌ B ] HРѓѓ РіЌ A
     Kij_xx_11 = KroneckerProductLazyTensor(
         H1 @ KroneckerProductLazyTensor(Kxx, V) @ H2.t(), U)
     return Kij_xx_11
 def correlation_kernel_12(self, Kxx, H1, V, U):
     # TODO: Make this a separate module
     # elif M1, M2 = (1, 0)
     #    HРѓЂрхђ [ k_x* РіЌ B ] РіЌ A
     Kij_xx_12 = KroneckerProductLazyTensor(
         H1 @ KroneckerProductLazyTensor(Kxx, V), U)
     return Kij_xx_12
 def correlation_kernel_12(self, Kxx, H1, Sigma):
     # TODO: Make this a separate module
     # elif M1, M2 = (1, 0)
     #    (HРѓЂрхђ РіЌ IРѓЎ) [ k_x* РіЌ ╬Б ]
     _, x_dim, u_dim_p_1 = self.decoder.sizes
     In = torch.eye(x_dim, dtype=H1.dtype, device=H1.device)
     Kij_xx_12 = KroneckerProductLazyTensor(
         H1, In) @ KroneckerProductLazyTensor(Kxx, Sigma)
     return Kij_xx_12
 def kernel1(self, Kxx, H1, H2, Sigma):
     # TODO: Make this a separate module
     # If M1, M2 = (1, 1)
     #    (HРѓЂрхђ РіЌ IРѓЎ)[ K РіЌ ╬Б ] (HРѓѓ РіЌ IРѓЎ)
     _, x_dim, u_dim_p_1 = self.decoder.sizes
     In = torch.eye(x_dim, dtype=H1.dtype, device=H1.device)
     Kij_xx_11 = (
         KroneckerProductLazyTensor(H1, In) @ KroneckerProductLazyTensor(
             Kxx, Sigma) @ KroneckerProductLazyTensor(H2.t(), In))
     return Kij_xx_11
    def create_lazy_tensor(self):
        a = torch.tensor([[4, 0, 2], [0, 3, -1], [2, -1, 3]], dtype=torch.float)
        b = torch.tensor([[2, 1], [1, 2]], dtype=torch.float)
        c = torch.tensor([[4, 0.5, 1], [0.5, 4, -1], [1, -1, 3]], dtype=torch.float)
        d = torch.tensor([[1.2, 0.75], [0.75, 1.2]], dtype=torch.float)

        a.requires_grad_(True)
        b.requires_grad_(True)
        c.requires_grad_(True)
        d.requires_grad_(True)
        kp_lt_1 = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b))
        kp_lt_2 = KroneckerProductLazyTensor(NonLazyTensor(c), NonLazyTensor(d))

        return SumKroneckerLazyTensor(kp_lt_1, kp_lt_2)
 def _create_marginal_input(self, batch_shape=torch.Size([])):
     mat = torch.randn(*batch_shape, 5, 5)
     mat2 = torch.randn(*batch_shape, 4, 4)
     covar = KroneckerProductLazyTensor(RootLazyTensor(mat),
                                        RootLazyTensor(mat2))
     return MultitaskMultivariateNormal(torch.randn(*batch_shape, 5, 4),
                                        covar)
Example #9
0
    def forward(self, input):
        # Here I should calculate the (final) weight first using tensor products and the rest is exactly the same
        # w = nn.BatchNorm2d(self.weight_leafs.shape[1]).cuda()(self.weight_leafs)
        w = self.weight_leafs
        if self.lazy:
            self.weight = KroneckerProductLazyTensor(*NonLazyTensor(w)).sum(
                dim=0)  # get the sum of the batch of product
            logging.debug('self.weight.shape: ' + str(self.weight.shape))
            if input.dim() == 1:  #
                return self.weight[input].base_lazy_tensor.evaluate().sum(
                    dim=-3
                )[:, :self.
                  embedding_dim]  # https://github.com/cornellius-gp/gpytorch/pull/871
            elif input.dim() == 2:
                input_1d = input.contiguous().view(1, -1)
                result = self.weight[input_1d[0]].base_lazy_tensor.evaluate(
                ).sum(
                    dim=-3
                )[:, :self.
                  embedding_dim]  #TODO: Not sure if this selection (self.embedding_dim) is correct in here. # https://github.com/cornellius-gp/gpytorch/pull/871
                return result.view(input.shape[0], input.shape[1], -1)
            else:
                raise Exception('This input dimesion is not yet implemented')
        else:
            weight_leafs_product = w[0]
            for i in range(1, self.order):
                weight_leafs_product = self.knocker_product(
                    weight_leafs_product, w[i])
            self.weight = weight_leafs_product.sum(dim=0)

            return F.embedding(input, self.weight, self.padding_idx,
                               self.max_norm, self.norm_type,
                               self.scale_grad_by_freq, self.sparse)
Example #10
0
 def forward(self, x1, x2, diag=False, last_dim_is_batch=False, **params):
     if last_dim_is_batch:
         raise RuntimeError(
             "MultitaskKernel does not accept the last_dim_is_batch argument."
         )
     covar_i = self.task_covar_module.covar_matrix
     if len(x1.shape[:-2]):
         covar_i = covar_i.repeat(*x1.shape[:-2], 1, 1)
     if self.bias_only:
         covar_i = lazify(
             torch.ones_like(covar_i.evaluate())
         )  # task covariance now all one so it shares covariance but still
         # as multitask mean
     covar_x = lazify(self.data_covar_module.forward(x1, x2, **params))
     res = KroneckerProductLazyTensor(covar_x, covar_i)
     return res.diag() if diag else res
Example #11
0
    def forward(self, X: Tensor) -> MultivariateNormal:
        X = self.transform_inputs(X)

        covariance_list = []
        covariance_list.append(self.covar_modules[0](X))

        for cm, param in zip(self.covar_modules[1:], self.latent_parameters):
            covariance_list.append(cm(param))

        # check batch_shapes
        if covariance_list[0].batch_shape != covariance_list[1].batch_shape:
            for i in range(1, len(covariance_list)):
                cm = covariance_list[i]
                covariance_list[i] = BatchRepeatLazyTensor(
                    cm, covariance_list[0].batch_shape
                )
        kronecker_covariance = KroneckerProductLazyTensor(*covariance_list)

        # TODO: expand options for the mean module via batch shaping?
        mean = torch.zeros(
            *covariance_list[0].batch_shape,
            kronecker_covariance.shape[-1],
            device=kronecker_covariance.device,
            dtype=kronecker_covariance.dtype,
        )
        return MultivariateNormal(mean, kronecker_covariance)
 def create_lazy_tensor(self):
     a = torch.tensor([[4, 0, 2], [0, 3, -1], [2, -1, 3]],
                      dtype=torch.float)
     b = torch.tensor([[2, 1], [1, 2]], dtype=torch.float)
     c = torch.tensor(
         [[4, 0.5, 1, 0], [0.5, 4, -1, 0], [1, -1, 3, 0], [0, 0, 0, 4]],
         dtype=torch.float)
     d = torch.tensor([2], dtype=torch.float)
     e = torch.tensor([5], dtype=torch.float)
     f = torch.tensor([2.5], dtype=torch.float)
     a.requires_grad_(True)
     b.requires_grad_(True)
     c.requires_grad_(True)
     d.requires_grad_(True)
     e.requires_grad_(True)
     f.requires_grad_(True)
     kp_lazy_tensor = KroneckerProductLazyTensor(NonLazyTensor(a),
                                                 NonLazyTensor(b),
                                                 NonLazyTensor(c))
     diag_lazy_tensor = KroneckerProductDiagLazyTensor(
         ConstantDiagLazyTensor(d, diag_shape=3),
         ConstantDiagLazyTensor(e, diag_shape=2),
         ConstantDiagLazyTensor(f, diag_shape=4),
     )
     return KroneckerProductAddedDiagLazyTensor(kp_lazy_tensor,
                                                diag_lazy_tensor)
Example #13
0
 def create_lazy_tensor(self):
     a = torch.randn(2, 3, requires_grad=True)
     b = torch.randn(5, 2, requires_grad=True)
     c = torch.randn(6, 4, requires_grad=True)
     kp_lazy_tensor = KroneckerProductLazyTensor(NonLazyTensor(a),
                                                 NonLazyTensor(b),
                                                 NonLazyTensor(c))
     return kp_lazy_tensor
 def create_lazy_tensor(self):
     a = torch.tensor([[4, 0, 2], [0, 3, -1], [2, -1, 3]], dtype=torch.float)
     b = torch.tensor([[2, 1], [1, 2]], dtype=torch.float)
     c = torch.tensor([[4, 0.5, 1, 0], [0.5, 4, -1, 0], [1, -1, 3, 0], [0, 0, 0, 4]], dtype=torch.float)
     a.requires_grad_(True)
     b.requires_grad_(True)
     c.requires_grad_(True)
     kp_lazy_tensor = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b), NonLazyTensor(c))
     return kp_lazy_tensor
 def create_lazy_tensor(self):
     a = torch.tensor([[4, 0, 2], [0, 3, -1], [2, -1, 3]], dtype=torch.float)
     b = torch.tensor([[2, 1], [1, 2]], dtype=torch.float)
     c = torch.tensor([[4, 0.5, 1, 0], [0.5, 4, -1, 0], [1, -1, 3, 0], [0, 0, 0, 4]], dtype=torch.float)
     a.requires_grad_(True)
     b.requires_grad_(True)
     c.requires_grad_(True)
     kp_lazy_tensor = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b), NonLazyTensor(c))
     diag_lazy_tensor = ConstantDiagLazyTensor(
         torch.tensor([0.25], dtype=torch.float, requires_grad=True), kp_lazy_tensor.shape[-1],
     )
     return KroneckerProductAddedDiagLazyTensor(kp_lazy_tensor, diag_lazy_tensor)
 def create_lazy_tensor(self):
     a = torch.tensor([[4, 0, 2], [0, 3, -1], [2, -1, 3]], dtype=torch.float)
     b = torch.tensor([[2, 1], [1, 2]], dtype=torch.float)
     c = torch.tensor([[4, 0.5, 1, 0], [0.5, 4, -1, 0], [1, -1, 3, 0], [0, 0, 0, 4]], dtype=torch.float)
     d = 0.5 * torch.rand(24, dtype=torch.float)
     a.requires_grad_(True)
     b.requires_grad_(True)
     c.requires_grad_(True)
     d.requires_grad_(True)
     kp_lazy_tensor = KroneckerProductLazyTensor(NonLazyTensor(a), NonLazyTensor(b), NonLazyTensor(c))
     diag_lazy_tensor = DiagLazyTensor(d)
     return KroneckerProductAddedDiagLazyTensor(kp_lazy_tensor, diag_lazy_tensor)
Example #17
0
    def make_posterior_variances(self, joint_covariance_matrix: LazyTensor) -> Tensor:
        r"""
        Computes the posterior variances given the data points X. As currently
        implemented, it computes another forwards call with the stacked data to get out
        the joint covariance across all data points.
        """
        # TODO: use the exposed joint covariances from the prediction strategy
        data_joint_covariance = joint_covariance_matrix.lazy_tensors[
            0
        ].evaluate_kernel()
        num_train = self.train_inputs[0].shape[-2]
        test_train_covar = data_joint_covariance[..., num_train:, :num_train]
        train_train_covar = data_joint_covariance[..., :num_train, :num_train]
        test_test_covar = data_joint_covariance[..., num_train:, num_train:]

        full_train_train_covar = KroneckerProductLazyTensor(
            train_train_covar, *joint_covariance_matrix.lazy_tensors[1:]
        )
        full_test_test_covar = KroneckerProductLazyTensor(
            test_test_covar, *joint_covariance_matrix.lazy_tensors[1:]
        )
        full_test_train_covar_list = [test_train_covar] + [
            *joint_covariance_matrix.lazy_tensors[1:]
        ]

        train_evals, train_evecs = full_train_train_covar.symeig(eigenvectors=True)
        # (\kron \Lambda_i + \sigma^2 I)^{-1}
        train_inv_evals = DiagLazyTensor(1.0 / (train_evals + self.likelihood.noise))

        # compute K_i S_i \hadamard K_i S_i
        test_train_hadamard = KroneckerProductLazyTensor(
            *[
                lt1.matmul(lt2).evaluate() ** 2
                for lt1, lt2 in zip(
                    full_test_train_covar_list, train_evecs.lazy_tensors
                )
            ]
        )

        # and compute the column sums of
        #  (\kron K_i S_i * K_i S_i) \tilde{\Lambda}^{-1}
        test_train_pred_covar = test_train_hadamard.matmul(train_inv_evals).sum(dim=-1)

        pred_variances = full_test_test_covar.diag() - test_train_pred_covar
        return pred_variances
    def test_matmul_batch_mat(self):
        avar = a.repeat(3, 1, 1).requires_grad_(True)
        bvar = b.repeat(3, 1, 1).requires_grad_(True)
        cvar = c.repeat(3, 1, 1).requires_grad_(True)
        mat = torch.randn(3, 24, 5, requires_grad=True)
        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar),
                                                 NonLazyTensor(bvar),
                                                 NonLazyTensor(cvar))
        res = kp_lazy_var.matmul(mat)

        avar_copy = avar.clone().detach().requires_grad_(True)
        bvar_copy = bvar.clone().detach().requires_grad_(True)
        cvar_copy = cvar.clone().detach().requires_grad_(True)
        mat_copy = mat.clone().detach().requires_grad_(True)
        actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(mat_copy)
        self.assertTrue(approx_equal(res, actual))

        actual.sum().backward()
        res.sum().backward()
        self.assertTrue(approx_equal(avar_copy.grad, avar.grad))
        self.assertTrue(approx_equal(bvar_copy.grad, bvar.grad))
        self.assertTrue(approx_equal(cvar_copy.grad, cvar.grad))
        self.assertTrue(approx_equal(mat_copy.grad, mat.grad))
    def test_matmul_mat_random_rectangular(self):
        a = torch.randn(4, 2, 3, requires_grad=True)
        b = torch.randn(4, 5, 2, requires_grad=True)
        c = torch.randn(4, 6, 4, requires_grad=True)
        rhs = torch.randn(4, 3 * 2 * 4, 2, requires_grad=True)
        a_copy = a.clone().detach().requires_grad_(True)
        b_copy = b.clone().detach().requires_grad_(True)
        c_copy = c.clone().detach().requires_grad_(True)
        rhs_copy = rhs.clone().detach().requires_grad_(True)

        actual = kron(kron(a_copy, b_copy), c_copy).matmul(rhs_copy)
        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(a),
                                                 NonLazyTensor(b),
                                                 NonLazyTensor(c))
        res = kp_lazy_var.matmul(rhs)

        self.assertTrue(approx_equal(res, actual))

        actual.sum().backward()
        res.sum().backward()
        self.assertTrue(approx_equal(a_copy.grad, a.grad))
        self.assertTrue(approx_equal(b_copy.grad, b.grad))
        self.assertTrue(approx_equal(c_copy.grad, c.grad))
        self.assertTrue(approx_equal(rhs_copy.grad, rhs.grad))
    def test_matmul_vec(self):
        avar = a.clone().requires_grad_(True)
        bvar = b.clone().requires_grad_(True)
        cvar = c.clone().requires_grad_(True)
        vec = torch.randn(24, requires_grad=True)
        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar),
                                                 NonLazyTensor(bvar),
                                                 NonLazyTensor(cvar))
        res = kp_lazy_var.matmul(vec)

        avar_copy = a.clone().requires_grad_(True)
        bvar_copy = b.clone().requires_grad_(True)
        cvar_copy = c.clone().requires_grad_(True)
        vec_copy = vec.clone().detach().requires_grad_(True)
        actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(vec_copy)

        self.assertTrue(approx_equal(res, actual))

        actual.sum().backward()
        res.sum().backward()
        self.assertTrue(approx_equal(avar_copy.grad, avar.grad))
        self.assertTrue(approx_equal(bvar_copy.grad, bvar.grad))
        self.assertTrue(approx_equal(cvar_copy.grad, cvar.grad))
        self.assertTrue(approx_equal(vec_copy.grad, vec.grad))
Example #21
0
    def forward(self, input):
        """
        Adds the log task noises to the diagonal of the covariance matrix of the supplied
        :obj:`gpytorch.random_variables.GaussianRandomVariable` or
        :obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`, in case of
        `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it.

        To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyTensor` between :math:`I_{n}`,
        an identity matrix with size equal to the data and a (not necessarily diagonal) matrix containing the task
        noises :math:`D_{t}`.

        We also incorporate a shared `log_noise` parameter from the base
        :class:`gpytorch.likelihoods.GaussianLikelihood` that we extend.

        The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`.

        Args:
            input (:obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`): Random variable whose covariance
                matrix is a :obj:`gpytorch.lazy.LazyTensor` we intend to augment.
        Returns:
            :obj:`gpytorch.random_variables.MultitaskGaussianRandomVariable`: A new random variable whose covariance
            matrix is a :obj:`gpytorch.lazy.LazyTensor` with :math:`D_{t} \otimes I_{n}` and :math:`\sigma^{2}I_{nt}`
            added.
        """
        mean, covar = input.representation()
        eye_lv = DiagLazyTensor(
            torch.ones(covar.size(-1) // self.n_tasks,
                       device=self.log_noise.device))
        if hasattr(self, "log_task_noises"):
            task_var_lv = DiagLazyTensor(self.log_task_noises.exp())
        else:
            task_var_lv = RootLazyTensor(self.task_noise_covar_factor)
        covar_kron_lv = KroneckerProductLazyTensor(task_var_lv, eye_lv)
        noise = covar + covar_kron_lv
        noise = add_diag(noise, self.log_noise.exp())
        return input.__class__(mean, noise)
    def test_evaluate(self):
        avar = a
        bvar = b
        cvar = c
        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar),
                                                 NonLazyTensor(bvar),
                                                 NonLazyTensor(cvar))
        res = kp_lazy_var.evaluate()
        actual = kron(kron(avar, bvar), cvar)
        self.assertTrue(approx_equal(res, actual))

        avar = a.repeat(3, 1, 1)
        bvar = b.repeat(3, 1, 1)
        cvar = c.repeat(3, 1, 1)
        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar),
                                                 NonLazyTensor(bvar),
                                                 NonLazyTensor(cvar))
        res = kp_lazy_var.evaluate()
        actual = kron(kron(avar, bvar), cvar)
        self.assertTrue(approx_equal(res, actual))
    def test_diag(self):
        avar = a
        bvar = b
        cvar = c
        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar),
                                                 NonLazyTensor(bvar),
                                                 NonLazyTensor(cvar))
        res = kp_lazy_var.diag()
        actual = kron(kron(avar, bvar), cvar).diag()
        self.assertTrue(approx_equal(res, actual))

        avar = a.repeat(3, 1, 1)
        bvar = b.repeat(3, 1, 1)
        cvar = c.repeat(3, 1, 1)
        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar),
                                                 NonLazyTensor(bvar),
                                                 NonLazyTensor(cvar))
        res = kp_lazy_var.diag()
        actual_mat = kron(kron(avar, bvar), cvar)
        actual = torch.stack(
            [actual_mat[0].diag(), actual_mat[1].diag(), actual_mat[2].diag()])
        self.assertTrue(approx_equal(res, actual))
Example #24
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        posterior_transform: Optional[PosteriorTransform] = None,
        **kwargs: Any,
    ) -> MultitaskGPPosterior:
        self.eval()

        if posterior_transform is not None:
            # this could be very costly, disallow for now
            raise NotImplementedError(
                "Posterior transforms currently not supported for "
                f"{self.__class__.__name__}")

        X = self.transform_inputs(X)
        train_x = self.transform_inputs(self.train_inputs[0])

        # construct Ktt
        task_covar = self._task_covar_matrix
        task_rootlt = self._task_covar_matrix.root_decomposition(
            method="diagonalization")
        task_root = task_rootlt.root
        if task_covar.batch_shape != X.shape[:-2]:
            task_covar = BatchRepeatLazyTensor(task_covar,
                                               batch_repeat=X.shape[:-2])
            task_root = BatchRepeatLazyTensor(lazify(task_root),
                                              batch_repeat=X.shape[:-2])

        task_covar_rootlt = RootLazyTensor(task_root)

        # construct RR' \approx Kxx
        data_data_covar = self.train_full_covar.lazy_tensors[0]
        # populate the diagonalziation caches for the root and inverse root
        # decomposition
        data_data_evals, data_data_evecs = data_data_covar.diagonalization()

        # pad the eigenvalue and eigenvectors with zeros if we are using lanczos
        if data_data_evecs.shape[-1] < data_data_evecs.shape[-2]:
            cols_to_add = data_data_evecs.shape[-2] - data_data_evecs.shape[-1]
            zero_evecs = torch.zeros(
                *data_data_evecs.shape[:-1],
                cols_to_add,
                dtype=data_data_evals.dtype,
                device=data_data_evals.device,
            )
            zero_evals = torch.zeros(
                *data_data_evecs.shape[:-2],
                cols_to_add,
                dtype=data_data_evals.dtype,
                device=data_data_evals.device,
            )
            data_data_evecs = CatLazyTensor(
                data_data_evecs,
                lazify(zero_evecs),
                dim=-1,
                output_device=data_data_evals.device,
            )
            data_data_evals = torch.cat((data_data_evals, zero_evals), dim=-1)

        # construct K_{xt, x}
        test_data_covar = self.covar_module.data_covar_module(X, train_x)
        # construct K_{xt, xt}
        test_test_covar = self.covar_module.data_covar_module(X)

        # now update root so that \tilde{R}\tilde{R}' \approx K_{(x,xt), (x,xt)}
        # cloning preserves the gradient history
        updated_lazy_tensor = data_data_covar.cat_rows(
            cross_mat=test_data_covar.clone(),
            new_mat=test_test_covar,
            method="diagonalization",
        )
        updated_root = updated_lazy_tensor.root_decomposition().root
        # occasionally, there's device errors so enforce this comes out right
        updated_root = updated_root.to(data_data_covar.device)

        # build a root decomposition of the joint train/test covariance matrix
        # construct (\tilde{R} \otimes M)(\tilde{R} \otimes M)' \approx
        # (K_{(x,xt), (x,xt)} \otimes Ktt)
        joint_covar = RootLazyTensor(
            KroneckerProductLazyTensor(updated_root,
                                       task_covar_rootlt.root.detach()))

        # construct K_{xt, x} \otimes Ktt
        test_obs_kernel = KroneckerProductLazyTensor(test_data_covar,
                                                     task_covar)

        # collect y - \mu(x) and \mu(X)
        train_diff = self.train_targets - self.mean_module(train_x)
        if detach_test_caches.on():
            train_diff = train_diff.detach()
        test_mean = self.mean_module(X)

        train_noise = self.likelihood._shaped_noise_covar(train_x.shape)
        diagonal_noise = isinstance(train_noise, DiagLazyTensor)
        if detach_test_caches.on():
            train_noise = train_noise.detach()
        test_noise = (self.likelihood._shaped_noise_covar(X.shape)
                      if observation_noise else None)

        # predictive mean and variance for the mvn
        # first the predictive mean
        pred_mean = (test_obs_kernel.matmul(
            self.predictive_mean_cache).reshape_as(test_mean) + test_mean)
        # next the predictive variance, assume diagonal noise
        test_var_term = KroneckerProductLazyTensor(test_test_covar,
                                                   task_covar).diag()

        if diagonal_noise:
            task_evals, task_evecs = self._task_covar_matrix.diagonalization()
            # TODO: make this be the default KPMatmulLT diagonal method in gpytorch
            full_data_inv_evals = (KroneckerProductDiagLazyTensor(
                DiagLazyTensor(data_data_evals), DiagLazyTensor(task_evals)) +
                                   train_noise).inverse()
            test_train_hadamard = KroneckerProductLazyTensor(
                test_data_covar.matmul(data_data_evecs).evaluate()**2,
                task_covar.matmul(task_evecs).evaluate()**2,
            )
            data_var_term = test_train_hadamard.matmul(
                full_data_inv_evals).sum(dim=-1)
        else:
            # if non-diagonal noise (but still kronecker structured), we have to pull
            # across the noise because the inverse is not closed form
            # should be a kronecker lt, R = \Sigma_X^{-1/2} \kron \Sigma_T^{-1/2}
            # TODO: enforce the diagonalization to return a KPLT for all shapes in
            # gpytorch or dense linear algebra for small shapes
            data_noise, task_noise = train_noise.lazy_tensors
            data_noise_root = data_noise.root_inv_decomposition(
                method="diagonalization")
            task_noise_root = task_noise.root_inv_decomposition(
                method="diagonalization")

            # ultimately we need to compute the diagonal of
            # (K_{x* X} \kron K_T)(K_{XX} \kron K_T + \Sigma_X \kron \Sigma_T)^{-1}
            #                           (K_{x* X} \kron K_T)^T
            # = (K_{x* X} \Sigma_X^{-1/2} Q_R)(\Lambda_R + I)^{-1}
            #                       (K_{x* X} \Sigma_X^{-1/2} Q_R)^T
            # where R = (\Sigma_X^{-1/2T}K_{XX}\Sigma_X^{-1/2} \kron
            #                   \Sigma_T^{-1/2T}K_{T}\Sigma_T^{-1/2})
            # first we construct the components of R's eigen-decomposition
            # TODO: make this be the default KPMatmulLT diagonal method in gpytorch
            whitened_data_covar = (data_noise_root.transpose(
                -1, -2).matmul(data_data_covar).matmul(data_noise_root))
            w_data_evals, w_data_evecs = whitened_data_covar.diagonalization()
            whitened_task_covar = (task_noise_root.transpose(-1, -2).matmul(
                self._task_covar_matrix).matmul(task_noise_root))
            w_task_evals, w_task_evecs = whitened_task_covar.diagonalization()

            # we add one to the eigenvalues as above (not just for stability)
            full_data_inv_evals = (KroneckerProductDiagLazyTensor(
                DiagLazyTensor(w_data_evals),
                DiagLazyTensor(w_task_evals)).add_jitter(1.0).inverse())

            test_data_comp = (test_data_covar.matmul(data_noise_root).matmul(
                w_data_evecs).evaluate()**2)
            task_comp = (task_covar.matmul(task_noise_root).matmul(
                w_task_evecs).evaluate()**2)

            test_train_hadamard = KroneckerProductLazyTensor(
                test_data_comp, task_comp)
            data_var_term = test_train_hadamard.matmul(
                full_data_inv_evals).sum(dim=-1)

        pred_variance = test_var_term - data_var_term
        specialized_mvn = MultitaskMultivariateNormal(
            pred_mean, DiagLazyTensor(pred_variance))
        if observation_noise:
            specialized_mvn = self.likelihood(specialized_mvn)

        posterior = MultitaskGPPosterior(
            mvn=specialized_mvn,
            joint_covariance_matrix=joint_covar,
            test_train_covar=test_obs_kernel,
            train_diff=train_diff,
            test_mean=test_mean,
            train_train_covar=self.train_full_covar,
            train_noise=train_noise,
            test_noise=test_noise,
        )

        if hasattr(self, "outcome_transform"):
            posterior = self.outcome_transform.untransform_posterior(posterior)
        return posterior
Example #25
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        self.eval()  # make sure we're calling a posterior
        # input transforms are applied at `posterior` in `eval` mode, and at
        # `model.forward()` at the training time
        X = self.transform_inputs(X)
        no_pred_variance = skip_posterior_variances._state

        with ExitStack() as es:
            es.enter_context(gpt_posterior_settings())
            es.enter_context(fast_pred_var(True))

            # we need to skip posterior variances here
            es.enter_context(skip_posterior_variances(True))
            mvn = self(X)
            if observation_noise is not False:
                # TODO: ensure that this still works for structured noise solves.
                mvn = self.likelihood(mvn, X)

            # lazy covariance matrix includes the interpolated version of the full
            # covariance matrix so we can actually grab that instead.
            if X.ndimension() > self.train_inputs[0].ndimension():
                X_batch_shape = X.shape[:-2]
                train_inputs = self.train_inputs[0].reshape(
                    *[1] * len(X_batch_shape), *self.train_inputs[0].shape
                )
                train_inputs = train_inputs.repeat(
                    *X_batch_shape, *[1] * self.train_inputs[0].ndimension()
                )
            else:
                train_inputs = self.train_inputs[0]

            # we now compute the data covariances for the training data, the testing
            # data, the joint covariances, and the test train cross-covariance
            train_train_covar = self.prediction_strategy.lik_train_train_covar.detach()
            base_train_train_covar = train_train_covar.lazy_tensor

            data_train_covar = base_train_train_covar.lazy_tensors[0]
            data_covar = self.covar_modules[0]
            data_train_test_covar = data_covar(X, train_inputs)
            data_test_test_covar = data_covar(X)
            data_joint_covar = data_train_covar.cat_rows(
                cross_mat=data_train_test_covar,
                new_mat=data_test_test_covar,
            )

            # we detach the latents so that they don't cause gradient errors
            # TODO: Can we enable backprop through the latent covariances?
            batch_shape = data_train_test_covar.batch_shape
            latent_covar_list = []
            for latent_covar in base_train_train_covar.lazy_tensors[1:]:
                if latent_covar.batch_shape != batch_shape:
                    latent_covar = BatchRepeatLazyTensor(latent_covar, batch_shape)
                latent_covar_list.append(latent_covar.detach())

            joint_covar = KroneckerProductLazyTensor(
                data_joint_covar, *latent_covar_list
            )
            test_train_covar = KroneckerProductLazyTensor(
                data_train_test_covar, *latent_covar_list
            )

            # compute the posterior variance if necessary
            if no_pred_variance:
                pred_variance = mvn.variance
            else:
                pred_variance = self.make_posterior_variances(joint_covar)

            # mean and variance get reshaped into the target shape
            new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape)
            if not no_pred_variance:
                new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape)
                new_variance = DiagLazyTensor(new_variance)
            else:
                new_variance = ZeroLazyTensor(
                    *X.shape[:-1], *self.target_shape, self.target_shape[-1]
                )

            mvn = MultivariateNormal(new_mean, new_variance)

            # return a specialized Posterior to allow for sampling
            # cloning the full covar allows backpropagation through it
            posterior = HigherOrderGPPosterior(
                mvn=mvn,
                train_targets=self.train_targets.unsqueeze(-1),
                train_train_covar=train_train_covar,
                test_train_covar=test_train_covar,
                joint_covariance_matrix=joint_covar.clone(),
                output_shape=X.shape[:-1] + self.target_shape,
                num_outputs=self._num_outputs,
            )
            if hasattr(self, "outcome_transform"):
                posterior = self.outcome_transform.untransform_posterior(posterior)

            return posterior
Example #26
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        self.eval()  # make sure we're calling a posterior

        no_pred_variance = skip_posterior_variances._state

        with ExitStack() as es:
            es.enter_context(gpt_posterior_settings())
            es.enter_context(fast_pred_var(True))

            # we need to skip posterior variances here
            es.enter_context(skip_posterior_variances(True))
            mvn = self(X)
            if observation_noise is not False:
                # TODO: implement Kronecker + diagonal solves so that this is possible.
                # if torch.is_tensor(observation_noise):
                #     # TODO: Validate noise shape
                #     # make observation_noise `batch_shape x q x n`
                #     obs_noise = observation_noise.transpose(-1, -2)
                #     mvn = self.likelihood(mvn, X, noise=obs_noise)
                # elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood):
                #     noise = self.likelihood.noise.mean().expand(X.shape[:-1])
                #     mvn = self.likelihood(mvn, X, noise=noise)
                # else:
                mvn = self.likelihood(mvn, X)

            # lazy covariance matrix includes the interpolated version of the full
            # covariance matrix so we can actually grab that instead.
            if X.ndimension() > self.train_inputs[0].ndimension():
                X_batch_shape = X.shape[:-2]
                train_inputs = self.train_inputs[0].reshape(
                    *[1] * len(X_batch_shape), *self.train_inputs[0].shape
                )
                train_inputs = train_inputs.repeat(
                    *X_batch_shape, *[1] * self.train_inputs[0].ndimension()
                )
            else:
                train_inputs = self.train_inputs[0]
            full_covar = self.covar_modules[0](torch.cat((train_inputs, X), dim=-2))

            if no_pred_variance:
                pred_variance = mvn.variance
            else:
                joint_covar = self._get_joint_covariance([X])
                pred_variance = self.make_posterior_variances(joint_covar)

                full_covar = KroneckerProductLazyTensor(
                    full_covar, *joint_covar.lazy_tensors[1:]
                )

            joint_covar_list = [self.covar_modules[0](X, train_inputs)]
            batch_shape = joint_covar_list[0].batch_shape
            for cm, param in zip(self.covar_modules[1:], self.latent_parameters):
                covar = cm(param)
                if covar.batch_shape != batch_shape:
                    covar = BatchRepeatLazyTensor(covar, batch_shape)
                joint_covar_list.append(covar)

            test_train_covar = KroneckerProductLazyTensor(*joint_covar_list)

            # mean and variance get reshaped into the target shape
            new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape)
            if not no_pred_variance:
                new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape)
                new_variance = DiagLazyTensor(new_variance)
            else:
                new_variance = ZeroLazyTensor(
                    *X.shape[:-1], *self.target_shape, self.target_shape[-1]
                )

            mvn = MultivariateNormal(new_mean, new_variance)

            # return a specialized Posterior to allow for sampling
            posterior = HigherOrderGPPosterior(
                mvn=mvn,
                train_targets=self.train_targets.unsqueeze(-1),
                train_train_covar=self.prediction_strategy.lik_train_train_covar,
                test_train_covar=test_train_covar,
                joint_covariance_matrix=full_covar,
                output_shape=Size(
                    (
                        *X.shape[:-1],
                        *self.target_shape,
                    )
                ),
                num_outputs=self._num_outputs,
            )
            if hasattr(self, "outcome_transform"):
                posterior = self.outcome_transform.untransform_posterior(posterior)

            return posterior
 def covar_matrix(self):
     U = self.U.covar_matrix
     V = self.V.covar_matrix
     return KroneckerProductLazyTensor(V, U)
 def kernel2(self, Kxx, Sigma):
     # TODO: Make this a separate module
     # if M1, M2 = (0, 0)
     #    [ k_** РіЌ ╬Б ]
     Kij_xx_22 = KroneckerProductLazyTensor(Kxx, Sigma)
     return Kij_xx_22