def test_matmul_vec_random_rectangular(self):
        ax = torch.randn(4, 2, 3, requires_grad=True)
        bx = torch.randn(4, 5, 2, requires_grad=True)
        cx = torch.randn(4, 6, 4, requires_grad=True)
        rhsx = torch.randn(4, 3 * 2 * 4, 1)
        rhsx = (rhsx / torch.norm(rhsx)).requires_grad_(True)
        ax_copy = ax.clone().detach().requires_grad_(True)
        bx_copy = bx.clone().detach().requires_grad_(True)
        cx_copy = cx.clone().detach().requires_grad_(True)
        rhsx_copy = rhsx.clone().detach().requires_grad_(True)

        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(ax),
                                                 NonLazyTensor(bx),
                                                 NonLazyTensor(cx))
        res = kp_lazy_var.matmul(rhsx)

        actual_mat = kron(kron(ax_copy, bx_copy), cx_copy)
        actual = actual_mat.matmul(rhsx_copy)

        self.assertTrue(approx_equal(res, actual))

        actual.sum().backward()
        res.sum().backward()
        self.assertTrue(approx_equal(ax_copy.grad, ax.grad))
        self.assertTrue(approx_equal(bx_copy.grad, bx.grad))
        self.assertTrue(approx_equal(cx_copy.grad, cx.grad))
        self.assertTrue(approx_equal(rhsx_copy.grad, rhsx.grad))
Example #2
0
    def make_posterior_variances(self, joint_covariance_matrix: LazyTensor) -> Tensor:
        r"""
        Computes the posterior variances given the data points X. As currently
        implemented, it computes another forwards call with the stacked data to get out
        the joint covariance across all data points.
        """
        # TODO: use the exposed joint covariances from the prediction strategy
        data_joint_covariance = joint_covariance_matrix.lazy_tensors[
            0
        ].evaluate_kernel()
        num_train = self.train_inputs[0].shape[-2]
        test_train_covar = data_joint_covariance[..., num_train:, :num_train]
        train_train_covar = data_joint_covariance[..., :num_train, :num_train]
        test_test_covar = data_joint_covariance[..., num_train:, num_train:]

        full_train_train_covar = KroneckerProductLazyTensor(
            train_train_covar, *joint_covariance_matrix.lazy_tensors[1:]
        )
        full_test_test_covar = KroneckerProductLazyTensor(
            test_test_covar, *joint_covariance_matrix.lazy_tensors[1:]
        )
        full_test_train_covar_list = [test_train_covar] + [
            *joint_covariance_matrix.lazy_tensors[1:]
        ]

        train_evals, train_evecs = full_train_train_covar.symeig(eigenvectors=True)
        # (\kron \Lambda_i + \sigma^2 I)^{-1}
        train_inv_evals = DiagLazyTensor(1.0 / (train_evals + self.likelihood.noise))

        # compute K_i S_i \hadamard K_i S_i
        test_train_hadamard = KroneckerProductLazyTensor(
            *[
                lt1.matmul(lt2).evaluate() ** 2
                for lt1, lt2 in zip(
                    full_test_train_covar_list, train_evecs.lazy_tensors
                )
            ]
        )

        # and compute the column sums of
        #  (\kron K_i S_i * K_i S_i) \tilde{\Lambda}^{-1}
        test_train_pred_covar = test_train_hadamard.matmul(train_inv_evals).sum(dim=-1)

        pred_variances = full_test_test_covar.diag() - test_train_pred_covar
        return pred_variances
    def test_matmul_batch_mat(self):
        avar = a.repeat(3, 1, 1).requires_grad_(True)
        bvar = b.repeat(3, 1, 1).requires_grad_(True)
        cvar = c.repeat(3, 1, 1).requires_grad_(True)
        mat = torch.randn(3, 24, 5, requires_grad=True)
        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar),
                                                 NonLazyTensor(bvar),
                                                 NonLazyTensor(cvar))
        res = kp_lazy_var.matmul(mat)

        avar_copy = avar.clone().detach().requires_grad_(True)
        bvar_copy = bvar.clone().detach().requires_grad_(True)
        cvar_copy = cvar.clone().detach().requires_grad_(True)
        mat_copy = mat.clone().detach().requires_grad_(True)
        actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(mat_copy)
        self.assertTrue(approx_equal(res, actual))

        actual.sum().backward()
        res.sum().backward()
        self.assertTrue(approx_equal(avar_copy.grad, avar.grad))
        self.assertTrue(approx_equal(bvar_copy.grad, bvar.grad))
        self.assertTrue(approx_equal(cvar_copy.grad, cvar.grad))
        self.assertTrue(approx_equal(mat_copy.grad, mat.grad))
    def test_matmul_vec(self):
        avar = a.clone().requires_grad_(True)
        bvar = b.clone().requires_grad_(True)
        cvar = c.clone().requires_grad_(True)
        vec = torch.randn(24, requires_grad=True)
        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(avar),
                                                 NonLazyTensor(bvar),
                                                 NonLazyTensor(cvar))
        res = kp_lazy_var.matmul(vec)

        avar_copy = a.clone().requires_grad_(True)
        bvar_copy = b.clone().requires_grad_(True)
        cvar_copy = c.clone().requires_grad_(True)
        vec_copy = vec.clone().detach().requires_grad_(True)
        actual = kron(kron(avar_copy, bvar_copy), cvar_copy).matmul(vec_copy)

        self.assertTrue(approx_equal(res, actual))

        actual.sum().backward()
        res.sum().backward()
        self.assertTrue(approx_equal(avar_copy.grad, avar.grad))
        self.assertTrue(approx_equal(bvar_copy.grad, bvar.grad))
        self.assertTrue(approx_equal(cvar_copy.grad, cvar.grad))
        self.assertTrue(approx_equal(vec_copy.grad, vec.grad))
    def test_matmul_mat_random_rectangular(self):
        a = torch.randn(4, 2, 3, requires_grad=True)
        b = torch.randn(4, 5, 2, requires_grad=True)
        c = torch.randn(4, 6, 4, requires_grad=True)
        rhs = torch.randn(4, 3 * 2 * 4, 2, requires_grad=True)
        a_copy = a.clone().detach().requires_grad_(True)
        b_copy = b.clone().detach().requires_grad_(True)
        c_copy = c.clone().detach().requires_grad_(True)
        rhs_copy = rhs.clone().detach().requires_grad_(True)

        actual = kron(kron(a_copy, b_copy), c_copy).matmul(rhs_copy)
        kp_lazy_var = KroneckerProductLazyTensor(NonLazyTensor(a),
                                                 NonLazyTensor(b),
                                                 NonLazyTensor(c))
        res = kp_lazy_var.matmul(rhs)

        self.assertTrue(approx_equal(res, actual))

        actual.sum().backward()
        res.sum().backward()
        self.assertTrue(approx_equal(a_copy.grad, a.grad))
        self.assertTrue(approx_equal(b_copy.grad, b.grad))
        self.assertTrue(approx_equal(c_copy.grad, c.grad))
        self.assertTrue(approx_equal(rhs_copy.grad, rhs.grad))
Example #6
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        posterior_transform: Optional[PosteriorTransform] = None,
        **kwargs: Any,
    ) -> MultitaskGPPosterior:
        self.eval()

        if posterior_transform is not None:
            # this could be very costly, disallow for now
            raise NotImplementedError(
                "Posterior transforms currently not supported for "
                f"{self.__class__.__name__}")

        X = self.transform_inputs(X)
        train_x = self.transform_inputs(self.train_inputs[0])

        # construct Ktt
        task_covar = self._task_covar_matrix
        task_rootlt = self._task_covar_matrix.root_decomposition(
            method="diagonalization")
        task_root = task_rootlt.root
        if task_covar.batch_shape != X.shape[:-2]:
            task_covar = BatchRepeatLazyTensor(task_covar,
                                               batch_repeat=X.shape[:-2])
            task_root = BatchRepeatLazyTensor(lazify(task_root),
                                              batch_repeat=X.shape[:-2])

        task_covar_rootlt = RootLazyTensor(task_root)

        # construct RR' \approx Kxx
        data_data_covar = self.train_full_covar.lazy_tensors[0]
        # populate the diagonalziation caches for the root and inverse root
        # decomposition
        data_data_evals, data_data_evecs = data_data_covar.diagonalization()

        # pad the eigenvalue and eigenvectors with zeros if we are using lanczos
        if data_data_evecs.shape[-1] < data_data_evecs.shape[-2]:
            cols_to_add = data_data_evecs.shape[-2] - data_data_evecs.shape[-1]
            zero_evecs = torch.zeros(
                *data_data_evecs.shape[:-1],
                cols_to_add,
                dtype=data_data_evals.dtype,
                device=data_data_evals.device,
            )
            zero_evals = torch.zeros(
                *data_data_evecs.shape[:-2],
                cols_to_add,
                dtype=data_data_evals.dtype,
                device=data_data_evals.device,
            )
            data_data_evecs = CatLazyTensor(
                data_data_evecs,
                lazify(zero_evecs),
                dim=-1,
                output_device=data_data_evals.device,
            )
            data_data_evals = torch.cat((data_data_evals, zero_evals), dim=-1)

        # construct K_{xt, x}
        test_data_covar = self.covar_module.data_covar_module(X, train_x)
        # construct K_{xt, xt}
        test_test_covar = self.covar_module.data_covar_module(X)

        # now update root so that \tilde{R}\tilde{R}' \approx K_{(x,xt), (x,xt)}
        # cloning preserves the gradient history
        updated_lazy_tensor = data_data_covar.cat_rows(
            cross_mat=test_data_covar.clone(),
            new_mat=test_test_covar,
            method="diagonalization",
        )
        updated_root = updated_lazy_tensor.root_decomposition().root
        # occasionally, there's device errors so enforce this comes out right
        updated_root = updated_root.to(data_data_covar.device)

        # build a root decomposition of the joint train/test covariance matrix
        # construct (\tilde{R} \otimes M)(\tilde{R} \otimes M)' \approx
        # (K_{(x,xt), (x,xt)} \otimes Ktt)
        joint_covar = RootLazyTensor(
            KroneckerProductLazyTensor(updated_root,
                                       task_covar_rootlt.root.detach()))

        # construct K_{xt, x} \otimes Ktt
        test_obs_kernel = KroneckerProductLazyTensor(test_data_covar,
                                                     task_covar)

        # collect y - \mu(x) and \mu(X)
        train_diff = self.train_targets - self.mean_module(train_x)
        if detach_test_caches.on():
            train_diff = train_diff.detach()
        test_mean = self.mean_module(X)

        train_noise = self.likelihood._shaped_noise_covar(train_x.shape)
        diagonal_noise = isinstance(train_noise, DiagLazyTensor)
        if detach_test_caches.on():
            train_noise = train_noise.detach()
        test_noise = (self.likelihood._shaped_noise_covar(X.shape)
                      if observation_noise else None)

        # predictive mean and variance for the mvn
        # first the predictive mean
        pred_mean = (test_obs_kernel.matmul(
            self.predictive_mean_cache).reshape_as(test_mean) + test_mean)
        # next the predictive variance, assume diagonal noise
        test_var_term = KroneckerProductLazyTensor(test_test_covar,
                                                   task_covar).diag()

        if diagonal_noise:
            task_evals, task_evecs = self._task_covar_matrix.diagonalization()
            # TODO: make this be the default KPMatmulLT diagonal method in gpytorch
            full_data_inv_evals = (KroneckerProductDiagLazyTensor(
                DiagLazyTensor(data_data_evals), DiagLazyTensor(task_evals)) +
                                   train_noise).inverse()
            test_train_hadamard = KroneckerProductLazyTensor(
                test_data_covar.matmul(data_data_evecs).evaluate()**2,
                task_covar.matmul(task_evecs).evaluate()**2,
            )
            data_var_term = test_train_hadamard.matmul(
                full_data_inv_evals).sum(dim=-1)
        else:
            # if non-diagonal noise (but still kronecker structured), we have to pull
            # across the noise because the inverse is not closed form
            # should be a kronecker lt, R = \Sigma_X^{-1/2} \kron \Sigma_T^{-1/2}
            # TODO: enforce the diagonalization to return a KPLT for all shapes in
            # gpytorch or dense linear algebra for small shapes
            data_noise, task_noise = train_noise.lazy_tensors
            data_noise_root = data_noise.root_inv_decomposition(
                method="diagonalization")
            task_noise_root = task_noise.root_inv_decomposition(
                method="diagonalization")

            # ultimately we need to compute the diagonal of
            # (K_{x* X} \kron K_T)(K_{XX} \kron K_T + \Sigma_X \kron \Sigma_T)^{-1}
            #                           (K_{x* X} \kron K_T)^T
            # = (K_{x* X} \Sigma_X^{-1/2} Q_R)(\Lambda_R + I)^{-1}
            #                       (K_{x* X} \Sigma_X^{-1/2} Q_R)^T
            # where R = (\Sigma_X^{-1/2T}K_{XX}\Sigma_X^{-1/2} \kron
            #                   \Sigma_T^{-1/2T}K_{T}\Sigma_T^{-1/2})
            # first we construct the components of R's eigen-decomposition
            # TODO: make this be the default KPMatmulLT diagonal method in gpytorch
            whitened_data_covar = (data_noise_root.transpose(
                -1, -2).matmul(data_data_covar).matmul(data_noise_root))
            w_data_evals, w_data_evecs = whitened_data_covar.diagonalization()
            whitened_task_covar = (task_noise_root.transpose(-1, -2).matmul(
                self._task_covar_matrix).matmul(task_noise_root))
            w_task_evals, w_task_evecs = whitened_task_covar.diagonalization()

            # we add one to the eigenvalues as above (not just for stability)
            full_data_inv_evals = (KroneckerProductDiagLazyTensor(
                DiagLazyTensor(w_data_evals),
                DiagLazyTensor(w_task_evals)).add_jitter(1.0).inverse())

            test_data_comp = (test_data_covar.matmul(data_noise_root).matmul(
                w_data_evecs).evaluate()**2)
            task_comp = (task_covar.matmul(task_noise_root).matmul(
                w_task_evecs).evaluate()**2)

            test_train_hadamard = KroneckerProductLazyTensor(
                test_data_comp, task_comp)
            data_var_term = test_train_hadamard.matmul(
                full_data_inv_evals).sum(dim=-1)

        pred_variance = test_var_term - data_var_term
        specialized_mvn = MultitaskMultivariateNormal(
            pred_mean, DiagLazyTensor(pred_variance))
        if observation_noise:
            specialized_mvn = self.likelihood(specialized_mvn)

        posterior = MultitaskGPPosterior(
            mvn=specialized_mvn,
            joint_covariance_matrix=joint_covar,
            test_train_covar=test_obs_kernel,
            train_diff=train_diff,
            test_mean=test_mean,
            train_train_covar=self.train_full_covar,
            train_noise=train_noise,
            test_noise=test_noise,
        )

        if hasattr(self, "outcome_transform"):
            posterior = self.outcome_transform.untransform_posterior(posterior)
        return posterior