Esempio n. 1
0
    def forward(self, x):
        weighted_means = []
        weighted_covars = []
        # filter model with zero weights
        # weights on covariance matrices are weight**2
        non_zero_weight_indices = (self.weights**2 > 0).nonzero()
        non_zero_weights = self.weights[non_zero_weight_indices]
        # re-normalize
        non_zero_weights /= non_zero_weights.sum()

        for non_zero_weight_idx in range(non_zero_weight_indices.shape[0]):
            raw_idx = non_zero_weight_indices[non_zero_weight_idx].item()
            model = self.models[raw_idx]
            posterior = model.posterior(x)
            # unstandardize predictions
            posterior_mean = posterior.mean.squeeze(
                -1) * model.Y_std + model.Y_mean
            posterior_cov = posterior.mvn.lazy_covariance_matrix * model.Y_std.pow(
                2)
            # apply weight
            weight = non_zero_weights[non_zero_weight_idx]
            weighted_means.append(weight * posterior_mean)
            weighted_covars.append(posterior_cov * weight**2)
        # set mean and covariance to be the rank-weighted sum the means and covariances of the
        # base models and target model
        mean_x = torch.stack(weighted_means).sum(dim=0)
        covar_x = PsdSumLazyTensor(*weighted_covars)
        return MultivariateNormal(mean_x, covar_x)
Esempio n. 2
0
 def create_lazy_tensor(self):
     c1 = torch.tensor([[2, 0.5, 0, 0], [5, 1, 2, 0]],
                       dtype=torch.float,
                       requires_grad=True)
     t1 = ToeplitzLazyTensor(c1)
     c2 = torch.tensor([[2, 0.5, 0, 0], [6, 0, 1, -1]],
                       dtype=torch.float,
                       requires_grad=True)
     t2 = ToeplitzLazyTensor(c2)
     return PsdSumLazyTensor(t1, t2)
Esempio n. 3
0
    def _get_covariance(self, x1, x2):
        k_ux1 = delazify(self.base_kernel(x1, self.inducing_points))
        if torch.equal(x1, x2):
            covar = RootLazyTensor(k_ux1.matmul(self._inducing_inv_root))

            # Diagonal correction for predictive posterior
            correction = (self.base_kernel(x1, x2, diag=True) -
                          covar.diag()).clamp(0, math.inf)
            covar = PsdSumLazyTensor(covar, DiagLazyTensor(correction))
        else:
            k_ux2 = delazify(self.base_kernel(x2, self.inducing_points))
            covar = MatmulLazyTensor(
                k_ux1.matmul(self._inducing_inv_root),
                k_ux2.matmul(self._inducing_inv_root).transpose(-1, -2))

        return covar
Esempio n. 4
0
 def create_lazy_tensor(self):
     mat1 = torch.randn(2, 3, 4, 4)
     lt1 = NonLazyTensor(mat1 @ mat1.transpose(-1, -2))
     mat2 = torch.randn(2, 3, 4, 4)
     lt2 = NonLazyTensor(mat2 @ mat2.transpose(-1, -2))
     return PsdSumLazyTensor(lt1, lt2)
Esempio n. 5
0
    def forward(self, x):
        """Forward propagate the module.

        This method determines how to marginalize out the inducing function values.
        Specifically, forward defines how to transform a variational distribution over
        the inducing point values, q(u), in to a variational distribution over
        the function values at specified locations x, q(f|x), by integrating
        p(f|x, u)q(u)du

        Parameters
        ----------
        x (torch.tensor):
            Locations x to get the variational posterior of the function values at.

        Returns
        -------
            The distribution q(f|x)
        """
        variational_dist = self.variational_distribution.approx_variational_distribution
        inducing_points = self.inducing_points
        inducing_batch_shape = inducing_points.shape[:-2]
        if inducing_batch_shape < x.shape[:-2] or len(
                inducing_batch_shape) < len(x.shape[:-2]):
            batch_shape = _mul_broadcast_shape(inducing_points.shape[:-2],
                                               x.shape[:-2])
            inducing_points = inducing_points.expand(
                *batch_shape, *inducing_points.shape[-2:])
            x = x.expand(*batch_shape, *x.shape[-2:])
            variational_dist = variational_dist.expand(batch_shape)

        # If our points equal the inducing points, we're done
        if torch.equal(x, inducing_points):
            return variational_dist

        # Otherwise, we have to marginalize
        else:
            num_induc = inducing_points.size(-2)
            full_inputs = torch.cat([inducing_points, x], dim=-2)
            full_output = self.model.forward(full_inputs)
            full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix

            # Mean terms
            test_mean = full_mean[..., num_induc:]
            induc_mean = full_mean[..., :num_induc]
            mean_diff = (variational_dist.mean - induc_mean).unsqueeze(-1)

            # Covariance terms
            induc_induc_covar = full_covar[
                ..., :num_induc, :num_induc].add_jitter()
            induc_data_covar = full_covar[..., :num_induc,
                                          num_induc:].evaluate()
            data_data_covar = full_covar[..., num_induc:, num_induc:]
            aux = variational_dist.lazy_covariance_matrix.root_decomposition()
            root_variational_covar = aux.root.evaluate()

            # If we had to expand the inducing points,
            # shrink the inducing mean and induc_induc_covar dimension
            # This makes everything more computationally efficient
            if len(inducing_batch_shape) < len(induc_induc_covar.batch_shape):
                index = tuple(0 for _ in range(
                    len(induc_induc_covar.batch_shape) -
                    len(inducing_batch_shape)))
                repeat_size = torch.Size(
                    (tuple(induc_induc_covar.batch_shape[:len(index)]) + tuple(
                        1
                        for _ in induc_induc_covar.batch_shape[len(index):])))
                induc_induc_covar = BatchRepeatLazyTensor(
                    induc_induc_covar.__getitem__(index), repeat_size)

            # If we're less than a certain size, we'll compute the Cholesky
            # decomposition of induc_induc_covar
            cholesky = False
            if settings.fast_computations.log_prob.off() or (
                    num_induc <= settings.max_cholesky_size.value()):
                induc_induc_covar = CholLazyTensor(
                    induc_induc_covar.cholesky())
                cholesky = True

            # If we are making predictions and don't need variances, we can do things
            # very quickly.
            if not self.training and settings.skip_posterior_variances.on():
                if not hasattr(self, "_mean_cache"):
                    self._mean_cache = induc_induc_covar.inv_matmul(
                        mean_diff).detach()

                predictive_mean = torch.add(
                    test_mean,
                    induc_data_covar.transpose(-2, -1).matmul(
                        self._mean_cache).squeeze(-1))

                predictive_covar = ZeroLazyTensor(test_mean.size(-1),
                                                  test_mean.size(-1))

                return MultivariateNormal(predictive_mean, predictive_covar)

            # Cache the CG results
            # For now: run variational inference without a preconditioner
            # The preconditioner screws things up for some reason
            with settings.max_preconditioner_size(0):
                # Cache the CG results
                left_tensors = torch.cat([mean_diff, root_variational_covar],
                                         -1)
                with torch.no_grad():
                    eager_rhs = torch.cat([left_tensors, induc_data_covar], -1)
                    solve, probe_vecs, probe_vec_norms, probe_vec_solves, tmats = \
                        CachedCGLazyTensor.precompute_terms(
                            induc_induc_covar, eager_rhs.detach(),
                            logdet_terms=(not cholesky),
                            include_tmats=(not settings.skip_logdet_forward.on() and
                                           not cholesky)
                        )
                    eager_rhss = [
                        eager_rhs.detach(),
                        eager_rhs[..., left_tensors.size(-1):].detach(),
                        eager_rhs[..., :left_tensors.size(-1)].detach()
                    ]
                    solves = [
                        solve.detach(), solve[...,
                                              left_tensors.size(-1):].detach(),
                        solve[..., :left_tensors.size(-1)].detach()
                    ]
                    if settings.skip_logdet_forward.on():
                        eager_rhss.append(
                            torch.cat([probe_vecs, left_tensors], -1))
                        solves.append(
                            torch.cat([
                                probe_vec_solves,
                                solve[..., :left_tensors.size(-1)]
                            ], -1))
                induc_induc_covar = CachedCGLazyTensor(
                    induc_induc_covar,
                    eager_rhss=eager_rhss,
                    solves=solves,
                    probe_vectors=probe_vecs,
                    probe_vector_norms=probe_vec_norms,
                    probe_vector_solves=probe_vec_solves,
                    probe_vector_tmats=tmats,
                )

            if self.training:
                self._memoize_cache[
                    "prior_distribution_memo"] = MultivariateNormal(
                        induc_mean, induc_induc_covar)

            # Compute predictive mean/covariance
            inv_products = induc_induc_covar.inv_matmul(
                induc_data_covar, left_tensors.transpose(-1, -2))
            predictive_mean = torch.add(test_mean, inv_products[..., 0, :])
            predictive_covar = RootLazyTensor(inv_products[...,
                                                           1:, :].transpose(
                                                               -1, -2))
            if self.training:
                interp_data_data_var, _ = induc_induc_covar.inv_quad_logdet(
                    induc_data_covar, logdet=False, reduce_inv_quad=False)
                data_covariance = DiagLazyTensor(
                    (data_data_covar.diag() - interp_data_data_var).clamp(
                        0, math.inf))
            else:
                neg_induc_data_data_covar = torch.matmul(
                    induc_data_covar.transpose(-1, -2).mul(-1),
                    induc_induc_covar.inv_matmul(induc_data_covar))
                data_covariance = data_data_covar + neg_induc_data_data_covar
            predictive_covar = PsdSumLazyTensor(predictive_covar,
                                                data_covariance)

            return MultivariateNormal(predictive_mean, predictive_covar)