Ejemplo n.º 1
0
def main():

    sample_sizes = [250, 500, 1000, 2000, 10000]
    rhos = [0, 0.3, 0.6, 0.9]
    r = 2
    s = 2
    K = 50

    time_results = {
        rho: [{ssize: []
               for ssize in sample_sizes}, None]
        for rho in rhos
    }
    all_results = []

    for rho in rhos:
        for sample_size in sample_sizes:

            cov = np.array([[1., rho], [rho, 1.]])
            dist = MultivariateNormal(mean=np.zeros(2), cov=cov)

            t_ci, t_nad, t_ml = [], [], []

            delta = lambda x: chi2.ppf(0.97, x**2 - 1)

            print(f"Timing samples {sample_size} for r = {rho}")

            for k in range(K):
                xy_sample = dist.sample(sample_size)

                plane = Plane(xy_sample)

                # Adaptive algorithm
                t0_ad = time.time()
                ad = AdaptiveAlgorithm(xy_sample, delta, r, s).run()
                t_ci.append(time.time() - t0_ad)

                t0_nad = time.time()
                nad = NonAdaptivePartition(xy_sample, bins=[50, 50]).run()
                t_nad.append(time.time() - t0_nad)

                t0_ml = time.time()
                ml = -np.log(
                    1 - pearsonr(xy_sample[:, 0], xy_sample[:, 1])[0]**2) / 2
                t_ml.append(time.time() - t0_ml)

                all_results.append((ad, nad, ml))

            time_results[rho][0][sample_size] = [
                np.mean(t_ml), np.mean(t_ci),
                np.mean(t_nad)
            ]

            print(
                f"Times: ML: {np.mean(t_ml)}, CI: {np.mean(t_ci)}, NAD: {np.mean(t_nad)}"
            )

    generate_timing_table(time_results)

    print(len(all_results))
    def forward(self,
                x,
                inducing_points,
                inducing_values,
                variational_inducing_covar=None):
        if variational_inducing_covar is not None:
            raise NotImplementedError(
                "OrthogonallyDecoupledVariationalStrategy currently works with DeltaVariationalDistribution"
            )

        num_data = x.size(-2)
        full_output = self.model(torch.cat([x, inducing_points], dim=-2))
        full_mean = full_output.mean
        full_covar = full_output.lazy_covariance_matrix

        if self.training:
            induc_mean = full_mean[..., num_data:]
            induc_induc_covar = full_covar[..., num_data:, num_data:]
            self._memoize_cache[
                "prior_distribution_memo"] = MultivariateNormal(
                    induc_mean, induc_induc_covar)

        test_mean = full_mean[..., :num_data]
        data_induc_covar = full_covar[..., :num_data, num_data:]
        predictive_mean = (data_induc_covar @ inducing_values.unsqueeze(-1)
                           ).squeeze(-1).add(test_mean)
        predictive_covar = full_covar[..., :num_data, :num_data]

        # Return the distribution
        return MultivariateNormal(predictive_mean, predictive_covar)
Ejemplo n.º 3
0
    def forward(self,
                x,
                inducing_points,
                inducing_values,
                variational_inducing_covar=None):
        if variational_inducing_covar is None:
            raise RuntimeError(
                "GridInterpolationVariationalStrategy is only compatible with Gaussian variational "
                f"distributions. Got ({self.variational_distribution.__class__.__name__}."
            )

        variational_distribution = self.variational_distribution

        # Get interpolations
        interp_indices, interp_values = self._compute_grid(x)

        # Compute test mean
        # Left multiply samples by interpolation matrix
        predictive_mean = left_interp(interp_indices, interp_values,
                                      inducing_values.unsqueeze(-1))
        predictive_mean = predictive_mean.squeeze(-1)

        # Compute test covar
        predictive_covar = InterpolatedLazyTensor(
            variational_distribution.lazy_covariance_matrix,
            interp_indices,
            interp_values,
            interp_indices,
            interp_values,
        )
        output = MultivariateNormal(predictive_mean, predictive_covar)
        return output
    def forward(self,
                x,
                inducing_points,
                inducing_values,
                variational_inducing_covar=None):
        # Compute full prior distribution
        full_inputs = torch.cat([inducing_points, x], dim=-2)
        full_output = self.model.forward(full_inputs)
        full_covar = full_output.lazy_covariance_matrix

        # Covariance terms
        num_induc = inducing_points.size(-2)
        test_mean = full_output.mean[..., num_induc:]
        induc_induc_covar = full_covar[
            ..., :num_induc, :num_induc].add_jitter()
        induc_data_covar = full_covar[..., :num_induc, num_induc:].evaluate()
        data_data_covar = full_covar[..., num_induc:, num_induc:]

        # Compute interpolation terms
        # K_ZZ^{-1/2} K_ZX
        # K_ZZ^{-1/2} \mu_Z
        L = self._cholesky_factor(induc_induc_covar)
        if L.shape != induc_induc_covar.shape:
            # Aggressive caching can cause nasty shape incompatibilies when evaluating with different batch shapes
            del self._memoize_cache["cholesky_factor"]
            L = self._cholesky_factor(induc_induc_covar)
        interp_term = torch.triangular_solve(induc_data_covar.double(),
                                             L,
                                             upper=False)[0].to(
                                                 full_inputs.dtype)

        # Compute the mean of q(f)
        # k_XZ K_ZZ^{-1/2} (m - K_ZZ^{-1/2} \mu_Z) + \mu_X
        predictive_mean = (torch.matmul(
            interp_term.transpose(-1, -2),
            (inducing_values -
             self.prior_distribution.mean).unsqueeze(-1)).squeeze(-1) +
                           test_mean)

        # Compute the covariance of q(f)
        # K_XX + k_XZ K_ZZ^{-1/2} (S - I) K_ZZ^{-1/2} k_ZX
        middle_term = self.prior_distribution.lazy_covariance_matrix.mul(-1)
        if variational_inducing_covar is not None:
            middle_term = SumLazyTensor(variational_inducing_covar,
                                        middle_term)

        if trace_mode.on():
            predictive_covar = (data_data_covar.add_jitter(1e-4).evaluate() +
                                interp_term.transpose(-1, -2)
                                @ middle_term.evaluate() @ interp_term)
        else:
            predictive_covar = SumLazyTensor(
                data_data_covar.add_jitter(1e-4),
                MatmulLazyTensor(interp_term.transpose(-1, -2),
                                 middle_term @ interp_term),
            )

        # Return the distribution
        return MultivariateNormal(predictive_mean, predictive_covar)
 def forward(self):
     # TODO: if we don't multiply self._variational_stddev by a mask of one, Pyro models fail
     # not sure where this bug is occuring (in Pyro or PyTorch)
     # throwing this in as a hotfix for now - we should investigate later
     mask = torch.ones_like(self._variational_stddev)
     variational_covar = DiagLazyTensor(
         self._variational_stddev.mul(mask).pow(2))
     return MultivariateNormal(self.variational_mean, variational_covar)
    def forward(self, x1, x2, diag=False, **kwargs):
        covar = self._get_covariance(x1, x2)

        if self.training:
            if not torch.equal(x1, x2):
                raise RuntimeError("x1 should equal x2 in training mode")
            zero_mean = torch.zeros_like(x1.select(-1, 0))
            new_added_loss_term = InducingPointKernelAddedLossTerm(
                MultivariateNormal(zero_mean, self._covar_diag(x1)),
                MultivariateNormal(zero_mean, covar),
                self.likelihood,
            )
            self.update_added_loss_term("inducing_point_loss_term",
                                        new_added_loss_term)

        if diag:
            return covar.diag()
        else:
            return covar
    def prior_distribution(self):
        """
        If desired, models can compare the input to forward to inducing_points and use a GridKernel for space
        efficiency.

        However, when using a default VariationalDistribution which has an O(m^2) space complexity anyways, we find that
        GridKernel is typically not worth it due to the moderate slow down of using FFTs.
        """
        out = super(AdditiveGridInterpolationVariationalStrategy, self).prior_distribution
        mean = out.mean.repeat(self.num_dim, 1)
        covar = out.lazy_covariance_matrix.repeat(self.num_dim, 1, 1)
        return MultivariateNormal(mean, covar)
Ejemplo n.º 8
0
    def forward(self):
        chol_variational_covar = self.chol_variational_covar
        dtype = chol_variational_covar.dtype
        device = chol_variational_covar.device

        # First make the cholesky factor is upper triangular
        lower_mask = torch.ones(self.chol_variational_covar.shape[-2:],
                                dtype=dtype,
                                device=device).tril(0)
        chol_variational_covar = chol_variational_covar.mul(lower_mask)

        # Now construct the actual matrix
        variational_covar = CholLazyTensor(chol_variational_covar)
        return MultivariateNormal(self.variational_mean, variational_covar)
    def forward(self, x, inducing_points, inducing_values, variational_inducing_covar=None):
        if x.ndimension() == 1:
            x = x.unsqueeze(-1)
        elif x.ndimension() != 2:
            raise RuntimeError("AdditiveGridInterpolationVariationalStrategy expects a 2d tensor.")

        num_data, num_dim = x.size()
        if num_dim != self.num_dim:
            raise RuntimeError("The number of dims should match the number specified.")

        output = super().forward(x, inducing_points, inducing_values, variational_inducing_covar)
        if self.sum_output:
            if variational_inducing_covar is not None:
                mean = output.mean.sum(0)
                covar = output.lazy_covariance_matrix.sum(-3)
                return MultivariateNormal(mean, covar)
            else:
                return Delta(output.mean.sum(0))
        else:
            return output
    def forward(self, x):
        r"""
        The :func:`~gpytorch.variational.VariationalStrategy.forward` method determines how to marginalize out the
        inducing point function values. Specifically, forward defines how to transform a variational distribution
        over the inducing point values, :math:`q(u)`, in to a variational distribution over the function values at
        specified locations x, :math:`q(f|x)`, by integrating :math:`\int p(f|x, u)q(u)du`

        :param torch.Tensor x: Locations x to get the variational posterior of the function values at.
        :rtype: ~gpytorch.distributions.MultivariateNormal
        :return: The distribution :math:`q(f|x)`
        """
        variational_dist = self.variational_distribution
        inducing_points = self.inducing_points
        if inducing_points.dim() < x.dim():
            inducing_points = inducing_points.expand(
                *x.shape[:-2], *inducing_points.shape[-2:])
        if len(variational_dist.batch_shape) < x.dim() - 2:
            variational_dist = variational_dist.expand(x.shape[:-2])

        # If our points equal the inducing points, we're done
        if torch.equal(x, inducing_points):
            # De-whiten the prior covar
            prior_covar = self.prior_distribution.lazy_covariance_matrix
            if isinstance(variational_dist.lazy_covariance_matrix,
                          RootLazyTensor):
                predictive_covar = RootLazyTensor(
                    prior_covar
                    @ variational_dist.lazy_covariance_matrix.root.evaluate())
            else:
                predictive_covar = MatmulLazyTensor(
                    prior_covar @ variational_dist.covariance_matrix,
                    prior_covar)

            # Cache some values for the KL divergence
            if self.training:
                self._mean_diff_inv_quad_memo, self._logdet_memo = prior_covar.inv_quad_logdet(
                    (variational_dist.mean - self.prior_distribution.mean),
                    logdet=True)

            return MultivariateNormal(variational_dist.mean, predictive_covar)

        # Otherwise, we have to marginalize
        else:
            num_induc = inducing_points.size(-2)
            full_inputs = torch.cat([inducing_points, x], dim=-2)
            full_output = self.model.forward(full_inputs)
            full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix

            # Mean terms
            test_mean = full_mean[..., num_induc:]
            induc_mean = full_mean[..., :num_induc]
            mean_diff = (variational_dist.mean - induc_mean).unsqueeze(-1)

            # Covariance terms
            induc_induc_covar = full_covar[
                ..., :num_induc, :num_induc].add_jitter()
            induc_data_covar = full_covar[..., :num_induc,
                                          num_induc:].evaluate()
            data_data_covar = full_covar[..., num_induc:, num_induc:]

            # If we're less than a certain size, we'll compute the Cholesky decomposition of induc_induc_covar
            cholesky = False
            if settings.fast_computations.log_prob.off() or (
                    num_induc <= settings.max_cholesky_size.value()):
                induc_induc_covar = CholLazyTensor(
                    induc_induc_covar.cholesky())
                cholesky = True

            # Cache the CG results
            # Do not use preconditioning for whitened VI, as it does not seem to improve performance.
            with settings.max_preconditioner_size(0):
                with torch.no_grad():
                    eager_rhs = torch.cat([induc_data_covar, mean_diff], -1)
                    solve, probe_vecs, probe_vec_norms, probe_vec_solves, tmats = CachedCGLazyTensor.precompute_terms(
                        induc_induc_covar,
                        eager_rhs.detach(),
                        logdet_terms=(not cholesky),
                        include_tmats=(not settings.skip_logdet_forward.on()
                                       and not cholesky),
                    )
                    eager_rhss = [eager_rhs.detach()]
                    solves = [solve.detach()]
                    if settings.skip_logdet_forward.on() and self.training:
                        eager_rhss.append(
                            torch.cat([probe_vecs, eager_rhs], -1))
                        solves.append(
                            torch.cat([
                                probe_vec_solves,
                                solve[..., :eager_rhs.size(-1)]
                            ], -1))
                    elif not self.training:
                        eager_rhss.append(eager_rhs[..., :-1])
                        solves.append(solve[..., :-1])

                induc_induc_covar = CachedCGLazyTensor(
                    induc_induc_covar,
                    eager_rhss=eager_rhss,
                    solves=solves,
                    probe_vectors=probe_vecs,
                    probe_vector_norms=probe_vec_norms,
                    probe_vector_solves=probe_vec_solves,
                    probe_vector_tmats=tmats,
                )

            # Compute some terms that will be necessary for the predicitve covariance and KL divergence
            if self.training:
                interp_data_data_var_plus_mean_diff_inv_quad, logdet = induc_induc_covar.inv_quad_logdet(
                    torch.cat([induc_data_covar, mean_diff], -1),
                    logdet=True,
                    reduce_inv_quad=False)
                interp_data_data_var = interp_data_data_var_plus_mean_diff_inv_quad[
                    ..., :-1]
                mean_diff_inv_quad = interp_data_data_var_plus_mean_diff_inv_quad[
                    ..., -1]

            # Compute predictive mean
            predictive_mean = torch.add(
                test_mean,
                induc_induc_covar.inv_matmul(
                    mean_diff,
                    left_tensor=induc_data_covar.transpose(-1,
                                                           -2)).squeeze(-1),
            )

            # Compute the predictive covariance
            is_root_lt = isinstance(variational_dist.lazy_covariance_matrix,
                                    RootLazyTensor)
            is_repeated_root_lt = isinstance(
                variational_dist.lazy_covariance_matrix,
                BatchRepeatLazyTensor) and isinstance(
                    variational_dist.lazy_covariance_matrix.base_lazy_tensor,
                    RootLazyTensor)
            if is_root_lt:
                predictive_covar = RootLazyTensor(
                    induc_data_covar.transpose(-1, -2)
                    @ variational_dist.lazy_covariance_matrix.root.evaluate())
            elif is_repeated_root_lt:
                predictive_covar = RootLazyTensor(
                    induc_data_covar.transpose(
                        -1, -2) @ variational_dist.lazy_covariance_matrix.
                    root_decomposition().root.evaluate())
            else:
                predictive_covar = MatmulLazyTensor(
                    induc_data_covar.transpose(-1, -2),
                    predictive_covar @ induc_data_covar)

            if self.training:
                data_covariance = DiagLazyTensor(
                    (data_data_covar.diag() - interp_data_data_var).clamp(
                        0, math.inf))
            else:
                neg_induc_data_data_covar = torch.matmul(
                    induc_data_covar.transpose(-1, -2).mul(-1),
                    induc_induc_covar.inv_matmul(induc_data_covar))
                data_covariance = data_data_covar + neg_induc_data_data_covar
            predictive_covar = PsdSumLazyTensor(predictive_covar,
                                                data_covariance)

            # Save the logdet, mean_diff_inv_quad, prior distribution for the ELBO
            if self.training:
                self._memoize_cache[
                    "prior_distribution_memo"] = MultivariateNormal(
                        induc_mean, induc_induc_covar)
                self._memoize_cache["logdet_memo"] = -logdet
                self._memoize_cache[
                    "mean_diff_inv_quad_memo"] = mean_diff_inv_quad

            return MultivariateNormal(predictive_mean, predictive_covar)
Ejemplo n.º 11
0
 def prior_distribution(self):
     out = self.model.forward(self.inducing_points)
     res = MultivariateNormal(out.mean,
                              out.lazy_covariance_matrix.add_jitter())
     return res
Ejemplo n.º 12
0
def main():

    sample_sizes = [250, 500, 1000, 2000, 10000]
    rhos = [0, 0.3, 0.6, 0.9]
    K = 20

    rs_2 = []
    rs_4 = []
    rs_5 = []
    rs_10 = []

    results = {
        rho: [{ssize: []
               for ssize in sample_sizes}, None]
        for rho in rhos
    }

    for rho in rhos:

        rs_2_std = []
        rs_4_std = []
        rs_5_std = []
        rs_10_std = []

        real_mi = -np.log(1 - rho**2) / 2

        results[rho][1] = real_mi

        for sample_size in sample_sizes:

            cov = np.array([[1., rho], [rho, 1.]])
            dist = MultivariateNormal(mean=np.zeros(2), cov=cov)

            rs_2_l, rs_4_l, rs_5_l, rs_10_l = [], [], [], []

            delta = lambda x: chi2.ppf(0.97, x**2 - 1)

            for k in range(K):

                xy_sample = dist.sample(sample_size)

                # Adaptive algorithm
                plane = Plane(xy_sample)
                rs_2_l.append(
                    kl_estimate(
                        plane,
                        AdaptiveAlgorithm(xy_sample, delta, 2, 2).run()))
                plane = Plane(xy_sample)
                rs_4_l.append(
                    kl_estimate(
                        plane,
                        AdaptiveAlgorithm(xy_sample, delta, 4, 4).run()))
                plane = Plane(xy_sample)
                rs_5_l.append(
                    kl_estimate(
                        plane,
                        AdaptiveAlgorithm(xy_sample, delta, 5, 5).run()))
                plane = Plane(xy_sample)
                rs_10_l.append(
                    kl_estimate(
                        plane,
                        AdaptiveAlgorithm(xy_sample, delta, 10, 10).run()))

            results[rho][0][sample_size] = [
                np.mean(rs_2_l),
                np.mean(rs_4_l),
                np.mean(rs_5_l),
                np.mean(rs_10_l)
            ]

            print(
                "---------------------------------------------------------------------------------------------"
            )
            print("rho: %.2f, Sample Size: %d, Real MI: %.4f" %
                  (rho, sample_size, real_mi))
            print("r=s=2: %.4f, r=s=4: %.4f, r=s=5: %.4f, r=s=10: %.4f" %
                  (np.mean(rs_2_l), np.mean(rs_4_l), np.mean(rs_5_l),
                   np.mean(rs_5_l)))

            rs_2_std.append(np.std(rs_2_l))
            rs_4_std.append(np.std(rs_4_l))
            rs_5_std.append(np.std(rs_5_l))
            rs_10_std.append(np.std(rs_5_l))

        rs_2.append(rs_2_std)
        rs_4.append(rs_4_std)
        rs_5.append(rs_5_std)
        rs_10.append(rs_10_std)

    generate_rs_table(results)

    all_std = [rs_2, rs_4, rs_5, rs_10]
    for i, _ in enumerate(["r=s=2", "r=s=4", "r=s=5", "r=s=10"]):

        plt.figure()
        plt.semilogx(sample_sizes,
                     all_std[i][0],
                     '-o',
                     label=r'$\rho$ =' + f'{0.0}')
        plt.semilogx(sample_sizes,
                     all_std[i][1],
                     '-o',
                     label=r'$\rho$ =' + f'{0.3}')
        plt.semilogx(sample_sizes,
                     all_std[i][2],
                     '-o',
                     label=r'$\rho$ =' + f'{0.6}')
        plt.semilogx(sample_sizes,
                     all_std[i][3],
                     '-o',
                     label=r'$\rho$ =' + f'{0.9}')
        plt.xlabel('$\log_{10}$ of sample size')

        if i == 0:
            plt.ylabel("std($\hat{I}_{CI}^{r=s=2}$)")
            plt.title(
                "Standard deviation of MI estimator $I_{CI}$ with $r=s=2$")
        elif i == 1:
            plt.ylabel("std($\hat{I}_{CI}^{r=s=4}$)")
            plt.title(
                "Standard deviation of MI estimator $I_{CI}$ with $r=s=4$")
        elif i == 2:
            plt.ylabel("std($\hat{I}_{CI}^{r=s=5}$)")
            plt.title(
                "Standard deviation of MI estimator $I_{CI}$ with $r=s=5$")
        else:
            plt.ylabel("std($\hat{I}_{CI}^{r=s=10}$)")
            plt.title(
                "Standard deviation of MI estimator $I_{CI}$ with $r=s=10$")

        plt.legend()
        plt.show()
 def marginal(self, function_dist: MultivariateNormal, *params: Any,
              **kwargs: Any) -> MultivariateNormal:
     mean, covar = function_dist.mean, function_dist.lazy_covariance_matrix
     noise_covar = self._shaped_noise_covar(mean.shape, *params, **kwargs)
     full_covar = covar + noise_covar
     return function_dist.__class__(mean, full_covar)
def main():

    sample_sizes = [250, 500, 1000, 2000, 10000]
    rhos = [0, 0.3, 0.6, 0.9]
    r = 2
    s = 2
    K = 50

    ci_mi_all_std = []
    na_mi_all_std = []
    ml_mi_all_std = []

    results = {
        rho: [{ssize: []
               for ssize in sample_sizes}, None]
        for rho in rhos
    }

    for rho in rhos:

        ci_mi_std = []
        na_mi_std = []
        ml_mi_std = []

        real_mi = -np.log(1 - rho**2) / 2

        results[rho][1] = real_mi

        for sample_size in sample_sizes:

            cov = np.array([[1., rho], [rho, 1.]])
            dist = MultivariateNormal(mean=np.zeros(2), cov=cov)

            ci_mi_l, ml_mi_l, na_mi_l = [], [], []

            delta = lambda x: chi2.ppf(0.97, x**2 - 1)

            for k in range(K):

                xy_sample = dist.sample(sample_size)

                plane = Plane(xy_sample)

                # Adaptive algorithm
                ci_mi_l.append(
                    kl_estimate(
                        plane,
                        AdaptiveAlgorithm(xy_sample, delta, r, s).run()))

                na_mi_l.append(
                    kl_estimate(
                        plane,
                        NonAdaptivePartition(xy_sample, bins=[50, 50]).run()))

                ml_mi_l.append(
                    -np.log(1 -
                            pearsonr(xy_sample[:, 0], xy_sample[:, 1])[0]**2) /
                    2)

            results[rho][0][sample_size] = [
                np.mean(ml_mi_l),
                np.mean(ci_mi_l),
                np.mean(na_mi_l)
            ]

            print(
                "---------------------------------------------------------------------------------------------"
            )
            print("rho: %.2f, Sample Size: %d, Real MI: %.4f" %
                  (rho, sample_size, real_mi))
            print(
                "Adaptive Partition MI: %.4f, NA Partition MI: %.4f, ML MI: %.4f"
                % (np.mean(ci_mi_l), np.mean(na_mi_l), np.mean(ml_mi_l)))

            ci_mi_std.append(np.std(ci_mi_l))
            na_mi_std.append(np.std(na_mi_l))
            ml_mi_std.append(np.std(ml_mi_l))

        ci_mi_all_std.append(ci_mi_std)
        na_mi_all_std.append(na_mi_std)
        ml_mi_all_std.append(ml_mi_std)

    generate_table(results)

    all_std = [ci_mi_all_std, na_mi_all_std, ml_mi_all_std]
    for i, _ in enumerate(["CI", "NA", "ML"]):

        plt.figure()
        plt.semilogx(sample_sizes,
                     all_std[i][0],
                     '-o',
                     label=r'$\rho$ =' + f'{0.0}')
        plt.semilogx(sample_sizes,
                     all_std[i][1],
                     '-o',
                     label=r'$\rho$ =' + f'{0.3}')
        plt.semilogx(sample_sizes,
                     all_std[i][2],
                     '-o',
                     label=r'$\rho$ =' + f'{0.6}')
        plt.semilogx(sample_sizes,
                     all_std[i][3],
                     '-o',
                     label=r'$\rho$ =' + f'{0.9}')
        plt.xlabel('$\log_{10}$ of sample size')

        if i == 0:
            plt.ylabel("std($\hat{I}_{CI}$)")
            plt.title("Standard deviation of MI estimator $I_{CI}$")
        elif i == 1:
            plt.ylabel("std($\hat{I}_{NA}$)")
            plt.title("Standard deviation of MI estimator $I_{NA}$")
        else:
            plt.ylabel("std($\hat{I}_{ML}$)")
            plt.title("Standard deviation of MI estimator $I_{ML}$")

        plt.legend()
        plt.show()
Ejemplo n.º 15
0
def main():
    # Load image
    im = Image.open(image_file).convert('RGB')
    width, height = im.size

    # Convenience function to build image band-by-band from array data
    def image_from_array(dat):
        bands = [Image.new('L', (width, height)) for n in range(3)]
        for i in range(3):
            bands[i].putdata(dat[:, i])
        return Image.merge('RGB', bands)

    # Resize image
    width, height = int(width / image_rescale), int(height / image_rescale)
    im = im.resize((width, height))

    # Summary image
    summary = Image.new('RGB', (width * 2 + 40, height * 2 + 60),
                        (255, 255, 255))
    draw = ImageDraw.Draw(summary)
    draw.text((5, height + 10), 'Original', fill=(0, 0, 0))
    draw.text((width + 25, height + 10),
              'Noise V = %.2f, C = %.2f' % (noise_var, noise_cov),
              fill=(0, 0, 0))
    draw.text((5, 2 * height + 40), 'Blocked Gamma', fill=(0, 0, 0))
    draw.text((width + 25, 2 * height + 40), 'Dists', fill=(0, 0, 0))
    del draw
    summary.paste(im, (10, 10))

    # Flatten to emissions
    real_emissions = list(im.getdata())
    num_data = len(real_emissions)
    real_emissions = np.array(real_emissions)

    # Block emissions
    width_blocks = np.array_split(np.arange(width), block_splits)
    height_blocks = np.array_split(np.arange(height), block_splits)
    idx = np.arange(num_data)
    idx.resize((height, width))
    blocks = []
    for hb in height_blocks:
        for wb in width_blocks:
            block = [idx[h, w] for h in hb for w in wb]
            blocks.append(np.array(block))

    # Generate noise
    v, c = noise_var, noise_cov
    cov = [[v, c, c], [c, v, c], [c, c, v]]
    noise = np.random.multivariate_normal([0, 0, 0], cov, width * height)
    noisy_emissions = real_emissions + noise

    # Generate noisy image
    noisy = image_from_array(noisy_emissions)
    summary.paste(noisy, (30 + width, 10))

    # Use K-means to initialize components
    results = kmeans(noisy_emissions, num_comps)
    init_gamma = results['best']
    means = results['means']

    # Analyze color space
    if do_colormap:
        col = {'R': 0, 'G': 1, 'B': 2}
        plt.figure()
        for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                         (real_emissions, 'R', 'B'),
                                         (real_emissions, 'G', 'B'),
                                         (noisy_emissions, 'R', 'G'),
                                         (noisy_emissions, 'R', 'B'),
                                         (noisy_emissions, 'G', 'B')]):
            plt.subplot(2, 3, i + 1)
            plt.hexbin(d[:, col[c1]],
                       d[:, col[c2]],
                       gridsize=30,
                       extent=(0, 255, 0, 255))
            plt.plot(means[:, col[c1]], means[:, col[c2]], '.k')
            plt.xlabel(c1)
            plt.ylabel(c2)
            plt.axis([-20, 275, -20, 275])
        plt.savefig('image_test_color_colormap.png')
        plt.show()

    # Do EM
    results = em(noisy_emissions,
                 [MultivariateNormal() for n in range(num_comps)],
                 count_restart=count_restart,
                 blocks=blocks,
                 max_reps=100,
                 init_gamma=init_gamma,
                 trace=True,
                 pi_max=pi_max)
    dists = results['dists']
    dists_trace = results['dists_trace']
    pi = results['pi']
    print 'Iterations: %(reps)d' % results

    gamma = np.transpose(results['gamma'])
    means = np.array([d.mean() for d in dists])
    covs = np.array([d.cov() for d in dists])

    # Reconstruct with blocked gamma
    rec_blocked_gamma = np.array(
        [np.average(means, weights=g, axis=0) for g in gamma])
    im_blocked_gamma = image_from_array(rec_blocked_gamma)
    summary.paste(im_blocked_gamma, (10, 40 + height))

    # Reconstruct from distributions alone
    pi_opt = pi_maximize(noisy_emissions, dists)
    phi = np.empty((num_data, num_comps))
    for c in range(num_comps):
        phi[:, c] = dists[c].density(noisy_emissions)
    phi = np.matrix(phi)
    for i, pi in enumerate(pi_opt):
        phi[:, i] *= pi
    gamma_dists = phi / np.sum(phi, axis=1)
    rec_dists = np.array(np.dot(gamma_dists, means))
    im_dists = image_from_array(rec_dists)
    summary.paste(im_dists, (30 + width, 40 + height))

    # Show summary image
    if show_summary:
        summary.show()
    summary.save('image_test_color_reconstruction.png')

    # Compare RMSE between reconstructions
    def rmse(x):
        return np.sqrt(np.mean((x - real_emissions)**2))

    print 'Raw MSE: %.1f' % rmse(noisy_emissions)
    print 'Blocked Gamma MSE: %.1f' % rmse(rec_blocked_gamma)
    print 'Dists MSE: %.1f' % rmse(rec_dists)

    # Visualize variance components
    if do_variance_viz:
        temp_files = []
        col = {'R': 0, 'G': 1, 'B': 2}
        fig = plt.figure()
        for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                         (real_emissions, 'R', 'B'),
                                         (real_emissions, 'G', 'B'),
                                         (noisy_emissions, 'R', 'G'),
                                         (noisy_emissions, 'R', 'B'),
                                         (noisy_emissions, 'G', 'B')]):
            ax = fig.add_subplot(2, 3, i + 1)
            plt.hexbin(d[:, col[c1]],
                       d[:, col[c2]],
                       gridsize=30,
                       extent=(0, 255, 0, 255))
            plt.xlabel(c1)
            plt.ylabel(c2)
            plt.axis([-20, 275, -20, 275])
        for idx, dists in enumerate(dists_trace):
            ells = []
            for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                             (real_emissions, 'R', 'B'),
                                             (real_emissions, 'G', 'B'),
                                             (noisy_emissions, 'R', 'G'),
                                             (noisy_emissions, 'R', 'B'),
                                             (noisy_emissions, 'G', 'B')]):
                for dist in dists:
                    m, c = dist.mean(), dist.cov()
                    cm = (c[[col[c1], col[c2]]])[:, [col[c1], col[c2]]]
                    e, v = la.eigh(cm)
                    ell = Ellipse(xy=[m[col[c1]], m[col[c2]]],
                                  width=np.sqrt(e[0]),
                                  height=np.sqrt(e[1]),
                                  angle=(180.0 / np.pi) * np.arccos(v[0, 0]))
                    ells.append(ell)
                    ax = fig.add_subplot(2, 3, i + 1)
                    ax.add_artist(ell)
                    ell.set_clip_box(ax.bbox)
                    ell.set_alpha(0.9)
                    ell.set_facecolor(np.fmax(np.fmin(m / 255, 1), 0))
            file_name = 'tmp_%03d.png' % idx
            temp_files.append(file_name)
            plt.savefig(file_name, dpi=100)
            for ell in ells:
                ell.remove()
        command = ('mencoder', 'mf://tmp_*.png', '-mf',
                   'type=png:w=800:h=600:fps=5', '-ovc', 'lavc', '-lavcopts',
                   'vcodec=mpeg4', '-oac', 'copy', '-o',
                   'image_test_color_components.avi')
        os.spawnvp(os.P_WAIT, 'mencoder', command)
        for temp_file in temp_files:
            os.unlink(temp_file)

    # Find common variance components
    print 'True noise:'
    print cov
    chols = [la.cholesky(c) for c in covs]
    chol_recon = np.zeros((3, 3))
    for i in range(3):
        for j in range(3):
            if j > i: continue
            chol_recon[i, j] = np.Inf
            for chol in chols:
                if abs(chol[i, j]) < abs(chol_recon[i, j]):
                    chol_recon[i, j] = chol[i, j]
    cov_recon = np.dot(chol_recon, np.transpose(chol_recon))
    print 'Reconstructed noise:'
    print cov_recon
 def prior_distribution(self):
     zeros = torch.zeros_like(self.variational_distribution.mean)
     ones = torch.ones_like(zeros)
     res = MultivariateNormal(zeros, DiagLazyTensor(ones))
     return res
Ejemplo n.º 17
0
    def forward(self,
                x,
                inducing_points,
                inducing_values,
                variational_inducing_covar=None):
        # If our points equal the inducing points, we're done
        if torch.equal(x, inducing_points):
            if variational_inducing_covar is None:
                raise RuntimeError
            else:
                return MultivariateNormal(inducing_values,
                                          variational_inducing_covar)

        # Otherwise, we have to marginalize
        num_induc = inducing_points.size(-2)
        full_inputs = torch.cat([inducing_points, x], dim=-2)
        full_output = self.model.forward(full_inputs)
        full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix

        # Mean terms
        test_mean = full_mean[..., num_induc:]
        induc_mean = full_mean[..., :num_induc]
        mean_diff = (inducing_values - induc_mean).unsqueeze(-1)

        # Covariance terms
        induc_induc_covar = full_covar[
            ..., :num_induc, :num_induc].add_jitter()
        induc_data_covar = full_covar[..., :num_induc, num_induc:].evaluate()
        data_data_covar = full_covar[..., num_induc:, num_induc:]

        # If we're less than a certain size, we'll compute the Cholesky decomposition of induc_induc_covar
        cholesky = False
        if settings.fast_computations.log_prob.off() or (
                num_induc <= settings.max_cholesky_size.value()):
            induc_induc_covar = CholLazyTensor(
                self._cholesky_factor(induc_induc_covar))
            cholesky = True

        # If we are making predictions and don't need variances, we can do things very quickly.
        if not self.training and settings.skip_posterior_variances.on():
            if not hasattr(self, "_mean_cache"):
                # For now: run variational inference without a preconditioner
                # The preconditioner screws things up for some reason
                with settings.max_preconditioner_size(0):
                    self._mean_cache = induc_induc_covar.inv_matmul(
                        mean_diff).detach()
            predictive_mean = torch.add(
                test_mean,
                induc_data_covar.transpose(-2, -1).matmul(
                    self._mean_cache).squeeze(-1))
            predictive_covar = ZeroLazyTensor(test_mean.size(-1),
                                              test_mean.size(-1))
            return MultivariateNormal(predictive_mean, predictive_covar)

        # Expand everything to the right size
        shapes = [
            mean_diff.shape[:-1], induc_data_covar.shape[:-1],
            induc_induc_covar.shape[:-1]
        ]
        if variational_inducing_covar is not None:
            root_variational_covar = variational_inducing_covar.root_decomposition(
            ).root.evaluate()
            shapes.append(root_variational_covar.shape[:-1])
        shape = _mul_broadcast_shape(*shapes)
        mean_diff = mean_diff.expand(*shape, mean_diff.size(-1))
        induc_data_covar = induc_data_covar.expand(*shape,
                                                   induc_data_covar.size(-1))
        induc_induc_covar = induc_induc_covar.expand(
            *shape, induc_induc_covar.size(-1))
        if variational_inducing_covar is not None:
            root_variational_covar = root_variational_covar.expand(
                *shape, root_variational_covar.size(-1))

        # Cache the CG results
        # For now: run variational inference without a preconditioner
        # The preconditioner screws things up for some reason
        with settings.max_preconditioner_size(0):
            # Cache the CG results
            if variational_inducing_covar is None:
                left_tensors = mean_diff
            else:
                left_tensors = torch.cat([mean_diff, root_variational_covar],
                                         -1)

            with torch.no_grad():
                eager_rhs = torch.cat([left_tensors, induc_data_covar], -1)
                solve, probe_vecs, probe_vec_norms, probe_vec_solves, tmats = CachedCGLazyTensor.precompute_terms(
                    induc_induc_covar,
                    eager_rhs.detach(),
                    logdet_terms=(not cholesky),
                    include_tmats=(not settings.skip_logdet_forward.on()
                                   and not cholesky),
                )
                eager_rhss = [
                    eager_rhs.detach(),
                    eager_rhs[..., left_tensors.size(-1):].detach(),
                    eager_rhs[..., :left_tensors.size(-1)].detach(),
                ]
                solves = [
                    solve.detach(),
                    solve[..., left_tensors.size(-1):].detach(),
                    solve[..., :left_tensors.size(-1)].detach(),
                ]
                if settings.skip_logdet_forward.on():
                    eager_rhss.append(torch.cat([probe_vecs, left_tensors],
                                                -1))
                    solves.append(
                        torch.cat([
                            probe_vec_solves,
                            solve[..., :left_tensors.size(-1)]
                        ], -1))
            induc_induc_covar = CachedCGLazyTensor(
                induc_induc_covar,
                eager_rhss=eager_rhss,
                solves=solves,
                probe_vectors=probe_vecs,
                probe_vector_norms=probe_vec_norms,
                probe_vector_solves=probe_vec_solves,
                probe_vector_tmats=tmats,
            )

        # Cache the kernel matrix with the cached CG calls
        if self.training:
            self._memoize_cache[
                "prior_distribution_memo"] = MultivariateNormal(
                    induc_mean, induc_induc_covar)

        # Compute predictive mean
        inv_products = induc_induc_covar.inv_matmul(
            induc_data_covar, left_tensors.transpose(-1, -2))
        predictive_mean = torch.add(test_mean, inv_products[..., 0, :])

        # Compute covariance
        if self.training:
            interp_data_data_var, _ = induc_induc_covar.inv_quad_logdet(
                induc_data_covar, logdet=False, reduce_inv_quad=False)
            data_covariance = DiagLazyTensor(
                (data_data_covar.diag() - interp_data_data_var).clamp(
                    0, math.inf))
        else:
            neg_induc_data_data_covar = torch.matmul(
                induc_data_covar.transpose(-1, -2).mul(-1),
                induc_induc_covar.inv_matmul(induc_data_covar))
            data_covariance = data_data_covar + neg_induc_data_data_covar
        predictive_covar = PsdSumLazyTensor(
            RootLazyTensor(inv_products[..., 1:, :].transpose(-1, -2)),
            data_covariance)

        # Done!
        return MultivariateNormal(predictive_mean, predictive_covar)