Ejemplo n.º 1
0
def jitter_cholesky(A):
    try:
        jitter1 = linalg.diag(1e-7 * tf.ones(A.shape[-1], dtype='float64'))
        return linalg.cholesky(A + jitter1)
    except:
        jitter2 = linalg.diag(1e-5 * tf.ones(A.shape[-1], dtype='float64'))
        return linalg.cholesky(A + jitter2)
    def _build_interim_vals(self, train_inputs, train_outputs):
        _, var = self.lik(0, variances=0)
        # kxx (num_train, num_train)
        kxx = self.cov[0](train_inputs) + var * tf.eye(
            tf.shape(input=train_inputs)[-2])

        jitter = JITTER * tf.eye(tf.shape(input=train_inputs)[-2])
        # chol (same size as kxx), add jitter has to be added
        chol = tfl.cholesky(kxx + jitter)
        # alpha = chol.T \ (chol \ train_outputs)
        alpha = tfl.cholesky_solve(chol, train_outputs)
        return chol, alpha
Ejemplo n.º 3
0
    def _transform_variables(self, inputs=1):
        """Transorm variables that were stored in a more compact form.

        Doing it like this allows us to put certain constraints on the variables.
        """
        # Use softmax(raw_weights) to keep all weights normalized.
        weights, chol_covars, means, inducing_inputs = self.store(inputs)

        # Build the matrices of covariances between inducing inputs.
        kernel_mat = tf.stack([
            self.cov[i](inducing_inputs[i, :, :])
            for i in range(self.num_latents)
        ], 0)
        jitter = JITTER * tf.eye(tf.shape(input=inducing_inputs)[-2])

        kernel_chol = tfl.cholesky(kernel_mat + jitter)
        return weights, chol_covars, kernel_chol, means, inducing_inputs
Ejemplo n.º 4
0
def solve_det_conditional(x, sigma, A, Q):
    """
    Use matrix inversion lemma for the solve:

    .. math::
        (\\Sigma - AQ^{-1}A^T)^{-1} X =\\
        (\\Sigma^{-1} + \\Sigma^{-1} A (Q -
        A^T \\Sigma^{-1} A)^{-1} A^T \\Sigma^{-1}) X

    Use matrix determinant lemma for determinant:

    .. math::
        \\log|(\\Sigma - AQ^{-1}A^T)| =
        \\log|Q - A^T \\Sigma^{-1} A| - \\log|Q| + \\log|\\Sigma|

    Parameters
    ----------
    x: tf.Tensor
        Tensor to multiply the solve by
    sigma: brainiak.matnormal.CovBase
        Covariance object implementing solve and logdet
    A: tf.Tensor
        Factor multiplying the variable we conditioned on
    Q: brainiak.matnormal.CovBase
        Covariance object of conditioning variable,
        implementing solve and logdet

    """

    # (Q - A^T Sigma^{-1} A)
    lemma_factor = tlinalg.cholesky(
        Q._cov - tf.matmul(A, sigma.solve(A), transpose_a=True))

    logdet = (-Q.logdet + sigma.logdet + 2 * tf.reduce_sum(
        input_tensor=tf.math.log(tlinalg.diag_part(lemma_factor))))

    # A^T Sigma^{-1}
    Atrp_Sinv = tf.matmul(A, sigma._prec, transpose_a=True)
    # (Q - A^T Sigma^{-1} A)^{-1} A^T Sigma^{-1}
    prod_term = tlinalg.cholesky_solve(lemma_factor, Atrp_Sinv)

    solve = tf.matmul(
        sigma.solve(scaled_I(1.0, sigma.size) + tf.matmul(A, prod_term)), x)

    return solve, logdet
Ejemplo n.º 5
0
    def _build_entropy(self, weights, means, chol_covars):
        """Construct entropy.

        Args:
            weights: shape: (num_components)
            means: shape: (num_components, num_latents, num_inducing)
            chol_covars: shape: (num_components, num_latents, num_inducing[, num_inducing])
        Returns:
            Entropy (scalar)
        """

        # This part is to compute the product of the pdf of normal distributions
        """
        chol_component_covar = []
        component_mean = []
        component_covar =[]
        covar_shape = tf.shape(chol_covars)[-2:]
        mean_shape = tf.shape(means)[-1:]

        # \Sigma_new = (\sum_{i=1}^{num_latents}( \Sigma_i^-1) )^{-1}
        # \Mu_new = \Sigma_new * (\sum_{i=1}^{num_latents} \Sigma_i^{-1} * \mu_i)
        for i in range(self.num_components):
            temp_cov = tf.zeros(covar_shape)
            temp_mean = tf.zeros(mean_shape)[..., tf.newaxis]

            for k in range(self.num_latents):
                # Compute the sum of (\Sigma_i)^{-1}
                temp_cov += tf.cholesky_solve(chol_covars[i, k, :, :], tf.eye(covar_shape[0]))
                # Compute the sum of (\Sigma_i)^{-1} * \mu_i
                temp_mean += tf.cholesky_solve(chol_covars[i, k, :, :],
                                               means[i, k, :, tf.newaxis])

            # Compute \Sigma_new = temp_cov^{-1}
            temp_chol_covar = tf.cholesky(temp_cov)
            temp_component_covar = tf.cholesky_solve(temp_chol_covar, tf.eye(covar_shape[0]))
            component_covar.append(temp_component_covar)
            # Compute \Mu_new = \Sigma_new * (\sum_{i=1}^{num_latents} \Sigma_i^{-1} * \mu_i)
            temp_component_mean = temp_component_covar @ temp_mean
            component_mean.append(temp_component_mean)

            # Some functions need cholesky of \Sigma_new
            chol_component_covar.append(tf.cholesky(temp_component_covar))

        chol_component_covar = tf.stack(chol_component_covar, 0)
        component_covar = tf.stack(component_covar, 0)
        component_mean = tf.squeeze(tf.stack(component_mean, 0), -1)
        """
        # First build a square matrix of normals.
        if self.args['diag_post']:
            # construct normal distributions for all combinations of components
            variational_dist = tfd.MultivariateNormalDiag(
                means,
                tf.sqrt(chol_covars[tf.newaxis, ...] +
                        chol_covars[:, tf.newaxis, ...]))
        else:
            if self.args['num_components'] == 1:
                # Use the fact that chol(S + S) = sqrt(2) * chol(S)
                chol_covars_sum = tf.sqrt(2.) * chol_covars[tf.newaxis, ...]
            else:
                # Here we use the original component_covar directly
                # TODO: Can we just stay in cholesky space somehow?
                component_covar = util.mat_square(chol_covars)
                chol_covars_sum = tfl.cholesky(
                    component_covar[tf.newaxis, ...] +
                    component_covar[:, tf.newaxis, ...])
            # The class MultivariateNormalTriL only accepts cholesky decompositions of covariances
            variational_dist = tfd.MultivariateNormalTriL(
                means[tf.newaxis, ...], chol_covars_sum)

        # compute log probability of all means in all normal distributions
        # then sum over all latent functions
        # shape of log_normal_probs: (num_components, num_components)
        log_normal_probs = tf.reduce_sum(
            input_tensor=variational_dist.log_prob(means[:, tf.newaxis, ...]),
            axis=-1)

        # Now compute the entropy.
        # broadcast `weights` into dimension 1, then do `logsumexp` in that dimension
        weighted_logsumexp_probs = tf.reduce_logsumexp(
            input_tensor=tfm.log(weights) + log_normal_probs, axis=1)
        # multiply with weights again and then sum over it all
        return -util.mul_sum(weights, weighted_logsumexp_probs)
Ejemplo n.º 6
0
def solve_det_marginal(x, sigma, A, Q):
    """
    Use matrix inversion lemma for the solve:

    .. math::
        (\\Sigma + AQA^T)^{-1} X =\\
        (\\Sigma^{-1} - \\Sigma^{-1} A (Q^{-1} +
        A^T \\Sigma^{-1} A)^{-1} A^T \\Sigma^{-1}) X

    Use matrix determinant lemma for determinant:

    .. math::
        \\log|(\\Sigma + AQA^T)| = \\log|Q^{-1} + A^T \\Sigma^{-1} A|
        + \\log|Q| + \\log|\\Sigma|

    Parameters
    ----------
    x: tf.Tensor
        Tensor to multiply the solve by
    sigma: brainiak.matnormal.CovBase
        Covariance object implementing solve and logdet
    A: tf.Tensor
        Factor multiplying the variable we marginalized out
    Q: brainiak.matnormal.CovBase
        Covariance object of marginalized variable,
        implementing solve and logdet
    """

    # For diagnostics, we want to check condition numbers
    # of things we invert. This includes Q and Sigma, as well
    # as the "lemma factor" for lack of a better definition
    logging.log(logging.DEBUG, "Printing diagnostics for solve_det_marginal")
    lemma_cond = _condition(Q._prec +
                            tf.matmul(A, sigma.solve(A), transpose_a=True))
    logging.log(
        logging.DEBUG,
        f"lemma_factor condition={lemma_cond}",
    )
    logging.log(logging.DEBUG, f"Q condition={_condition(Q._cov)}")
    logging.log(logging.DEBUG, f"sigma condition={_condition(sigma._cov)}")
    logging.log(
        logging.DEBUG,
        f"sigma max={tf.reduce_max(input_tensor=A)}," +
        f"sigma min={tf.reduce_min(input_tensor=A)}",
    )

    # cholesky of (Qinv + A^T Sigma^{-1} A), which looks sort of like
    # a schur complement but isn't, so we call it the "lemma factor"
    # since we use it in woodbury and matrix determinant lemmas
    lemma_factor = tlinalg.cholesky(
        Q._prec + tf.matmul(A, sigma.solve(A), transpose_a=True))

    logdet = (Q.logdet + sigma.logdet + 2 * tf.reduce_sum(
        input_tensor=tf.math.log(tlinalg.diag_part(lemma_factor))))

    logging.log(logging.DEBUG, f"Log-determinant of Q={Q.logdet}")
    logging.log(logging.DEBUG, f"sigma logdet={sigma.logdet}")
    lemma_logdet = 2 * \
        tf.reduce_sum(input_tensor=tf.math.log(
            tlinalg.diag_part(lemma_factor)))
    logging.log(
        logging.DEBUG,
        f"lemma factor logdet={lemma_logdet}",
    )

    # A^T Sigma^{-1}
    Atrp_Sinv = tf.matmul(A, sigma._prec, transpose_a=True)
    # (Qinv + A^T Sigma^{-1} A)^{-1} A^T Sigma^{-1}
    prod_term = tlinalg.cholesky_solve(lemma_factor, Atrp_Sinv)

    solve = tf.matmul(
        sigma.solve(scaled_I(1.0, sigma.size) - tf.matmul(A, prod_term)), x)

    return solve, logdet