def inference(self, features, outputs, is_train):
        """Build graph for computing predictive mean and variance and negative log probability.

        Args:
            train_inputs: inputs
            train_outputs: targets
            is_train: whether we're training
        Returns:
            negative log marginal likelihood
        """
        inputs = features['input']
        if is_train:
            # During training, we have to store the training data to compute predictions later on
            train_inputs, train_outputs = self.store(inputs)
            train_inputs.assign(inputs)
            train_outputs.assign(outputs)

        chol, alpha = self._build_interim_vals(inputs, outputs)
        # precision = inv(kxx)
        precision = tfl.cholesky_solve(chol,
                                       tf.eye(tf.shape(input=inputs)[-2]))
        precision_diag = tfl.diag_part(precision)

        loo_fmu = outputs - alpha / precision_diag  # GMPL book eq. 5.12
        loo_fs2 = 1.0 / precision_diag  # GMPL book eq. 5.12

        # log probability (lp), also called log pseudo-likelihood)
        lp = self._build_loo(outputs, loo_fmu, loo_fs2)

        return {'loss': -lp, 'LP': lp}
Ejemplo n.º 2
0
def solve_det_conditional(x, sigma, A, Q):
    """
    Use matrix inversion lemma for the solve:

    .. math::
        (\\Sigma - AQ^{-1}A^T)^{-1} X =\\
        (\\Sigma^{-1} + \\Sigma^{-1} A (Q -
        A^T \\Sigma^{-1} A)^{-1} A^T \\Sigma^{-1}) X

    Use matrix determinant lemma for determinant:

    .. math::
        \\log|(\\Sigma - AQ^{-1}A^T)| =
        \\log|Q - A^T \\Sigma^{-1} A| - \\log|Q| + \\log|\\Sigma|

    Parameters
    ----------
    x: tf.Tensor
        Tensor to multiply the solve by
    sigma: brainiak.matnormal.CovBase
        Covariance object implementing solve and logdet
    A: tf.Tensor
        Factor multiplying the variable we conditioned on
    Q: brainiak.matnormal.CovBase
        Covariance object of conditioning variable,
        implementing solve and logdet

    """

    # (Q - A^T Sigma^{-1} A)
    lemma_factor = tlinalg.cholesky(
        Q._cov - tf.matmul(A, sigma.solve(A), transpose_a=True))

    logdet = (-Q.logdet + sigma.logdet + 2 * tf.reduce_sum(
        input_tensor=tf.math.log(tlinalg.diag_part(lemma_factor))))

    # A^T Sigma^{-1}
    Atrp_Sinv = tf.matmul(A, sigma._prec, transpose_a=True)
    # (Q - A^T Sigma^{-1} A)^{-1} A^T Sigma^{-1}
    prod_term = tlinalg.cholesky_solve(lemma_factor, Atrp_Sinv)

    solve = tf.matmul(
        sigma.solve(scaled_I(1.0, sigma.size) + tf.matmul(A, prod_term)), x)

    return solve, logdet
Ejemplo n.º 3
0
def mpgm_loss(target, prediction, l_A=1., l_E=1., l_F=1.):
    """
    Loss function using max-pooling graph matching as describes in the GraphVAE paper.
    Lets see if backprop works. Args obvly the same as above!
    """
    A, E, F = target
    A_hat, E_hat, F_hat = prediction
    n = A.shape[1]
    k = A_hat.shape[1]
    mpgm = MPGM()
    X = tf.cast(mpgm.call(A, A_hat, E, E_hat, F, F_hat), dtype=tf.float64)

    # now comes the loss part from the paper:
    A_t = tf.transpose(X, perm=[0, 2, 1]) @ A @ X  # shape (bs,k,n)
    E_hat_t = tf.transpose(batch_dot(batch_dot(X, E_hat, axes=(-1, 1)),
                                     X,
                                     axes=(-2, 1)),
                           perm=[0, 1, 3, 2])
    F_hat_t = tf.matmul(X, F_hat)
    # To avoid inf or nan errors we add the smallest possible value to all elements.
    A_hat_4log = add_e7(A_hat)

    term_1 = (1 / k) * tf.math.reduce_sum(
        diag_part(A_t) * tf.math.log(diag_part(A_hat_4log)), [1],
        keepdims=True)

    term_2 = tf.reduce_sum(
        (tf.ones_like(diag_part(A_t)) - diag_part(A_t)) *
        (tf.ones_like(diag_part(A_hat)) - tf.math.log(diag_part(A_hat_4log))),
        [1],
        keepdims=True)

    # TODO unsure if (1/(k*(1-k))) or ((1-k)/k) ??? Also the second sum in the paper is confusing. I am going to interpret it as matrix multiplication and sum over all elements.
    b = diag_part(A_t)
    term_31 = set_diag(A_t, tf.zeros_like(diag_part(A_t))) * set_diag(
        tf.math.log(A_hat_4log), tf.zeros_like(diag_part(A_hat)))
    term_31 = replace_nan(term_31)  # You know why!

    term_32 = tf.ones_like(A_t) - set_diag(A_t, tf.zeros_like(
        diag_part(A_t))) * tf.math.log(
            tf.ones_like(A_t) -
            set_diag(A_hat_4log, tf.zeros_like(diag_part(A_hat))))
    term_32 = replace_nan(term_32)
    term_3 = (1 / k * (1 - k)) * tf.expand_dims(
        tf.math.reduce_sum(term_31 + term_32, [1, 2]), -1)
    log_p_A = term_1 + term_2 + term_3

    # Man so many confusions: is the log over one or both Fs???
    F = tf.cast(F, dtype=tf.float64)
    A = tf.cast(A, dtype=tf.float64)
    E = tf.cast(E, dtype=tf.float64)
    log_p_F = (1 / n) * tf.math.log(
        tf.expand_dims(tf.math.reduce_sum(add_e7(F * F_hat_t), [1, 2]), -1))

    log_p_E = tf.math.log(
        tf.expand_dims((1 / (tf.norm(A, ord='fro', axis=[-2, -1]) - n)) *
                       tf.math.reduce_sum(add_e7(E * E_hat_t), [1, 2, 3]), -1))

    log_p = -l_A * log_p_A - l_F * log_p_F - l_E * log_p_E
    return log_p
Ejemplo n.º 4
0
def solve_det_marginal(x, sigma, A, Q):
    """
    Use matrix inversion lemma for the solve:

    .. math::
        (\\Sigma + AQA^T)^{-1} X =\\
        (\\Sigma^{-1} - \\Sigma^{-1} A (Q^{-1} +
        A^T \\Sigma^{-1} A)^{-1} A^T \\Sigma^{-1}) X

    Use matrix determinant lemma for determinant:

    .. math::
        \\log|(\\Sigma + AQA^T)| = \\log|Q^{-1} + A^T \\Sigma^{-1} A|
        + \\log|Q| + \\log|\\Sigma|

    Parameters
    ----------
    x: tf.Tensor
        Tensor to multiply the solve by
    sigma: brainiak.matnormal.CovBase
        Covariance object implementing solve and logdet
    A: tf.Tensor
        Factor multiplying the variable we marginalized out
    Q: brainiak.matnormal.CovBase
        Covariance object of marginalized variable,
        implementing solve and logdet
    """

    # For diagnostics, we want to check condition numbers
    # of things we invert. This includes Q and Sigma, as well
    # as the "lemma factor" for lack of a better definition
    logging.log(logging.DEBUG, "Printing diagnostics for solve_det_marginal")
    lemma_cond = _condition(Q._prec +
                            tf.matmul(A, sigma.solve(A), transpose_a=True))
    logging.log(
        logging.DEBUG,
        f"lemma_factor condition={lemma_cond}",
    )
    logging.log(logging.DEBUG, f"Q condition={_condition(Q._cov)}")
    logging.log(logging.DEBUG, f"sigma condition={_condition(sigma._cov)}")
    logging.log(
        logging.DEBUG,
        f"sigma max={tf.reduce_max(input_tensor=A)}," +
        f"sigma min={tf.reduce_min(input_tensor=A)}",
    )

    # cholesky of (Qinv + A^T Sigma^{-1} A), which looks sort of like
    # a schur complement but isn't, so we call it the "lemma factor"
    # since we use it in woodbury and matrix determinant lemmas
    lemma_factor = tlinalg.cholesky(
        Q._prec + tf.matmul(A, sigma.solve(A), transpose_a=True))

    logdet = (Q.logdet + sigma.logdet + 2 * tf.reduce_sum(
        input_tensor=tf.math.log(tlinalg.diag_part(lemma_factor))))

    logging.log(logging.DEBUG, f"Log-determinant of Q={Q.logdet}")
    logging.log(logging.DEBUG, f"sigma logdet={sigma.logdet}")
    lemma_logdet = 2 * \
        tf.reduce_sum(input_tensor=tf.math.log(
            tlinalg.diag_part(lemma_factor)))
    logging.log(
        logging.DEBUG,
        f"lemma factor logdet={lemma_logdet}",
    )

    # A^T Sigma^{-1}
    Atrp_Sinv = tf.matmul(A, sigma._prec, transpose_a=True)
    # (Qinv + A^T Sigma^{-1} A)^{-1} A^T Sigma^{-1}
    prod_term = tlinalg.cholesky_solve(lemma_factor, Atrp_Sinv)

    solve = tf.matmul(
        sigma.solve(scaled_I(1.0, sigma.size) - tf.matmul(A, prod_term)), x)

    return solve, logdet
Ejemplo n.º 5
0
def log_cholesky_det(chol):
    return 2 * tf.reduce_sum(input_tensor=tfm.log(tfl.diag_part(chol)),
                             axis=-1)