Ejemplo n.º 1
0
    def build_prior_KL(self):

        KL = tf.Variable(0, name='KL', trainable=False, dtype=float_type)

        for d in range(self.D):
            K = self.kerns[d].K(self.X)
            K_alpha = tf.matmul(K, self.q_alpha[d, :, :])
            f_mean = K_alpha + self.mean_functions[d](self.X)

            # compute the variance for each of the outputs
            I = tf.tile(tf.expand_dims(eye(self.num_data), 0),
                        [self.num_latent.value, 1, 1])
            A = I + tf.expand_dims(tf.transpose(self.q_lambda[d,:,:]), 1) * \
                tf.expand_dims(tf.transpose(self.q_lambda[d,:,:]), 2) * K
            L = tf.cholesky(A)
            Li = tf.matrix_triangular_solve(L, I)
            tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda[d, :, :]), 1)
            f_var = 1. / tf.square(self.q_lambda[d, :, :]) - tf.transpose(
                tf.reduce_sum(tf.square(tmp), 1))

            # some statistics about A are used in the KL
            A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
            trAi = tf.reduce_sum(tf.square(Li))

            KL += 0.5 * (A_logdet + trAi -
                         self.num_data.value * self.num_latent.value +
                         tf.reduce_sum(K_alpha * self.q_alpha[d, :, :]))
        return KL
Ejemplo n.º 2
0
def gauss_kl_diag(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a matrix, each column represents the diagonal of a square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    num_latent = tf.cast(tf.shape(q_sqrt)[1], float_type)
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(
        tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.size(q_sqrt), float_type)  # constant term
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(q_sqrt)))  # Log-det of q-cov
    L_inv = tf.matrix_triangular_solve(L, eye(tf.shape(L)[0]), lower=True)
    K_inv = tf.matrix_triangular_solve(tf.transpose(L), L_inv, lower=False)
    KL += 0.5 * tf.reduce_sum(
        tf.expand_dims(tf.diag_part(K_inv), 1) *
        tf.square(q_sqrt))  # Trace term.
    return KL
Ejemplo n.º 3
0
    def build_prior_KL(self):

        KL = tf.Variable(0, name='KL', trainable=False, dtype=float_type)
        for i in range(self.D):
            if self.whiten:
                if self.q_diag:
                    KL += gauss_kl_white_diag(self.q_mu[i], self.q_sqrt[i])
                else:
                    KL += gauss_kl_white(self.q_mu[i], self.q_sqrt[i])
            else:
                K = self.kerns[i].K(self.Zs[self.f_indices[i]]) + eye(
                    self.num_inducing[i]) * jitter_level
                if self.q_diag:
                    KL += gauss_kl_diag(self.q_mu[i], self.q_sqrt[i], K)
                else:
                    KL += gauss_kl(self.q_mu[i], self.q_sqrt[i], K)
        return KL
Ejemplo n.º 4
0
 def build_prior_KL(self):
     S = np.square(self.s) * eye(self.D)  # diagonal prior v*I
     KL = gauss_kl_diag(self.q_A_mu, self.q_A_sqrt, S)
     return KL
Ejemplo n.º 5
0
def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.

    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` represents the mean of the distribution and
    q_sqrt the square-root of the covariance.

    Additionally, the GP may have been centered (whitened) so that
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    In this case 'f' represents the values taken by v.

    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov).

    We assume K independent GPs, represented by the columns of f (and the
    last dimension of q_sqrt).

     - Xnew is a data matrix, size N x D
     - X are data points, size M x D
     - kern is a GPflow kernel
     - f is a data matrix, M x K, representing the function values at X, for K functions.
     - q_sqrt (optional) is a matrix of standard-deviations or Cholesky
       matrices, size M x K or M x M x K
     - whiten (optional) is a boolean: whether to whiten the representation
       as described above.

    These functions are now considered deprecated, subsumed into this one:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened

    """

    # compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * jitter_level
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = kern.K(Xnew) - tf.matmul(A, A, transpose_a=True)
        shape = tf.stack([tf.shape(f)[1], 1, 1])
    else:
        fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
        shape = tf.stack([tf.shape(f)[1], 1])
    fvar = tf.tile(tf.expand_dims(fvar, 0), shape)  # D x N x N or D x N

    # another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    # construct the conditional mean
    fmean = tf.matmul(tf.transpose(A), f)

    if q_sqrt is not None:
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # D x M x N
        elif q_sqrt.get_shape().ndims == 3:
            L = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # D x M x M
            A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([tf.shape(f)[1], 1, 1]))
            LTA = tf.matmul(L, A_tiled, transpose_a=True)  # D x M x N
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True)  # D x N x N
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # D x N
    fvar = tf.transpose(fvar)  # N x D or N x N x D

    return fmean, fvar