Example #1
0
    def build_predict(self, Xnew, full_cov=False):
        """
        The posterior variance of F is given by

            q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1)

        Here we project this to F*, the values of the GP at Xnew which is given
        by

           q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} +
                                           diag(lambda**-2)]^-1 K_{f*} )

        """

        # compute kernel things
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X)

        # predictive mean
        f_mean = tf.matmul(tf.transpose(Kx), self.q_alpha) + self.mean_function(Xnew)

        # predictive var
        A = K + tf.batch_matrix_diag(tf.transpose(1./tf.square(self.q_lambda)))
        L = tf.batch_cholesky(A)
        Kx_tiled = tf.tile(tf.expand_dims(Kx, 0), [self.num_latent, 1, 1])
        LiKx = tf.batch_matrix_triangular_solve(L, Kx_tiled)
        if full_cov:
            f_var = self.kern.K(Xnew) - tf.batch_matmul(LiKx, LiKx, adj_x=True)
        else:
            f_var = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(LiKx), 1)
        return f_mean, tf.transpose(f_var)
Example #2
0
def gauss_kl(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    num_latent = tf.cast(tf.shape(q_sqrt)[2], tf.float64)
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(
        tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]),
                         tf.float64)  # constant term
    Lq = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1,
                                   0)  # force lower triangle
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(
        tf.batch_matrix_diag_part(Lq))))  # logdet
    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1]))
    LiLq = tf.batch_matrix_triangular_solve(L_tiled, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
Example #3
0
    def _verifySolve(self, x, y, lower=True, adjoint=False, batch_dims=None):
        for np_type in [np.float32, np.float64]:
            a = x.astype(np_type)
            b = y.astype(np_type)
            # For numpy.solve we have to explicitly zero out the strictly
            # upper or lower triangle.
            if lower and a.size > 0:
                a_np = np.tril(a)
            elif a.size > 0:
                a_np = np.triu(a)
            else:
                a_np = a
            if adjoint:
                a_np = np.conj(np.transpose(a_np))

            if batch_dims is not None:
                a = np.tile(a, batch_dims + [1, 1])
                a_np = np.tile(a_np, batch_dims + [1, 1])
                b = np.tile(b, batch_dims + [1, 1])
            with self.test_session():
                if a.ndim == 2:
                    tf_ans = tf.matrix_triangular_solve(
                        a, b, lower=lower, adjoint=adjoint).eval()
                else:
                    tf_ans = tf.batch_matrix_triangular_solve(
                        a, b, lower=lower, adjoint=adjoint).eval()
            np_ans = np.linalg.solve(a_np, b)
            self.assertEqual(np_ans.shape, tf_ans.shape)
            self.assertAllClose(np_ans, tf_ans)
  def _verifySolve(self, x, y, lower=True, adjoint=False, batch_dims=None):
    for np_type in [np.float32, np.float64]:
      a = x.astype(np_type)
      b = y.astype(np_type)
      # For numpy.solve we have to explicitly zero out the strictly
      # upper or lower triangle.
      if lower and a.size > 0:
        a_np = np.tril(a)
      elif a.size > 0:
        a_np = np.triu(a)
      else:
        a_np = a
      if adjoint:
        a_np = np.conj(np.transpose(a_np))

      if batch_dims is not None:
        a = np.tile(a, batch_dims + [1, 1])
        a_np = np.tile(a_np, batch_dims + [1, 1])
        b = np.tile(b, batch_dims + [1, 1])
      with self.test_session():
        if a.ndim == 2:
          tf_ans = tf.matrix_triangular_solve(a,
                                              b,
                                              lower=lower,
                                              adjoint=adjoint).eval()
        else:
          tf_ans = tf.batch_matrix_triangular_solve(a,
                                                    b,
                                                    lower=lower,
                                                    adjoint=adjoint).eval()
      np_ans = np.linalg.solve(a_np, b)
      self.assertEqual(np_ans.shape, tf_ans.shape)
      self.assertAllClose(np_ans, tf_ans)
 def _verifySolve(self, x, y, lower=True):
   for np_type in [np.float32, np.float64]:
     a = x.astype(np_type)
     b = y.astype(np_type)
     with self.test_session():
       if a.ndim == 2:
         tf_ans = tf.matrix_triangular_solve(a, b, lower=lower)
       else:
         tf_ans = tf.batch_matrix_triangular_solve(a, b, lower=lower)
       out = tf_ans.eval()
     if lower:
       np_ans = np.linalg.solve(np.tril(a), b)
     else:
       np_ans = np.linalg.solve(np.triu(a), b)
     self.assertEqual(np_ans.shape, out.shape)
     self.assertAllClose(np_ans, out)
Example #6
0
  def _define_full_covariance_probs(self, shard_id, shard):
    """Defines the full covariance probabilties per example in a class.

    Updates a matrix with dimension num_examples X num_classes.

    Args:
      shard_id: id of the current shard.
      shard: current data shard, 1 X num_examples X dimensions.
    """
    diff = shard - self._means
    cholesky = tf.batch_cholesky(self._covs + self._min_var)
    log_det_covs = 2.0 * tf.reduce_sum(tf.log(
        tf.batch_matrix_diag_part(cholesky)), 1)
    x_mu_cov = tf.square(tf.batch_matrix_triangular_solve(
        cholesky, tf.transpose(diff, perm=[0, 2, 1]),
        lower=True))
    diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1))
    self._probs[shard_id] = -0.5 * (
        diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) +
        log_det_covs)
Example #7
0
    def _define_full_covariance_probs(self, shard_id, shard):
        """Defines the full covariance probabilties per example in a class.

    Updates a matrix with dimension num_examples X num_classes.

    Args:
      shard_id: id of the current shard.
      shard: current data shard, 1 X num_examples X dimensions.
    """
        diff = shard - self._means
        cholesky = tf.batch_cholesky(self._covs + self._min_var)
        log_det_covs = 2.0 * tf.reduce_sum(
            tf.log(tf.batch_matrix_diag_part(cholesky)), 1)
        x_mu_cov = tf.square(
            tf.batch_matrix_triangular_solve(cholesky,
                                             tf.transpose(diff, perm=[0, 2,
                                                                      1]),
                                             lower=True))
        diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1))
        self._probs[shard_id] = -0.5 * (
            diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) +
            log_det_covs)
Example #8
0
    def build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R

        This method computes the variational lower bound on the likelihood,
        which is:

            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]

        with

            q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) .

        """
        K = self.kern.K(self.X)
        K_alpha = tf.matmul(K, self.q_alpha)
        f_mean = K_alpha + self.mean_function(self.X)

        # compute the variance for each of the outputs
        I = tf.tile(tf.expand_dims(eye(self.num_data), 0), [self.num_latent, 1, 1])
        A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \
            tf.expand_dims(tf.transpose(self.q_lambda), 2) * K
        L = tf.batch_cholesky(A)
        Li = tf.batch_matrix_triangular_solve(L, I)
        tmp = Li / tf.transpose(self.q_lambda)
        f_var = 1./tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1))

        # some statistics about A are used in the KL
        A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.batch_matrix_diag_part(L)))
        trAi = tf.reduce_sum(tf.square(Li))

        KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent
                    + tf.reduce_sum(K_alpha*self.q_alpha))

        v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y)
        return tf.reduce_sum(v_exp) - KL
Example #9
0
 def _batch_sqrt_solve(self, rhs):
     return tf.batch_matrix_triangular_solve(self._chol, rhs, lower=True)
Example #10
0
def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.
    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` represents the mean of the distribution and
    q_sqrt the square-root of the covariance.
    Additionally, the GP may have been centered (whitened) so that
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    In this case 'f' represents the values taken by v.
    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov).
    We assume K independent GPs, represented by the columns of f (and the
    last dimension of q_sqrt).
     - Xnew is a data matrix, size n x D
     - X are data points, size m x D
     - kern is a GPinv kernel
     - f is a data matrix, m x R, representing the function values at X, for R functions.
     - q_sqrt (optional) is a matrix of standard-deviations or Cholesky
       matrices, size m x R or m x m x R
     - whiten (optional) is a boolean: whether to whiten the representation
       as described above.
    These functions are now considered deprecated, subsumed into this one:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened
    """
    # compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = tf.transpose(kern.K(X, Xnew), [2, 0, 1])  # [R,n,n2]
    Lm = tf.transpose(kern.Cholesky(X), [2, 0, 1])  # [R,n,n]

    # Compute the projection matrix A
    A = tf.batch_matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:  # shape [R,n,n]
        fvar = tf.transpose(kern.K(Xnew), [2, 0, 1]) - tf.matmul(
            A, A, transpose_a=True)
    else:  # shape [R,n]
        fvar = tf.transpose(kern.Kdiag(Xnew)) - tf.reduce_sum(tf.square(A), 1)

    # another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.batch_matrix_triangular_solve(tf.transpose(Lm, [0, 2, 1]),
                                             A,
                                             lower=False)

    # change shape of f [m,R] -> [R,m,1]
    f = tf.expand_dims(tf.transpose(f), -1)
    # construct the conditional mean, sized [m,R]
    fmean = tf.transpose(
        tf.squeeze(tf.batch_matmul(tf.transpose(A, [0, 2, 1]), f), [-1]))

    if q_sqrt is not None:
        # diagonal case.
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # R x m x n
        # full cov case
        elif q_sqrt.get_shape().ndims == 3:
            L = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1,
                                          0)  # D x M x M
            LTA = tf.batch_matmul(L, A, adj_x=True)  # R x m x n
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.batch_matmul(LTA, LTA, adj_x=True)  # R x n x n
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # R x n
    fvar = tf.transpose(fvar)  # n x R or n x n x R

    return fmean, fvar
Example #11
0
 def _batch_sqrt_solve(self, rhs):
   return tf.batch_matrix_triangular_solve(self._chol, rhs, lower=True)