Ejemplo n.º 1
0
def gauss_kl(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    num_latent = tf.cast(tf.shape(q_sqrt)[2], tf.float64)
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(
        tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]),
                         tf.float64)  # constant term
    Lq = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1,
                                   0)  # force lower triangle
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(
        tf.batch_matrix_diag_part(Lq))))  # logdet
    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1]))
    LiLq = tf.batch_matrix_triangular_solve(L_tiled, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
Ejemplo n.º 2
0
def gauss_kl(q_mu, q_sqrt, K, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    for d in range(num_latent):
        Lq = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
        # Log determinant of q covariance:
        KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
        LiLq = tf.matrix_triangular_solve(L, Lq, lower=True)
        KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
Ejemplo n.º 3
0
def gauss_kl_white(q_mu, q_sqrt):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, I)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance.
    """
    KL = 0.5 * tf.reduce_sum(tf.square(q_mu))  # Mahalanobis term
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]),
                         tf.float64)  # constant term
    L = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1,
                                  0)  # force lower triangle
    KL -= 0.5 * tf.reduce_sum(tf.log(tf.square(
        tf.batch_matrix_diag_part(L))))  # logdet
    KL += 0.5 * tf.reduce_sum(tf.square(L))  # Trace term.
    return KL
 def _random_cholesky_array(self, shape):
   mat = self._rng.rand(*shape)
   chol = distributions.batch_matrix_diag_transform(mat,
                                                    transform=tf.nn.softplus)
   # Zero the upper triangle because we're using this as a true Cholesky factor
   # in our tests.
   return tf.batch_matrix_band_part(chol, -1, 0).eval()
Ejemplo n.º 5
0
def gauss_kl_white(q_mu, q_sqrt, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, I)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).
    """
    KL = 0.5 * tf.reduce_sum(tf.square(q_mu))  # Mahalanobis term
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    for d in range(num_latent):
        Lq = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
        # Log determinant of q covariance:
        KL -= 0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
        KL += 0.5 * tf.reduce_sum(tf.square(Lq))  # Trace term.
    return KL
Ejemplo n.º 6
0
def gauss_kl(q_mu, q_sqrt, K, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(
        tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    for d in range(num_latent):
        Lq = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
        # Log determinant of q covariance:
        KL += -0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
        LiLq = tf.matrix_triangular_solve(L, Lq, lower=True)
        KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
Ejemplo n.º 7
0
def gauss_kl_white(q_mu, q_sqrt, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, I)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).
    """
    KL = 0.5 * tf.reduce_sum(tf.square(q_mu))  # Mahalanobis term
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    for d in range(num_latent):
        Lq = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
        # Log determinant of q covariance:
        KL -= 0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
        KL += 0.5 * tf.reduce_sum(tf.square(Lq))  # Trace term.
    return KL
Ejemplo n.º 8
0
 def _random_chol(self, *shape):
   mat = self._rng.rand(*shape)
   chol = distributions.batch_matrix_diag_transform(
       mat, transform=tf.nn.softplus)
   chol = tf.batch_matrix_band_part(chol, -1, 0)
   sigma = tf.batch_matmul(chol, chol, adj_y=True)
   return chol.eval(), sigma.eval()
Ejemplo n.º 9
0
 def CheckUnitary(self, x):
     # Tests that x[...,:,:]^H * x[...,:,:] is close to the identity.
     xx = tf.batch_matmul(x, x, adj_x=True)
     identity = tf.batch_matrix_band_part(tf.ones_like(xx), 0, 0)
     if dtype_ == np.float32:
         tol = 1e-5
     else:
         tol = 1e-14
     self.assertAllClose(identity.eval(), xx.eval(), atol=tol)
Ejemplo n.º 10
0
 def CheckUnitary(self, x):
   # Tests that x[...,:,:]^H * x[...,:,:] is close to the identity.
   xx = tf.batch_matmul(x, x, adj_x=True)
   identity = tf.batch_matrix_band_part(tf.ones_like(xx), 0, 0)
   if is_single:
     tol = 1e-5
   else:
     tol = 1e-14
   self.assertAllClose(identity.eval(), xx.eval(), atol=tol)
Ejemplo n.º 11
0
 def CheckUnitary(self, x):
     # Tests that x[...,:,:]^H * x[...,:,:] is close to the identity.
     xx = tf.batch_matmul(x, x, adj_x=True)
     identity = tf.batch_matrix_band_part(tf.ones_like(xx), 0, 0)
     if dtype_ in (np.float32, np.complex64):
         tol = 1e-5
     else:
         tol = 1e-14
     self.assertAllClose(np.real(identity.eval()), np.real(xx.eval()), atol=tol)
     self.assertAllClose(np.imag(identity.eval()), np.imag(xx.eval()), atol=tol)
 def Test(self):
   shape = batch_shape_ + shape_
   x = tf.constant(np.random.rand(*shape), dtype=dtype_)
   with self.test_session(use_gpu=use_gpu_):
     for lower in -1, 0, 1, shape_[-2] - 1:
       for upper in -1, 0, 1, shape_[-1] - 1:
         y = tf.batch_matrix_band_part(x, lower, upper)
         error = tf.test.compute_gradient_error(x, x.get_shape().as_list(), y,
                                                y.get_shape().as_list())
         self.assertLess(error, 1e-4)
Ejemplo n.º 13
0
 def CheckUnitary(self, x):
   # Tests that x[...,:,:]^H * x[...,:,:] is close to the identity.
   xx = tf.batch_matmul(x, x, adj_x=True)
   identity = tf.batch_matrix_band_part(tf.ones_like(xx), 0, 0)
   # Any decent SVD code should produce singular vectors that are
   # orthonormal to (almost) full machine precision.
   if dtype_ == np.float32:
     atol = 5e-6
   else:
     atol = 1e-14
   self.assertAllClose(identity.eval(), xx.eval(), atol=atol)
 def Test(self):
     shape = batch_shape_ + shape_
     x = tf.constant(np.random.rand(*shape), dtype=dtype_)
     with self.test_session(use_gpu=use_gpu_):
         for lower in -1, 0, 1, shape_[-2] - 1:
             for upper in -1, 0, 1, shape_[-1] - 1:
                 y = tf.batch_matrix_band_part(x, lower, upper)
                 error = tf.test.compute_gradient_error(
                     x,
                     x.get_shape().as_list(), y,
                     y.get_shape().as_list())
                 self.assertLess(error, 1e-4)
Ejemplo n.º 15
0
 def CheckUnitary(self, x):
     # Tests that x[...,:,:]^H * x[...,:,:] is close to the identity.
     xx = tf.batch_matmul(x, x, adj_x=True)
     identity = tf.batch_matrix_band_part(tf.ones_like(xx), 0, 0)
     if is_single:
         tol = 1e-5
     else:
         tol = 1e-14
     self.assertAllClose(np.real(identity.eval()),
                         np.real(xx.eval()),
                         atol=tol)
     self.assertAllClose(np.imag(identity.eval()),
                         np.imag(xx.eval()),
                         atol=tol)
 def Test(self):
     mat = np.ones(shape_).astype(dtype_)
     batch_mat = np.tile(mat, batch_shape + (1, 1))
     with self.test_session(use_gpu=use_gpu_):
         for lower in -1, 0, 1, shape_[-2] - 1:
             for upper in -1, 0, 1, shape_[-1] - 1:
                 band_np = mat
                 if lower >= 0:
                     band_np = np.triu(band_np, -lower)
                 if upper >= 0:
                     band_np = np.tril(band_np, upper)
                 if batch_shape is not ():
                     band_np = np.tile(band_np, batch_shape + (1, 1))
                 band = tf.batch_matrix_band_part(batch_mat, lower, upper)
                 self.assertAllEqual(band_np, band.eval())
 def Test(self):
   mat = np.ones(shape_).astype(dtype_)
   batch_mat = np.tile(mat, batch_shape + (1, 1))
   with self.test_session(use_gpu=use_gpu_):
     for lower in -1, 0, 1, shape_[-2] - 1:
       for upper in -1, 0, 1, shape_[-1] - 1:
         band_np = mat
         if lower >= 0:
           band_np = np.triu(band_np, -lower)
         if upper >= 0:
           band_np = np.tril(band_np, upper)
         if batch_shape is not ():
           band_np = np.tile(band_np, batch_shape + (1, 1))
         band = tf.batch_matrix_band_part(batch_mat, lower, upper)
         self.assertAllEqual(band_np, band.eval())
Ejemplo n.º 18
0
    def _multi_head(self, queries, keys, query_mask, key_mask, num_heads, block_feature=False, scope='multihead', reuse=None):
        with vs.variable_scope(scope, reuse=reuse):
            # batch_size * seq_size_q * num_units
            Q = rnn_cell._linear(tf.reshape(queries,
                                            [-1, self.num_units]),
                                 self.num_units, True, 1.0, scope='Q')
            Q = tf.reshape(Q, tf.shape(queries))
            # batch_size * seq_size_k * num_units
            K = rnn_cell._linear(tf.reshape(keys,
                                            [-1, self.num_units]),
                                 self.num_units, True, 1.0, scope='K')
            K = tf.reshape(K, tf.shape(keys))
            V = rnn_cell._linear(tf.reshape(keys,
                                            [-1, self.num_units]),
                                 self.num_units, True, 1.0, scope='V')
            V = tf.reshape(V, tf.shape(keys))
            Q_ = tf.pack(tf.split(2, num_heads, Q))  # num_heads *  batch_size * seq_size_q *num_units/num_heads
            K_ = tf.pack(tf.split(2, num_heads, K))  # num_heads * batch_size * seq_size_k * num_units/num_heads
            V_ = tf.pack(tf.split(2, num_heads, V))  # num_heads * batch_size * seq_size_k * num_units/num_heads
            len_q = tf.shape(queries)[1]
            len_k = tf.shape(keys)[1]

            # Compute weight
            weights = tf.batch_matmul(Q_, tf.transpose(K_, [0,1,3,2])) \
                      / ((self.num_units/num_heads) ** 0.5)    # num_heads * batch_size * seq_size_q * seq_size_k
            key_mask = tf.tile(tf.reshape(key_mask, [1, -1, 1, len_k]), [num_heads, 1, len_q, 1])
            weights = tf.select(key_mask, weights, tf.ones_like(weights) * (-2**32 + 1))

            if block_feature:
                diag_vals = tf.ones_like(weights[0, 0, :, :]) # seq_size_q * seq_size_k
                mask = tf.cast(tf.batch_matrix_band_part(diag_vals, -1, 0), tf.bool)
                mask = tf.tile(tf.reshape(mask, [1, 1, len_q, len_k]), [num_heads, tf.shape(queries)[0], 1, 1])
                weights = tf.select(mask, weights, tf.ones_like(weights) * (-2 ** 32 + 1))

            weights = tf.reshape(tf.nn.softmax(tf.reshape(weights, [-1, len_k])),
                                 [num_heads, -1, len_q, len_k])
            # num_heads * batch_size * seq_size_q * num_units/num_heads
            ctx = tf.batch_matmul(weights,  V_)

            ctx *= tf.reshape(tf.cast(query_mask, tf.float32), [-1, len_q, 1]) # num_heads * batch_size * seq_size_q * num_units/num_heads
            ctx = tf.concat(2, tf.unpack(ctx))  # batch_size * seq_size_q * num_units
            ctx = rnn_cell._linear(tf.reshape(ctx, [-1, self.num_units]), self.num_units, True, 1.0, scope='context')
            ctx = tf.reshape(ctx, [-1, len_q, self.num_units])
            drop_ctx = tf.nn.dropout(ctx, keep_prob=self.keep_prob)
            # Add and Normalization
            res = layer_normalization(drop_ctx + queries)
        return  res, weights
Ejemplo n.º 19
0
def vec2lower_triangle(vec, dim):
    """
    Convert a vector M of size (n * m) into a matrix of shape (n, m)
    [[e^M[0],    0,           0,             ...,    0]
     [M[n-1],    e^M[n],      0,      0,     ...,    0]
     [M[2n-1],   M[2n],       e^M[2n+1], 0,  ...,    0]
     ...
     [M[m(n-1)], M[m(n-1)+1], ...,       M[mn-2], e^M[mn-1]]
    """
    L = tf.reshape(vec, [-1, dim, dim])
    if int(tf.__version__.split('.')[1]) >= 10:
        L = tf.matrix_band_part(L, -1, 0) - tf.matrix_diag(
            tf.matrix_diag_part(L)) + tf.matrix_diag(
                tf.exp(tf.matrix_diag_part(L)))
    else:
        L = tf.batch_matrix_band_part(L, -1, 0) - tf.batch_matrix_diag(
            tf.batch_matrix_diag_part(L)) + tf.batch_matrix_diag(
                tf.exp(tf.batch_matrix_diag_part(L)))
    return L
Ejemplo n.º 20
0
 def _sample(self, N):
     """
     :param integer N: number of samples
     :Returns
      samples picked from the variational posterior.
      The Kulback_leibler divergence is stored as self._KL
     """
     n = self.num_data
     R = self.num_latent
     # Match dimension of the posterior variance to the data.
     if self.q_diag:
         sqrt = tf.batch_matrix_diag(tf.transpose(self.q_sqrt)) # [R,n,n]
     else:
         sqrt = tf.batch_matrix_band_part(
                         tf.transpose(self.q_sqrt,[2,0,1]), -1, 0) # [R,n,n]
     # Log determinant of matrix S = q_sqrt * q_sqrt^T
     logdet_S = tf.cast(N, float_type)*tf.reduce_sum(
             tf.log(tf.square(tf.batch_matrix_diag_part(sqrt))))
     sqrt = tf.tile(tf.expand_dims(sqrt, 1), [1,N,1,1]) # [R,N,n,n]
     # noraml random samples, [R,N,n,1]
     v_samples = tf.random_normal([R,N,n,1], dtype=float_type)
     # Match dimension of the posterior mean, [R,N,n,1]
     mu = tf.tile(tf.expand_dims(tf.expand_dims(
                             tf.transpose(self.q_mu), 1), -1), [1,N,1,1])
     u_samples = mu + tf.batch_matmul(sqrt, v_samples)
     # Stochastic approximation of the Kulback_leibler KL[q(f)||p(f)]
     self._KL = - 0.5 * logdet_S\
          - 0.5 * tf.reduce_sum(tf.square(v_samples)) \
          + 0.5 * tf.reduce_sum(tf.square(u_samples))
     # Cholesky factor of kernel [R,N,n,n]
     L = tf.tile(tf.expand_dims(
             tf.transpose(self.kern.Cholesky(self.X), [2,0,1]),1), [1,N,1,1])
     # mean, sized [N,n,R]
     mean = tf.tile(tf.expand_dims(
                 self.mean_function(self.X),
             0), [N,1,1])
     # sample from posterior, [N,n,R]
     f_samples = tf.transpose(
             tf.squeeze(tf.batch_matmul(L, u_samples),[-1]), # [R,N,n]
             [1,2,0]) + mean
     # return as Dict to deal with
     return f_samples
Ejemplo n.º 21
0
def conditional(Xnew, X, kern, f, num_columns,
                full_cov=False, q_sqrt=None, whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.

    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` represents the mean of the distribution and
    q_sqrt the square-root of the covariance.

    Additionally, the GP may have been centered (whitened) so that
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    In this case 'f' represents the values taken by v.

    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov).

    We assume K independent GPs, represented by the columns of f (and the
    last dimension of q_sqrt).

     - Xnew is a data matrix, size N x D
     - X are data points, size M x D
     - kern is a GPflow kernel
     - f is a data matrix, M x K, representing the function values at X.
     - num_columns is an integer number of columns in the f matrix (must match
       q_sqrt's last dimension)
     - q_sqrt (optional) is a matrix of standard-deviations or Cholesky
       matrices, size M x K or M x M x K
     - whiten (optional) is a boolean: whether to whiten the representation
       as described above.

    These functions are now considered deprecated, subsumed into this one:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened

    """

    # compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * 1e-6
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
        fvar = tf.tile(tf.expand_dims(fvar, 2), [1, 1, num_columns])
    else:
        fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
        fvar = tf.tile(tf.expand_dims(fvar, 1), [1, num_columns])

    # another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    # construct the conditional mean
    fmean = tf.matmul(tf.transpose(A), f)

    # add extra projected variance from q(f) if needed
    if q_sqrt is not None:
        projected_var = []
        for d in range(num_columns):
            if q_sqrt.get_shape().ndims == 2:
                LTA = A * q_sqrt[:, d:d + 1]
            elif q_sqrt.get_shape().ndims == 3:
                L = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
                LTA = tf.matmul(tf.transpose(L), A)
            else:  # pragma no cover
                raise ValueError("Bad dimension for q_sqrt: %s" %
                                 str(q_sqrt.get_shape().ndims))
            if full_cov:
                projected_var.append(tf.matmul(tf.transpose(LTA), LTA))
            else:
                projected_var.append(tf.reduce_sum(tf.square(LTA), 0))
        fvar = fvar + tf.transpose(tf.pack(projected_var))

    return fmean, fvar
Ejemplo n.º 22
0
def vec2trimat(vec, dim):
    L = tf.reshape(vec, [-1, dim, dim])
    L = tf.batch_matrix_band_part(L, -1, 0) - tf.batch_matrix_diag(tf.batch_matrix_diag_part(L)) + \
        tf.batch_matrix_diag(tf.exp(tf.batch_matrix_diag_part(L)))
    return L
Ejemplo n.º 23
0
def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.

    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` represents the mean of the distribution and
    q_sqrt the square-root of the covariance.

    Additionally, the GP may have been centered (whitened) so that
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    In this case 'f' represents the values taken by v.

    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov).

    We assume K independent GPs, represented by the columns of f (and the
    last dimension of q_sqrt).

     - Xnew is a data matrix, size N x D
     - X are data points, size M x D
     - kern is a GPflow kernel
     - f is a data matrix, M x K, representing the function values at X, for K functions.
     - q_sqrt (optional) is a matrix of standard-deviations or Cholesky
       matrices, size M x K or M x M x K
     - whiten (optional) is a boolean: whether to whiten the representation
       as described above.

    These functions are now considered deprecated, subsumed into this one:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened

    """

    # compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * settings.numerics.jitter_level
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = kern.K(Xnew) - tf.matmul(A, A, transpose_a=True)
        shape = tf.pack([tf.shape(f)[1], 1, 1])
    else:
        fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
        shape = tf.pack([tf.shape(f)[1], 1])
    fvar = tf.tile(tf.expand_dims(fvar, 0), shape)  # D x N x N or D x N

    # another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    # construct the conditional mean
    fmean = tf.matmul(tf.transpose(A), f)

    if q_sqrt is not None:
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # D x M x N
        elif q_sqrt.get_shape().ndims == 3:
            L = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1,
                                          0)  # D x M x M
            A_tiled = tf.tile(tf.expand_dims(A, 0),
                              tf.pack([tf.shape(f)[1], 1, 1]))
            LTA = tf.batch_matmul(L, A_tiled, adj_x=True)  # D x M x N
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.batch_matmul(LTA, LTA, adj_x=True)  # D x N x N
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # D x N
    fvar = tf.transpose(fvar)  # N x D or N x N x D

    return fmean, fvar
Ejemplo n.º 24
0
def conditional(Xnew,
                X,
                kern,
                f,
                num_columns,
                full_cov=False,
                q_sqrt=None,
                whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.

    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` representes the mean of the distribution and
    q_sqrt the square-root of the covariance.

    Additionally, the GP may have been centered (whitened) so that 
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    in this case 'f' represents the values taken by v. 

    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov). 

    We assume K independent GPs, represented by the columns of f (and the
    last ax of q_sqrt).  

    Xnew is a data matrix, size N x D
    X are data points, size M x D
    kern is a GPflow kernel
    f is a data matrix, M x K, represensting the function values at X.
    num_columns is an interger number of columns in the f matrix (must match q_sqrt's last dimension)
    (optional) q_sqrt is a matrix of standard-deviations or Cholesky matrices, size M x K or M x M x K
    (optional) whiten is a boolean: whether to whiten the representation as described above. 


    These functions are now considered deprecated, subsumed into this one function:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened

    """

    #compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * 1e-6
    Lm = tf.cholesky(Kmm)

    #Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    #compute the covariance due to the conditioning
    if full_cov:
        fvar = kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
        fvar = tf.tile(tf.expand_dims(fvar, 2), [1, 1, num_columns])
    else:
        fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
        fvar = tf.tile(tf.expand_dims(fvar, 1), [1, num_columns])

    #another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    #construct the conditional mean
    fmean = tf.matmul(tf.transpose(A), f)

    #add extra projected variance from q(f) if needed
    if q_sqrt is not None:
        projected_var = []
        for d in range(num_columns):
            if q_sqrt.get_shape().ndims == 2:
                LTA = A * q_sqrt[:, d:d + 1]
            elif q_sqrt.get_shape().ndims == 3:
                L = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
                LTA = tf.matmul(tf.transpose(L), A)
            else:  # pragma no cover
                raise ValueError, "Bad dimension for q_sqrt: %s" % str(
                    q_sqrt.get_shape().ndims)
            if full_cov:
                projected_var.append(tf.matmul(tf.transpose(LTA), LTA))
            else:
                projected_var.append(tf.reduce_sum(tf.square(LTA), 0))
        fvar = fvar + tf.transpose(tf.pack(projected_var))

    return fmean, fvar
Ejemplo n.º 25
0
def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.
    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` represents the mean of the distribution and
    q_sqrt the square-root of the covariance.
    Additionally, the GP may have been centered (whitened) so that
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    In this case 'f' represents the values taken by v.
    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov).
    We assume K independent GPs, represented by the columns of f (and the
    last dimension of q_sqrt).
     - Xnew is a data matrix, size n x D
     - X are data points, size m x D
     - kern is a GPinv kernel
     - f is a data matrix, m x R, representing the function values at X, for R functions.
     - q_sqrt (optional) is a matrix of standard-deviations or Cholesky
       matrices, size m x R or m x m x R
     - whiten (optional) is a boolean: whether to whiten the representation
       as described above.
    These functions are now considered deprecated, subsumed into this one:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened
    """
    # compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = tf.transpose(kern.K(X, Xnew), [2, 0, 1])  # [R,n,n2]
    Lm = tf.transpose(kern.Cholesky(X), [2, 0, 1])  # [R,n,n]

    # Compute the projection matrix A
    A = tf.batch_matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:  # shape [R,n,n]
        fvar = tf.transpose(kern.K(Xnew), [2, 0, 1]) - tf.matmul(
            A, A, transpose_a=True)
    else:  # shape [R,n]
        fvar = tf.transpose(kern.Kdiag(Xnew)) - tf.reduce_sum(tf.square(A), 1)

    # another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.batch_matrix_triangular_solve(tf.transpose(Lm, [0, 2, 1]),
                                             A,
                                             lower=False)

    # change shape of f [m,R] -> [R,m,1]
    f = tf.expand_dims(tf.transpose(f), -1)
    # construct the conditional mean, sized [m,R]
    fmean = tf.transpose(
        tf.squeeze(tf.batch_matmul(tf.transpose(A, [0, 2, 1]), f), [-1]))

    if q_sqrt is not None:
        # diagonal case.
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # R x m x n
        # full cov case
        elif q_sqrt.get_shape().ndims == 3:
            L = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1,
                                          0)  # D x M x M
            LTA = tf.batch_matmul(L, A, adj_x=True)  # R x m x n
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.batch_matmul(LTA, LTA, adj_x=True)  # R x n x n
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # R x n
    fvar = tf.transpose(fvar)  # n x R or n x n x R

    return fmean, fvar
Ejemplo n.º 26
0
def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.

    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` represents the mean of the distribution and
    q_sqrt the square-root of the covariance.

    Additionally, the GP may have been centered (whitened) so that
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    In this case 'f' represents the values taken by v.

    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov).

    We assume K independent GPs, represented by the columns of f (and the
    last dimension of q_sqrt).

     - Xnew is a data matrix, size N x D
     - X are data points, size M x D
     - kern is a GPflow kernel
     - f is a data matrix, M x K, representing the function values at X, for K functions.
     - q_sqrt (optional) is a matrix of standard-deviations or Cholesky
       matrices, size M x K or M x M x K
     - whiten (optional) is a boolean: whether to whiten the representation
       as described above.

    These functions are now considered deprecated, subsumed into this one:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened

    """

    # compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * settings.numerics.jitter_level
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = kern.K(Xnew) - tf.matmul(A, A, transpose_a=True)
        shape = tf.pack([tf.shape(f)[1], 1, 1])
    else:
        fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
        shape = tf.pack([tf.shape(f)[1], 1])
    fvar = tf.tile(tf.expand_dims(fvar, 0), shape)  # D x N x N or D x N

    # another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    # construct the conditional mean
    fmean = tf.matmul(tf.transpose(A), f)

    if q_sqrt is not None:
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # D x M x N
        elif q_sqrt.get_shape().ndims == 3:
            L = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # D x M x M
            A_tiled = tf.tile(tf.expand_dims(A, 0), tf.pack([tf.shape(f)[1], 1, 1]))
            LTA = tf.batch_matmul(L, A_tiled, adj_x=True)  # D x M x N
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.batch_matmul(LTA, LTA, adj_x=True)  # D x N x N
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # D x N
    fvar = tf.transpose(fvar)  # N x D or N x N x D

    return fmean, fvar