Ejemplo n.º 1
0
def gauss_kl(q_mu, q_sqrt, K, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    for d in range(num_latent):
        Lq = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
        # Log determinant of q covariance:
        KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
        LiLq = tf.matrix_triangular_solve(L, Lq, lower=True)
        KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
Ejemplo n.º 2
0
Archivo: gpr.py Proyecto: erenis/GPflow
    def build_predict(self, Xnew, full_cov=False):
        """
        Xnew is a data matrix, point at which we want to predict

        This method computes

            p(F* | Y )

        where F* are points on the GP at Xnew, Y are noisy observations at X.

        """
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance
        L = tf.cholesky(K)
        A = tf.matrix_triangular_solve(L, Kx, lower=True)
        V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X))
        fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew)
        if full_cov:
            fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            fvar = tf.tile(tf.expand_dims(fvar, 2), shape)
        else:
            fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
            fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, self.Y.shape[1]])
        return fmean, fvar
Ejemplo n.º 3
0
    def compute_upper_bound(self):
        num_data = tf.cast(tf.shape(self.Y)[0], settings.float_type)

        Kdiag = self.kern.Kdiag(self.X)
        Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
        Kuf = self.feature.Kuf(self.kern, self.X)

        L = tf.cholesky(Kuu)
        LB = tf.cholesky(Kuu + self.likelihood.variance ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True))

        LinvKuf = tf.matrix_triangular_solve(L, Kuf, lower=True)
        # Using the Trace bound, from Titsias' presentation
        c = tf.reduce_sum(Kdiag) - tf.reduce_sum(LinvKuf ** 2.0)
        # Kff = self.kern.K(self.X)
        # Qff = tf.matmul(Kuf, LinvKuf, transpose_a=True)

        # Alternative bound on max eigenval:
        # c = tf.reduce_max(tf.reduce_sum(tf.abs(Kff - Qff), 0))
        corrected_noise = self.likelihood.variance + c

        const = -0.5 * num_data * tf.log(2 * np.pi * self.likelihood.variance)
        logdet = tf.reduce_sum(tf.log(tf.diag_part(L))) - tf.reduce_sum(tf.log(tf.diag_part(LB)))

        LC = tf.cholesky(Kuu + corrected_noise ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True))
        v = tf.matrix_triangular_solve(LC, corrected_noise ** -1.0 * tf.matmul(Kuf, self.Y), lower=True)
        quad = -0.5 * corrected_noise ** -1.0 * tf.reduce_sum(self.Y ** 2.0) + 0.5 * tf.reduce_sum(v ** 2.0)

        return const + logdet + quad
Ejemplo n.º 4
0
    def build_predict(self, Xnew, full_cov=False):
        """
        Compute the mean and variance of the latent function at some new points
        Xnew.
        """
        _, _, Luu, L, _, _, gamma = self.build_common_terms()
        Kus = self.kern.K(self.Z, Xnew)  # size  M x Xnew

        w = tf.matrix_triangular_solve(Luu, Kus, lower=True)  # size M x Xnew

        tmp = tf.matrix_triangular_solve(tf.transpose(L), gamma, lower=False)
        mean = tf.matmul(tf.transpose(w), tmp) + self.mean_function(Xnew)
        intermediateA = tf.matrix_triangular_solve(L, w, lower=True)

        if full_cov:
            var = (
                self.kern.K(Xnew)
                - tf.matmul(tf.transpose(w), w)
                + tf.matmul(tf.transpose(intermediateA), intermediateA)
            )
            var = tf.tile(tf.expand_dims(var, 2), tf.pack([1, 1, tf.shape(self.Y)[1]]))
        else:
            var = (
                self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(w), 0) + tf.reduce_sum(tf.square(intermediateA), 0)
            )  # size Xnew,
            var = tf.tile(tf.expand_dims(var, 1), tf.pack([1, tf.shape(self.Y)[1]]))

        return mean, var
Ejemplo n.º 5
0
    def test_whiten(self):
        """
        make sure that predicting using the whitened representation is the
        sameas the non-whitened one.
        """
        with self.test_context() as sess:
            rng = np.random.RandomState(0)
            Xs, X, F, k, num_data, feed_dict = self.prepare()
            k.compile(session=sess)

            F_sqrt = tf.placeholder(settings.float_type, [num_data, 1])
            F_sqrt_data = rng.rand(num_data, 1)
            feed_dict[F_sqrt] = F_sqrt_data

            K = k.K(X)
            L = tf.cholesky(K)
            V = tf.matrix_triangular_solve(L, F, lower=True)
            V_sqrt = tf.matrix_triangular_solve(L, tf.diag(F_sqrt[:, 0]), lower=True)[None, :, :]

            Fstar_mean, Fstar_var = gpflow.conditionals.conditional(
                Xs, X, k, F, q_sqrt=F_sqrt)
            Fstar_w_mean, Fstar_w_var = gpflow.conditionals.conditional(
                Xs, X, k, V, q_sqrt=V_sqrt, white=True)

            mean_difference = sess.run(Fstar_w_mean - Fstar_mean, feed_dict=feed_dict)
            var_difference = sess.run(Fstar_w_var - Fstar_var, feed_dict=feed_dict)

            assert_allclose(mean_difference, 0, atol=4)
            assert_allclose(var_difference, 0, atol=4)
    def build_predict(self, Xnew , full_cov=False):
       
        
        err = self.Y
        Kuf = self.RBF(self.Z, self.X)
        Kuu = self.RBF(self.Z,self.Z) + eye(self.num_inducing) * 1e-6
        Kus = self.RBF(self.Z, Xnew)
        sigma = tf.sqrt(self.likelihood_variance)
        L = tf.cholesky(Kuu)
        A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
        B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing)
        LB = tf.cholesky(B)
        Aerr = tf.matmul(A, err)
        c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
        tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
        tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
        mean = tf.matmul(tf.transpose(tmp2), c)
        
        if full_cov:

            var = self.RBF(Xnew, Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\
                - tf.matmul(tf.transpose(tmp1), tmp1)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            var = tf.tile(tf.expand_dims(var, 2), shape)

        else:

            var = self.RBF(Xnew, Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\
                - tf.reduce_sum(tf.square(tmp1), 0)
            shape = tf.pack([1, tf.shape(self.Y)[1]])
            var = tf.tile(tf.expand_dims(var, 1), shape)


		return mean , var
Ejemplo n.º 7
0
    def build_likelihood(self):
        """
        Constuct a tensorflow function to compute the bound on the marginal
        likelihood. For a derivation of the terms in here, see the associated
        SGPR notebook. 
        """

        num_inducing = tf.shape(self.Z)[0]
        num_data = tf.shape(self.Y)[0]
        output_dim = tf.shape(self.Y)[1]

        err =  self.Y - self.mean_function(self.X)
        Kdiag = self.kern.Kdiag(self.X)
        Kuf = self.kern.K(self.Z, self.X)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
        L = tf.cholesky(Kuu)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, Kuf, lower=True)*tf.sqrt(1./self.likelihood.variance)
        AAT = tf.matmul(A, tf.transpose(A))
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, err), lower=True) * tf.sqrt(1./self.likelihood.variance)

        #compute log marginal bound
        bound = -0.5*tf.cast(num_data*output_dim, tf.float64)*np.log(2*np.pi)
        bound += -tf.cast(output_dim, tf.float64)*tf.reduce_sum(tf.log(tf.user_ops.get_diag(LB)))
        bound += -0.5*tf.cast(num_data*output_dim, tf.float64)*tf.log(self.likelihood.variance)
        bound += -0.5*tf.reduce_sum(tf.square(err))/self.likelihood.variance
        bound += 0.5*tf.reduce_sum(tf.square(c))
        bound += -0.5*(tf.reduce_sum(Kdiag)/self.likelihood.variance - tf.reduce_sum(tf.user_ops.get_diag(AAT)))

        return bound
Ejemplo n.º 8
0
def gauss_kl_diag(q_mu, q_sqrt, K,  num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a matrix, each column represents the diagonal of a square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(q_sqrt)))  # Log-det of q-cov
    L_inv = tf.matrix_triangular_solve(L, eye(tf.shape(L)[0]), lower=True)
    K_inv = tf.matrix_triangular_solve(tf.transpose(L), L_inv, lower=False)
    KL += 0.5 * tf.reduce_sum(tf.expand_dims(tf.diag_part(K_inv), 1)
                              * tf.square(q_sqrt))  # Trace term.
    return KL
Ejemplo n.º 9
0
def gauss_kl(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    num_latent = tf.cast(tf.shape(q_sqrt)[2], float_type)
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]), float_type)  # constant term
    Lq = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # force lower triangle
    KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.matrix_diag_part(Lq))))  # logdet
    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1]))
    LiLq = tf.matrix_triangular_solve(L_tiled, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
Ejemplo n.º 10
0
 def _build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = len(self.feature)
     err = self.Y - self.mean_function(self.X)
     Kuf = self.feature.Kuf(self.kern, self.X)
     Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
     Kus = self.feature.Kuf(self.kern, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tmp2, c, transpose_a=True)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
               - tf.matmul(tmp1, tmp1, transpose_a=True)
         shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
               - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.stack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
 def testNotInvertible(self):
   # The input should be invertible.
   with self.test_session():
     with self.assertRaisesOpError("Input matrix is not invertible."):
       # The matrix has a zero on the diagonal.
       matrix = tf.constant([[1., 0., -1.], [-1., 0., 1.], [0., -1., 1.]])
       tf.matrix_triangular_solve(matrix, matrix).eval()
 def testNonSquareMatrix(self):
   # When the solve of a non-square matrix is attempted we should return
   # an error
   with self.test_session():
     with self.assertRaises(ValueError):
       matrix = tf.constant([[1., 2., 3.], [3., 4., 5.]])
       tf.matrix_triangular_solve(matrix, matrix)
Ejemplo n.º 13
0
 def build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = tf.shape(self.Z)[0]
     err = self.Y - self.mean_function(self.X)
     Kuf = self.kern.K(self.Z, self.X)
     Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
     Kus = self.kern.K(self.Z, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tf.transpose(tmp2), c)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\
             - tf.matmul(tf.transpose(tmp1), tmp1)
         shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\
             - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.pack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
 def testWrongDimensions(self):
   # The matrix and rhs should have the same number of rows as the
   # right-hand sides.
   with self.test_session():
     matrix = tf.constant([[1., 0.], [0., 1.]])
     rhs = tf.constant([[1., 0.]])
     with self.assertRaises(ValueError):
       tf.matrix_triangular_solve(matrix, rhs)
Ejemplo n.º 15
0
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - Ka_{.,.}, Kb_{.,.} :: RBF kernels
    Ka and Kb as well as Z1 and Z2 can differ from each other, but this is supported
    only if the Gaussian p is Diagonal (p.cov NxD) and Ka, Kb have disjoint active_dims
    in which case the joint expectations simplify into a product of expectations

    :return: NxMxM
    """
    if kern1.on_separate_dims(kern2) and isinstance(p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (kern1, feat1))
        eKxz2 = expectation(p, (kern2, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if feat1 != feat2 or kern1 != kern2:
        raise NotImplementedError("The expectation over two kernels has only an "
                                  "analytical implementation if both kernels are equal.")

    kern = kern1
    feat = feat1

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov)
        Z, Xmu = kern._slice(feat.Z, p.mu)

        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]

        squared_lengthscales = kern.lengthscales ** 2. if kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales ** 2.

        sqrt_det_L = tf.reduce_prod(0.5 * squared_lengthscales) ** 0.5
        C = tf.cholesky(0.5 * tf.matrix_diag(squared_lengthscales) + Xcov)  # NxDxD
        dets = sqrt_det_L / tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(C)), axis=1))  # N

        C_inv_mu = tf.matrix_triangular_solve(C, tf.expand_dims(Xmu, 2), lower=True)  # NxDx1
        C_inv_z = tf.matrix_triangular_solve(C,
                                             tf.tile(tf.expand_dims(tf.transpose(Z) / 2., 0), [N, 1, 1]),
                                             lower=True)  # NxDxM
        mu_CC_inv_mu = tf.expand_dims(tf.reduce_sum(tf.square(C_inv_mu), 1), 2)  # Nx1x1
        z_CC_inv_z = tf.reduce_sum(tf.square(C_inv_z), 1)  # NxM
        zm_CC_inv_zn = tf.matmul(C_inv_z, C_inv_z, transpose_a=True)  # NxMxM
        two_z_CC_inv_mu = 2 * tf.matmul(C_inv_z, C_inv_mu, transpose_a=True)[:, :, 0]  # NxM

        exponent_mahalanobis = mu_CC_inv_mu + tf.expand_dims(z_CC_inv_z, 1) + \
                               tf.expand_dims(z_CC_inv_z, 2) + 2 * zm_CC_inv_zn - \
                               tf.expand_dims(two_z_CC_inv_mu, 2) - tf.expand_dims(two_z_CC_inv_mu, 1)  # NxMxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxMxM

        # Compute sqrt(self.K(Z)) explicitly to prevent automatic gradient from
        # being NaN sometimes, see pull request #615
        kernel_sqrt = tf.exp(-0.25 * kern.square_dist(Z, None))
        return kern.variance ** 2 * kernel_sqrt * \
               tf.reshape(dets, [N, 1, 1]) * exponent_mahalanobis
Ejemplo n.º 16
0
	def build_predict(self,X_new,full_cov=False):
  
		Kx = self.RBF(self.X_train, X_new)
		#Kuu = self.RBF(self.X_train,self.X_train)
		L = tf.cholesky(self.condition(self.Kuu))
		A = tf.matrix_triangular_solve(L, Kx, lower=True)
		V = tf.matrix_triangular_solve(L, self.Y_train)

		fmean = tf.matmul(A, V, transpose_a=True) + self.age_mean
		
		if full_cov:
			fvar = self.RBF(X_new,X_new) - tf.matmul(A, A, transpose_a=True) + tf.exp(self.variance_output) * self.eye(X_new.shape[0])
		else:
			fvar = tf.diag_part(self.RBF(X_new,X_new) - tf.matmul(A, A, transpose_a=True)) + tf.exp(self.variance_output) 
		
		return fmean,fvar
Ejemplo n.º 17
0
def _expectation(p, kern, feat, none1, none2, nghp=None):
    """
    Compute the expectation:
    <K_{X, Z}>_p(X)
        - K_{.,.} :: RBF kernel

    :return: NxM
    """
    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(p.cov)
        Z, Xmu = kern._slice(feat.Z, p.mu)
        D = tf.shape(Xmu)[1]
        if kern.ARD:
            lengthscales = kern.lengthscales
        else:
            lengthscales = tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD

        all_diffs = tf.transpose(Z) - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)
Ejemplo n.º 18
0
    def _build_predict(self, Xnew, full_cov=False):
        """
        The posterior variance of F is given by
            q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1)
        Here we project this to F*, the values of the GP at Xnew which is given
        by
           q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} +
                                           diag(lambda**-2)]^-1 K_{f*} )
        """

        # compute kernel things
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X)

        # predictive mean
        f_mean = tf.matmul(Kx, self.q_alpha, transpose_a=True) + self.mean_function(Xnew)

        # predictive var
        A = K + tf.matrix_diag(tf.transpose(1. / tf.square(self.q_lambda)))
        L = tf.cholesky(A)
        Kx_tiled = tf.tile(tf.expand_dims(Kx, 0), [self.num_latent, 1, 1])
        LiKx = tf.matrix_triangular_solve(L, Kx_tiled)
        if full_cov:
            f_var = self.kern.K(Xnew) - tf.matmul(LiKx, LiKx, transpose_a=True)
        else:
            f_var = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(LiKx), 1)
        return f_mean, tf.transpose(f_var)
Ejemplo n.º 19
0
    def _build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R
        This method computes the variational lower bound on the likelihood,
        which is:
            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]
        with
            q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) .
        """
        K = self.kern.K(self.X)
        K_alpha = tf.matmul(K, self.q_alpha)
        f_mean = K_alpha + self.mean_function(self.X)

        # compute the variance for each of the outputs
        I = tf.tile(tf.expand_dims(tf.eye(self.num_data, dtype=settings.float_type), 0),
                    [self.num_latent, 1, 1])
        A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \
            tf.expand_dims(tf.transpose(self.q_lambda), 2) * K
        L = tf.cholesky(A)
        Li = tf.matrix_triangular_solve(L, I)
        tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda), 1)
        f_var = 1. / tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1))

        # some statistics about A are used in the KL
        A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
        trAi = tf.reduce_sum(tf.square(Li))

        KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent +
                    tf.reduce_sum(K_alpha * self.q_alpha))

        v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y)
        return tf.reduce_sum(v_exp) - KL
Ejemplo n.º 20
0
    def eKzxKxz(self, Z, Xmu, Xcov):
        """
        Also known as Phi_2.
        :param Z: MxD
        :param Xmu: X mean (NxD)
        :param Xcov: X covariance matrices (NxDxD)
        :return: NxMxM
        """
        # use only active dimensions
        Xcov = self._slice_cov(Xcov)
        Z, Xmu = self._slice(Z, Xmu)
        M = tf.shape(Z)[0]
        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]
        lengthscales = self.lengthscales if self.ARD else tf.zeros(
            (D, ), dtype=float_type) + self.lengthscales

        Kmms = tf.sqrt(self.K(Z, presliced=True)) / self.variance**0.5
        scalemat = tf.expand_dims(eye(D), 0) + 2 * Xcov * tf.reshape(
            lengthscales**-2.0, [1, 1, -1])  # NxDxD
        det = tf.matrix_determinant(scalemat)

        mat = Xcov + 0.5 * tf.expand_dims(tf.diag(lengthscales**2.0),
                                          0)  # NxDxD
        cm = tf.cholesky(mat)  # NxDxD
        vec = 0.5 * (tf.reshape(Z, [1, M, 1, D]) + tf.reshape(
            Z, [1, 1, M, D])) - tf.reshape(Xmu, [N, 1, 1, D])  # NxMxMxD
        cmr = tf.tile(tf.reshape(cm, [N, 1, 1, D, D]),
                      [1, M, M, 1, 1])  # NxMxMxDxD
        smI_z = tf.matrix_triangular_solve(cmr, tf.expand_dims(vec,
                                                               4))  # NxMxMxDx1
        fs = tf.reduce_sum(tf.square(smI_z), [3, 4])

        return self.variance**2.0 * tf.expand_dims(Kmms, 0) * tf.exp(
            -0.5 * fs) * tf.reshape(det**-0.5, [N, 1, 1])
Ejemplo n.º 21
0
    def build_likelihood_terms(self):
        Kdiag = reduce(
            tf.multiply,
            [k.Kdiag(self.X[:, i:i + 1]) for i, k in enumerate(self.kerns)])
        Kuu = [
            make_Kuu(k, a, b, self.ms)
            for k, a, b, in zip(self.kerns, self.a, self.b)
        ]
        Kuu_solid = kron([Kuu_d.get() for Kuu_d in Kuu])
        Kuu_inv_solid = kron([Kuu_d.inv().get() for Kuu_d in Kuu])
        sigma2 = self.likelihood.variance

        # Compute intermediate matrices
        P = self.KufKfu / sigma2 + Kuu_solid
        L = tf.cholesky(P)
        log_det_P = tf.reduce_sum(tf.log(tf.square(tf.diag_part(L))))
        c = tf.matrix_triangular_solve(L, self.KufY) / sigma2

        Kuu_logdets = [K.logdet() for K in Kuu]
        N_others = [float(np.prod(self.Ms)) / M for M in self.Ms]
        Kuu_logdet = reduce(
            tf.add, [N * logdet for N, logdet in zip(N_others, Kuu_logdets)])

        # compute log marginal bound
        ND = tf.cast(tf.size(self.Y), float_type)
        D = tf.cast(tf.shape(self.Y)[1], float_type)
        return (-0.5 * ND * tf.log(2 * np.pi * sigma2), -0.5 * D * log_det_P,
                0.5 * D * Kuu_logdet, -0.5 * self.tr_YTY / sigma2,
                0.5 * tf.reduce_sum(tf.square(c)),
                -0.5 * tf.reduce_sum(Kdiag) / sigma2,
                0.5 * tf.reduce_sum(Kuu_inv_solid * self.KufKfu) / sigma2)
Ejemplo n.º 22
0
def multivariate_normal(x, mu, L):
    """
    Computes the log-density of a multivariate normal.
    :param x  : Dx1 or DxN sample(s) for which we want the density
    :param mu : Dx1 or DxN mean(s) of the normal distribution
    :param L  : DxD Cholesky decomposition of the covariance matrix
    :return p : (1,) or (N,) vector of log densities for each of the N x's and/or mu's

    x and mu are either vectors or matrices. If both are vectors (N,1):
    p[0] = log pdf(x) where x ~ N(mu, LL^T)
    If at least one is a matrix, we assume independence over the *columns*:
    the number of rows must match the size of L. Broadcasting behaviour:
    p[n] = log pdf of:
    x[n] ~ N(mu, LL^T) or x ~ N(mu[n], LL^T) or x[n] ~ N(mu[n], LL^T)
    """
    if x.shape.ndims is None:
        warnings.warn('Shape of x must be 2D at computation.')
    elif x.shape.ndims != 2:
        raise ValueError('Shape of x must be 2D.')
    if mu.shape.ndims is None:
        warnings.warn('Shape of mu may be unknown or not 2D.')
    elif mu.shape.ndims != 2:
        raise ValueError('Shape of mu must be 2D.')

    d = x - mu
    alpha = tf.matrix_triangular_solve(L, d, lower=True)
    num_dims = tf.cast(tf.shape(d)[0], L.dtype)
    p = - 0.5 * tf.reduce_sum(tf.square(alpha), 0)
    p -= 0.5 * num_dims * np.log(2 * np.pi)
    p -= tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
    return p
Ejemplo n.º 23
0
    def test_whiten(self):
        """
        make sure that predicting using the whitened representation is the
        sameas the non-whitened one.
        """

        with self.test_context() as sess:
            Xs, X, F, k, num_data, feed_dict = self.prepare()
            k.compile(session=sess)

            K = k.K(X) + tf.eye(num_data, dtype=settings.float_type) * 1e-6
            L = tf.cholesky(K)
            V = tf.matrix_triangular_solve(L, F, lower=True)
            Fstar_mean, Fstar_var = gpflow.conditionals.conditional(
                Xs, X, k, F)
            Fstar_w_mean, Fstar_w_var = gpflow.conditionals.conditional(
                Xs, X, k, V, white=True)

            mean1, var1 = sess.run([Fstar_w_mean, Fstar_w_var],
                                   feed_dict=feed_dict)
            mean2, var2 = sess.run([Fstar_mean, Fstar_var],
                                   feed_dict=feed_dict)

            # TODO: should tolerance be type dependent?
            assert_allclose(mean1, mean2)
            assert_allclose(var1, var2)
Ejemplo n.º 24
0
    def build_likelihood(self):
        num_data = tf.shape(self.Y)[0]
        output_dim = tf.shape(self.Y)[1]

        total_variance = reduce(tf.add, [k.variance for k in self.kerns])
        Kuu = [
            make_Kuu(k, ai, bi, self.ms)
            for k, ai, bi in zip(self.kerns, self.a, self.b)
        ]
        Kuu = BlockDiagMat_many([mat for k in Kuu for mat in [k.A, k.B]])
        sigma2 = self.likelihood.variance

        # Compute intermediate matrices
        P = self.KufKfu / sigma2 + Kuu.get()
        L = tf.cholesky(P)
        log_det_P = tf.reduce_sum(tf.log(tf.square(tf.diag_part(L))))
        c = tf.matrix_triangular_solve(L, self.KufY) / sigma2

        # compute log marginal bound
        ND = tf.cast(num_data * output_dim, float_type)
        D = tf.cast(output_dim, float_type)
        bound = -0.5 * ND * tf.log(2 * np.pi * sigma2)
        bound += -0.5 * D * log_det_P
        bound += 0.5 * D * Kuu.logdet()
        bound += -0.5 * self.tr_YTY / sigma2
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * ND * total_variance / sigma2
        bound += 0.5 * D * Kuu.trace_KiX(self.KufKfu) / sigma2

        return bound
Ejemplo n.º 25
0
        def get_cholesky_solve_terms(Z, C=C):
            C_inv_z = tf.matrix_triangular_solve(
                C, tf.tile(tf.expand_dims(tf.transpose(Z), 0),
                           [N, 1, 1]), lower=True)  # [N, D, M]
            z_CC_inv_z = tf.reduce_sum(tf.square(C_inv_z), 1)  # [N, M]

            return C_inv_z, z_CC_inv_z
def update_precond_type2(Q1, Q2, q3, dx, dg, step=0.01):
    """
    update type II limited-memory preconditioner P = Q'*Q, where the Cholesky factor is a block matrix,
    
    Q = [Q1, Q2; 0, diag(q3)]
    
    This preconditioner requires limited memory if Q1(Q2) only has a few rows
    """
    r = Q1.shape.as_list()[0]

    #max_diag = tf.maximum(tf.reduce_max(tf.diag_part(Q1)), tf.reduce_max(q3))
    #Q1 = Q1 + tf.diag(tf.clip_by_value(_diag_loading*max_diag - tf.diag_part(Q1), 0.0, max_diag))
    #q3 = q3 + tf.clip_by_value(_diag_loading*max_diag - q3, 0.0, max_diag)

    a1 = tf.matmul(Q1, dg[:r]) + tf.matmul(Q2, dg[r:])
    a2 = tf.multiply(q3, dg[r:])
    b1 = tf.matrix_triangular_solve(tf.transpose(Q1), dx[:r], lower=True)
    b2 = tf.divide(dx[r:] - tf.matmul(Q2, b1, transpose_a=True), q3)
    grad1 = tf.matrix_band_part(
        tf.matmul(a1, a1, transpose_b=True) -
        tf.matmul(b1, b1, transpose_b=True), 0, -1)
    grad2 = tf.matmul(a1, a2, transpose_b=True) - tf.matmul(
        b1, b2, transpose_b=True)
    grad3 = tf.multiply(a2, a2) - tf.multiply(b2, b2)

    max_abs_grad = tf.reduce_max(tf.abs(grad1))
    max_abs_grad = tf.maximum(max_abs_grad, tf.reduce_max(tf.abs(grad2)))
    max_abs_grad = tf.maximum(max_abs_grad, tf.reduce_max(tf.abs(grad3)))
    step0 = step / (max_abs_grad + _tiny)
    return Q1 - tf.matmul(step0*grad1, Q1), \
            Q2 - tf.matmul(step0*grad1, Q2) - tf.multiply(step0*grad2, tf.tile(tf.transpose(q3), [r,1])), \
            q3 - tf.multiply(step0*grad3, q3)
Ejemplo n.º 27
0
    def _build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R
        This method computes the variational lower bound on the likelihood,
        which is:
            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]
        with
            q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) .
        """
        K = self.kern.K(self.X)
        K_alpha = tf.matmul(K, self.q_alpha)
        f_mean = K_alpha + self.mean_function(self.X)

        # compute the variance for each of the outputs
        I = tf.tile(
            tf.expand_dims(tf.eye(self.num_data, dtype=settings.float_type),
                           0), [self.num_latent, 1, 1])
        A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \
            tf.expand_dims(tf.transpose(self.q_lambda), 2) * K
        L = tf.cholesky(A)
        Li = tf.matrix_triangular_solve(L, I)
        tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda), 1)
        f_var = 1. / tf.square(self.q_lambda) - tf.transpose(
            tf.reduce_sum(tf.square(tmp), 1))

        # some statistics about A are used in the KL
        A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
        trAi = tf.reduce_sum(tf.square(Li))

        KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent +
                    tf.reduce_sum(K_alpha * self.q_alpha))

        v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y)
        return tf.reduce_sum(v_exp) - KL
Ejemplo n.º 28
0
    def build_predict(self, Xnew, full_cov=False):
        """
        The posterior variance of F is given by

            q(f) = N(f | K alpha, [K^-1 + diag(lambda**2)]^-1)

        Here we project this to F*, the values of the GP at Xnew which is given by

           q(F*) = N ( F* | K_{*F} alpha , K_{**} - K_{*f}[K_{ff} + diag(lambda**-2)]^-1 K_{f*} )

        """

        #compute kernelly things
        Kx = self.kern.K(Xnew, self.X)
        K = self.kern.K(self.X)


        #predictive mean
        f_mean = tf.matmul(Kx, self.q_alpha) + self.mean_function(Xnew)

        #predictive var
        f_var = []
        for d in range(self.num_latent):
            b = self.q_lambda[:,d]
            A = K + tf.diag(1./tf.square(b))
            L = tf.cholesky(A)
            LiKx = tf.matrix_triangular_solve(L, tf.transpose(Kx), lower=True)
            if full_cov:
                f_var.append( self.kern.K(Xnew)- tf.matmul(tf.transpose(LiKx),LiKx) )
            else:
                f_var.append( self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(LiKx),0) )
        f_var = tf.pack(f_var)
        return f_mean, tf.transpose(f_var)
  def _verifySolve(self, x, y, lower=True, adjoint=False, batch_dims=None, use_gpu=False):
    for np_type in [np.float32, np.float64]:
      a = x.astype(np_type)
      b = y.astype(np_type)
      # For numpy.solve we have to explicitly zero out the strictly
      # upper or lower triangle.
      if lower and a.size > 0:
        a_np = np.tril(a)
      elif a.size > 0:
        a_np = np.triu(a)
      else:
        a_np = a
      if adjoint:
        a_np = np.conj(np.transpose(a_np))

      if batch_dims is not None:
        a = np.tile(a, batch_dims + [1, 1])
        a_np = np.tile(a_np, batch_dims + [1, 1])
        b = np.tile(b, batch_dims + [1, 1])

      with self.test_session(use_gpu=use_gpu):
        tf_ans = tf.matrix_triangular_solve(a, b, lower=lower, adjoint=adjoint)
        out = tf_ans.eval()
        np_ans = np.linalg.solve(a_np, b)
        self.assertEqual(np_ans.shape, tf_ans.get_shape())
        self.assertEqual(np_ans.shape, out.shape)
        self.assertAllClose(np_ans, out)
Ejemplo n.º 30
0
    def build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R

        This method computes the variational lower lound on the likelihood, which is:

            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]

        with

            q(f) = N(f | K alpha, [K^-1 + diag(square(lambda))]^-1) .

        """
        K = self.kern.K(self.X)
        f_mean = tf.matmul(K, self.q_alpha) + self.mean_function(self.X)
        #for each of the data-dimensions (columns of Y), find the diagonal of the
        #variance, and also relevant parts of the KL.
        f_var, A_logdet, trAi = [], tf.zeros((1,), tf.float64), tf.zeros((1,), tf.float64)
        for d in range(self.num_latent):
            b = self.q_lambda[:,d]
            B = tf.expand_dims(b, 1)
            A = eye(self.num_data) + K*B*tf.transpose(B)
            L = tf.cholesky(A)
            Li = tf.matrix_triangular_solve(L, eye(self.num_data), lower=True)
            LiBi = Li / b
            #full_sigma:return tf.diag(b**-2) - LiBi.T.dot(LiBi)
            f_var.append(1./tf.square(b) - tf.reduce_sum(tf.square(LiBi),0))
            A_logdet += 2*tf.reduce_sum(tf.log(tf.user_ops.get_diag(L)))
            trAi += tf.reduce_sum(tf.square(Li))

        f_var = tf.transpose(tf.pack(f_var))

        KL = 0.5*(A_logdet + trAi - self.num_data*self.num_latent + tf.reduce_sum(f_mean*self.q_alpha))

        return tf.reduce_sum(self.likelihood.variational_expectations(f_mean, f_var, self.Y)) - KL
Ejemplo n.º 31
0
    def _build_predict(self, Xnew, full_cov=False):
        """
        The posterior variance of F is given by
            q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1)
        Here we project this to F*, the values of the GP at Xnew which is given
        by
           q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} +
                                           diag(lambda**-2)]^-1 K_{f*} )
        """

        # compute kernel things
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X)

        # predictive mean
        f_mean = tf.matmul(Kx, self.q_alpha,
                           transpose_a=True) + self.mean_function(Xnew)

        # predictive var
        A = K + tf.matrix_diag(tf.transpose(1. / tf.square(self.q_lambda)))
        L = tf.cholesky(A)
        Kx_tiled = tf.tile(tf.expand_dims(Kx, 0), [self.num_latent, 1, 1])
        LiKx = tf.matrix_triangular_solve(L, Kx_tiled)
        if full_cov:
            f_var = self.kern.K(Xnew) - tf.matmul(LiKx, LiKx, transpose_a=True)
        else:
            f_var = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(LiKx), 1)
        return f_mean, tf.transpose(f_var)
Ejemplo n.º 32
0
    def build_likelihood(self):
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood.
        """
        num_inducing = tf.shape(self.Z)[0]
        psi0 = tf.reduce_sum(self.kern.eKdiag(self.X_mean, self.X_var), 0)
        psi1 = self.kern.eKxz(self.Z, self.X_mean, self.X_var)
        psi2 = tf.reduce_sum(
            self.kern.eKzxKxz(self.Z, self.X_mean, self.X_var), 0)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
        L = tf.cholesky(Kuu)
        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(sigma2)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, tf.transpose(psi1),
                                       lower=True) / sigma
        tmp = tf.matrix_triangular_solve(L, psi2, lower=True)
        AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp),
                                         lower=True) / sigma2
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        log_det_B = 2. * tf.reduce_sum(tf.log(tf.diag_part(LB)))
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y),
                                       lower=True) / sigma

        # KL[q(x) || p(x)]
        dX_var = self.X_var if len(
            self.X_var.get_shape()) == 2 else tf.matrix_diag_part(self.X_var)
        NQ = tf.cast(tf.size(self.X_mean), float_type)
        D = tf.cast(tf.shape(self.Y)[1], float_type)
        KL = -0.5 * tf.reduce_sum(tf.log(dX_var)) \
             + 0.5 * tf.reduce_sum(tf.log(self.X_prior_var)) \
             - 0.5 * NQ \
             + 0.5 * tf.reduce_sum((tf.square(self.X_mean - self.X_prior_mean) + dX_var) / self.X_prior_var)

        # compute log marginal bound
        ND = tf.cast(tf.size(self.Y), float_type)
        bound = -0.5 * ND * tf.log(2 * np.pi * sigma2)
        bound += -0.5 * D * log_det_B
        bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 -
                             tf.reduce_sum(tf.diag_part(AAT)))
        bound -= KL
        return bound
Ejemplo n.º 33
0
    def build_likelihood(self):
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood. For a derivation of the terms in here, see the associated
        SGPR notebook.
        """

        num_inducing = tf.shape(self.Z)[0]
        num_data = tf.cast(tf.shape(self.Y)[0], settings.dtypes.float_type)
        output_dim = tf.cast(tf.shape(self.Y)[1], settings.dtypes.float_type)

        err = self.Y - self.mean_function(self.X)
        Kdiag = self.kern.Kdiag(self.X)
        Kuf = self.kern.K(self.Z, self.X)
        Kuu = self.kern.K(self.Z) + tf.eye(
            num_inducing, dtype=float_type) * settings.numerics.jitter_level
        L = tf.cholesky(Kuu)
        sigma = tf.sqrt(self.likelihood.variance)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
        AAT = tf.matmul(A, A, transpose_b=True)
        B = AAT + tf.eye(num_inducing, dtype=float_type)
        LB = tf.cholesky(B)
        Aerr = tf.matmul(A, err)
        c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma

        # compute log marginal bound
        bound = -0.5 * num_data * output_dim * np.log(2 * np.pi)
        bound += -output_dim * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB)))
        bound -= 0.5 * num_data * output_dim * tf.log(self.likelihood.variance)
        bound += -0.5 * tf.reduce_sum(
            tf.square(err)) / self.likelihood.variance
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * output_dim * tf.reduce_sum(
            Kdiag) / self.likelihood.variance
        bound += 0.5 * output_dim * tf.reduce_sum(tf.matrix_diag_part(AAT))

        if self.reg:
            # add regularization
            beta = 1000.
            regularization = -beta * reduce(
                tf.add, map(tf.abs, self.kern.var_vector))  # L-1 norm
            return bound + regularization

        else:
            return bound
Ejemplo n.º 34
0
Archivo: ssgp.py Proyecto: zcmail/VFF
    def build_predict(self, Xnew, full_cov=False):
        # w = w./repmat(ell',[m,1]);                                              % scaled model angular frequencies
        w = self.omega / self.kern.lengthscales
        m = tf.shape(self.omega)[0]
        m_float = tf.cast(m, tf.float64)

        # phi = x_tr*w';
        phi = tf.matmul(self.X, tf.transpose(w))
        # phi = [cos(phi) sin(phi)];                                              % design matrix
        phi = tf.concat([tf.cos(phi), tf.sin(phi)], axis=1)

        # R = chol((sf2/m)*(phi'*phi) + sn2*eye(2*m));                            % calculate some often-used constants
        A = (self.kern.variance / m_float) * tf.matmul(tf.transpose(phi), phi)\
            + self.likelihood.variance * gpflow.tf_wraps.eye(2*m)
        RT = tf.cholesky(A)
        R = tf.transpose(RT)

        # RtiPhit = PhiRi';
        RtiPhit = tf.matrix_triangular_solve(RT, tf.transpose(phi))
        # Rtiphity=RtiPhit*y_tr;
        Rtiphity = tf.matmul(RtiPhit, self.Y)

        # alfa=sf2/m*(R\Rtiphity);                                                % cosines/sines coefficients
        alpha = self.kern.variance / m_float * tf.matrix_triangular_solve(R, Rtiphity, lower=False)

        # phistar = x_tst*w';
        phistar = tf.matmul(Xnew, tf.transpose(w))
        # phistar = [cos(phistar) sin(phistar)];                              % test design matrix
        phistar = tf.concat([tf.cos(phistar), tf.sin(phistar)], axis=1)
        # out1(beg_chunk:end_chunk) = phistar*alfa;                           % Predictive mean
        mean = tf.matmul(phistar, alpha)

        # % also output predictive variance
        # out2(beg_chunk:end_chunk) = sn2*(1+sf2/m*sum((phistar/R).^2,2));% Predictive variance
        RtiPhistart = tf.matrix_triangular_solve(RT, tf.transpose(phistar))
        PhiRistar = tf.transpose(RtiPhistart)
        # NB: do not add in noise variance to the predictive var: gpflow does that for us.
        if full_cov:
            var = self.likelihood.variance * self.kern.variance / m_float *\
                tf.matmul(PhiRistar, tf.transpose(PhiRistar)) + \
                gpflow.tf_wraps.eye(tf.shape(Xnew)[0]) * 1e-6
            var = tf.expand_dims(var, 2)
        else:
            var = self.likelihood.variance * self.kern.variance / m_float * tf.reduce_sum(tf.square(PhiRistar), 1)
            var = tf.expand_dims(var, 1)

        return mean, var
Ejemplo n.º 35
0
    def __init__(self, prec_mean, prec, d=None):

        prec_mean = tf.convert_to_tensor(prec_mean)
        prec = tf.convert_to_tensor(prec)

        try:
            d1, = util.extract_shape(prec_mean)
            prec_mean = tf.reshape(prec_mean, (d1, 1))
        except:
            d1, k = util.extract_shape(prec_mean)
            assert (k == 1)

        d2, _ = util.extract_shape(prec)
        assert (d1 == d2)
        if d is None:
            d = d1
        else:
            assert (d == d1)

        super(MVGaussianNatural, self).__init__(d=d)

        self._prec_mean = prec_mean
        self._prec = prec

        self._L_prec = tf.cholesky(prec)
        self._entropy = util.dists.multivariate_gaussian_entropy(
            L_prec=self._L_prec)

        # want to solve prec * mean = prec_mean for mean.
        # this is equiv to (LL') * mean = prec_mean.
        # since tf doesn't have a cholSolve shortcut, just
        # do it directly:
        #   solve L y = prec_mean
        # to get y = (L' * mean), then
        #   solve L' mean = y
        y = tf.matrix_triangular_solve(self._L_prec,
                                       self._prec_mean,
                                       lower=True,
                                       adjoint=False)
        self._mean = tf.matrix_triangular_solve(self._L_prec,
                                                y,
                                                lower=True,
                                                adjoint=True)

        L_cov_transpose = util.triangular_inv(self._L_prec)
        self._L_cov = tf.transpose(L_cov_transpose)
        self._cov = tf.matmul(self._L_cov, L_cov_transpose)
Ejemplo n.º 36
0
    def _build_likelihood(self):
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood. For a derivation of the terms in here, see the associated
        SGPR notebook.
        """
        ND = tf.cast(tf.size(self.Y), settings.float_type)
        D = tf.cast(tf.shape(self.Y)[1], settings.float_type)

        Kxu = self.kern.K(self.X, self.feature.Z)

        psi0 = self._psi0()
        psi1 = self._psi1(Kxu)
        psi2 = self._psi2(Kxu)

        # Copy this into blocks for each dimension
        Kuu = features.Kuu(self.feature, self.kern, jitter=settings.jitter)
        L = tf.cholesky(Kuu)
        L = block_diagonal([L for _ in range(self.W.shape[1])])
        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(sigma2)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, tf.transpose(psi1),
                                       lower=True) / sigma
        tmp = tf.matrix_triangular_solve(L, psi2, lower=True)
        AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp),
                                         lower=True) / sigma2
        B = AAT + tf.eye(self.num_inducing, dtype=settings.float_type)
        LB = tf.cholesky(B)
        log_det_B = 2. * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB)))
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y),
                                       lower=True) / sigma

        # KL[q(W) || p(W)]
        KL = tf.reduce_sum(self.Wnorm() *
                           (tf.log(self.Wnorm()) - tf.log(self.W_prior)))

        # compute log marginal bound
        bound = -0.5 * ND * tf.log(2 * np.pi * sigma2)
        bound += -0.5 * D * log_det_B
        bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 -
                             tf.reduce_sum(tf.matrix_diag_part(AAT)))
        bound -= KL
        return bound
Ejemplo n.º 37
0
        def get_cholesky_solve_terms(Z, C=C):
            C_inv_z = tf.matrix_triangular_solve(
                C,
                tf.tile(tf.expand_dims(tf.transpose(Z), 0), [N, 1, 1]),
                lower=True)  # [N, D, M]
            z_CC_inv_z = tf.reduce_sum(tf.square(C_inv_z), 1)  # [N, M]

            return C_inv_z, z_CC_inv_z
Ejemplo n.º 38
0
 def inv(self):
     di = tf.reciprocal(self.d)
     d_col = tf.expand_dims(self.d, 1)
     DiW = self.W / d_col
     M = tf.eye(tf.shape(self.W)[1], float_type) + tf.matmul(tf.transpose(DiW), self.W)
     L = tf.cholesky(M)
     v = tf.transpose(tf.matrix_triangular_solve(L, tf.transpose(DiW), lower=True))
     return LowRankMatNeg(di, V)
Ejemplo n.º 39
0
    def predict_components(self, Xnew):
        """
        Here, Xnew should be a Nnew x 1 array of points at which to test each function
        """
        Kuu = [
            make_Kuu(k, ai, bi, self.ms)
            for k, ai, bi in zip(self.kerns, self.a, self.b)
        ]
        Kuu = BlockDiagMat_many([mat for k in Kuu for mat in [k.A, k.B]])
        sigma2 = self.likelihood.variance

        # Compute intermediate matrices
        P = self.KufKfu / sigma2 + Kuu.get()
        L = tf.cholesky(P)
        c = tf.matrix_triangular_solve(L, self.KufY) / sigma2

        Kus_blocks = [
            make_Kuf(k, Xnew, a, b, self.ms)
            for i, (k, a, b) in enumerate(zip(self.kerns, self.a, self.b))
        ]
        Kus = []
        start = tf.constant(0, tf.int32)
        for i, b in enumerate(Kus_blocks):
            zeros_above = tf.zeros(tf.pack([start, tf.shape(b)[1]]),
                                   float_type)
            zeros_below = tf.zeros(
                tf.pack(
                    [tf.shape(L)[0] - start - tf.shape(b)[0],
                     tf.shape(b)[1]]), float_type)
            Kus.append(tf.concat(0, [zeros_above, b, zeros_below]))
            start = start + tf.shape(b)[0]

        tmp = [tf.matrix_triangular_solve(L, Kus_i) for Kus_i in Kus]
        mean = [tf.matmul(tf.transpose(tmp_i), c) for tmp_i in tmp]
        KiKus = [Kuu.solve(Kus_i) for Kus_i in Kus]
        var = [k.Kdiag(Xnew[:, i:i + 1]) for i, k in enumerate(self.kerns)]
        var = [
            v + tf.reduce_sum(tf.square(tmp_i), 0)
            for v, tmp_i in zip(var, tmp)
        ]
        var = [
            v - tf.reduce_sum(KiKus_i * Kus_i, 0)
            for v, KiKus_i, Kus_i in zip(var, KiKus, Kus)
        ]
        var = [tf.expand_dims(v, 1) for v in var]
        return tf.concat(1, mean), tf.concat(1, var)
Ejemplo n.º 40
0
def dmvnorm(y, mean, sigma):
    L = tf.cholesky(sigma)
    kern_sqr = tf.matrix_triangular_solve(L, y - mean, lower=True)
    n = tf.cast(tf.shape(sigma)[1], tf.float32)
    loglike = -0.5 * n * tf.log(2.0 * np.pi)
    loglike += -tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
    loglike += -0.5 * tf.reduce_sum(tf.square(kern_sqr))
    return (loglike)
Ejemplo n.º 41
0
    def neglogp(self, x):
        delta = tf.expand_dims(x - self.mean, axis=-1)
        stds = 0*delta + self.std
        half_quadratic = tf.matrix_triangular_solve(stds,
                                                    delta, lower=False)
        quadratic = tf.matmul(half_quadratic, half_quadratic, transpose_a=True)

        return 0.5 * (self.log_det_cov + quadratic + self.size*tf.log(2*tf.constant(np.pi)))
Ejemplo n.º 42
0
def _expectation(p, rbf_kern, feat1, lin_kern, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - K_lin_{.,.} :: RBF kernel
        - K_rbf_{.,.} :: Linear kernel
    Different Z1 and Z2 are handled if p is diagonal and K_lin and K_rbf have disjoint
    active_dims, in which case the joint expectations simplify into a product of expectations

    :return: NxM1xM2
    """
    if rbf_kern.on_separate_dims(lin_kern) and isinstance(p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (rbf_kern, feat1))
        eKxz2 = expectation(p, (lin_kern, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if feat1 != feat2:
        raise NotImplementedError("Features have to be the same for both kernels.")

    if rbf_kern.active_dims != lin_kern.active_dims:
        raise NotImplementedError("active_dims have to be the same for both kernels.")

    with params_as_tensors_for(rbf_kern), params_as_tensors_for(lin_kern), \
         params_as_tensors_for(feat1), params_as_tensors_for(feat2):
        # use only active dimensions
        Xcov = rbf_kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov)
        Z, Xmu = rbf_kern._slice(feat1.Z, p.mu)

        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]

        lin_kern_variances = lin_kern.variance if lin_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + lin_kern.variance

        rbf_kern_lengthscales = rbf_kern.lengthscales if rbf_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + rbf_kern.lengthscales  ## Begin RBF eKxz code:

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(rbf_kern_lengthscales ** 2) + Xcov)  # NxDxD

        Z_transpose = tf.transpose(Z)
        all_diffs = Z_transpose - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(rbf_kern_lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N
        eKxz_rbf = rbf_kern.variance * (determinants[:, None] * exponent_mahalanobis)  ## NxM <- End RBF eKxz code

        tiled_Z = tf.tile(tf.expand_dims(Z_transpose, 0), (N, 1, 1))  # NxDxM
        z_L_inv_Xcov = tf.matmul(tiled_Z, Xcov / rbf_kern_lengthscales[:, None] ** 2., transpose_a=True)  # NxMxD

        cross_eKzxKxz = tf.cholesky_solve(
            chol_L_plus_Xcov, (lin_kern_variances * rbf_kern_lengthscales ** 2.)[..., None] * tiled_Z)  # NxDxM

        cross_eKzxKxz = tf.matmul((z_L_inv_Xcov + Xmu[:, None, :]) * eKxz_rbf[..., None], cross_eKzxKxz)  # NxMxM
        return cross_eKzxKxz
Ejemplo n.º 43
0
def _expectation(p, rbf_kern, feat1, lin_kern, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - K_lin_{.,.} :: RBF kernel
        - K_rbf_{.,.} :: Linear kernel
    Different Z1 and Z2 are handled if p is diagonal and K_lin and K_rbf have disjoint
    active_dims, in which case the joint expectations simplify into a product of expectations

    :return: NxM1xM2
    """
    if rbf_kern.on_separate_dims(lin_kern) and isinstance(p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (rbf_kern, feat1))
        eKxz2 = expectation(p, (lin_kern, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if feat1 != feat2:
        raise NotImplementedError("Features have to be the same for both kernels.")

    if rbf_kern.active_dims != lin_kern.active_dims:
        raise NotImplementedError("active_dims have to be the same for both kernels.")

    with params_as_tensors_for(rbf_kern), params_as_tensors_for(lin_kern), \
         params_as_tensors_for(feat1), params_as_tensors_for(feat2):
        # use only active dimensions
        Xcov = rbf_kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov)
        Z, Xmu = rbf_kern._slice(feat1.Z, p.mu)

        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]

        lin_kern_variances = lin_kern.variance if lin_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + lin_kern.variance

        rbf_kern_lengthscales = rbf_kern.lengthscales if rbf_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + rbf_kern.lengthscales  ## Begin RBF eKxz code:

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(rbf_kern_lengthscales ** 2) + Xcov)  # NxDxD

        Z_transpose = tf.transpose(Z)
        all_diffs = Z_transpose - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(rbf_kern_lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N
        eKxz_rbf = rbf_kern.variance * (determinants[:, None] * exponent_mahalanobis)  ## NxM <- End RBF eKxz code

        tiled_Z = tf.tile(tf.expand_dims(Z_transpose, 0), (N, 1, 1))  # NxDxM
        z_L_inv_Xcov = tf.matmul(tiled_Z, Xcov / rbf_kern_lengthscales[:, None] ** 2., transpose_a=True)  # NxMxD

        cross_eKzxKxz = tf.cholesky_solve(
            chol_L_plus_Xcov, (lin_kern_variances * rbf_kern_lengthscales ** 2.)[..., None] * tiled_Z)  # NxDxM

        cross_eKzxKxz = tf.matmul((z_L_inv_Xcov + Xmu[:, None, :]) * eKxz_rbf[..., None], cross_eKzxKxz)  # NxMxM
        return cross_eKzxKxz
Ejemplo n.º 44
0
    def _matrix_triangular_solve_tensor(self, other, lower):
        """
        Solve self @ x = other for x when other is a full tensor

        Matrix sizes:
        A    : n x m
        A[i] : n_i x m_i
        X    : m x p
        rhs  : n x p

        Recursive algorithm based on Bilionis et al., "Multi-output separable
        Gaussian process: Towards an efficient, fully Bayesian paradigm for
        uncertainty quantification" (2013)

        :param other: the right-hand side of the system of equations
        :type other: tf.Tensor
        :param lower: whether self is a lower (True) or upper (False) triangular
            matrix
        :type lower: bool
        :return: (KroneckerProduct)
        """
        assert lower, "upper triangular not implemented"
        if self.k == 1:
            return tf.matrix_triangular_solve(self.x[0], other, lower)
        else:
            n = self.shape[0]
            p = other.shape[1]
            n_0 = int(self.x[0].shape[0])
            n_prime = n // n_0

            a_prime = KroneckerProduct(self.x[1:])
            a_0 = self.x[0]

            x_cols = []
            for i in range(p):
                # See KP times matrix for notes about Fortran-style reshaping...
                x1i = a_prime.matrix_triangular_solve(tf.transpose(
                    tf.reshape(other[:, i], (n_0, n_prime))), lower)
                # Note: The formula has a transpose before vectorizing.
                # However, F-style reshape needs a transpose as well.
                # So, they cancel and no transpose is carried out after trtrs.
                x_cols.append(tf.reshape(
                    tf.matrix_triangular_solve(a_0, tf.transpose(x1i), lower),
                    [-1]))
            return tf.stack(x_cols, 1)
Ejemplo n.º 45
0
    def conditional_ND_not_share_Z(self, X, full_cov=False):
        mean_lst, var_lst, A_tiled_lst = [], [], []
        for nd in range(self.num_nodes):
            pa_nd = self.pa_idx(nd)
            X_tmp = tf.gather(X, pa_nd, axis=1)
            Kuf_nd = self.feature[nd].Kuf(self.kern[nd], X_tmp)

            A_nd = tf.matrix_triangular_solve(self.Lu[nd], Kuf_nd, lower=True)
            A_nd = tf.matrix_triangular_solve(tf.transpose(self.Lu[nd]),
                                              A_nd,
                                              lower=False)

            mean_tmp = tf.matmul(A_nd,
                                 self.q_mu[:, nd * self.dim_per_out:(nd + 1) *
                                           self.dim_per_out],
                                 transpose_a=True)
            if self.nb_init:
                mean_tmp += self.mean_function[nd](X_tmp)
            else:
                mean_tmp += self.mean_function[nd](
                    X[:, nd * self.dim_per_in:(nd + 1) * self.dim_per_in])
            mean_lst.append(mean_tmp)
            A_tiled_lst.append(
                tf.tile(A_nd[None, :, :], [self.dim_per_out, 1, 1]))

            SK_nd = -self.Ku_tiled_lst[nd]
            q_sqrt_nd = self.q_sqrt_lst[nd]
            with params_as_tensors_for(q_sqrt_nd, convert=True):
                SK_nd += tf.matmul(q_sqrt_nd, q_sqrt_nd, transpose_b=True)

            B_nd = tf.matmul(SK_nd, A_tiled_lst[nd])

            # (num_latent, num_X)
            delta_cov_nd = tf.reduce_sum(A_tiled_lst[nd] * B_nd, 1)
            Kff_nd = self.kern[nd].Kdiag(X_tmp)

            # (1, num_X) + (num_latent, num_X)
            var_nd = tf.expand_dims(Kff_nd, 0) + delta_cov_nd
            var_nd = tf.transpose(var_nd)

            var_lst.append(var_nd)

        mean = tf.concat(mean_lst, axis=1)
        var = tf.concat(var_lst, axis=1)
        return mean, var
Ejemplo n.º 46
0
 def _compute_cache(self):
     K = self.kern.K(self.X) + tf.eye(
         tf.shape(self.X)[0],
         dtype=settings.float_type) * self.likelihood.variance
     L = tf.cholesky(K, name='gp_cholesky')
     V = tf.matrix_triangular_solve(L,
                                    self.Y - self.mean_function(self.X),
                                    name='gp_alpha')
     return L, V
Ejemplo n.º 47
0
 def _forward(self, x):
   with tf.control_dependencies(self._assertions(x)):
     x_shape = tf.shape(x)
     identity_matrix = tf.eye(
         x_shape[-1], batch_shape=x_shape[:-2], dtype=x.dtype.base_dtype)
     # Note `matrix_triangular_solve` implicitly zeros upper triangular of `x`.
     y = tf.matrix_triangular_solve(x, identity_matrix)
     y = tf.matmul(y, y, adjoint_a=True)
     return tf.cholesky(y)
Ejemplo n.º 48
0
def multivariate_gaussian_log_density(x,
                                      mu,
                                      Sigma=None,
                                      L=None,
                                      prec=None,
                                      L_prec=None):
    """
    Assume X is a single vector described by a multivariate Gaussian
    distribution with x ~ N(mu, Sigma).

    We accept parameterization in terms of the covariance matrix or
    its cholesky decomposition L (more efficient if available), or the
    precision matrix or its cholesky decomposition L_prec.
    The latter is useful when representing a Gaussian in its natural 
    parameterization. Note that we still require the explicit mean mu
    (not the natural parameter prec*mu) since I'm too lazy to cover
    all the permutations of possible arguments (though this should be
    straightforward). 

    """
    s = extract_shape(x)
    try:
        n, = s
    except:
        n, m = s
        assert (m == 1)

    if L is None and Sigma is not None:
        L = tf.cholesky(Sigma)
    if L_prec is None and prec is not None:
        L_prec = tf.cholesky(prec)

    if L is not None:
        neg_half_logdet = -tf.reduce_sum(tf.log(tf.diag_part(L)))
    else:
        assert (L_prec is not None)
        neg_half_logdet = tf.reduce_sum(tf.log(tf.diag_part(L_prec)))

    d = tf.reshape(x - mu, (n, 1))
    if L is not None:
        alpha = tf.matrix_triangular_solve(L, d, lower=True)
        exponential_part = tf.reduce_sum(tf.square(alpha))
    elif prec is not None:
        d = tf.reshape(d, (n, 1))
        exponential_part = tf.reduce_sum(d * tf.matmul(prec, d))
    else:
        assert (L_prec is not None)
        d = tf.reshape(d, (1, n))
        alpha = tf.matmul(d, L_prec)
        exponential_part = tf.reduce_sum(tf.square(alpha))

    n_log2pi = n * 1.83787706641
    logp = -0.5 * n_log2pi
    logp += neg_half_logdet
    logp += -0.5 * exponential_part

    return logp
Ejemplo n.º 49
0
    def test_whitening(self):
        with self.test_context() as sess:
            mu = tf.placeholder(FLOAT_TYPE, shape=(self.D, self.M))
            Q_chol = tf.placeholder(FLOAT_TYPE, shape=(self.D, self.M, self.M))
            P_chol = tf.placeholder(FLOAT_TYPE, shape=(self.D, self.M, self.M))

            feed_dict = self.get_feed_dict([mu], [Q_chol], [P_chol])

            KL_black = sess.run(KL(mu, Q_chol, P_chol=P_chol), feed_dict)
            KL_white = sess.run(
                KL(
                    tf.matrix_triangular_solve(P_chol,
                                               mu[:, :, None],
                                               lower=True)[..., 0],
                    tf.matrix_triangular_solve(P_chol, Q_chol, lower=True)),
                feed_dict)

            assert_allclose(KL_black, KL_white)
Ejemplo n.º 50
0
Archivo: gpr.py Proyecto: mrana6/gptf
 def build_posterior_mean_var(self, X, Y, test_points, full_cov=False):
     noise_var = self.likelihood.variance.tensor
     Kx = self.kernel.K(X, test_points)
     K = self.kernel.K(X)
     K += tfhacks.eye(tf.shape(X)[0], X.dtype) * noise_var
     L = tf.cholesky(K)
     A = tf.matrix_triangular_solve(L, Kx, lower=True)
     V = tf.matrix_triangular_solve(L, Y - self.meanfunction(X))
     fmean = tf.matmul(A, V, transpose_a=True)
     fmean += self.meanfunction(test_points)
     if full_cov:
         fvar = self.kernel.K(test_points) - tf.matmul(A, A, transpose_a=1)
         fvar = tf.tile(tf.expand_dims(fvar, 2), (1, 1, tf.shape(Y)[1]))
     else:
         fvar = self.kernel.Kdiag(test_points)
         fvar -= tf.reduce_sum(tf.square(A), 0)
         fvar = tf.tile(tf.expand_dims(fvar, 1), (1, tf.shape(Y)[1]))
     return fmean, fvar
Ejemplo n.º 51
0
    def _build_likelihood(self):
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood.
        """
        pX = DiagonalGaussian(self.X_mean, self.X_var)

        num_inducing = len(self.feature)
        psi0 = tf.reduce_sum(expectation(pX, self.kern))
        psi1 = expectation(pX, (self.feature, self.kern))
        psi2 = tf.reduce_sum(expectation(pX, (self.feature, self.kern), (self.feature, self.kern)), axis=0)
        Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
        L = tf.cholesky(Kuu)
        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(sigma2)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma
        tmp = tf.matrix_triangular_solve(L, psi2, lower=True)
        AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2
        B = AAT + tf.eye(num_inducing, dtype=settings.float_type)
        LB = tf.cholesky(B)
        log_det_B = 2. * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB)))
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma

        # KL[q(x) || p(x)]
        dX_var = self.X_var if len(self.X_var.get_shape()) == 2 else tf.matrix_diag_part(self.X_var)
        NQ = tf.cast(tf.size(self.X_mean), settings.float_type)
        D = tf.cast(tf.shape(self.Y)[1], settings.float_type)
        KL = -0.5 * tf.reduce_sum(tf.log(dX_var)) \
             + 0.5 * tf.reduce_sum(tf.log(self.X_prior_var)) \
             - 0.5 * NQ \
             + 0.5 * tf.reduce_sum((tf.square(self.X_mean - self.X_prior_mean) + dX_var) / self.X_prior_var)

        # compute log marginal bound
        ND = tf.cast(tf.size(self.Y), settings.float_type)
        bound = -0.5 * ND * tf.log(2 * np.pi * sigma2)
        bound += -0.5 * D * log_det_B
        bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 -
                             tf.reduce_sum(tf.matrix_diag_part(AAT)))
        bound -= KL
        return bound
Ejemplo n.º 52
0
    def __init__(self, D_IN, D_OUT, M=50):
        """
        Initialize GP layer 
        D_IN    : dimension of input
        D_OUT   : dimension of output
        M       : the number of inducing points
        """
        self.sig_offset = 1e-5
        self.process_sig_offset = 1e-5
        self.lambda_offset = 1e-5
        self.D_IN = D_IN
        self.D_OUT = D_OUT
        self.M = M

        # Define mean function
        self._init_mean_function(D_IN, D_OUT)

        # Kernel Parameters (SE ARE kernel)
        self.ARD_loglambda = tf.Variable(np.zeros([1, D_IN]),
                                         dtype=tf.float32)  # (1,Din)
        self.ARD_lambda = tf.exp(self.ARD_loglambda) + self.lambda_offset
        self.ARD_logsig0 = tf.Variable(0.0, dtype=tf.float32)
        self.ARD_var0 = (tf.exp(self.ARD_logsig0) + self.sig_offset)**2

        # Inducing Points (Z -> U)
        self.Z = tf.Variable(np.random.uniform(-3, 3, [M, D_IN]),
                             dtype=tf.float32)  # (M,Din)
        self.GPmean = self.mean(self.Z)
        self.U_mean = tf.Variable(
            np.random.uniform(-3, 3, [M, D_OUT]),
            dtype=tf.float32)  # np.random.uniform(-2,2,[M,D_OUT])  # (M,Dout)
        self.U_logL_diag = tf.Variable(np.zeros([D_OUT, M]),
                                       dtype=tf.float32)  # (Dout,M)
        self.U_L_diag = tf.exp(self.U_logL_diag)
        self.U_L_nondiag = tf.Variable(np.zeros([D_OUT,
                                                 int(M * (M - 1) / 2)]),
                                       dtype=tf.float32)  # (Dout,M(M-1)/2)
        self.U_L = tf.matrix_set_diag(vecs_to_tri(self.U_L_nondiag, M),
                                      self.U_L_diag)  # (Dout,M,M)
        self.U_cov = (self.sig_offset**2) * tf.eye(M, batch_shape=[
            D_OUT
        ]) + self.U_L @ tf.transpose(self.U_L, perm=(0, 2, 1))  # (Dout,M,M)

        # Covariance among inducing points
        self.Kzz = SEARD(self.Z, self.Z, self.ARD_lambda, self.ARD_var0, M, M,
                         D_IN) + (self.sig_offset**2) * tf.eye(M)  # (M,M)
        self.Kzz_L = tf.cholesky(self.Kzz)
        self.Kzz_L_inv = tf.matrix_triangular_solve(
            self.Kzz_L, tf.eye(M), lower=True)  # tf.matrix_inverse(self.Kzz_L)
        self.Kzz_inv = tf.transpose(self.Kzz_L_inv) @ self.Kzz_L_inv

        # Processing Noise
        self.logbeta = tf.Variable(np.zeros([1, D_OUT]),
                                   dtype=tf.float32)  # set as 0.0
        self.beta = tf.exp(self.logbeta) + self.process_sig_offset  # (1,Dout)
        self.beta_expand = tf.expand_dims(self.beta, axis=0)  # (1,1,Dout)
Ejemplo n.º 53
0
    def conditional_ND(self, X, full_cov=False):
        self.build_cholesky_if_needed()

        # mmean, vvar = conditional(X, self.feature.Z, self.kern,
        #             self.q_mu, q_sqrt=self.q_sqrt,
        #             full_cov=full_cov, white=self.white)
        Kuf = self.feature.Kuf(self.kern, X)

        A = tf.matrix_triangular_solve(self.Lu, Kuf, lower=True)
        if not self.white:
            A = tf.matrix_triangular_solve(tf.transpose(self.Lu),
                                           A,
                                           lower=False)

        mean = tf.matmul(A, self.q_mu, transpose_a=True)

        A_tiled = tf.tile(A[None, :, :], [self.num_outputs, 1, 1])
        I = tf.eye(self.num_inducing, dtype=settings.float_type)[None, :, :]

        if self.white:
            SK = -I
        else:
            SK = -self.Ku_tiled

        if self.q_sqrt is not None:
            SK += tf.matmul(self.q_sqrt, self.q_sqrt, transpose_b=True)

        B = tf.matmul(SK, A_tiled)

        if full_cov:
            # (num_latent, num_X, num_X)
            delta_cov = tf.matmul(A_tiled, B, transpose_a=True)
            Kff = self.kern.K(X)
        else:
            # (num_latent, num_X)
            delta_cov = tf.reduce_sum(A_tiled * B, 1)
            Kff = self.kern.Kdiag(X)

        # either (1, num_X) + (num_latent, num_X) or (1, num_X, num_X) + (num_latent, num_X, num_X)
        var = tf.expand_dims(Kff, 0) + delta_cov
        var = tf.transpose(var)

        return mean + self.mean_function(X), var
Ejemplo n.º 54
0
    def _build_likelihood(self):
        if self.fDebug:
            print('assignegp_denseSparse compiling model (build_likelihood)')
        N = tf.cast(tf.shape(self.Y)[0], dtype=settings.float_type)
        M = tf.shape(self.ZExpanded)[0]
        D = tf.cast(tf.shape(self.Y)[1], dtype=settings.float_type)

        Phi = tf.nn.softmax(self.logPhi)
        # try squashing Phi to avoid numerical errors
        Phi = (1 - 2e-6) * Phi + 1e-6

        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(self.likelihood.variance)
        Kuu = self.kern.K(self.ZExpanded) + tf.eye(
            M, dtype=settings.float_type) * settings.numerics.jitter_level
        Kuf = self.kern.K(self.ZExpanded, self.X)

        Kdiag = self.kern.Kdiag(self.X)
        L = tf.cholesky(Kuu)
        A = tf.reduce_sum(Phi, 0)
        LiKuf = tf.matrix_triangular_solve(L, Kuf)
        W = LiKuf * tf.sqrt(A) / sigma
        P = tf.matmul(W, tf.transpose(W)) + tf.eye(M,
                                                   dtype=settings.float_type)
        traceTerm = -0.5 * tf.reduce_sum(
            Kdiag * A) / sigma2 + 0.5 * tf.reduce_sum(tf.square(W))
        R = tf.cholesky(P)
        tmp = tf.matmul(LiKuf, tf.matmul(tf.transpose(Phi), self.Y))
        c = tf.matrix_triangular_solve(R, tmp, lower=True) / sigma2
        if (self.fDebug):
            # trace term should be 0 for Z=X (full data)
            traceTerm = tf.Print(traceTerm, [traceTerm],
                                 message='traceTerm=',
                                 name='traceTerm',
                                 summarize=10)

        self.bound = traceTerm - 0.5*N*D*tf.log(2 * np.pi * sigma2)\
            - 0.5*D*tf.reduce_sum(tf.log(tf.square(tf.diag_part(R))))\
            - 0.5*tf.reduce_sum(tf.square(self.Y)) / sigma2\
            + 0.5*tf.reduce_sum(tf.square(c))\
            - self.build_KL(Phi)

        return self.bound
Ejemplo n.º 55
0
def multivariate_gaussian_log_density(x, mu,
                                      Sigma=None, L=None,
                                      prec=None, L_prec=None):
    """
    Assume X is a single vector described by a multivariate Gaussian
    distribution with x ~ N(mu, Sigma).

    We accept parameterization in terms of the covariance matrix or
    its cholesky decomposition L (more efficient if available), or the
    precision matrix or its cholesky decomposition L_prec.
    The latter is useful when representing a Gaussian in its natural 
    parameterization. Note that we still require the explicit mean mu
    (not the natural parameter prec*mu) since I'm too lazy to cover
    all the permutations of possible arguments (though this should be
    straightforward). 

    """
    s = extract_shape(x)
    try:
        n, = s
    except:
        n, m = s
        assert(m==1)

    if L is None and Sigma is not None:
        L = tf.cholesky(Sigma)        
    if L_prec is None and prec is not None:
        L_prec = tf.cholesky(prec)
        
    if L is not None:
        neg_half_logdet = -tf.reduce_sum(tf.log(tf.diag_part(L)))
    else:
        assert(L_prec is not None)
        neg_half_logdet = tf.reduce_sum(tf.log(tf.diag_part(L_prec)))
        
    d = tf.reshape(x - mu, (n,1))
    if L is not None:
        alpha = tf.matrix_triangular_solve(L, d, lower=True)
        exponential_part= tf.reduce_sum(tf.square(alpha))
    elif prec is not None:
        d = tf.reshape(d, (n, 1))
        exponential_part = tf.reduce_sum(d * tf.matmul(prec, d))
    else:
        assert(L_prec is not None)
        d = tf.reshape(d, (1, n))
        alpha = tf.matmul(d, L_prec)
        exponential_part= tf.reduce_sum(tf.square(alpha))

    n_log2pi = n * 1.83787706641
    logp =  -0.5 * n_log2pi
    logp += neg_half_logdet
    logp += -0.5 * exponential_part
        
    return logp
Ejemplo n.º 56
0
def base_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, q_sqrt=None, white=False):
    # compute kernel stuff
    num_func = tf.shape(f)[1]  # K
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = Knn - tf.matmul(A, A, transpose_a=True)
        shape = tf.stack([num_func, 1, 1])
    else:
        fvar = Knn - tf.reduce_sum(tf.square(A), 0)
        shape = tf.stack([num_func, 1])
    fvar = tf.tile(tf.expand_dims(fvar, 0), shape)  # K x N x N or K x N

    # another backsubstitution in the unwhitened case
    if not white:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    # construct the conditional mean
    fmean = tf.matmul(A, f, transpose_a=True)

    if q_sqrt is not None:
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # K x M x N
        elif q_sqrt.get_shape().ndims == 3:
            L = tf.matrix_band_part(q_sqrt, -1, 0)  # K x M x M
            A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1]))
            LTA = tf.matmul(L, A_tiled, transpose_a=True)  # K x M x N
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True)  # K x N x N
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # K x N
    fvar = tf.transpose(fvar)  # N x K or N x N x K

    return fmean, fvar
Ejemplo n.º 57
0
    def build_likelihood(self):
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood.
        """
        num_inducing = tf.shape(self.Z)[0]

        psi0, psi1, psi2 = ke.build_psi_stats(self.Z, self.kern, self.X_mean, self.X_var)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
        L = tf.cholesky(Kuu)
        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(sigma2)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma
        tmp = tf.matrix_triangular_solve(L, psi2, lower=True)
        AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        log_det_B = 2. * tf.reduce_sum(tf.log(tf.diag_part(LB)))
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma

        # KL[q(x) || p(x)]
        NQ = tf.cast(tf.size(self.X_mean), tf.float64)
        D = tf.cast(tf.shape(self.Y)[1], tf.float64)
        KL = -0.5*tf.reduce_sum(tf.log(self.X_var)) \
            + 0.5*tf.reduce_sum(tf.log(self.X_prior_var))\
            - 0.5 * NQ\
            + 0.5 * tf.reduce_sum((tf.square(self.X_mean - self.X_prior_mean) + self.X_var) / self.X_prior_var)

        # compute log marginal bound
        ND = tf.cast(tf.size(self.Y), tf.float64)
        bound = -0.5 * ND * tf.log(2 * np.pi * sigma2)
        bound += -0.5 * D * log_det_B
        bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 -
                             tf.reduce_sum(tf.diag_part(AAT)))
        bound -= KL

        return bound
Ejemplo n.º 58
0
    def __init__(self, prec_mean, prec, d=None):

        prec_mean = tf.convert_to_tensor(prec_mean)
        prec = tf.convert_to_tensor(prec)
        
        try:
            d1, = util.extract_shape(prec_mean)
            prec_mean = tf.reshape(prec_mean, (d1,1))
        except:
            d1,k = util.extract_shape(prec_mean)
            assert(k == 1)

            
        d2,_ = util.extract_shape(prec)
        assert(d1==d2)
        if d is None:
            d = d1
        else:
            assert(d==d1)

        super(MVGaussianNatural, self).__init__(d=d)

        self._prec_mean = prec_mean
        self._prec = prec
        
        self._L_prec = tf.cholesky(prec)
        self._entropy = bf.dists.multivariate_gaussian_entropy(L_prec=self._L_prec)

        # want to solve prec * mean = prec_mean for mean.
        # this is equiv to (LL') * mean = prec_mean.
        # since tf doesn't have a cholSolve shortcut, just
        # do it directly:
        #   solve L y = prec_mean
        # to get y = (L' * mean), then
        #   solve L' mean = y
        y = tf.matrix_triangular_solve(self._L_prec, self._prec_mean, lower=True, adjoint=False)
        self._mean = tf.matrix_triangular_solve(self._L_prec, y, lower=True, adjoint=True)

        L_cov_transpose = util.triangular_inv(self._L_prec)
        self._L_cov = tf.transpose(L_cov_transpose)
        self._cov = tf.matmul(self._L_cov, L_cov_transpose)
Ejemplo n.º 59
0
    def _build_common_terms(self):
        num_inducing = len(self.feature)
        err = self.Y - self.mean_function(self.X)  # size N x R
        Kdiag = self.kern.Kdiag(self.X)
        Kuf = self.feature.Kuf(self.kern, self.X)
        Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)

        Luu = tf.cholesky(Kuu)  # => Luu Luu^T = Kuu
        V = tf.matrix_triangular_solve(Luu, Kuf)  # => V^T V = Qff = Kuf^T Kuu^-1 Kuf

        diagQff = tf.reduce_sum(tf.square(V), 0)
        nu = Kdiag - diagQff + self.likelihood.variance

        B = tf.eye(num_inducing, dtype=settings.float_type) + tf.matmul(V / nu, V, transpose_b=True)
        L = tf.cholesky(B)
        beta = err / tf.expand_dims(nu, 1)  # size N x R
        alpha = tf.matmul(V, beta)  # size N x R

        gamma = tf.matrix_triangular_solve(L, alpha, lower=True)  # size N x R

        return err, nu, Luu, L, alpha, beta, gamma