Example #1
0
    def build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R

        This method computes the variational lower lound on the likelihood, which is 

            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]
        with
            q(f) = N(f | K alpha, [K^-1 + diag(square(lambda))]^-1)
        """
        K = self.kern.K(self.X)
        f_mean = tf.matmul(K, self.q_alpha) + self.mean_function(self.X)
        #for each of the data-dimensions (columns of Y), find the diagonal of the
        #variance, and also relevant parts of the KL.
        f_var, A_logdet, trAi = [], tf.zeros((1,), tf.float64), tf.zeros((1,), tf.float64)
        for d in range(self.num_latent):
            b = self.q_lambda[:,d]
            B = tf.expand_dims(b, 1)
            A = eye(self.num_data) + K*B*tf.transpose(B)
            L = tf.cholesky(A)
            Li = tf.user_ops.triangular_solve(L, eye(self.num_data), 'lower')
            LiBi = Li / b
            #full_sigma:return tf.diag(b**-2) - LiBi.T.dot(LiBi)
            f_var.append(1./tf.square(b) - tf.reduce_sum(tf.square(LiBi),0))
            A_logdet += 2*tf.reduce_sum(tf.log(tf.user_ops.get_diag(L)))
            trAi += tf.reduce_sum(tf.square(Li))

        f_var = tf.transpose(tf.pack(f_var))

        KL = 0.5*(A_logdet + trAi - self.num_data*self.num_latent + tf.reduce_sum(f_mean*self.q_alpha))

        return tf.reduce_sum(self.likelihood.variational_expectations(f_mean, f_var, self.Y)) - KL
Example #2
0
    def build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R

        This method computes the variational lower lound on the likelihood, which is:

            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]

        with

            q(f) = N(f | K alpha, [K^-1 + diag(square(lambda))]^-1) .

        """
        K = self.kern.K(self.X)
        f_mean = tf.matmul(K, self.q_alpha) + self.mean_function(self.X)
        #for each of the data-dimensions (columns of Y), find the diagonal of the
        #variance, and also relevant parts of the KL.
        f_var, A_logdet, trAi = [], tf.zeros((1,), tf.float64), tf.zeros((1,), tf.float64)
        for d in range(self.num_latent):
            b = self.q_lambda[:,d]
            B = tf.expand_dims(b, 1)
            A = eye(self.num_data) + K*B*tf.transpose(B)
            L = tf.cholesky(A)
            Li = tf.matrix_triangular_solve(L, eye(self.num_data), lower=True)
            LiBi = Li / b
            #full_sigma:return tf.diag(b**-2) - LiBi.T.dot(LiBi)
            f_var.append(1./tf.square(b) - tf.reduce_sum(tf.square(LiBi),0))
            A_logdet += 2*tf.reduce_sum(tf.log(tf.user_ops.get_diag(L)))
            trAi += tf.reduce_sum(tf.square(Li))

        f_var = tf.transpose(tf.pack(f_var))

        KL = 0.5*(A_logdet + trAi - self.num_data*self.num_latent + tf.reduce_sum(f_mean*self.q_alpha))

        return tf.reduce_sum(self.likelihood.variational_expectations(f_mean, f_var, self.Y)) - KL
Example #3
0
 def build_prior_KL(self):
     """
     We return the KL for all latent funtions
     """
     KL = 0
     for i in np.arange(self.rank):  # i is the group id.
         for j in np.arange(self.num_latent_list[i]):
             lat_id = np.sum(self.num_latent_list[:i],dtype = np.int64) + j #id of latent function
             if self.whiten_list[lat_id]:
                 if self.q_diag_list[lat_id]:
                     KL +=  kullback_leiblers.gauss_kl_white_diag(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id], self.dim)#rotates the coordinate system to make it independent
                 else:
                     KL += kullback_leiblers.gauss_kl_white(self.q_mu_list[lat_id],self.q_sqrt_list[lat_id], self.dim)
             else:
                 K = self.kern_list[i].K(self.Z[lat_id]) + eye(self.num_inducing_list[lat_id]) * 1e-6 ## compute with the ith kernel
                 if self.q_diag_list[lat_id]:
                     KL += kullback_leiblers.gauss_kl_diag(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id], K, self.dim)
                 else:
                     KL += kullback_leiblers.gauss_kl(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id], K, self.dim)
     if self.tsk:
         for task_id in np.arange(self.num_tasks):
             lat_id = np.sum(self.num_latent_list,dtype = np.int64) + task_id#id of latent function
             if self.whiten_list[lat_id]:
                 if self.q_diag_list[lat_id]:
                     KL +=  kullback_leiblers.gauss_kl_white_diag(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id], self.dim)
                         #rotates the coordinate system to make it independent
                 else:
                     KL += kullback_leiblers.gauss_kl_white(self.q_mu_list[lat_id],self.q_sqrt_list[lat_id], self.dim)
             else:
                 K = self.tskern_list[task_id].K(self.Z[lat_id]) + eye(self.num_inducing_list[lat_id]) * 1e-6 ## compute with the ith kernel
                 if self.q_diag_list[lat_id]:
                     KL += kullback_leiblers.gauss_kl_diag(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id],K, self.dim)
                 else:
                     KL += kullback_leiblers.gauss_kl(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id],K, self.dim)
     return KL
Example #4
0
 def build_prior_KL(self):
     """
     We return the KL for all latent funtions
     """
     KL = 0
     for q in np.arange(self.rank):  # q is the group id.
         for i in np.arange(self.num_latent_list[q]):
             lat_id = np.sum(self.num_latent_list[:q],dtype = np.int64) + i #id of latent function
             if self.whiten_list[lat_id]:
                 if self.q_diag_list[lat_id]:
                     KL +=  kullback_leiblers.gauss_kl_white_diag(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id], self.dim)
                 else:
                     KL += kullback_leiblers.gauss_kl_white(self.q_mu_list[lat_id],self.q_sqrt_list[lat_id], self.dim)
             else:
                 K = self.kern_list[q].K(self.Z[lat_id]) + eye(self.num_inducing_list[lat_id]) * 1e-6 
                 if self.q_diag_list[lat_id]:
                     KL += kullback_leiblers.gauss_kl_diag(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id], K, self.dim)
                 else:
                     KL += kullback_leiblers.gauss_kl(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id], K, self.dim)
     if self.tsk:
         for d in np.arange(self.num_tasks):
             lat_id = np.sum(self.num_latent_list,dtype = np.int64) + d#id of latent function
             if self.whiten_list[lat_id]:
                 if self.q_diag_list[lat_id]:
                     KL +=  kullback_leiblers.gauss_kl_white_diag(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id], self.dim)
                 else:
                     KL += kullback_leiblers.gauss_kl_white(self.q_mu_list[lat_id],self.q_sqrt_list[lat_id], self.dim)
             else:
                 K = self.tskern_list[d].K(self.Z[lat_id]) + eye(self.num_inducing_list[lat_id]) * 1e-6 ## compute with the ith kernel
                 if self.q_diag_list[lat_id]:
                     KL += kullback_leiblers.gauss_kl_diag(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id],K, self.dim)
                 else:
                     KL += kullback_leiblers.gauss_kl(self.q_mu_list[lat_id], self.q_sqrt_list[lat_id],K, self.dim)
     return KL
Example #5
0
    def build_predict(self, Xnew, full_cov=False):
        """
        Compute the mean and variance of the latent function at some new points
        Xnew. Note that this is very similar to the SGPR prediction, for whcih
        there are notes in the SGPR notebook.
        """
        num_inducing = tf.shape(self.Z)[0]
        psi0, psi1, psi2 = ke.build_psi_stats(self.Z, self.kern, self.X_mean, self.X_var)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
        Kus = self.kern.K(self.Z, Xnew)
        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(sigma2)
        L = tf.cholesky(Kuu)

        A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma
        tmp = tf.matrix_triangular_solve(L, psi2, lower=True)
        AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma
        tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
        tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
        mean = tf.matmul(tf.transpose(tmp2), c)
        if full_cov:
            var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\
                - tf.matmul(tf.transpose(tmp1), tmp1)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            var = tf.tile(tf.expand_dims(var, 2), shape)
        else:
            var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\
                - tf.reduce_sum(tf.square(tmp1), 0)
            shape = tf.pack([1, tf.shape(self.Y)[1]])
            var = tf.tile(tf.expand_dims(var, 1), shape)
        return mean + self.mean_function(Xnew), var
Example #6
0
 def build_prior_KL(self):
     KL = None
     
     for d in xrange(self.X.shape[1]):
         q_mu_d = self.__getattribute__('q_mu_%d' % d)
         q_sqrt_d = self.__getattribute__('q_sqrt_%d' % d)
         Z_d = self.__getattribute__('Z_%d' % d)
         
         if self.whiten:
             if self.q_diag:
                 KL_d = kullback_leiblers.gauss_kl_white_diag(q_mu_d, q_sqrt_d, self.num_latent)
             else:
                 KL_d = kullback_leiblers.gauss_kl_white(q_mu_d, q_sqrt_d, self.num_latent)
         else:
             K = self.kern[d].K(Z_d) + eye(self.num_inducing[d]) * 1e-6
             if self.q_diag:
                 KL_d = kullback_leiblers.gauss_kl_diag(q_mu_d, q_sqrt_d, K, self.num_latent)
             else:
                 KL_d = kullback_leiblers.gauss_kl(q_mu_d, q_sqrt_d, K, self.num_latent)
                 
         # add things up, we were too lazy to check the type of KL_d
         if KL is None:
             KL = KL_d
         else:
             KL += KL_d
             
     return KL
Example #7
0
File: gpr.py Project: blutooth/dgp
    def build_predict(self, Xnew, full_cov=False):
        """
        Xnew is a data matrix, point at which we want to predict

        This method computes

            p(F* | Y )

        where F* are points on the GP at Xnew, Y are noisy observations at X.

        """
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X) + eye(tf.shape(self.X)[0]) * self.likelihood.variance
        L = tf.cholesky(K)
        A = tf.matrix_triangular_solve(L, Kx, lower=True)
        V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X))
        fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew)
        if full_cov:
            fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            fvar = tf.tile(tf.expand_dims(fvar, 2), shape)
        else:
            fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
            fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, tf.shape(self.Y)[1]])
        return fmean, fvar
Example #8
0
def gauss_kl_diag(q_mu, q_sqrt, K,  num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a matrix, each column represents the diagonal of a square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(q_sqrt)))  # Log-det of q-cov
    L_inv = tf.matrix_triangular_solve(L, eye(tf.shape(L)[0]), lower=True)
    K_inv = tf.matrix_triangular_solve(tf.transpose(L), L_inv, lower=False)
    KL += 0.5 * tf.reduce_sum(tf.expand_dims(tf.diag_part(K_inv), 1)
                              * tf.square(q_sqrt))  # Trace term.
    return KL
Example #9
0
    def build_predict(self, Xnew, full_cov=False):
        """
        Xnew is a data matrix, point at which we want to predict

        This method computes

            p(F* | Y )

        where F* are points on the GP at Xnew, Y are noisy observations at X.

        """
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance
        L = tf.cholesky(K)
        A = tf.matrix_triangular_solve(L, Kx, lower=True)
        V = tf.matrix_triangular_solve(L,
                                       self.Y - self.mean_function(self.X),
                                       lower=True)
        fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew)
        if full_cov:
            fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
            fvar = tf.tile(tf.expand_dims(fvar, 2),
                           tf.pack([1, 1, tf.shape(self.Y)[1]]))
        else:
            fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A),
                                                         reduction_indices=0)
            fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, self.Y.shape[1]])
        return fmean, fvar
Example #10
0
    def build_likelihood(self):
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood.
        """
        num_inducing = tf.shape(self.Z)[0]

        psi0, psi1, psi2 = ke.build_psi_stats(self.Z, self.kern, self.X_mean, self.X_var)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
        L = tf.cholesky(Kuu)
        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(sigma2)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma
        tmp = tf.matrix_triangular_solve(L, psi2, lower=True)
        AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        log_det_B = 2. * tf.reduce_sum(tf.log(tf.diag_part(LB)))
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma

        # KL[q(x) || p(x)]
        NQ = tf.cast(tf.size(self.X_mean), tf.float64)
        D = tf.cast(tf.shape(self.Y)[1], tf.float64)
        KL = -0.5*tf.reduce_sum(tf.log(self.X_var)) \
            + 0.5*tf.reduce_sum(tf.log(self.X_prior_var))\
            - 0.5 * NQ\
            + 0.5 * tf.reduce_sum((tf.square(self.X_mean - self.X_prior_mean) + self.X_var) / self.X_prior_var)

        # compute log marginal bound
        ND = tf.cast(tf.size(self.Y), tf.float64)
        bound = -0.5 * ND * tf.log(2 * np.pi * sigma2)
        bound += -0.5 * D * log_det_B
        bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 -
                             tf.reduce_sum(tf.diag_part(AAT)))
        bound -= KL

        return bound
Example #11
0
File: gpr.py Project: blutooth/dgp
    def build_likelihood(self):
        """
        Construct a tensorflow function to compute the likelihood.

            \log p(Y, V | theta).

        """
        K = self.kern.K(self.X) + eye(tf.shape(self.X)[0]) * self.likelihood.variance
        L = tf.cholesky(K)
        m = self.mean_function(self.X)

        return multivariate_normal(self.Y, m, L)
Example #12
0
    def build_likelihood(self):
        """
        Constuct a tensorflow function to compute the likelihood of a general GP model.

            \log p(Y, V | theta).

        """
        K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance
        L = tf.cholesky(K)
        m = self.mean_function(self.X)

        return multivariate_normal(self.Y, m, L)
Example #13
0
    def build_likelihood(self):
        """
        Constuct a tensorflow function to compute the likelihood of a general GP model.

            \log p(Y, V | theta).

        """
        K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance
        L = tf.cholesky(K)
        m = self.mean_function(self.X)

        return multivariate_normal(self.Y, m, L)
Example #14
0
 def build_prior_KL(self):
     if self.whiten:
         if self.q_diag:
             KL = kullback_leiblers.gauss_kl_white_diag(self.q_mu, self.q_sqrt, self.num_latent)
         else:
             KL = kullback_leiblers.gauss_kl_white(self.q_mu, self.q_sqrt, self.num_latent)
     else:
         K = self.kern.K(self.Z) + eye(self.num_inducing) * 1e-6
         if self.q_diag:
             KL = kullback_leiblers.gauss_kl_diag(self.q_mu, self.q_sqrt, K, self.num_latent)
         else:
             KL = kullback_leiblers.gauss_kl(self.q_mu, self.q_sqrt, K, self.num_latent)
     return KL
Example #15
0
 def predict_f_samples(self, Xnew, num_samples):
     """
     Produce samples from the posterior latent function(s) at the points
     Xnew.
     """
     mu, var = self.build_predict(Xnew, full_cov=True)
     jitter = tf_hacks.eye(tf.shape(mu)[0]) * 1e-6
     samples = []
     for i in range(self.num_latent):
         L = tf.cholesky(var[:, :, i] + jitter)
         shape = tf.pack([tf.shape(L)[0], num_samples])
         V = tf.random_normal(shape, dtype=tf.float64)
         samples.append(mu[:, i:i + 1] + tf.matmul(L, V))
     return tf.transpose(tf.pack(samples))
Example #16
0
 def build_prior_KL(self):
     if self.whiten:
         if self.q_diag:
             KL = kullback_leiblers.gauss_kl_white_diag(
                 self.q_mu, self.q_sqrt, self.num_latent)
         else:
             KL = kullback_leiblers.gauss_kl_white(self.q_mu, self.q_sqrt,
                                                   self.num_latent)
     else:
         K = self.kern.K(self.Z) + eye(self.num_inducing) * 1e-6
         if self.q_diag:
             KL = kullback_leiblers.gauss_kl_diag(self.q_mu, self.q_sqrt, K,
                                                  self.num_latent)
         else:
             KL = kullback_leiblers.gauss_kl(self.q_mu, self.q_sqrt, K,
                                             self.num_latent)
     return KL
Example #17
0
    def build_likelihood(self):
        """
        Constuct a tensorflow function to compute the likelihood of a general GP model.

            \log p(Y, V | theta).

        """
        with tf.name_scope('kernel'):
            K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance
            _ = tf.image_summary('k', tf.expand_dims(tf.expand_dims(tf.cast(K, tf.float32), 2), 0))
        L = tf.cholesky(K)
        with tf.name_scope('mean_function'):
            m = self.mean_function(self.X)

        with tf.name_scope('mvn_density'):
            log_lik = multivariate_normal(self.Y, m, L)

        return log_lik
Example #18
0
    def build_predict(self, Xnew):
        """
        Xnew is a data matrix, point at which we want to predict

        This method computes

            p(F* | Y )

        where F* are points on the GP at Xnew, Y are noisy observations at X.

        """
        Kd = self.kern.Kdiag(Xnew)
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance
        L = tf.cholesky(K)
        A = tf.user_ops.triangular_solve(L, Kx, 'lower')
        V = tf.user_ops.triangular_solve(L, self.Y - self.mean_function(self.X), 'lower')
        fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew)
        fvar = Kd - tf.reduce_sum(tf.square(A), reduction_indices=0)
        return fmean, tf.tile(tf.reshape(fvar, (-1,1)), [1, self.Y.shape[1]])
Example #19
0
def gp_predict(Xnew, X, kern, F):
    """
    Given F, representing the GP at the points X, produce the mean and variance
    of the GP at the points Xnew.

    We assume K independent GPs, represented by the columns of F. This function
    computes the Gaussian conditional

        p(F* | F) 

    Xnew is a data matrix, size N x D
    X are inducing points, size M x D
    F are function values , size M x K

    See also:
        gp_predict_whitened -- where F is rotated into V (F = LV)
        gaussian_gp_predict -- similar, but with uncertainty in F

    """

    #compute kernel stuff
    num_data = tf.shape(X)[0]
    Kdiag = kern.Kdiag(Xnew)
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * 1e-6
    Lm = tf.cholesky(Kmm)

    #this is O(N M^2)
    A = tf.user_ops.triangular_solve(Lm, Kmn, 'lower')
    B = tf.user_ops.triangular_solve(tf.transpose(Lm), A,
                                     'upper')  # B is Kmm^{-1} Kmn

    #construct the mean and variance of q(f*)
    fmean = tf.matmul(tf.transpose(B), F)
    fvar = Kdiag - tf.reduce_sum(tf.square(A), 0)
    fvar = tf.expand_dims(fvar, 1)

    return fmean, fvar
Example #20
0
def gp_predict(Xnew, X, kern, F):
    """
    Given F, representing the GP at the points X, produce the mean and variance
    of the GP at the points Xnew.

    We assume K independent GPs, represented by the columns of F. This function
    computes the Gaussian conditional

        p(F* | F) 

    Xnew is a data matrix, size N x D
    X are inducing points, size M x D
    F are function values , size M x K

    See also:
        gp_predict_whitened -- where F is rotated into V (F = LV)
        gaussian_gp_predict -- similar, but with uncertainty in F

    """
 
    #compute kernel stuff
    num_data = tf.shape(X)[0]
    Kdiag = kern.Kdiag(Xnew)
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data)*1e-6
    Lm = tf.cholesky(Kmm)

    #this is O(N M^2)
    A = tf.user_ops.triangular_solve(Lm, Kmn, 'lower')
    B = tf.user_ops.triangular_solve(tf.transpose(Lm), A, 'upper') # B is Kmm^{-1} Kmn

    #construct the mean and variance of q(f*)
    fmean = tf.matmul(tf.transpose(B), F)
    fvar = Kdiag - tf.reduce_sum(tf.square(A), 0)
    fvar = tf.expand_dims(fvar, 1)

    return fmean, fvar
Example #21
0
 def K(self, X, X2=None):
     if X2 is None:
         return self.variance * eye(tf.shape(X)[0])
     else:
         return tf.zeros(tf.pack([tf.shape(X)[0],
                                  tf.shape(X2)[0]]), tf.float64)
Example #22
0
def gaussian_gp_predict_whitened(Xnew, X, kern, q_mu, q_sqrt, num_columns):
    """
    Given an (approximate) posterior (via q_mu, q_sqrt) to the GP at the points
    X, produce the mean and variance of the GP at the points Xnew.
    Additionally, the GP has been centered (whitened) so that 
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).

    We assume K independent GPs, represented by the columns of q_mu (and the
    last ax of q_sqrt).  q_mu and q_sqrt are variational posteriors for v, So
        q(v[:,i]) = N( q_mu[:,i], diag(q_sqrt[:,i]**2)
        q(f[:,i]) = N (L q_mu[:,i],  L diag(q_sqrt**2) L^T)
    or
        q(f[:,i]) = N (L q_mu,  L [W W^T] L^T)
    where W is the lower triangle of q_sqrt[:,:,i]. 

    This function computes the Gaussian integral
        q(f*) = \int p(f*|(f=Lv))q(v) df.

    Xnew is a data matrix, size N x D
    X are data points, size M x D
    q_mu are variational means, size M x K
    q_sqrt are variational standard-deviations or Cholesky matrices,, size M x K or M x M x K

    Note (and TODO):
        At the moment, num_columns only gets used for the q_sqrt.ndim==3 case,
        and it tells use the value of q_sqrt.shape()[2]. We need to find a way
        to get this from the tf graph. 


    See also:
        gp_predict_whitened -- where there is no uncertainty in V
        gaussian_gp_predict -- same without the whitening

    """

    #compute kernel stuff
    num_data = tf.shape(X)[0]
    Kdiag = kern.Kdiag(Xnew)
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * 1e-6
    Lm = tf.cholesky(Kmm)

    #this is O(N M^2)
    A = tf.user_ops.triangular_solve(Lm, Kmn, 'lower')

    #construct the mean and variance of q(f)
    fmean = tf.matmul(tf.transpose(A), q_mu)
    if q_sqrt.get_shape().ndims == 2:
        #we hae a diagonal form for q(v)
        q_var = np.square(q_sqrt)
        #fvar = Kdiag[:,None] + tf.reduce_sum((tf.square(tf.transpose(A)))[:,:,None] * (q_var[None, :,:] - 1),1)
        fvar = tf.reshape(Kdiag, (-1, 1)) + tf.reduce_sum(
            tf.expand_dims(tf.square(tf.transpose(A)), 2) *
            (tf.expand_dims(q_var, 0) - 1.0), 1)
        return fmean, fvar
    elif q_sqrt.get_shape().ndims == 3:
        # we have the cholesky form for q(v)
        fvar = Kdiag - tf.reduce_sum(np.square(A), 0)
        projected_var = []
        for d in range(num_columns):
            L = tf.user_ops.triangle(q_sqrt[:, :, d], 'lower')
            LTA = tf.matmul(tf.transpose(L), A)
            projected_var.append(fvar + tf.reduce_sum(tf.square(LTA), 0))
        fvar = tf.transpose(tf.pack(projected_var))
        return fmean, fvar
    else:
        raise ValueError, "Bad dimension for q_sqrt: %s" % str(
            q_sqrt.get_shape().ndims)
Example #23
0
def gaussian_gp_predict(Xnew, X, kern, q_mu, q_sqrt, num_columns):
    """
    Given an (approximate) posterior (via q_mu, q_sqrt) to the GP at the points
    X, produce the mean and variance of the GP at the points Xnew.

    We assume K independent GPs, represented by the columns of q_mu (and the
    last ax of q_sqrt).  q_mu and q_sqrt are variational posteriors for f, So
        q(f[:,i]) = N (q_mu[:,i],  diag(q_sqrt[:,i]**2))
    or
        q(f[:,i]) = N (q_mu,  W W^T)
    where W is the lower triangle of q_sqrt[:,:,i]. 

    This function computes the Gaussian integral
        q(f*) = \int p(f*|f)q(f) df.

    Xnew is a data matrix, size N x D
    X are inducing points, size M x D
    q_mu are variational means, size M x K
    q_sqrt are variational standard-deviations or Cholesky matrices,, size M x K or M x M x K
    num_columns is the number of columns in q_mu. 

    Note (and TODO):
        At the moment, num_columns only gets used for the q_sqrt.ndim==3 case,
        and it tells use the value of q_sqrt.shape()[2]. We need to find a way
        to get this from the tf graph. 

    See also:
        gp_predict -- where there is no uncertainty in F
        gaussian_gp_predict_whitened -- the same, but with whitening (centering) the F variables

    """

    #compute kernel stuff
    num_data = tf.shape(X)[0]
    Kdiag = kern.Kdiag(Xnew)
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * 1e-6
    Lm = tf.cholesky(Kmm)

    #this is O(N M^2)
    A = tf.user_ops.triangular_solve(Lm, Kmn, 'lower')
    B = tf.user_ops.triangular_solve(tf.transpose(Lm), A,
                                     'upper')  # B is Kmm^{-1} Kmn

    #construct the mean and variance of q(f*)
    fmean = tf.matmul(tf.transpose(B), q_mu)
    fvar = Kdiag - tf.reduce_sum(tf.square(A), 0)
    fvar = tf.expand_dims(fvar, 1)
    if q_sqrt.get_shape().ndims == 2:
        #we hae a diagonal form for q(f)
        fvar = fvar + tf.reduce_sum(
            tf.square(
                tf.expand_dims(tf.transpose(B), 2) *
                tf.expand_dims(q_sqrt, 0)), 1)
    elif q_sqrt.get_shape().ndims == 3:
        # we have the cholesky form for q(v)
        projected_var = []
        for d in range(num_columns):
            L = tf.user_ops.triangle(q_sqrt[:, :, d], 'lower')
            LTB = tf.matmul(tf.transpose(L), B)
            projected_var.append(tf.reduce_sum(tf.square(LTB), 0))
        fvar = fvar + tf.transpose(tf.pack(projected_var))
    else:
        raise ValueError, "Bad dimension for q_sqrt: %s" % str(
            q_sqrt.get_shape().ndims)

    return fmean, fvar
Example #24
0
def conditional(Xnew, X, kern, f, num_columns, full_cov=False, q_sqrt=None, whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.

    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` representes the mean of the distribution and
    q_sqrt the square-root of the covariance.

    Additionally, the GP may have been centered (whitened) so that 
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    in this case 'f' represents the values taken by v. 

    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov). 

    We assume K independent GPs, represented by the columns of f (and the
    last ax of q_sqrt).  

    Xnew is a data matrix, size N x D
    X are data points, size M x D
    kern is a GPflow kernel
    f is a data matrix, M x K, represensting the function values at X.
    num_columns is an interger number of columns in the f matrix (must match q_sqrt's last dimension)
    (optional) q_sqrt is a matrix of standard-deviations or Cholesky matrices, size M x K or M x M x K
    (optional) whiten is a boolean: whether to whiten the representation as described above. 


    These functions are now considered deprecated, subsumed into this one function:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened

    """

    #compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data)*1e-6
    Lm = tf.cholesky(Kmm)

    #Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    #compute the covariance due to the conditioning
    if full_cov:
        fvar = kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
        fvar = tf.tile(tf.expand_dims(fvar, 2), [1, 1, num_columns])
    else:
        fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
        fvar = tf.tile(tf.expand_dims(fvar, 1), [1, num_columns])

    #another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    #construct the conditional mean 
    fmean = tf.matmul(tf.transpose(A), f)

    #add extra projected variance from q(f) if needed
    if q_sqrt is not None:
        projected_var = []
        for d in range(num_columns):
            if q_sqrt.get_shape().ndims==2:
                LTA = A*q_sqrt[:,d:d+1]
            elif q_sqrt.get_shape().ndims==3:
                L = tf.user_ops.triangle(q_sqrt[:,:,d], 'lower')
                LTA = tf.matmul(tf.transpose(L), A)
            else: # pragma no cover
                raise ValueError, "Bad dimension for q_sqrt: %s"%str(q_sqrt.get_shape().ndims)
            if full_cov:
                projected_var.append(tf.matmul(tf.transpose(LTA),LTA))
            else:
                projected_var.append(tf.reduce_sum(tf.square(LTA),0))
        fvar = fvar + tf.transpose(tf.pack(projected_var))

    return fmean, fvar
Example #25
0
def gaussian_gp_predict(Xnew, X, kern, q_mu, q_sqrt, num_columns):
    """
    Given an (approximate) posterior (via q_mu, q_sqrt) to the GP at the points
    X, produce the mean and variance of the GP at the points Xnew.

    We assume K independent GPs, represented by the columns of q_mu (and the
    last ax of q_sqrt).  q_mu and q_sqrt are variational posteriors for f, So
        q(f[:,i]) = N (q_mu[:,i],  diag(q_sqrt[:,i]**2))
    or
        q(f[:,i]) = N (q_mu,  W W^T)
    where W is the lower triangle of q_sqrt[:,:,i]. 

    This function computes the Gaussian integral
        q(f*) = \int p(f*|f)q(f) df.

    Xnew is a data matrix, size N x D
    X are inducing points, size M x D
    q_mu are variational means, size M x K
    q_sqrt are variational standard-deviations or Cholesky matrices,, size M x K or M x M x K
    num_columns is the number of columns in q_mu. 

    Note (and TODO):
        At the moment, num_columns only gets used for the q_sqrt.ndim==3 case,
        and it tells use the value of q_sqrt.shape()[2]. We need to find a way
        to get this from the tf graph. 

    See also:
        gp_predict -- where there is no uncertainty in F
        gaussian_gp_predict_whitened -- the same, but with whitening (centering) the F variables

    """
 
    #compute kernel stuff
    num_data = tf.shape(X)[0]
    Kdiag = kern.Kdiag(Xnew)
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data)*1e-6
    Lm = tf.cholesky(Kmm)

    #this is O(N M^2)
    A = tf.user_ops.triangular_solve(Lm, Kmn, 'lower')
    B = tf.user_ops.triangular_solve(tf.transpose(Lm), A, 'upper') # B is Kmm^{-1} Kmn

    #construct the mean and variance of q(f*)
    fmean = tf.matmul(tf.transpose(B), q_mu)
    fvar = Kdiag - tf.reduce_sum(tf.square(A), 0)
    fvar = tf.expand_dims(fvar, 1)
    if q_sqrt.get_shape().ndims==2:
        #we hae a diagonal form for q(f)
        fvar = fvar + tf.reduce_sum(tf.square(tf.expand_dims(tf.transpose(B), 2) * tf.expand_dims(q_sqrt, 0)),1)
    elif q_sqrt.get_shape().ndims==3:
        # we have the cholesky form for q(v)
        projected_var = []
        for d in range(num_columns):
            L = tf.user_ops.triangle(q_sqrt[:,:,d], 'lower')
            LTB = tf.matmul(tf.transpose(L), B)
            projected_var.append(tf.reduce_sum(tf.square(LTB),0))
        fvar = fvar + tf.transpose(tf.pack(projected_var))
    else:
        raise ValueError, "Bad dimension for q_sqrt: %s"%str(q_sqrt.get_shape().ndims)

    return fmean, fvar
Example #26
0
def gaussian_gp_predict_whitened(Xnew, X, kern, q_mu, q_sqrt, num_columns):
    """
    Given an (approximate) posterior (via q_mu, q_sqrt) to the GP at the points
    X, produce the mean and variance of the GP at the points Xnew.
    Additionally, the GP has been centered (whitened) so that 
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).

    We assume K independent GPs, represented by the columns of q_mu (and the
    last ax of q_sqrt).  q_mu and q_sqrt are variational posteriors for v, So
        q(v[:,i]) = N( q_mu[:,i], diag(q_sqrt[:,i]**2)
        q(f[:,i]) = N (L q_mu[:,i],  L diag(q_sqrt**2) L^T)
    or
        q(f[:,i]) = N (L q_mu,  L [W W^T] L^T)
    where W is the lower triangle of q_sqrt[:,:,i]. 

    This function computes the Gaussian integral
        q(f*) = \int p(f*|(f=Lv))q(v) df.

    Xnew is a data matrix, size N x D
    X are data points, size M x D
    q_mu are variational means, size M x K
    q_sqrt are variational standard-deviations or Cholesky matrices,, size M x K or M x M x K

    Note (and TODO):
        At the moment, num_columns only gets used for the q_sqrt.ndim==3 case,
        and it tells use the value of q_sqrt.shape()[2]. We need to find a way
        to get this from the tf graph. 


    See also:
        gp_predict_whitened -- where there is no uncertainty in V
        gaussian_gp_predict -- same without the whitening

    """
 
    #compute kernel stuff
    num_data = tf.shape(X)[0]
    Kdiag = kern.Kdiag(Xnew)
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data)*1e-6
    Lm = tf.cholesky(Kmm)

    #this is O(N M^2)
    A = tf.user_ops.triangular_solve(Lm, Kmn, 'lower')

    #construct the mean and variance of q(f)
    fmean = tf.matmul(tf.transpose(A), q_mu)
    if q_sqrt.get_shape().ndims==2:
        #we hae a diagonal form for q(v)
        q_var = np.square(q_sqrt)
        #fvar = Kdiag[:,None] + tf.reduce_sum((tf.square(tf.transpose(A)))[:,:,None] * (q_var[None, :,:] - 1),1)
        fvar = tf.reshape(Kdiag, (-1,1)) + tf.reduce_sum(tf.expand_dims(tf.square(tf.transpose(A)), 2) * (tf.expand_dims(q_var, 0) - 1.0),1)
        return fmean, fvar
    elif q_sqrt.get_shape().ndims==3:
        # we have the cholesky form for q(v)
        fvar = Kdiag - tf.reduce_sum(np.square(A), 0)
        projected_var = []
        for d in range(num_columns):
            L = tf.user_ops.triangle(q_sqrt[:,:,d], 'lower')
            LTA = tf.matmul(tf.transpose(L), A)
            projected_var.append(fvar + tf.reduce_sum(tf.square(LTA),0))
        fvar = tf.transpose(tf.pack(projected_var))
        return fmean, fvar
    else:
        raise ValueError, "Bad dimension for q_sqrt: %s"%str(q_sqrt.get_shape().ndims)
Example #27
0
def conditional(Xnew,
                X,
                kern,
                f,
                num_columns,
                full_cov=False,
                q_sqrt=None,
                whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.

    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` representes the mean of the distribution and
    q_sqrt the square-root of the covariance.

    Additionally, the GP may have been centered (whitened) so that 
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    in this case 'f' represents the values taken by v. 

    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov). 

    We assume K independent GPs, represented by the columns of f (and the
    last ax of q_sqrt).  

    Xnew is a data matrix, size N x D
    X are data points, size M x D
    kern is a GPflow kernel
    f is a data matrix, M x K, represensting the function values at X.
    num_columns is an interger number of columns in the f matrix (must match q_sqrt's last dimension)
    (optional) q_sqrt is a matrix of standard-deviations or Cholesky matrices, size M x K or M x M x K
    (optional) whiten is a boolean: whether to whiten the representation as described above. 


    These functions are now considered deprecated, subsumed into this one function:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened

    """

    #compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * 1e-6
    Lm = tf.cholesky(Kmm)

    #Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    #compute the covariance due to the conditioning
    if full_cov:
        fvar = kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
        fvar = tf.tile(tf.expand_dims(fvar, 2), [1, 1, num_columns])
    else:
        fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
        fvar = tf.tile(tf.expand_dims(fvar, 1), [1, num_columns])

    #another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    #construct the conditional mean
    fmean = tf.matmul(tf.transpose(A), f)

    #add extra projected variance from q(f) if needed
    if q_sqrt is not None:
        projected_var = []
        for d in range(num_columns):
            if q_sqrt.get_shape().ndims == 2:
                LTA = A * q_sqrt[:, d:d + 1]
            elif q_sqrt.get_shape().ndims == 3:
                L = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
                LTA = tf.matmul(tf.transpose(L), A)
            else:  # pragma no cover
                raise ValueError, "Bad dimension for q_sqrt: %s" % str(
                    q_sqrt.get_shape().ndims)
            if full_cov:
                projected_var.append(tf.matmul(tf.transpose(LTA), LTA))
            else:
                projected_var.append(tf.reduce_sum(tf.square(LTA), 0))
        fvar = fvar + tf.transpose(tf.pack(projected_var))

    return fmean, fvar
Example #28
0
 def K(self, X, X2=None):
     if X2 is None:
         return self.variance * eye(tf.shape(X)[0])
     else:
         return tf.zeros(tf.pack([tf.shape(X)[0], tf.shape(X2)[0]]), tf.float64)