Ejemplo n.º 1
0
 def build_prior_KL(self):
     K1 = self.kern1.K(
         self.Z) + eye(self.num_inducing) * settings.numerics.jitter_level
     KL1 = GPflow.kullback_leiblers.gauss_kl(self.q_mu1, self.q_sqrt1, K1)
     K2 = self.kern2.K(
         self.Z) + eye(self.num_inducing) * settings.numerics.jitter_level
     KL2 = GPflow.kullback_leiblers.gauss_kl(self.q_mu2, self.q_sqrt2, K2)
     return KL1 + KL2
Ejemplo n.º 2
0
    def build_likelihood(self):
        """
        Construct a tensorflow function to compute the likelihood.
        \log p(Y | theta).
        """

        # forward mapping
        K_forward = self.kern.K(
            self.X) + eye(tf.shape(self.X)[0]) * self.likelihood.variance
        L_forward = tf.cholesky(K_forward)

        # log likelihood is defined using multivariate_normal function
        diff_forward = self.Y - self.mean_function(self.X)
        alpha_forward = tf.matrix_triangular_solve(L_forward,
                                                   diff_forward,
                                                   lower=True)

        # initialize model parameters
        num_dims_forward = 1 if tf.rank(self.Y) == 1 else tf.shape(self.Y)[1]
        num_dims_forward = tf.cast(num_dims_forward, float_type)
        num_points_forward = tf.cast(tf.shape(self.Y)[0], float_type)

        # compute log likelihood
        llh_forward = -0.5 * num_dims_forward * num_points_forward * np.log(
            2 * np.pi)
        llh_forward += -num_dims_forward * tf.reduce_sum(
            tf.log(tf.diag_part(L_forward)))
        llh_forward += -0.5 * tf.reduce_sum(tf.square(alpha_forward))

        # backward mapping
        K_backward = self.back_kern.K(
            self.Y) + eye(tf.shape(self.Y)[0]) * self.back_likelihood.variance
        L_backward = tf.cholesky(K_backward)

        # log likelihood is defined using multivariate_normal function
        diff_backward = self.X - self.mean_function(self.Y)
        alpha_backward = tf.matrix_triangular_solve(L_backward,
                                                    diff_backward,
                                                    lower=True)

        # initialize model parameters
        num_dims_backward = 1 if tf.rank(self.X) == 1 else tf.shape(self.X)[1]
        num_dims_backward = tf.cast(num_dims_backward, float_type)
        num_points_backward = tf.cast(tf.shape(self.X)[0], float_type)

        # compute log likelihood
        llh_backward = -0.5 * num_dims_backward * num_points_backward * np.log(
            2 * np.pi)
        llh_backward += -num_dims_backward * tf.reduce_sum(
            tf.log(tf.diag_part(L_backward)))
        llh_backward += -0.5 * tf.reduce_sum(tf.square(alpha_backward))

        return llh_forward + llh_backward
Ejemplo n.º 3
0
 def inv_diag(self):
     d_col = tf.expand_dims(self.d, 1)
     WTDi = tf.transpose(self.W / d_col)
     M = eye(tf.shape(self.W)[1]) + tf.matmul(WTDi, self.W)
     L = tf.cholesky(M)
     tmp1 = tf.matrix_triangular_solve(L, WTDi, lower=True)
     return 1. / self.d - tf.reduce_sum(tf.square(tmp1), 0)
Ejemplo n.º 4
0
    def build_predict(self, Xnew, full_cov=False):
        """
        Xnew is a data matrix, point at which we want to predict.
        This method computes, p(F* | Y ), where F* are points on the GP at Xnew.
        This will be similar to GP Regression.
        """

        # compute kernel for test points
        Kx = self.kern.K(self.X, Xnew)

        # compute kernel matrix and cholesky decomp.
        K = self.kern.K(
            self.X) + eye(tf.shape(self.X)[0]) * self.likelihood.variance
        L = tf.cholesky(K)

        # compute L^-1kx
        A = tf.matrix_triangular_solve(L, Kx, lower=True)
        # compute L^-1(y-mu(x))
        V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X))
        # compute fmean = kx^TK^-1(y-mu(x))
        fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew)

        # diag var or full variance
        if full_cov:
            # compute kxx - kxTK^-1kx
            fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
            shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
            fvar = tf.tile(tf.expand_dims(fvar, 2), shape)
        else:
            # compute single value for variance
            fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
            fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, tf.shape(self.Y)[1]])
        return fmean, fvar
Ejemplo n.º 5
0
    def build_predict(self, Xnew, full_cov=False):
        num_inducing = tf.size(self.ms)

        err = self.Y - self.mean_function(self.X)
        Kuf = make_Kuf(self.kern, self.X, self.a, self.b, self.ms)
        Kuu = make_Kuu(self.kern, self.a, self.b, self.ms)
        Kuu = Kuu.get()
        sigma = tf.sqrt(self.likelihood.variance)

        # Compute intermediate matrices
        L = tf.cholesky(Kuu)
        A = tf.matrix_triangular_solve(L, Kuf) / sigma
        AAT = tf.matmul(A, tf.transpose(A))

        B = AAT + eye(num_inducing * 2 - 1)
        LB = tf.cholesky(B)
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, err)) / sigma

        Kus = make_Kuf(self.kern, Xnew, self.a, self.b, self.ms)
        tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
        tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
        mean = tf.matmul(tf.transpose(tmp2), c)
        if full_cov:
            var = self.kern.K(Xnew) + \
                tf.matmul(tf.transpose(tmp2), tmp2) - \
                tf.matmul(tf.transpose(tmp1), tmp1)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            var = tf.tile(tf.expand_dims(var, 2), shape)
        else:
            var = self.kern.Kdiag(Xnew) + \
                tf.reduce_sum(tf.square(tmp2), 0) - \
                tf.reduce_sum(tf.square(tmp1), 0)
            shape = tf.pack([1, tf.shape(self.Y)[1]])
            var = tf.tile(tf.expand_dims(var, 1), shape)
        return mean + self.mean_function(Xnew), var
Ejemplo n.º 6
0
    def build_likelihood(self):
        num_inducing = tf.size(self.ms)
        num_data = tf.shape(self.Y)[0]
        output_dim = tf.shape(self.Y)[1]

        err = self.Y - self.mean_function(self.X)
        Kdiag = self.kern.Kdiag(self.X)
        Kuf = make_Kuf(self.kern, self.X, self.a, self.b, self.ms)
        Kuu = make_Kuu(self.kern, self.a, self.b, self.ms)
        Kuu = Kuu.get()
        sigma = tf.sqrt(self.likelihood.variance)

        # Compute intermediate matrices
        L = tf.cholesky(Kuu)
        A = tf.matrix_triangular_solve(L, Kuf) / sigma
        AAT = tf.matmul(A, tf.transpose(A))

        B = AAT + eye(num_inducing * 2 - 1)
        LB = tf.cholesky(B)
        log_det_B = 2. * tf.reduce_sum(tf.log(tf.diag_part(LB)))
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, err)) / sigma

        # compute log marginal bound
        ND = tf.cast(num_data * output_dim, float_type)
        D = tf.cast(output_dim, float_type)
        bound = -0.5 * ND * tf.log(2 * np.pi * self.likelihood.variance)
        bound += -0.5 * D * log_det_B
        bound += -0.5 * tf.reduce_sum(
            tf.square(err)) / self.likelihood.variance
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * tf.reduce_sum(Kdiag) / self.likelihood.variance
        bound += 0.5 * tf.reduce_sum(tf.diag_part(AAT))

        return bound
Ejemplo n.º 7
0
 def Cholesky(self, X):
     core = self._Kcore(X, X2=None) + \
                 eye(tf.shape(X)[0]) * settings.numerics.jitter_level
     chol = tf.cholesky(core)
     var = tf.tile(
         tf.expand_dims(tf.expand_dims(tf.sqrt(self.variance), 0), 0),
         [tf.shape(core)[0], tf.shape(core)[1], 1])
     return var * tf.tile(tf.expand_dims(chol, -1),
                          [1, 1, tf.shape(var)[2]])
Ejemplo n.º 8
0
 def solve(self, B):
     d_col = tf.expand_dims(self.d, 1)
     DiB = B / d_col
     DiW = self.W / d_col
     WTDiB = tf.matmul(tf.transpose(DiW), B)
     M = eye(tf.shape(self.W)[1]) + tf.matmul(tf.transpose(DiW), self.W)
     L = tf.cholesky(M)
     tmp1 = tf.matrix_triangular_solve(L, WTDiB, lower=True)
     tmp2 = tf.matrix_triangular_solve(tf.transpose(L), tmp1, lower=False)
     return DiB - tf.matmul(DiW, tmp2)
Ejemplo n.º 9
0
 def Cholesky(self, X):
     """
     Overwrite cholesky for the speed up.
     X should be dim2*dim2
     """
     chol_dim1 = tf.cholesky(
             self._Kcore(self.dim1, X2=None) + \
             eye(tf.shape(self.dim1)[0]) * settings.numerics.jitter_level)
     chol_dim2 = tf.cholesky(
             self._Kcore(self.dim2, X2=None) + \
             eye(tf.shape(self.dim2)[0]) * settings.numerics.jitter_level)
     # core of the cholesky
     chol = kronecker_product(chol_dim1, chol_dim2)
     # expand and tile
     var = tf.tile(
         tf.expand_dims(tf.expand_dims(tf.sqrt(self.variance), 0), 0),
         [tf.shape(chol)[0], tf.shape(chol)[1], 1])
     return var * tf.tile(tf.expand_dims(chol, -1),
                          [1, 1, tf.shape(var)[2]])
Ejemplo n.º 10
0
 def trace_KiX(self, X):
     """
     X is a square matrix of the same size as this one.
     if self is K, compute tr(K^{-1} X)
     """
     d_col = tf.expand_dims(self.d, 1)
     R = self.W / d_col
     RTX = tf.matmul(tf.transpose(R), X)
     RTXR = tf.matmul(RTX, R)
     M = eye(tf.shape(self.W)[1]) + tf.matmul(tf.transpose(R), self.W)
     Mi = tf.matrix_inverse(M)
     return tf.reduce_sum(tf.diag_part(X) * 1. / self.d) - tf.reduce_sum(
         RTXR * Mi)
Ejemplo n.º 11
0
    def test_whiten(self):
        """
        make sure that predicting using the whitened representation is the
        sameas the non-whitened one. 
        """
        
        with self.k.tf_mode():
            K = self.k.K(self.X) + eye(self.num_data) * 1e-6
            L = tf.cholesky(K)
            V = tf.matrix_triangular_solve(L, self.F, lower=True)
            Fstar_mean, Fstar_var = GPflow.conditionals.gp_predict(self.Xs, self.X, self.k, self.F)
            Fstar_w_mean, Fstar_w_var = GPflow.conditionals.gp_predict_whitened(self.Xs, self.X, self.k, V)


        mean1, var1 = tf.Session().run([Fstar_w_mean, Fstar_w_var], feed_dict=self.feed_dict)
        mean2, var2 = tf.Session().run([Fstar_mean, Fstar_var], feed_dict=self.feed_dict)

        self.assertTrue(np.allclose(mean1, mean2, 1e-6, 1e-6)) # TODO: should tolerance be type dependent?
        self.assertTrue(np.allclose(var1, var2, 1e-6, 1e-6))
Ejemplo n.º 12
0
    def test_whiten(self):
        """
        make sure that predicting using the whitened representation is the
        sameas the non-whitened one. 
        """
        
        with self.k.tf_mode():
            K = self.k.K(self.X) + eye(self.num_data) * 1e-6
            L = tf.cholesky(K)
            V = tf.matrix_triangular_solve(L, self.F, lower=True)
            Fstar_mean, Fstar_var = GPflow.conditionals.gp_predict(self.Xs, self.X, self.k, self.F)
            Fstar_w_mean, Fstar_w_var = GPflow.conditionals.gp_predict_whitened(self.Xs, self.X, self.k, V)


        mean1, var1 = tf.Session().run([Fstar_w_mean, Fstar_w_var], feed_dict=self.feed_dict)
        mean2, var2 = tf.Session().run([Fstar_mean, Fstar_var], feed_dict=self.feed_dict)

        self.assertTrue(np.allclose(mean1, mean2, 1e-6, 1e-6)) # TODO: should tolerance be type dependent?
        self.assertTrue(np.allclose(var1, var2, 1e-6, 1e-6))
Ejemplo n.º 13
0
 def logdet(self):
     part1 = tf.reduce_sum(tf.log(self.d))
     I = eye(tf.shape(self.W)[1])
     M = I + tf.matmul(tf.transpose(self.W) / self.d, self.W)
     part2 = 2 * tf.reduce_sum(tf.log(tf.diag_part(tf.cholesky(M))))
     return part1 + part2