def build_prior_KL(self): K1 = self.kern1.K( self.Z) + eye(self.num_inducing) * settings.numerics.jitter_level KL1 = GPflow.kullback_leiblers.gauss_kl(self.q_mu1, self.q_sqrt1, K1) K2 = self.kern2.K( self.Z) + eye(self.num_inducing) * settings.numerics.jitter_level KL2 = GPflow.kullback_leiblers.gauss_kl(self.q_mu2, self.q_sqrt2, K2) return KL1 + KL2
def build_likelihood(self): """ Construct a tensorflow function to compute the likelihood. \log p(Y | theta). """ # forward mapping K_forward = self.kern.K( self.X) + eye(tf.shape(self.X)[0]) * self.likelihood.variance L_forward = tf.cholesky(K_forward) # log likelihood is defined using multivariate_normal function diff_forward = self.Y - self.mean_function(self.X) alpha_forward = tf.matrix_triangular_solve(L_forward, diff_forward, lower=True) # initialize model parameters num_dims_forward = 1 if tf.rank(self.Y) == 1 else tf.shape(self.Y)[1] num_dims_forward = tf.cast(num_dims_forward, float_type) num_points_forward = tf.cast(tf.shape(self.Y)[0], float_type) # compute log likelihood llh_forward = -0.5 * num_dims_forward * num_points_forward * np.log( 2 * np.pi) llh_forward += -num_dims_forward * tf.reduce_sum( tf.log(tf.diag_part(L_forward))) llh_forward += -0.5 * tf.reduce_sum(tf.square(alpha_forward)) # backward mapping K_backward = self.back_kern.K( self.Y) + eye(tf.shape(self.Y)[0]) * self.back_likelihood.variance L_backward = tf.cholesky(K_backward) # log likelihood is defined using multivariate_normal function diff_backward = self.X - self.mean_function(self.Y) alpha_backward = tf.matrix_triangular_solve(L_backward, diff_backward, lower=True) # initialize model parameters num_dims_backward = 1 if tf.rank(self.X) == 1 else tf.shape(self.X)[1] num_dims_backward = tf.cast(num_dims_backward, float_type) num_points_backward = tf.cast(tf.shape(self.X)[0], float_type) # compute log likelihood llh_backward = -0.5 * num_dims_backward * num_points_backward * np.log( 2 * np.pi) llh_backward += -num_dims_backward * tf.reduce_sum( tf.log(tf.diag_part(L_backward))) llh_backward += -0.5 * tf.reduce_sum(tf.square(alpha_backward)) return llh_forward + llh_backward
def inv_diag(self): d_col = tf.expand_dims(self.d, 1) WTDi = tf.transpose(self.W / d_col) M = eye(tf.shape(self.W)[1]) + tf.matmul(WTDi, self.W) L = tf.cholesky(M) tmp1 = tf.matrix_triangular_solve(L, WTDi, lower=True) return 1. / self.d - tf.reduce_sum(tf.square(tmp1), 0)
def build_predict(self, Xnew, full_cov=False): """ Xnew is a data matrix, point at which we want to predict. This method computes, p(F* | Y ), where F* are points on the GP at Xnew. This will be similar to GP Regression. """ # compute kernel for test points Kx = self.kern.K(self.X, Xnew) # compute kernel matrix and cholesky decomp. K = self.kern.K( self.X) + eye(tf.shape(self.X)[0]) * self.likelihood.variance L = tf.cholesky(K) # compute L^-1kx A = tf.matrix_triangular_solve(L, Kx, lower=True) # compute L^-1(y-mu(x)) V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X)) # compute fmean = kx^TK^-1(y-mu(x)) fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew) # diag var or full variance if full_cov: # compute kxx - kxTK^-1kx fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) fvar = tf.tile(tf.expand_dims(fvar, 2), shape) else: # compute single value for variance fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0) fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, tf.shape(self.Y)[1]]) return fmean, fvar
def build_predict(self, Xnew, full_cov=False): num_inducing = tf.size(self.ms) err = self.Y - self.mean_function(self.X) Kuf = make_Kuf(self.kern, self.X, self.a, self.b, self.ms) Kuu = make_Kuu(self.kern, self.a, self.b, self.ms) Kuu = Kuu.get() sigma = tf.sqrt(self.likelihood.variance) # Compute intermediate matrices L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf) / sigma AAT = tf.matmul(A, tf.transpose(A)) B = AAT + eye(num_inducing * 2 - 1) LB = tf.cholesky(B) c = tf.matrix_triangular_solve(LB, tf.matmul(A, err)) / sigma Kus = make_Kuf(self.kern, Xnew, self.a, self.b, self.ms) tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tf.transpose(tmp2), c) if full_cov: var = self.kern.K(Xnew) + \ tf.matmul(tf.transpose(tmp2), tmp2) - \ tf.matmul(tf.transpose(tmp1), tmp1) shape = tf.pack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + \ tf.reduce_sum(tf.square(tmp2), 0) - \ tf.reduce_sum(tf.square(tmp1), 0) shape = tf.pack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def build_likelihood(self): num_inducing = tf.size(self.ms) num_data = tf.shape(self.Y)[0] output_dim = tf.shape(self.Y)[1] err = self.Y - self.mean_function(self.X) Kdiag = self.kern.Kdiag(self.X) Kuf = make_Kuf(self.kern, self.X, self.a, self.b, self.ms) Kuu = make_Kuu(self.kern, self.a, self.b, self.ms) Kuu = Kuu.get() sigma = tf.sqrt(self.likelihood.variance) # Compute intermediate matrices L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf) / sigma AAT = tf.matmul(A, tf.transpose(A)) B = AAT + eye(num_inducing * 2 - 1) LB = tf.cholesky(B) log_det_B = 2. * tf.reduce_sum(tf.log(tf.diag_part(LB))) c = tf.matrix_triangular_solve(LB, tf.matmul(A, err)) / sigma # compute log marginal bound ND = tf.cast(num_data * output_dim, float_type) D = tf.cast(output_dim, float_type) bound = -0.5 * ND * tf.log(2 * np.pi * self.likelihood.variance) bound += -0.5 * D * log_det_B bound += -0.5 * tf.reduce_sum( tf.square(err)) / self.likelihood.variance bound += 0.5 * tf.reduce_sum(tf.square(c)) bound += -0.5 * tf.reduce_sum(Kdiag) / self.likelihood.variance bound += 0.5 * tf.reduce_sum(tf.diag_part(AAT)) return bound
def Cholesky(self, X): core = self._Kcore(X, X2=None) + \ eye(tf.shape(X)[0]) * settings.numerics.jitter_level chol = tf.cholesky(core) var = tf.tile( tf.expand_dims(tf.expand_dims(tf.sqrt(self.variance), 0), 0), [tf.shape(core)[0], tf.shape(core)[1], 1]) return var * tf.tile(tf.expand_dims(chol, -1), [1, 1, tf.shape(var)[2]])
def solve(self, B): d_col = tf.expand_dims(self.d, 1) DiB = B / d_col DiW = self.W / d_col WTDiB = tf.matmul(tf.transpose(DiW), B) M = eye(tf.shape(self.W)[1]) + tf.matmul(tf.transpose(DiW), self.W) L = tf.cholesky(M) tmp1 = tf.matrix_triangular_solve(L, WTDiB, lower=True) tmp2 = tf.matrix_triangular_solve(tf.transpose(L), tmp1, lower=False) return DiB - tf.matmul(DiW, tmp2)
def Cholesky(self, X): """ Overwrite cholesky for the speed up. X should be dim2*dim2 """ chol_dim1 = tf.cholesky( self._Kcore(self.dim1, X2=None) + \ eye(tf.shape(self.dim1)[0]) * settings.numerics.jitter_level) chol_dim2 = tf.cholesky( self._Kcore(self.dim2, X2=None) + \ eye(tf.shape(self.dim2)[0]) * settings.numerics.jitter_level) # core of the cholesky chol = kronecker_product(chol_dim1, chol_dim2) # expand and tile var = tf.tile( tf.expand_dims(tf.expand_dims(tf.sqrt(self.variance), 0), 0), [tf.shape(chol)[0], tf.shape(chol)[1], 1]) return var * tf.tile(tf.expand_dims(chol, -1), [1, 1, tf.shape(var)[2]])
def trace_KiX(self, X): """ X is a square matrix of the same size as this one. if self is K, compute tr(K^{-1} X) """ d_col = tf.expand_dims(self.d, 1) R = self.W / d_col RTX = tf.matmul(tf.transpose(R), X) RTXR = tf.matmul(RTX, R) M = eye(tf.shape(self.W)[1]) + tf.matmul(tf.transpose(R), self.W) Mi = tf.matrix_inverse(M) return tf.reduce_sum(tf.diag_part(X) * 1. / self.d) - tf.reduce_sum( RTXR * Mi)
def test_whiten(self): """ make sure that predicting using the whitened representation is the sameas the non-whitened one. """ with self.k.tf_mode(): K = self.k.K(self.X) + eye(self.num_data) * 1e-6 L = tf.cholesky(K) V = tf.matrix_triangular_solve(L, self.F, lower=True) Fstar_mean, Fstar_var = GPflow.conditionals.gp_predict(self.Xs, self.X, self.k, self.F) Fstar_w_mean, Fstar_w_var = GPflow.conditionals.gp_predict_whitened(self.Xs, self.X, self.k, V) mean1, var1 = tf.Session().run([Fstar_w_mean, Fstar_w_var], feed_dict=self.feed_dict) mean2, var2 = tf.Session().run([Fstar_mean, Fstar_var], feed_dict=self.feed_dict) self.assertTrue(np.allclose(mean1, mean2, 1e-6, 1e-6)) # TODO: should tolerance be type dependent? self.assertTrue(np.allclose(var1, var2, 1e-6, 1e-6))
def logdet(self): part1 = tf.reduce_sum(tf.log(self.d)) I = eye(tf.shape(self.W)[1]) M = I + tf.matmul(tf.transpose(self.W) / self.d, self.W) part2 = 2 * tf.reduce_sum(tf.log(tf.diag_part(tf.cholesky(M)))) return part1 + part2