def test_eMxKxz_no_uncertainty(kernel, inducing_variable, mean): exKxz = expectation(_distrs["dirac_diag"], mean, (kernel, inducing_variable)) Kxz = kernel(Xmu, Z) xKxz = expectation(_distrs["dirac_gauss"], mean)[:, :, None] * Kxz[:, None, :] assert_allclose(exKxz, xKxz, rtol=RTOL)
def predict_f(self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False) -> MeanAndVariance: """ Compute the mean and variance of the latent function at some new points. Note that this is very similar to the SGPR prediction, for which there are notes in the SGPR notebook. Note: This model does not allow full output covariances. :param Xnew: points at which to predict """ if full_output_cov: raise NotImplementedError pX = DiagonalGaussian(self.X_data_mean, self.X_data_var) Y_data = self.data num_inducing = self.inducing_variable.num_inducing psi1 = expectation(pX, (self.kernel, self.inducing_variable)) psi2 = tf.reduce_sum( expectation(pX, (self.kernel, self.inducing_variable), (self.kernel, self.inducing_variable)), axis=0, ) jitter = default_jitter() Kus = covariances.Kuf(self.inducing_variable, self.kernel, Xnew) sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) L = tf.linalg.cholesky( covariances.Kuu(self.inducing_variable, self.kernel, jitter=jitter)) A = tf.linalg.triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.linalg.triangular_solve(L, psi2, lower=True) AAT = tf.linalg.triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=default_float()) LB = tf.linalg.cholesky(B) c = tf.linalg.triangular_solve( LB, tf.linalg.matmul(A, Y_data), lower=True) / sigma tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True) tmp2 = tf.linalg.triangular_solve(LB, tmp1, lower=True) mean = tf.linalg.matmul(tmp2, c, transpose_a=True) if full_cov: var = (self.kernel(Xnew) + tf.linalg.matmul(tmp2, tmp2, transpose_a=True) - tf.linalg.matmul(tmp1, tmp1, transpose_a=True)) shape = tf.stack([1, 1, tf.shape(Y_data)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = (self.kernel(Xnew, full_cov=False) + tf.reduce_sum(tf.square(tmp2), axis=0) - tf.reduce_sum(tf.square(tmp1), axis=0)) shape = tf.stack([1, tf.shape(Y_data)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def test_eKzxKxz_same_vs_different_sum_kernels(distribution, kern1, kern2, inducing_variable): # check the result is the same if we pass different objects with the same value same = expectation(*(distribution, (kern1, inducing_variable), (kern1, inducing_variable))) different = expectation(*(distribution, (kern1, inducing_variable), (kern2, inducing_variable))) assert_allclose(same, different, rtol=RTOL)
def test_eKzxKxz_same_vs_different_sum_kernels(session_tf, feature): # check the result is the same if we pass different objects with the same value kern1 = rbf_lin_sum_kern2() kern2 = copy.copy(rbf_lin_sum_kern2()) same = expectation(*(gauss(), (kern1, feature), (kern1, feature))) different = expectation(*(gauss(), (kern1, feature), (kern2, feature))) session = tf.get_default_session() same, different = session.run([same, different]) assert_allclose(same, different, rtol=RTOL)
def compute_qu(self, full_cov: bool = True) -> Tuple[tf.Tensor, tf.Tensor]: """ Computes the mean and variance of q(u) = N(mu, cov), the variational distribution on inducing outputs. SVGP with this q(u) should predict identically to SGPR. The derivation is at follows: q(u)=N(u | m, S) with: S=Kuu^{-1}+ [Kuu^{-1}* Kuf * Kfu * Kuu^{-1} * beta] m=S^{-1} Kuu^{-1} Kuf y beta were sigma^-2 = beta :return: mu, cov """ Y_data = self.data X_data_mean, X_data_var = self.encoder(Y_data) pX = DiagonalGaussian(X_data_mean, X_data_var) # num_inducing = self.inducing_variable.num_inducing #E_qx[Kfu] psi1 = expectation(pX, (self.kernel, self.inducing_variable)) #E_qx[Kuf@Kfu] psi2 = tf.reduce_sum( expectation(pX, (self.kernel, self.inducing_variable), (self.kernel, self.inducing_variable)), axis=0) kuu = covariances.Kuu(self.inducing_variable, self.kernel, jitter=default_jitter()) kuf = tf.transpose(psi1) sig = kuu + psi2 * (self.likelihood.variance**-1) sig_sqrt = tf.linalg.cholesky(sig) sig_sqrt_kuu = tf.linalg.triangular_solve(sig_sqrt, kuu) # [M,M] -> [M(M +1)//2] =/= [M,D] cov = tf.linalg.matmul(sig_sqrt_kuu, sig_sqrt_kuu, transpose_a=True) err = Y_data - self.mean_function(X_data_mean) mu = (tf.linalg.matmul(sig_sqrt_kuu, tf.linalg.triangular_solve( sig_sqrt, tf.linalg.matmul(kuf, err)), transpose_a=True) / self.likelihood.variance) if not full_cov: return mu, cov else: return mu, tf.tile(cov[None, :, :], [mu.shape[-1], 1, 1])
def test_eKdiag_no_uncertainty(kern): kern, _ = _compile_params(kern, Data.ip) eKdiag = expectation(Data.dirac, kern) Kdiag = kern.Kdiag(Data.Xmu) eKdiag, Kdiag = tf.get_default_session().run([eKdiag, Kdiag]) np.testing.assert_almost_equal(eKdiag, Kdiag) _clear_params(kern, _)
def test_eKzxKxz_no_uncertainty(session_tf, kernel, feature): kern = kernel() eKzxKxz = expectation(dirac_diag(), (kern, feature), (kern, feature)) Kxz = kern.K(Data.Xmu, Data.Z) eKzxKxz, Kxz = session_tf.run([eKzxKxz, Kxz]) KzxKxz = Kxz[:, :, None] * Kxz[:, None, :] assert_allclose(eKzxKxz, KzxKxz, rtol=RTOL)
def test_exKxz_markov_no_uncertainty(session_tf, kernel, feature): exKxz = expectation(dirac_markov_gauss(), (kernel(), feature), identity_mean()) exKxz = session_tf.run(exKxz) Kzx = kernel().compute_K(Data.Xmu_markov[:-1, :], Data.Z) # NxM xKxz = Kzx[..., None] * Data.Xmu_markov[1:, None, :] # NxMxD assert_allclose(exKxz, xKxz, rtol=RTOL)
def test_eKxz_no_uncertainty(kern): kern, feat = _compile_params(kern, Data.ip) eKxz = expectation(Data.dirac, (feat, kern)) Kxz = kern.K(Data.Xmu, Data.Z) eKxz, Kxz = tf.get_default_session().run([eKxz, Kxz]) np.testing.assert_almost_equal(eKxz, Kxz) _clear_params(kern, feat)
def test_exKxz_pairwise_no_uncertainty(kern): kern, feat = _compile_params(kern, Data.ip) exKxz_pairwise = expectation(Data.dirac_markov_gauss, (feat, kern), Data.iden) exKxz_pairwise = tf.get_default_session().run(exKxz_pairwise) Kxz = kern.compute_K(Data.Xmu[:-1, :], Data.Z) # NxM xKxz_pairwise = np.einsum('nm,nd->nmd', Kxz, Data.Xmu[1:, :]) np.testing.assert_almost_equal(exKxz_pairwise, xKxz_pairwise) _clear_params(kern, feat)
def _test(params): _execute_func_on_params(params[1:], 'compile') analytic = expectation(*params) quad = quadrature_expectation(*params) analytic, quad = tf.get_default_session().run([analytic, quad]) np.testing.assert_almost_equal(quad, analytic, decimal=2) _execute_func_on_params(params[1:], 'clear')
def test_RBF_eKzxKxz_gradient_notNaN(): """ Ensure that <K_{Z, x} K_{x, Z}>_p(x) is not NaN and correct, when K_{Z, Z} is zero with finite precision. See pull request #595. """ kernel = gpflow.kernels.SquaredExponential(1, lengthscale=0.1) kernel.variance.assign(2.0) p = gpflow.probability_distributions.Gaussian( tf.constant([[10]], dtype=default_float()), tf.constant([[[0.1]]], dtype=default_float())) z = gpflow.inducing_variables.InducingPoints([[-10.], [10.]]) with tf.GradientTape() as tape: ekz = expectation(p, (kernel, z), (kernel, z)) grad = tape.gradient(ekz, kernel.lengthscale) assert grad is not None and not np.isnan(grad)
def test_RBF_eKzxKxz_gradient_notNaN(session_tf): """ Ensure that <K_{Z, x} K_{x, Z}>_p(x) is not NaN and correct, when K_{Z, Z} is zero with finite precision. See pull request #595. """ kern = gpflow.kernels.RBF(1, lengthscales=0.1) kern.variance = 2. p = gpflow.probability_distributions.Gaussian( tf.constant([[10]], dtype=gpflow.settings.tf_float), tf.constant([[[0.1]]], dtype=gpflow.settings.tf_float)) z = gpflow.features.InducingPoints([[-10.], [10.]]) ekz = expectation(p, (kern, z), (kern, z)) g, = tf.gradients(ekz, kern.lengthscales._unconstrained_tensor) grad = session_tf.run(g) assert grad is not None and not np.isnan(grad)
def test_RBF_eKzxKxz_gradient_not_NaN(session_tf): """ Ensure that <K_{Z, x} K_{x, Z}>_p(x) is not NaN and correct, when K_{Z, Z} is zero with finite precision. See pull request #595. """ kern = gpflow.kernels.RBF(1, lengthscales=0.1) kern.variance = 2. p = gpflow.probability_distributions.Gaussian( tf.constant([[10]], dtype=gpflow.settings.tf_float), tf.constant([[[0.1]]], dtype=gpflow.settings.tf_float)) z = gpflow.features.InducingPoints([[-10.], [10.]]) ekz = expectation(p, (kern, z), (kern, z)) g, = tf.gradients(ekz, kern.lengthscales._unconstrained_tensor) grad = session_tf.run(g) assert grad is not None and not np.isnan(grad)
def test_eKxz_no_uncertainty(session_tf, kernel, feature): eKxz = expectation(dirac_diag(), (kernel(), feature)) Kxz = kernel().K(Data.Xmu, Data.Z) eKxz, Kxz = session_tf.run([eKxz, Kxz]) assert_allclose(eKxz, Kxz, rtol=RTOL)
def test_eKzxKxz_no_uncertainty(kernel, inducing_variable): eKzxKxz = expectation(_distrs["dirac_diag"], (kernel, inducing_variable), (kernel, inducing_variable)) Kxz = kernel(Xmu, Z) KzxKxz = Kxz[:, :, None] * Kxz[:, None, :] assert_allclose(eKzxKxz, KzxKxz, rtol=RTOL)
def test_eMxKxz_no_uncertainty(session_tf, kernel, feature, mean): exKxz = expectation(dirac_diag(), mean(), (kernel(), feature)) Kxz = kernel().K(Data.Xmu, Data.Z) xKxz = expectation(dirac_gauss(), mean())[:, :, None] * Kxz[:, None, :] exKxz, xKxz = session_tf.run([exKxz, xKxz]) assert_allclose(exKxz, xKxz, rtol=RTOL)
def test_eKdiag_no_uncertainty(session_tf, kernel): eKdiag = expectation(dirac_diag(), kernel()) Kdiag = kernel().Kdiag(Data.Xmu) eKdiag, Kdiag = session_tf.run([eKdiag, Kdiag]) assert_allclose(eKdiag, Kdiag, rtol=RTOL)
def gplvm_build_likelihood(self, X_mean, X_var, Y, variance): if X_var is None: # SGPR num_inducing = len(self.feature) num_data = tf.cast(tf.shape(Y)[0], settings.float_type) output_dim = tf.cast(tf.shape(Y)[1], settings.float_type) err = Y - self.mean_function(X_mean) Kdiag = self.kern.Kdiag(X_mean) Kuf = self.feature.Kuf(self.kern, X_mean) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) L = tf.cholesky(Kuu) sigma = tf.sqrt(variance) # Compute intermediate matrices A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma AAT = tf.matmul(A, A, transpose_b=True) B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma # compute log marginal bound bound = -0.5 * num_data * output_dim * np.log(2 * np.pi) bound += tf.negative(output_dim) * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB))) bound -= 0.5 * num_data * output_dim * tf.log(variance) bound += -0.5 * tf.reduce_sum(tf.square(err)) / variance bound += 0.5 * tf.reduce_sum(tf.square(c)) bound += -0.5 * output_dim * tf.reduce_sum(Kdiag) / variance bound += 0.5 * output_dim * tf.reduce_sum(tf.matrix_diag_part(AAT)) return bound else: X_cov = tf.matrix_diag(X_var) pX = DiagonalGaussian(X_mean, X_var) num_inducing = len(self.feature) if hasattr(self.kern, 'X_input_dim'): psi0 = tf.reduce_sum(self.kern.eKdiag(X_mean, X_cov)) psi1 = self.kern.eKxz(self.feature.Z, X_mean, X_cov) psi2 = tf.reduce_sum(self.kern.eKzxKxz(self.feature.Z, X_mean, X_cov), 0) else: psi0 = tf.reduce_sum(expectation(pX, self.kern)) psi1 = expectation(pX, (self.kern, self.feature)) psi2 = tf.reduce_sum(expectation(pX, (self.kern, self.feature), (self.kern, self.feature)), axis=0) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) L = tf.cholesky(Kuu) sigma2 = variance sigma = tf.sqrt(sigma2) # Compute intermediate matrices A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.matrix_triangular_solve(L, psi2, lower=True) AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) log_det_B = 2. * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB))) c = tf.matrix_triangular_solve(LB, tf.matmul(A, Y), lower=True) / sigma # KL[q(x) || p(x)] # dX_var = self.X_var if len(self.X_var.get_shape()) == 2 else tf.matrix_diag_part(self.X_var) # NQ = tf.cast(tf.size(self.X_mean), settings.float_type) D = tf.cast(tf.shape(Y)[1], settings.float_type) # KL = -0.5 * tf.reduce_sum(tf.log(dX_var)) \ # + 0.5 * tf.reduce_sum(tf.log(self.X_prior_var)) \ # - 0.5 * NQ \ # + 0.5 * tf.reduce_sum((tf.square(self.X_mean - self.X_prior_mean) + dX_var) / self.X_prior_var) # compute log marginal bound ND = tf.cast(tf.size(Y), settings.float_type) bound = -0.5 * ND * tf.log(2 * np.pi * sigma2) bound += -0.5 * D * log_det_B bound += -0.5 * tf.reduce_sum(tf.square(Y)) / sigma2 bound += 0.5 * tf.reduce_sum(tf.square(c)) bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 - tf.reduce_sum(tf.matrix_diag_part(AAT))) # bound -= KL # don't need this term return bound
def test_eKxzzx_no_uncertainty(session_tf, kernel, feature): eKxzzx = expectation(dirac(), (feature, kernel()), (feature, kernel())) Kxz = kernel().K(Data.Xmu, Data.Z) eKxzzx, Kxz = session_tf.run([eKxzzx, Kxz]) Kxzzx = Kxz[:, :, None] * Kxz[:, None, :] assert_almost_equal(eKxzzx, Kxzzx)
def test_eKxz_no_uncertainty(session_tf, kernel, feature): eKxz = expectation(dirac(), (feature, kernel())) Kxz = kernel().K(Data.Xmu, Data.Z) eKxz, Kxz = session_tf.run([eKxz, Kxz]) assert_almost_equal(eKxz, Kxz)
def test_eKdiag_no_uncertainty(session_tf, kernel): eKdiag = expectation(dirac(), kernel()) Kdiag = kernel().Kdiag(Data.Xmu) eKdiag, Kdiag = session_tf.run([eKdiag, Kdiag]) assert_almost_equal(eKdiag, Kdiag)
def _check(params): analytic = expectation(*params) quad = quadrature_expectation(*params) session = tf.get_default_session() analytic, quad = session.run([analytic, quad]) assert_almost_equal(quad, analytic, decimal=2)
def test_exKxz_markov_no_uncertainty(distribution, kernel, mean, inducing_variable): exKxz = expectation(distribution, (kernel, inducing_variable), mean) Kzx = kernel(Xmu_markov[:-1, :], Z) # NxM xKxz = Kzx[..., None] * Xmu_markov[1:, None, :] # NxMxD assert_allclose(exKxz, xKxz, rtol=RTOL)
def gplvm_build_predict(self, Xnew, X_mean, X_var, Y, variance, full_cov=False): if X_var is None: # SGPR num_inducing = len(self.feature) err = Y - self.mean_function(X_mean) Kuf = self.feature.Kuf(self.kern, X_mean) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Kus = self.feature.Kuf(self.kern, Xnew) sigma = tf.sqrt(variance) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tmp2, c, transpose_a=True) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \ - tf.matmul(tmp1, tmp1, transpose_a=True) shape = tf.stack([1, 1, tf.shape(Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.stack([1, tf.shape(Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var else: # gplvm pX = DiagonalGaussian(X_mean, X_var) num_inducing = len(self.feature) X_cov = tf.matrix_diag(X_var) if hasattr(self.kern, 'X_input_dim'): psi1 = self.kern.eKxz(self.feature.Z, X_mean, X_cov) psi2 = tf.reduce_sum(self.kern.eKzxKxz(self.feature.Z, X_mean, X_cov), 0) else: psi1 = expectation(pX, (self.kern, self.feature)) psi2 = tf.reduce_sum(expectation(pX, (self.kern, self.feature), (self.kern, self.feature)), axis=0) # psi1 = expectation(pX, (self.kern, self.feature)) # psi2 = tf.reduce_sum(expectation(pX, (self.kern, self.feature), (self.kern, self.feature)), axis=0) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Kus = self.feature.Kuf(self.kern, Xnew) sigma2 = variance sigma = tf.sqrt(sigma2) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.matrix_triangular_solve(L, psi2, lower=True) AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) c = tf.matrix_triangular_solve(LB, tf.matmul(A, Y), lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tmp2, c, transpose_a=True) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \ - tf.matmul(tmp1, tmp1, transpose_a=True) shape = tf.stack([1, 1, tf.shape(Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.stack([1, tf.shape(Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def uncertain_conditional_diag( Xnew_mu: tf.Tensor, Xnew_var: tf.Tensor, inducing_variable: InducingVariables, kernel: Kernel, q_mu, q_sqrt, *, mean_function=None, full_output_cov=False, full_cov=False, white=False, ): """ Calculates the conditional for uncertain inputs Xnew, p(Xnew) = N(Xnew_mu, Xnew_var). See ``conditional`` documentation for further reference. :param Xnew_mu: mean of the inputs, size [N, D]in :param Xnew_var: covariance matrix of the inputs, size [N, n, n] :param inducing_variable: gpflow.InducingVariable object, only InducingPoints is supported :param kernel: gpflow kernel object. :param q_mu: mean inducing points, size [M, Dout] :param q_sqrt: cholesky of the covariance matrix of the inducing points, size [t, M, M] :param full_output_cov: boolean wheter to compute covariance between output dimension. Influences the shape of return value ``fvar``. Default is False :param white: boolean whether to use whitened representation. Default is False. :return fmean, fvar: mean and covariance of the conditional, size ``fmean`` is [N, Dout], size ``fvar`` depends on ``full_output_cov``: if True ``f_var`` is [N, t, t], if False then ``f_var`` is [N, Dout] """ if not isinstance(inducing_variable, InducingPoints): raise NotImplementedError if full_cov: raise NotImplementedError( "uncertain_conditional() currently does not support full_cov=True") # pX = DiagonalGaussian(self.X_data_mean, self.X_data_var) # Y_data = self.data # mu, cov = self.compute_qu() # jitter = default_jitter() # Kus = covariances.Kuf(self.inducing_variable, self.kernel, Xnew) # L = tf.linalg.cholesky(covariances.Kuu(self.inducing_variable, self.kernel, jitter=jitter)) # var = cov # tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True) #L^{-1} K_{us} # tmp2 = tf.linalg.triangular_solve(L, mu, lower=True) # L^{-1} m # mean = tf.linalg.matmul(tmp1, tmp2, transpose_a=True) #K_{su} L^{-T} L^{-1} m = K_{su} K_{uu}^{-1} m #ook # return mean + self.mean_function(Xnew), var pXnew = DiagonalGaussian(Xnew_mu, Xnew_var) num_data = tf.shape(Xnew_mu)[0] # number of new inputs (N) num_ind, num_func = tf.unstack( tf.shape(q_mu), num=2, axis=0) # number of inducing points (M), output dimension (D) q_sqrt_r = tf.linalg.band_part( q_sqrt, -1, 0) # [D, M, M] #taking the lower triangular part eKuf = tf.transpose(expectation( pXnew, (kernel, inducing_variable))) # [M, N] (psi1) Kuu = covariances.Kuu(inducing_variable, kernel, jitter=default_jitter()) # [M, M] Luu = tf.linalg.cholesky(Kuu) # [M, M] if not white: q_mu = tf.linalg.triangular_solve(Luu, q_mu, lower=True) Luu_tiled = tf.tile( Luu[None, :, :], [num_func, 1, 1]) # remove line once issue 216 is fixed q_sqrt_r = tf.linalg.triangular_solve(Luu_tiled, q_sqrt_r, lower=True) Li_eKuf = tf.linalg.triangular_solve(Luu, eKuf, lower=True) # [M, N] fmean = tf.linalg.matmul(Li_eKuf, q_mu, transpose_a=True) eKff = expectation(pXnew, kernel) # N (psi0) eKuffu = expectation(pXnew, (kernel, inducing_variable), (kernel, inducing_variable)) # [N, M, M] (psi2) Luu_tiled = tf.tile( Luu[None, :, :], [num_data, 1, 1]) # remove this line, once issue 216 is fixed Li_eKuffu = tf.linalg.triangular_solve(Luu_tiled, eKuffu, lower=True) Li_eKuffu_Lit = tf.linalg.triangular_solve(Luu_tiled, tf.linalg.adjoint(Li_eKuffu), lower=True) # [N, M, M] cov = tf.linalg.matmul(q_sqrt_r, q_sqrt_r, transpose_b=True) # [D, M, M] if mean_function is None or isinstance(mean_function, mean_functions.Zero): e_related_to_mean = tf.zeros((num_data, num_func, num_func), dtype=default_float()) else: # Update mean: \mu(x) + m(x) fmean = fmean + expectation(pXnew, mean_function) # Calculate: m(x) m(x)^T + m(x) \mu(x)^T + \mu(x) m(x)^T, # where m(x) is the mean_function and \mu(x) is fmean e_mean_mean = expectation(pXnew, mean_function, mean_function) # [N, D, D] Lit_q_mu = tf.linalg.triangular_solve(Luu, q_mu, adjoint=True) e_mean_Kuf = expectation(pXnew, mean_function, (kernel, inducing_variable)) # [N, D, M] # einsum isn't able to infer the rank of e_mean_Kuf, hence we explicitly set the rank of the tensor: e_mean_Kuf = tf.reshape(e_mean_Kuf, [num_data, num_func, num_ind]) e_fmean_mean = tf.einsum("nqm,mz->nqz", e_mean_Kuf, Lit_q_mu) # [N, D, D] e_related_to_mean = e_fmean_mean + tf.linalg.adjoint( e_fmean_mean) + e_mean_mean if full_output_cov: fvar = ( tf.linalg.diag( tf.tile((eKff - tf.linalg.trace(Li_eKuffu_Lit))[:, None], [1, num_func])) + tf.linalg.diag(tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov)) + # tf.linalg.diag(tf.linalg.trace(tf.linalg.matmul(Li_eKuffu_Lit, cov))) + tf.einsum("ig,nij,jh->ngh", q_mu, Li_eKuffu_Lit, q_mu) - # tf.linalg.matmul(q_mu, tf.linalg.matmul(Li_eKuffu_Lit, q_mu), transpose_a=True) - fmean[:, :, None] * fmean[:, None, :] + e_related_to_mean) else: fvar = ( (eKff - tf.linalg.trace(Li_eKuffu_Lit))[:, None] + tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov) # tf.linalg.diag(tf.linalg.trace(tf.linalg.matmul(Li_eKuffu_Lit, cov))) + + tf.einsum("ig,nij,jg->ng", q_mu, Li_eKuffu_Lit, q_mu) # tf.linalg.matmul(q_mu, tf.linalg.matmul(Li_eKuffu_Lit, q_mu), transpose_a=True) - - fmean**2 + tf.linalg.diag_part(e_related_to_mean)) return fmean, fvar
def _check(params): analytic = expectation(*params) quad = quadrature_expectation(*params) assert_allclose(analytic, quad, rtol=RTOL)
def _check(params): analytic = expectation(*params) quad = quadrature_expectation(*params) session = tf.get_default_session() analytic, quad = session.run([analytic, quad]) assert_allclose(analytic, quad, rtol=RTOL)
def test_eKdiag_no_uncertainty(kernel): eKdiag = expectation(_distrs["dirac_diag"], kernel) Kdiag = kernel(Xmu, full_cov=False) assert_allclose(eKdiag, Kdiag, rtol=RTOL)
def test_eKxz_no_uncertainty(kernel, inducing_variable): eKxz = expectation(_distrs["dirac_diag"], (kernel, inducing_variable)) Kxz = kernel(Xmu, Z) assert_allclose(eKxz, Kxz, rtol=RTOL)
def test_exKxz_pairwise_no_uncertainty(session_tf, kernel, feature): exKxz_pairwise = expectation(dirac_markov_gauss(), (feature, kernel()), identity()) exKxz_pairwise = session_tf.run(exKxz_pairwise) Kxz = kernel().compute_K(Data.Xmu[:-1, :], Data.Z) # NxM xKxz_pairwise = np.einsum('nm,nd->nmd', Kxz, Data.Xmu[1:, :]) assert_almost_equal(exKxz_pairwise, xKxz_pairwise)