def testNonSquareMatrix(self): with self.assertRaises(ValueError): tf.cholesky(np.array([[1., 2., 3.], [3., 4., 5.]])) with self.assertRaises(ValueError): tf.batch_cholesky( np.array([[[1., 2., 3.], [3., 4., 5.]], [[1., 2., 3.], [3., 4., 5.]]]))
def build_predict(self, Xnew, full_cov=False): """ The posterior variance of F is given by q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1) Here we project this to F*, the values of the GP at Xnew which is given by q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} + diag(lambda**-2)]^-1 K_{f*} ) """ # compute kernel things Kx = self.kern.K(self.X, Xnew) K = self.kern.K(self.X) # predictive mean f_mean = tf.matmul(tf.transpose(Kx), self.q_alpha) + self.mean_function(Xnew) # predictive var A = K + tf.batch_matrix_diag(tf.transpose(1./tf.square(self.q_lambda))) L = tf.batch_cholesky(A) Kx_tiled = tf.tile(tf.expand_dims(Kx, 0), [self.num_latent, 1, 1]) LiKx = tf.batch_matrix_triangular_solve(L, Kx_tiled) if full_cov: f_var = self.kern.K(Xnew) - tf.batch_matmul(LiKx, LiKx, adj_x=True) else: f_var = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(LiKx), 1) return f_mean, tf.transpose(f_var)
def _verifyCholesky(self, x): with self.test_session() as sess: # Verify that LL^T == x. if x.ndim == 2: chol = tf.cholesky(x) verification = tf.matmul(chol, chol, transpose_a=False, transpose_b=True) else: chol = tf.batch_cholesky(x) verification = tf.batch_matmul(chol, chol, adj_x=False, adj_y=True) chol_np, verification_np = sess.run([chol, verification]) self.assertAllClose(x, verification_np) self.assertShapeEqual(x, chol) # Check that the cholesky is lower triangular, and has positive diagonal # elements. if chol_np.shape[-1] > 0: chol_reshaped = np.reshape( chol_np, (-1, chol_np.shape[-2], chol_np.shape[-1])) for chol_matrix in chol_reshaped: self.assertAllClose(chol_matrix, np.tril(chol_matrix)) self.assertTrue((np.diag(chol_matrix) > 0.0).all())
def _verifyCholesky(self, x): # Verify that LL^T == x. with self.test_session() as sess: # Check the batch version, which works for ndim >= 2. chol = tf.batch_cholesky(x) verification = tf.batch_matmul(chol, chol, adj_x=False, adj_y=True) self._verifyCholeskyBase(sess, x, chol, verification) if x.ndim == 2: # Check the simple form of cholesky chol = tf.cholesky(x) verification = tf.matmul( chol, chol, transpose_a=False, transpose_b=True) self._verifyCholeskyBase(sess, x, chol, verification)
def test_works_with_five_different_random_pos_def_matricies(self): with self.test_session(): for n in range(1, 6): for np_type, atol in [(np.float32, 0.05), (np.float64, 1e-5)]: # Create 2 x n x n matrix array = np.array( [_random_pd_matrix(n, self.rng), _random_pd_matrix(n, self.rng)] ).astype(np_type) chol = tf.batch_cholesky(array) for k in range(1, 3): rhs = self.rng.randn(2, n, k).astype(np_type) x = tf.batch_cholesky_solve(chol, rhs) self.assertAllClose( rhs, tf.batch_matmul(array, x).eval(), atol=atol)
def _verifyCholesky(self, x): # Verify that LL^T == x. with self.test_session() as sess: # Check the batch version, which works for ndim >= 2. chol = tf.batch_cholesky(x) verification = tf.batch_matmul(chol, chol, adj_x=False, adj_y=True) self._verifyCholeskyBase(sess, x, chol, verification) if x.ndim == 2: # Check the simple form of cholesky chol = tf.cholesky(x) verification = tf.matmul(chol, chol, transpose_a=False, transpose_b=True) self._verifyCholeskyBase(sess, x, chol, verification)
def _verifyCholesky(self, x): with self.test_session() as sess: # Verify that LL^T == x. if x.ndim == 2: chol = tf.cholesky(x) verification = tf.matmul(chol, chol, transpose_a=False, transpose_b=True) else: chol = tf.batch_cholesky(x) verification = tf.batch_matmul(chol, chol, adj_x=False, adj_y=True) chol_np, verification_np = sess.run([chol, verification]) self.assertAllClose(x, verification_np) self.assertShapeEqual(x, chol) # Check that the cholesky is lower triangular, and has positive diagonal # elements. if chol_np.shape[-1] > 0: chol_reshaped = np.reshape(chol_np, (-1, chol_np.shape[-2], chol_np.shape[-1])) for chol_matrix in chol_reshaped: self.assertAllClose(chol_matrix, np.tril(chol_matrix)) self.assertTrue((np.diag(chol_matrix) > 0.0).all())
def _define_full_covariance_probs(self, shard_id, shard): """Defines the full covariance probabilties per example in a class. Updates a matrix with dimension num_examples X num_classes. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. """ diff = shard - self._means cholesky = tf.batch_cholesky(self._covs + self._min_var) log_det_covs = 2.0 * tf.reduce_sum(tf.log( tf.batch_matrix_diag_part(cholesky)), 1) x_mu_cov = tf.square(tf.batch_matrix_triangular_solve( cholesky, tf.transpose(diff, perm=[0, 2, 1]), lower=True)) diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1)) self._probs[shard_id] = -0.5 * ( diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) + log_det_covs)
def _define_full_covariance_probs(self, shard_id, shard): """Defines the full covariance probabilties per example in a class. Updates a matrix with dimension num_examples X num_classes. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. """ diff = shard - self._means cholesky = tf.batch_cholesky(self._covs + self._min_var) log_det_covs = 2.0 * tf.reduce_sum( tf.log(tf.batch_matrix_diag_part(cholesky)), 1) x_mu_cov = tf.square( tf.batch_matrix_triangular_solve(cholesky, tf.transpose(diff, perm=[0, 2, 1]), lower=True)) diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1)) self._probs[shard_id] = -0.5 * ( diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) + log_det_covs)
def build_likelihood(self): """ q_alpha, q_lambda are variational parameters, size N x R This method computes the variational lower bound on the likelihood, which is: E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)] with q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) . """ K = self.kern.K(self.X) K_alpha = tf.matmul(K, self.q_alpha) f_mean = K_alpha + self.mean_function(self.X) # compute the variance for each of the outputs I = tf.tile(tf.expand_dims(eye(self.num_data), 0), [self.num_latent, 1, 1]) A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \ tf.expand_dims(tf.transpose(self.q_lambda), 2) * K L = tf.batch_cholesky(A) Li = tf.batch_matrix_triangular_solve(L, I) tmp = Li / tf.transpose(self.q_lambda) f_var = 1./tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1)) # some statistics about A are used in the KL A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.batch_matrix_diag_part(L))) trAi = tf.reduce_sum(tf.square(Li)) KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent + tf.reduce_sum(K_alpha*self.q_alpha)) v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y) return tf.reduce_sum(v_exp) - KL
def testWrongDimensions(self): tensor3 = tf.constant([1., 2.]) with self.assertRaises(ValueError): tf.cholesky(tensor3) with self.assertRaises(ValueError): tf.batch_cholesky(tensor3)
def testNonSquareMatrix(self): with self.assertRaises(ValueError): tf.cholesky(np.array([[1., 2., 3.], [3., 4., 5.]])) with self.assertRaises(ValueError): tf.batch_cholesky(np.array([[[1., 2., 3.], [3., 4., 5.]], [[1., 2., 3.], [3., 4., 5.]]]))
# tf.matrix_inverse x = np.random.rand(10, 10) z_matrix_inverse = tf.matrix_inverse(x) # tf.batch_matrix_inverse batch_x = np.random.rand(10, 5, 5) z_batch_matrix_inverse = tf.batch_matrix_inverse(batch_x) # tf.cholesky x = np.random.rand(10, 10) z_cholesky = tf.cholesky(x) # tf.batch_cholesky batch_x = np.random.rand(10, 5, 5) z_batch_cholesky = tf.batch_cholesky(x) # tf.self_adjoint_eig x = np.random.rand(10, 8) z_self_adjoint_eig = tf.self_adjoint_eig(x) # tf.batch_self_adjoint_eig batch_x = np.random.rand(10, 8, 5) z_batch_self_adjoint_eig = tf.batch_self_adjoint_eig(batch_x) with tf.Session() as sess: print "tf.diag" print sess.run(z_diag) print "tf.transpose" print sess.run(z_transpose)
def test_BatchCholesky(self): t = tf.batch_cholesky(np.array(3 * [8, 3, 3, 8]).reshape(3, 2, 2).astype("float32")) self.check(t)
def compute_posterior_samples(self, X, Y, test_points, num_samples): """Computes samples from the posterior distribution(s). Args: X (np.ndarray): The training inputs, shape `[n, point_dims]` Y (np.ndarray): The training outputs, shape `[n, num_latent]` test_points (np.ndarray): The points from the sample space for which to predict means and variances of the posterior distribution(s), shape `[m, point_dims]`. num_samples (int): The number of samples to take. Returns: (np.ndarray): An array of samples from the posterior distributions, with shape `[num_samples, m, num_latent]` Examples: For testing purposes, we create an example model whose likelihood is always `0` and whose `.build_predict()` returns mean `0` and variance `1` for every test point, or an independent covariance matrix. >>> from overrides import overrides >>> from gptf import ParamAttributes, tfhacks >>> class Example(GPModel, ParamAttributes): ... def __init__(self, dtype): ... super().__init__() ... self.dtype = dtype ... @property ... def dtype(self): ... return self._dtype ... @dtype.setter ... def dtype(self, value): ... self.clear_cache() ... self._dtype = value ... @tf_method() ... @overrides ... def build_log_likelihood(self): ... NotImplemented ... @tf_method() ... @overrides ... def build_prior_mean_var\\ ... (self, test_points, num_latent, full_cov=False): ... NotImplemented ... @tf_method() ... @overrides ... def build_posterior_mean_var\\ ... (self, X, Y, test_points, full_cov=False): ... n = tf.shape(test_points)[0] ... num_latent = tf.shape(Y)[1] ... mu = tf.zeros([n, 1], self.dtype) ... mu = tf.tile(mu, (1, num_latent)) ... if full_cov: ... var = tf.expand_dims(tfhacks.eye(n, self.dtype), 2) ... var = tf.tile(var, (1, 1, num_latent)) ... else: ... var = tf.ones([n, 1], self.dtype) ... var = tf.tile(var, (1, num_latent)) ... return mu, var >>> m = Example(tf.float64) >>> X = np.array([[.5]]) >>> Y = np.array([[.3]]) >>> test_points = np.array([[0.], [1.], [2.], [3.]]) The shape of the returned array is `(a, b, c)`, where `a` is the number of samples, `b` is the number of test points and `c` is the number of latent functions. >>> samples = m.compute_posterior_samples(X, Y, test_points, 2) >>> samples.shape (2, 4, 1) `.compute_posterior_samples()` respects the dtype of the tensors returned by `.build_predict()`. >>> samples.dtype dtype('float64') >>> m.dtype = tf.float32 >>> samples = m.compute_posterior_samples(X, Y, test_points, 2) >>> samples.dtype dtype('float32') """ mu, var = self.build_posterior_mean_var(X, Y, test_points, True) jitter = tfhacks.eye(tf.shape(mu)[0], var.dtype) * 1e-06 L = tf.batch_cholesky(tf.transpose(var, (2, 0, 1)) + jitter) V_shape = [tf.shape(L)[0], tf.shape(L)[1], num_samples] V = tf.random_normal(V_shape, dtype=L.dtype) samples = tf.expand_dims(tf.transpose(mu), -1) + tf.batch_matmul(L, V) return tf.transpose(samples)