def testTTMatTimesTTMatBroadcasting(self): # Multiply a batch of TT-matrices by another batch of TT-matrices with # broadcasting. left_shape = (2, 3) sum_shape = (4, 3) right_shape = (4, 4) with self.test_session() as sess: tt_mat_1 = initializers.random_matrix_batch((left_shape, sum_shape), tt_rank=3, batch_size=3, dtype=self.dtype) tt_mat_2 = initializers.random_matrix_batch((sum_shape, right_shape), dtype=self.dtype) # TT-batch by one element TT-batch res_actual = ops.matmul(tt_mat_1, tt_mat_2) res_actual = ops.full(res_actual) # TT by TT-batch. res_actual2 = ops.matmul(ops.transpose(tt_mat_2[0]), ops.transpose(tt_mat_1)) res_actual2 = ops.full(ops.transpose(res_actual2)) res_desired = tf.einsum('oij,jk->oik', ops.full(tt_mat_1), ops.full(tt_mat_2[0])) to_run = [res_actual, res_actual2, res_desired] res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run) self.assertAllClose(res_actual_val, res_desired_val, atol=1e-5, rtol=1e-5) self.assertAllClose(res_actual2_val, res_desired_val, atol=1e-5, rtol=1e-5)
def testHessianVectorProduct(self): w = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) A = initializers.random_matrix(([5] * 3, [5] * 3), dtype=self.dtype) x = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) z = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) projected_vector = riemannian.project(z, x) def func1(x): return 0.5 * ops.flat_inner(x, w) ** 2 # Grad: <x, w> w # Hessian: w w.T # Hessian by vector: w <w, P_x z> desired1 = riemannian.project(w * ops.flat_inner(projected_vector, w), x) desired1 = ops.full(desired1) self._TestSingleHessianByVector(func1, x, z, desired1) def func2(x): return ops.bilinear_form(A, x, x) # Hessian of <x, Ax> is A + A.T hessian_by_vector = ops.matmul(ops.transpose(A) + A, projected_vector) desired2 = ops.full(riemannian.project(hessian_by_vector, x)) self._TestSingleHessianByVector(func1, x, z, desired1) def func3(x): # A function which is not invariant to different representations of the # same tensor, i.e. it does not even have a Riemannian gradient or # hessian. return tf.add_n([tf.reduce_sum(c) for c in x.tt_cores]) ** 2 with self.assertRaises(tf.errors.InvalidArgumentError): actual3 = ops.full(autodiff.hessian_vector_product(func3, x, z)) self.evaluate(actual3)
def _get_mu(self, ranks, x, y): """Initializes latent inputs expectations mu. Either loads pretrained values of tt-cores of mu, or initializes it according to optimal formulas from the given data. Args: ranks: tt-ranks of mu x: features of a batch of objects y: targets of a batch of objects """ # TODO: test if this is needed. w = self.inputs.interpolate_on_batch(self.cov.project(x)) Sigma = ops.tt_tt_matmul(self.sigma_l, ops.transpose(self.sigma_l)) temp = ops.tt_tt_matmul(w, y) anc = ops.tt_tt_matmul(Sigma, temp) res = TensorTrain([core[0, :, :, :, :] for core in anc.tt_cores], tt_ranks=[1]*(anc.ndims()+1)) res = res for i in range(1, anc.get_shape()[0]): elem = TensorTrain([core[i, :, :, :, :] for core in anc.tt_cores], tt_ranks=[1]*(anc.ndims()+1)) res = ops.add(res, elem) mu_ranks = [1] + [ranks] * (res.ndims() - 1) + [1] return t3f.get_variable('tt_mu', initializer=TensorTrain(res.tt_cores, res.get_raw_shape(), mu_ranks))
def _get_mus(self, mu_ranks): """Initialize expectations of var distribution over unary potentials. Args: mu_ranks: TT-ranks of mus. """ # TODO: is this a good initialization? x_init = tf.random_normal([mu_ranks, self.d], dtype=tf.float64) y_init = tf.random_normal([mu_ranks], dtype=tf.float64) w = self.inputs.interpolate_on_batch(x_init) y_init_cores = [tf.reshape(y_init, (-1, 1, 1, 1, 1))] for core_idx in range(1, w.ndims()): y_init_cores += [tf.ones((mu_ranks, 1, 1, 1, 1), dtype=tf.float64)] y_init = t3f.TensorTrainBatch(y_init_cores) Sigma = ops.tt_tt_matmul(self.sigma_ls[0], ops.transpose(self.sigma_ls[0])) res_batch = t3f.tt_tt_matmul(Sigma, t3f.tt_tt_matmul(w, y_init)) res = res_batch[0] for i in range(1, mu_ranks): res = res + res_batch[i] mu_ranks = [1] + [mu_ranks] * (res.ndims() - 1) + [1] mu_cores = [] for core in res.tt_cores: mu_cores.append( tf.tile(core[None, ...], [self.n_labels, 1, 1, 1, 1])) return t3f.get_variable('tt_mus', initializer=TensorTrainBatch( mu_cores, res.get_raw_shape(), mu_ranks))
def _unary_complexity_penalty(self): """Computes the complexity penalty for unary potentials. This function computes KL-divergence between prior and variational distribution over the values of GPs at inducing inputs. Returns: A scalar `tf.Tensor` containing the complexity penalty for GPs determining unary potentials. """ # TODO: test this mus = self.mus sigma_ls = _kron_tril(self.sigma_ls) sigmas = ops.tt_tt_matmul(sigma_ls, ops.transpose(sigma_ls)) sigmas_logdet = _kron_logdet(sigma_ls) K_mms = self._K_mms() K_mms_inv = kron.inv(K_mms) K_mms_logdet = kron.slog_determinant(K_mms)[1] penalty = 0 penalty += -K_mms_logdet penalty += sigmas_logdet penalty += -ops.tt_tt_flat_inner(sigmas, K_mms_inv) penalty += -ops.tt_tt_flat_inner(mus, ops.tt_tt_matmul(K_mms_inv, mus)) return tf.reduce_sum(penalty) / 2
def testBilinearFormTwoMat(self): # Test bilinear_form_two_mat. shape_list = (((2, 2), (3, 4)), ((2, 3, 4), (2, 2, 2))) rank_list = (1, 2) for tensor_shape in shape_list: for rank in rank_list: A = initializers.random_matrix(tensor_shape, tt_rank=rank, dtype=self.dtype) B = initializers.random_matrix(tensor_shape, tt_rank=rank, dtype=self.dtype) B = ops.transpose(B) x = initializers.random_matrix((tensor_shape[0], None), tt_rank=rank, dtype=self.dtype) y = initializers.random_matrix((tensor_shape[0], None), tt_rank=rank, dtype=self.dtype) res_actual = ops.bilinear_form_two_mat(x, A, B, y) vars = [ res_actual, ops.full(x), ops.full(A), ops.full(B), ops.full(y) ] res_actual_val, x_val, A_val, B_val, y_val = self.evaluate( vars) res_desired = x_val.T.dot(A_val).dot(B_val).dot(y_val) self.assertAllClose(res_actual_val, np.squeeze(res_desired), atol=1e-5, rtol=1e-5)
def testTranspose(self): # Transpose a batch of TT-matrices. tt = initializers.random_matrix_batch(((2, 3, 4), (2, 2, 2)), batch_size=2, dtype=self.dtype) res_actual = ops.full(ops.transpose(tt)) res_actual_val, tt_val = self.evaluate([res_actual, ops.full(tt)]) self.assertAllClose(tt_val.transpose((0, 2, 1)), res_actual_val)
def testTranspose(self): # Transpose a batch of TT-matrices. with self.test_session() as sess: tt = initializers.random_matrix_batch(((2, 3, 4), (2, 2, 2)), batch_size=2) res_actual = ops.full(ops.transpose(tt)) res_actual_val, tt_val = sess.run([res_actual, ops.full(tt)]) self.assertAllClose(tt_val.transpose((0, 2, 1)), res_actual_val)
def testTranspose(self): # Transpose a TT-matrix. shape_list = (((2, 2), (3, 4)), ((2, 3, 4), (2, 2, 2))) rank_list = (1, 2) with self.test_session() as sess: for tensor_shape in shape_list: for rank in rank_list: tt = initializers.random_matrix(tensor_shape, tt_rank=rank) res_actual = ops.full(ops.transpose(tt)) res_actual_val, tt_val = sess.run( [res_actual, ops.full(tt)]) self.assertAllClose(tt_val.transpose(), res_actual_val)
def testTranspose(self): # Transpose a TT-matrix. shape_list = (((2, 2), (3, 4)), ((2, 3, 4), (2, 2, 2))) rank_list = (1, 2) for tensor_shape in shape_list: for rank in rank_list: tt = initializers.random_matrix(tensor_shape, tt_rank=rank, dtype=self.dtype) res_actual = ops.full(ops.transpose(tt)) res_actual_val, tt_val = self.evaluate( [res_actual, ops.full(tt)]) self.assertAllClose(tt_val.transpose(), res_actual_val)
def _predict_process_values(self, x, with_variance=False, test=False): w = self.inputs.interpolate_on_batch(self.cov.project(x, test=test)) mean = batch_ops.pairwise_flat_inner(w, self.mus) if not with_variance: return mean K_mms = self._K_mms() sigma_ls = _kron_tril(self.sigma_ls) variances = [] sigmas = ops.tt_tt_matmul(sigma_ls, ops.transpose(sigma_ls)) variances = pairwise_quadratic_form(sigmas, w, w) variances -= pairwise_quadratic_form(K_mms, w, w) variances += self.cov.cov_0()[None, :] return mean, variances
def _get_mus(self, ranks, x_init, y_init): w = self.inputs.interpolate_on_batch(self.cov.project(x_init)) Sigma = ops.tt_tt_matmul(self.sigma_ls[0], ops.transpose(self.sigma_ls[0])) temp = ops.tt_tt_matmul(w, y_init) anc = ops.tt_tt_matmul(Sigma, temp) res = TensorTrain([core[0, :, :, :, :] for core in anc.tt_cores], tt_ranks=[1]*(anc.ndims()+1)) res = res for i in range(1, anc.get_shape()[0]): elem = TensorTrain([core[i, :, :, :, :] for core in anc.tt_cores], tt_ranks=[1]*(anc.ndims()+1)) res = ops.add(res, elem) mu_ranks = [1] + [ranks] * (res.ndims() - 1) + [1] mu_cores = [] for core in res.tt_cores: mu_cores.append(tf.tile(core[None, ...], [self.n_class, 1, 1, 1, 1])) return t3f.get_variable('tt_mus', initializer=TensorTrainBatch(mu_cores, res.get_raw_shape(), mu_ranks))
def complexity_penalty(self): """Returns the complexity penalty term for ELBO. """ mus = self.mus sigma_ls = _kron_tril(self.sigma_ls) sigmas = ops.tt_tt_matmul(sigma_ls, ops.transpose(sigma_ls)) sigmas_logdet = _kron_logdet(sigma_ls) K_mms = self._K_mms() K_mms_inv = kron.inv(K_mms) K_mms_logdet = kron.slog_determinant(K_mms)[1] penalty = 0 penalty += - K_mms_logdet penalty += sigmas_logdet penalty += - ops.tt_tt_flat_inner(sigmas, K_mms_inv) penalty += - ops.tt_tt_flat_inner(mus, ops.tt_tt_matmul(K_mms_inv, mus)) return penalty / 2
def complexity_penalty(self): """Returns the complexity penalty term for ELBO of different GP models. """ mu = self.mu sigma_l = _kron_tril(self.sigma_l) sigma = ops.tt_tt_matmul(sigma_l, ops.transpose(sigma_l)) sigma_logdet = _kron_logdet(sigma_l) K_mm = self.K_mm() K_mm_inv = kron.inv(K_mm) K_mm_logdet = kron.slog_determinant(K_mm)[1] elbo = 0 elbo += - K_mm_logdet elbo += sigma_logdet elbo += - ops.tt_tt_flat_inner(sigma, K_mm_inv) elbo += - ops.tt_tt_flat_inner(mu, ops.tt_tt_matmul(K_mm_inv, mu)) return elbo / 2
def predict_process_value(self, x, with_variance=False): """Predicts the value of the process at point x. Args: x: data features with_variance: if True, returns process variance at x """ mu = self.mu w = self.inputs.interpolate_on_batch(self.cov.project(x)) mean = ops.tt_tt_flat_inner(w, mu) if not with_variance: return mean K_mm = self.K_mm() variance = self.cov.cov_0() sigma_l_w = ops.tt_tt_matmul(ops.transpose(self.sigma_l), w) variance += ops.tt_tt_flat_inner(sigma_l_w, sigma_l_w) variance -= ops.tt_tt_flat_inner(w, ops.tt_tt_matmul(K_mm, w)) return mean, variance
def testGradients(self): w = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) A = initializers.random_matrix(([5] * 3, [5] * 3), dtype=self.dtype) x = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) def func1(x): return 0.5 * ops.flat_inner(x, w) ** 2 desired1 = ops.full(riemannian.project(w, x) * ops.flat_inner(x, w)) self._TestSingleGradient(func1, x, desired1) def func2(x): return ops.bilinear_form(A, x, x) grad = ops.matmul(ops.transpose(A) + A, x) desired2 = ops.full(riemannian.project(grad, x)) self._TestSingleGradient(func2, x, desired2) def func3(x): # A function which is not invariant to different representations of the # same tensor, i.e. it does not even have a Riemannian gradient. return tf.add_n([tf.reduce_sum(c) for c in x.tt_cores]) ** 2 with self.assertRaises(tf.errors.InvalidArgumentError): actual3 = ops.full(autodiff.gradients(func3, x)) self.evaluate(actual3)
def elbo(self, w, y): '''Evidence lower bound. Args: w: interpolation vector for the current batch. y: target values for the current batch. ''' l = tf.cast(tf.shape(y)[0], tf.float64) # batch size N = tf.cast(self.N, dtype=tf.float64) y = tf.reshape(y, [-1]) mu = self.gp.mu sigma_l = _kron_tril(self.gp.sigma_l) sigma = ops.tt_tt_matmul(sigma_l, ops.transpose(sigma_l)) sigma_n = self.gp.cov.noise_variance() K_mm = self.gp.K_mm() tilde_K_ii = l * self.gp.cov.cov_0() tilde_K_ii -= tf.reduce_sum(ops.tt_tt_flat_inner(w, ops.tt_tt_matmul(K_mm, w))) elbo = 0 elbo -= tf.reduce_sum(tf.square(y - ops.tt_tt_flat_inner(w, mu))) elbo -= tilde_K_ii # TODO: wtf? # elbo -= ops.tt_tt_flat_inner(w, ops.tt_tt_matmul(sigma, w)) elbo -= tf.reduce_sum(ops.tt_tt_flat_inner(w, ops.tt_tt_matmul(sigma, w))) elbo /= 2 * sigma_n**2 * l elbo += self.gp.complexity_penalty() / N # TODO: wtf? # elbo -= tf.log(tf.abs(sigma_n)) return -elbo[0]
def pairwise_flat_inner(tt_1, tt_2, matrix=None): """Computes all scalar products between two batches of TT-objects. If matrix is None, computes res[i, j] = t3f.flat_inner(tt_1[i], tt_2[j]). If matrix is present, computes res[i, j] = t3f.flat_inner(tt_1[i], t3f.matmul(matrix, tt_2[j])) or more shortly res[i, j] = tt_1[i]^T * matrix * tt_2[j] but is more efficient. Args: tt_1: TensorTrainBatch. tt_2: TensorTrainBatch. matrix: None, or TensorTrain matrix. Returns: tf.tensor with the matrix of pairwise scalar products (flat inners). Complexity: If the matrix is not present, the complexity is O(batch_size^2 d r^3 n) where d is the number of TT-cores (tt_vectors.ndims()), r is the largest TT-rank max(tt_vectors.get_tt_rank()) and n is the size of the axis dimension, e.g. for a tensor of size 4 x 4 x 4, n is 4; for a 9 x 64 matrix of raw shape (3, 3, 3) x (4, 4, 4) n is 12 A more precise complexity is O(batch_size^2 d r1 r2 n max(r1, r2)) where r1 is the largest TT-rank of tt_a and r2 is the largest TT-rank of tt_b. If the matrix is present, the complexity is O(batch_size^2 d R r1 r2 (n r1 + n m R + m r2)) where the matrix is of raw-shape (n, n, ..., n) x (m, m, ..., m) and TT-rank R; tt_1 is of shape (n, n, ..., n) and is of the TT-rank r1; tt_2 is of shape (m, m, ..., m) and is of the TT-rank r2; """ ndims = tt_1.ndims() if matrix is None: curr_core_1 = tt_1.tt_cores[0] curr_core_2 = tt_2.tt_cores[0] mode_string = 'ij' if tt_1.is_tt_matrix() else 'i' einsum_str = 'pa{0}b,qc{0}d->pqbd'.format(mode_string) res = tf.einsum(einsum_str, curr_core_1, curr_core_2) for core_idx in range(1, ndims): curr_core_1 = tt_1.tt_cores[core_idx] curr_core_2 = tt_2.tt_cores[core_idx] einsum_str = 'pqac,pa{0}b,qc{0}d->pqbd'.format(mode_string) res = tf.einsum(einsum_str, res, curr_core_1, curr_core_2) else: # res[i, j] = tt_1[i] ^ T * matrix * tt_2[j] if not tt_1.is_tt_matrix() or not tt_2.is_tt_matrix( ) or not matrix.is_tt_matrix(): raise ValueError( 'When passing three arguments to pairwise_flat_inner, ' 'the first 2 of them should be TT-vecors and the last ' 'should be a TT-matrix. Got %s, %s, and %s instead.' % (tt_1, tt_2, matrix)) matrix_shape = matrix.get_raw_shape() if not tt_1.get_raw_shape()[0].is_compatible_with(matrix_shape[0]): raise ValueError( 'The shape of the first argument should be compatible ' 'with the shape of the TT-matrix, that is it should be ' 'possible to do the following matmul: ' 'transpose(tt_1) * matrix. Got the first argument ' '"%s" and matrix "%s"' % (tt_1, matrix)) if not tt_2.get_raw_shape()[0].is_compatible_with(matrix_shape[1]): raise ValueError( 'The shape of the second argument should be compatible ' 'with the shape of the TT-matrix, that is it should be ' 'possible to do the following matmul: ' 'matrix * tt_2. Got the second argument ' '"%s" and matrix "%s"' % (tt_2, matrix)) vectors_1_shape = tt_1.get_shape() if vectors_1_shape[2] == 1 and vectors_1_shape[1] != 1: # TODO: not very efficient, better to use different order in einsum. tt_1 = ops.transpose(tt_1) vectors_1_shape = tt_1.get_shape() vectors_2_shape = tt_2.get_shape() if vectors_2_shape[2] == 1 and vectors_2_shape[1] != 1: # TODO: not very efficient, better to use different order in einsum. tt_2 = ops.transpose(tt_2) vectors_2_shape = tt_2.get_shape() if vectors_1_shape[1] != 1: # TODO: do something so that in case the shape is undefined on compilation # it still works. raise ValueError( 'The tt_vectors_1 argument should be vectors (not ' 'matrices) with shape defined on compilation.') if vectors_2_shape[1] != 1: # TODO: do something so that in case the shape is undefined on compilation # it still works. raise ValueError( 'The tt_vectors_2 argument should be vectors (not ' 'matrices) with shape defined on compilation.') curr_core_1 = tt_1.tt_cores[0] curr_core_2 = tt_2.tt_cores[0] curr_matrix_core = matrix.tt_cores[0] # We enumerate the dummy dimension (that takes 1 value) with `k`. res = tf.einsum('pakib,cijd,qekjf->pqbdf', curr_core_1, curr_matrix_core, curr_core_2) for core_idx in range(1, ndims): curr_core_1 = tt_1.tt_cores[core_idx] curr_core_2 = tt_2.tt_cores[core_idx] curr_matrix_core = matrix.tt_cores[core_idx] res = tf.einsum('pqace,pakib,cijd,qekjf->pqbdf', res, curr_core_1, curr_matrix_core, curr_core_2) # Squeeze to make the result of size batch_size x batch_size instead of # batch_size x batch_size x 1 x 1. return tf.squeeze(res)