def testHessianVectorProduct(self): w = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) A = initializers.random_matrix(([5] * 3, [5] * 3), dtype=self.dtype) x = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) z = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) projected_vector = riemannian.project(z, x) def func1(x): return 0.5 * ops.flat_inner(x, w) ** 2 # Grad: <x, w> w # Hessian: w w.T # Hessian by vector: w <w, P_x z> desired1 = riemannian.project(w * ops.flat_inner(projected_vector, w), x) desired1 = ops.full(desired1) self._TestSingleHessianByVector(func1, x, z, desired1) def func2(x): return ops.bilinear_form(A, x, x) # Hessian of <x, Ax> is A + A.T hessian_by_vector = ops.matmul(ops.transpose(A) + A, projected_vector) desired2 = ops.full(riemannian.project(hessian_by_vector, x)) self._TestSingleHessianByVector(func1, x, z, desired1) def func3(x): # A function which is not invariant to different representations of the # same tensor, i.e. it does not even have a Riemannian gradient or # hessian. return tf.add_n([tf.reduce_sum(c) for c in x.tt_cores]) ** 2 with self.assertRaises(tf.errors.InvalidArgumentError): actual3 = ops.full(autodiff.hessian_vector_product(func3, x, z)) self.evaluate(actual3)
def testFlatInnerTTTensbySparseTens(self): # Inner product between a TT-tensor and a sparse tensor. shape_list = ((2, 2), (2, 3, 4), (4, 2, 5, 2)) rank_list = (1, 2) np.random.seed(1) with self.test_session() as sess: for shape in shape_list: for rank in rank_list: for num_elements in [1, 10]: tt_1 = initializers.random_tensor(shape, tt_rank=rank) sparse_flat_indices = np.random.choice( np.prod(shape), num_elements).astype(int) sparse_indices = np.unravel_index( sparse_flat_indices, shape) sparse_indices = np.vstack(sparse_indices).transpose() values = np.random.randn(num_elements).astype( np.float32) sparse_2 = tf.SparseTensor(indices=sparse_indices, values=values, dense_shape=shape) res_actual = ops.flat_inner(tt_1, sparse_2) res_actual_val, tt_1_val = sess.run( [res_actual, ops.full(tt_1)]) res_desired_val = tt_1_val.flatten( )[sparse_flat_indices].dot(values) self.assertAllClose(res_actual_val, res_desired_val)
def testFlatInnerTTMatbySparseMat(self): # Inner product between a TT-matrix and a sparse matrix. shape_list = (((2, 2), (3, 4)), ((2, 3, 4), (2, 2, 2))) rank_list = (1, 2) np.random.seed(1) for tensor_shape in shape_list: for rank in rank_list: for num_elements in [1, 9]: tt_1 = initializers.random_matrix(tensor_shape, tt_rank=rank, dtype=self.dtype) matrix_shape = np.prod(tensor_shape[0]), np.prod( tensor_shape[1]) sparse_flat_indices = np.random.choice( np.prod(matrix_shape), num_elements) sparse_flat_indices = sparse_flat_indices.astype(int) sparse_indices = np.unravel_index(sparse_flat_indices, matrix_shape) sparse_indices = np.vstack(sparse_indices).transpose() values = np.random.randn(num_elements).astype( self.dtype.as_numpy_dtype) sparse_2 = tf.SparseTensor(indices=sparse_indices, values=values, dense_shape=matrix_shape) res_actual = ops.flat_inner(tt_1, sparse_2) res_actual_val, tt_1_val = self.evaluate( [res_actual, ops.full(tt_1)]) res_desired_val = tt_1_val.flatten( )[sparse_flat_indices].dot(values) self.assertAllClose(res_actual_val, res_desired_val)
def testFlatInnerTTTensbyTTTensBroadcasting(self): # Inner product between two batch TT-tensors with broadcasting. tt_1 = initializers.random_tensor_batch((2, 3, 4), batch_size=1) tt_2 = initializers.random_tensor_batch((2, 3, 4), batch_size=3) res_actual_1 = ops.flat_inner(tt_1, tt_2) res_actual_2 = ops.flat_inner(tt_2, tt_1) res_desired = tf.einsum('ijk,oijk->o', ops.full(tt_1[0]), ops.full(tt_2)) with self.test_session() as sess: res = sess.run([res_actual_1, res_actual_2, res_desired]) res_actual_1_val, res_actual_2_val, res_desired_val = res self.assertAllClose(res_actual_1_val, res_desired_val) self.assertAllClose(res_actual_2_val, res_desired_val) tt_1 = initializers.random_tensor_batch((2, 3, 4), batch_size=2) with self.assertRaises(ValueError): # The batch_sizes are different. ops.flat_inner(tt_1, tt_2)
def testFlatInnerTTTensbyTTTens(self): # Inner product between two TT-tensors. shape_list = ((2, 2), (2, 3, 4), (4, 2, 5, 2)) rank_list = (1, 2) with self.test_session() as sess: for shape in shape_list: for rank in rank_list: tt_1 = initializers.random_tensor(shape, tt_rank=rank) tt_2 = initializers.random_tensor(shape, tt_rank=rank) res_actual = ops.flat_inner(tt_1, tt_2) tt_1_full = tf.reshape(ops.full(tt_1), (1, -1)) tt_2_full = tf.reshape(ops.full(tt_2), (-1, 1)) res_desired = tf.matmul(tt_1_full, tt_2_full) res_actual_val, res_desired_val = sess.run( [res_actual, res_desired]) self.assertAllClose(res_actual_val, np.squeeze(res_desired_val), rtol=1e-5)
def testFlatInnerTTTensbyTTTensSameBatchSize(self): # Inner product between two batch TT-tensors of the same batch_size. shape_list = ((2, 2), (2, 3, 4)) rank_list = (1, 2) with self.test_session() as sess: for shape in shape_list: for rank in rank_list: tt_1 = initializers.random_tensor_batch(shape, tt_rank=rank, batch_size=2, dtype=self.dtype) tt_2 = initializers.random_tensor_batch(shape, tt_rank=rank, batch_size=2, dtype=self.dtype) res_actual = ops.flat_inner(tt_1, tt_2) tt_1_full = tf.reshape(ops.full(tt_1), (2, 1, -1)) tt_2_full = tf.reshape(ops.full(tt_2), (2, -1, 1)) res_desired = tf.matmul(tt_1_full, tt_2_full) res_actual_val, res_desired_val = sess.run([res_actual, res_desired]) self.assertAllClose(res_actual_val, np.squeeze(res_desired_val))
def testFlatInnerTTMatbyTTMat(self): # Inner product between two TT-Matrices. shape_list = (((2, 2), (3, 4)), ((2, 3, 4), (2, 2, 2))) rank_list = (1, 2) for shape in shape_list: for rank in rank_list: tt_1 = initializers.random_matrix(shape, tt_rank=rank, dtype=self.dtype) tt_2 = initializers.random_matrix(shape, tt_rank=rank, dtype=self.dtype) res_actual = ops.flat_inner(tt_1, tt_2) tt_1_full = tf.reshape(ops.full(tt_1), (1, -1)) tt_2_full = tf.reshape(ops.full(tt_2), (-1, 1)) res_desired = tf.matmul(tt_1_full, tt_2_full) res_actual_val, res_desired_val = self.evaluate( [res_actual, res_desired]) self.assertAllClose(res_actual_val, np.squeeze(res_desired_val), rtol=1e-5, atol=1e-5)
def testGradients(self): w = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) A = initializers.random_matrix(([5] * 3, [5] * 3), dtype=self.dtype) x = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) def func1(x): return 0.5 * ops.flat_inner(x, w) ** 2 desired1 = ops.full(riemannian.project(w, x) * ops.flat_inner(x, w)) self._TestSingleGradient(func1, x, desired1) def func2(x): return ops.bilinear_form(A, x, x) grad = ops.matmul(ops.transpose(A) + A, x) desired2 = ops.full(riemannian.project(grad, x)) self._TestSingleGradient(func2, x, desired2) def func3(x): # A function which is not invariant to different representations of the # same tensor, i.e. it does not even have a Riemannian gradient. return tf.add_n([tf.reduce_sum(c) for c in x.tt_cores]) ** 2 with self.assertRaises(tf.errors.InvalidArgumentError): actual3 = ops.full(autodiff.gradients(func3, x)) self.evaluate(actual3)
def func1(x): return 0.5 * ops.flat_inner(x, w) ** 2