def testMultiplyInverseAgainstExplicit(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) input_dim, output_dim = 3, 2 inputs = tf.zeros([32, input_dim]) outputs = tf.zeros([32, output_dim]) params = tf.zeros([input_dim, output_dim]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) block.register_additional_tower(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors(((grads,),), damping) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() sess.run(tf.assign(block._input_factor._cov, _make_psd(3))) sess.run(tf.assign(block._output_factor._cov, _make_psd(2))) block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(6, dtype=np.float32) vector = utils.column_to_tensors(params, tf.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseNotTuple(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) inputs = tf.constant([[1., 2.], [3., 4.]]) outputs = tf.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf.global_variables_initializer()) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) vector = np.arange(2, 6).reshape(2, 2).astype(np.float32) output = block.multiply_inverse(tf.constant(vector)) self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]], sess.run(output))
def testFullyConnectedKFACBasicFBInit(self): with tf.Graph().as_default(): tf.set_random_seed(200) inputs = tf.constant([1., 2.]) outputs = tf.constant([3., 4.]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection()) block.register_additional_tower(inputs, outputs) self.assertAllEqual([outputs], block.tensors_to_compute_grads())
def testInstantiateFactorsNoBias(self): with tf.Graph().as_default(): tf.set_random_seed(200) inputs = tf.constant([[1., 2.], [3., 4.]]) outputs = tf.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5)
def testMultiplyInverse(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) # Create a Fisher Block. vocab_size = 5 block = fb.FullyConnectedKFACBasicFB( lc.LayerCollection(), diagonal_approx_for_input=True) # Add some examples. inputs = tf.constant([[0, 1], [1, 2], [2, 3]]) inputs.one_hot_depth = vocab_size outputs = tf.constant([[0.], [1.], [2.]]) block.register_additional_tower(inputs, outputs) # Instantiate factor's variables. Ensure it doesn't fail. grads = outputs**2. damping = tf.constant(0.) block.instantiate_factors(((grads,),), damping) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() # Create a sparse update. indices = tf.constant([1, 3, 4]) values = tf.constant([[1.], [1.], [1.]]) sparse_vector = tf.IndexedSlices( values, indices, dense_shape=[vocab_size, 1]) dense_vector = tf.reshape([0., 1., 0., 1., 1.], [vocab_size, 1]) # Compare Fisher-vector product against explicit result. result = block.multiply_inverse(sparse_vector) expected_result = tf.matrix_solve(block.full_fisher_block(), dense_vector) sess.run(tf.global_variables_initializer()) self.assertAlmostEqual( sess.run(expected_result[1]), sess.run(result.values[0])) self.assertAlmostEqual( sess.run(expected_result[3]), sess.run(result.values[1])) self.assertAlmostEqual( sess.run(expected_result[4]), sess.run(result.values[2]))
def testInstantiateFactors(self): with tf.Graph().as_default(): tf.set_random_seed(200) # Create a Fisher Block. vocab_size = 5 block = fb.FullyConnectedKFACBasicFB( lc.LayerCollection(), diagonal_approx_for_input=True) # Add some examples. inputs = tf.constant([[0, 1], [1, 2], [2, 3]]) inputs.one_hot_depth = vocab_size outputs = tf.constant([[0.], [1.], [2.]]) block.register_additional_tower(inputs, outputs) # Instantiate factor's variables. Ensure it doesn't fail. grads = outputs**2. damping = tf.constant(0.) block.instantiate_factors(((grads,),), damping)