def testMultiplyInverseNotTupleWithBias(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) params = [tf.random_normal((2, 2, 2, 2))] inputs = tf.random_normal((2, 2, 2, 2)) outputs = tf.random_normal((2, 2, 2, 2)) block = fb.ConvKFCBasicFB( lc.LayerCollection(), params=params, padding='SAME') block.register_additional_tower(inputs, outputs) self.assertTrue(block._has_bias) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf.global_variables_initializer()) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) vector = np.arange(1, 19).reshape(9, 2).astype(np.float32) output = block.multiply_inverse(tf.constant(vector)) self.assertAllClose([0.136455, 0.27291], sess.run(output)[0])
def testMultiplyInverseAgainstExplicit(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) params = tf.zeros((2, 2, 2, 2)) inputs = tf.zeros((2, 2, 2, 2)) outputs = tf.zeros((2, 2, 2, 2)) block = fb.ConvKFCBasicFB( lc.LayerCollection(), params=params, padding='SAME') block.register_additional_tower(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors(((grads,),), damping) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() sess.run(tf.assign(block._input_factor._cov, _make_psd(8))) sess.run(tf.assign(block._output_factor._cov, _make_psd(2))) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(16, dtype=np.float32) vector = utils.column_to_tensors(params, tf.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat) self.assertAllClose(output_flat, explicit)
def _testConvKFCBasicFBInitParams(self, params): with tf.Graph().as_default(): tf.set_random_seed(200) if isinstance(params, (list, tuple)): params = [tf.constant(param) for param in params] else: params = tf.constant(params) inputs = tf.random_normal((2, 2, 2)) outputs = tf.random_normal((2, 2, 2)) block = fb.ConvKFCBasicFB( lc.LayerCollection(), params=params, padding='SAME') block.register_additional_tower(inputs, outputs) self.assertAllEqual([outputs], block.tensors_to_compute_grads())