Beispiel #1
0
  def testMultiplyInverseAgainstExplicit(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      input_dim, output_dim = 3, 2
      inputs = tf.zeros([32, input_dim])
      outputs = tf.zeros([32, output_dim])
      params = tf.zeros([input_dim, output_dim])
      block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False)
      block.register_additional_tower(inputs, outputs)
      grads = outputs**2
      damping = 0.  # This test is only valid without damping.
      block.instantiate_factors(((grads,),), damping)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()

      sess.run(tf.assign(block._input_factor._cov, _make_psd(3)))
      sess.run(tf.assign(block._output_factor._cov, _make_psd(2)))

      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      v_flat = np.arange(6, dtype=np.float32)
      vector = utils.column_to_tensors(params, tf.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat)

      self.assertAllClose(output_flat, explicit)
Beispiel #2
0
  def testMultiplyInverseNotTuple(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      inputs = tf.constant([[1., 2.], [3., 4.]])
      outputs = tf.constant([[3., 4.], [5., 6.]])
      block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False)
      block.register_additional_tower(inputs, outputs)
      grads = outputs**2
      block.instantiate_factors(((grads,),), 0.5)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      # Make sure our inverse is something other than the identity.
      sess.run(tf.global_variables_initializer())
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      vector = np.arange(2, 6).reshape(2, 2).astype(np.float32)
      output = block.multiply_inverse(tf.constant(vector))

      self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]],
                          sess.run(output))
Beispiel #3
0
  def testFullyConnectedKFACBasicFBInit(self):
    with tf.Graph().as_default():
      tf.set_random_seed(200)
      inputs = tf.constant([1., 2.])
      outputs = tf.constant([3., 4.])
      block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection())
      block.register_additional_tower(inputs, outputs)

      self.assertAllEqual([outputs], block.tensors_to_compute_grads())
Beispiel #4
0
  def testInstantiateFactorsNoBias(self):
    with tf.Graph().as_default():
      tf.set_random_seed(200)
      inputs = tf.constant([[1., 2.], [3., 4.]])
      outputs = tf.constant([[3., 4.], [5., 6.]])
      block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False)
      block.register_additional_tower(inputs, outputs)

      grads = outputs**2
      block.instantiate_factors(((grads,),), 0.5)
Beispiel #5
0
  def testMultiplyInverse(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)

      # Create a Fisher Block.
      vocab_size = 5
      block = fb.FullyConnectedKFACBasicFB(
          lc.LayerCollection(),
          diagonal_approx_for_input=True)

      # Add some examples.
      inputs = tf.constant([[0, 1], [1, 2], [2, 3]])
      inputs.one_hot_depth = vocab_size
      outputs = tf.constant([[0.], [1.], [2.]])
      block.register_additional_tower(inputs, outputs)

      # Instantiate factor's variables. Ensure it doesn't fail.
      grads = outputs**2.
      damping = tf.constant(0.)
      block.instantiate_factors(((grads,),), damping)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      # Create a sparse update.
      indices = tf.constant([1, 3, 4])
      values = tf.constant([[1.], [1.], [1.]])
      sparse_vector = tf.IndexedSlices(
          values, indices, dense_shape=[vocab_size, 1])
      dense_vector = tf.reshape([0., 1., 0., 1., 1.], [vocab_size, 1])

      # Compare Fisher-vector product against explicit result.
      result = block.multiply_inverse(sparse_vector)
      expected_result = tf.matrix_solve(block.full_fisher_block(), dense_vector)

      sess.run(tf.global_variables_initializer())
      self.assertAlmostEqual(
          sess.run(expected_result[1]), sess.run(result.values[0]))
      self.assertAlmostEqual(
          sess.run(expected_result[3]), sess.run(result.values[1]))
      self.assertAlmostEqual(
          sess.run(expected_result[4]), sess.run(result.values[2]))
Beispiel #6
0
  def testInstantiateFactors(self):
    with tf.Graph().as_default():
      tf.set_random_seed(200)

      # Create a Fisher Block.
      vocab_size = 5
      block = fb.FullyConnectedKFACBasicFB(
          lc.LayerCollection(),
          diagonal_approx_for_input=True)

      # Add some examples.
      inputs = tf.constant([[0, 1], [1, 2], [2, 3]])
      inputs.one_hot_depth = vocab_size
      outputs = tf.constant([[0.], [1.], [2.]])
      block.register_additional_tower(inputs, outputs)

      # Instantiate factor's variables. Ensure it doesn't fail.
      grads = outputs**2.
      damping = tf.constant(0.)
      block.instantiate_factors(((grads,),), damping)