예제 #1
0
파일: utils_test.py 프로젝트: leox1v/kfac
  def testColumnToTensors(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)

      vector_template = tf.constant(np.array([[0., 1.], [2., 3.]]))
      colvec = tf.constant(np.arange(4.)[:, None])
      output = sess.run(utils.column_to_tensors(vector_template, colvec))
      self.assertAllClose(output, np.array([[0., 1.], [2., 3.]]))

      vector_template = self._fully_connected_layer_params()
      colvec = tf.constant(np.arange(6.)[:, None])
      output = sess.run(utils.column_to_tensors(vector_template, colvec))

      self.assertIsInstance(output, tuple)
      self.assertEqual(len(output), 2)
      a, b = output
      self.assertAllClose(a, np.array([[0., 1.], [2., 3.]]))
      self.assertAllClose(b, np.array([4., 5.]))

      vector_template = list(vector_template)
      vector_template.append(tf.constant([[6.], [7.], [8.], [9.]]))
      colvec = tf.constant(np.arange(10.)[:, None])
      output = sess.run(utils.column_to_tensors(vector_template, colvec))
      self.assertIsInstance(output, tuple)
      self.assertEqual(len(output), 3)
      a, b, c = output
      self.assertAllClose(a, np.array([[0., 1.], [2., 3.]]))
      self.assertAllClose(b, np.array([4., 5.]))
      self.assertAllClose(c, np.array([[6.], [7.], [8.], [9.]]))
예제 #2
0
  def testMultiplyInverseAgainstExplicit(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      params = tf.zeros((2, 2, 2, 2))
      inputs = tf.zeros((2, 2, 2, 2))
      outputs = tf.zeros((2, 2, 2, 2))
      block = fb.ConvKFCBasicFB(
          lc.LayerCollection(), params=params, padding='SAME')
      block.register_additional_tower(inputs, outputs)
      grads = outputs**2
      damping = 0.  # This test is only valid without damping.
      block.instantiate_factors(((grads,),), damping)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      sess.run(tf.assign(block._input_factor._cov, _make_psd(8)))
      sess.run(tf.assign(block._output_factor._cov, _make_psd(2)))
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      v_flat = np.arange(16, dtype=np.float32)
      vector = utils.column_to_tensors(params, tf.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat)

      self.assertAllClose(output_flat, explicit)
예제 #3
0
  def testMultiplyInverseAgainstExplicit(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      params = (tf.constant([1., 2.]), tf.constant(3.))
      block = fb.FullFB(lc.LayerCollection(), params)
      block.register_additional_tower(32)
      grads = (tf.constant([2., 3.]), tf.constant(4.))
      damping = 0.5
      block.instantiate_factors((grads,), damping)
      block._factor.instantiate_cov_variables()
      block.register_inverse()
      block._factor.instantiate_inv_variables()

      # Make sure our inverse is something other than the identity.
      sess.run(tf.assign(block._factor._cov, _make_psd(3)))
      sess.run(block._factor.make_inverse_update_ops())

      v_flat = np.array([4., 5., 6.], dtype=np.float32)
      vector = utils.column_to_tensors(params, tf.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat)

      self.assertAllClose(output_flat, explicit)
예제 #4
0
  def testMultiplyInverseAgainstExplicit(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      input_dim, output_dim = 3, 2
      inputs = tf.zeros([32, input_dim])
      outputs = tf.zeros([32, output_dim])
      params = tf.zeros([input_dim, output_dim])
      block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False)
      block.register_additional_tower(inputs, outputs)
      grads = outputs**2
      damping = 0.  # This test is only valid without damping.
      block.instantiate_factors(((grads,),), damping)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()

      sess.run(tf.global_variables_initializer())
      sess.run(block._input_factor._cov.add_to_average(_make_psd(3)))
      sess.run(block._output_factor._cov.add_to_average(_make_psd(2)))

      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      v_flat = np.arange(6, dtype=np.float32)
      vector = utils.column_to_tensors(params, tf.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat)

      self.assertAllClose(output_flat, explicit)
예제 #5
0
  def testMultiplyInverseAgainstExplicit(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      params = (tf.constant([1., 2.]), tf.constant(3.))
      block = fb.NaiveDiagonalFB(lc.LayerCollection(), params)
      block.register_additional_tower(32)
      grads = (params[0]**2, tf.sqrt(params[1]))
      damping = 0.5
      block.instantiate_factors((grads,), damping)
      block._factor.instantiate_cov_variables()

      cov = tf.reshape(tf.constant([2., 3., 4.]), [-1, 1])

      sess.run(tf.global_variables_initializer())
      sess.run(block._factor._cov.add_to_average(cov))

      sess.run(block._factor.make_inverse_update_ops())

      v_flat = np.array([4., 5., 6.], dtype=np.float32)
      vector = utils.column_to_tensors(params, tf.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat)

      self.assertAllClose(output_flat, explicit)