Ejemplo n.º 1
0
  def DISABLED_test_multiple_weights(self):
    """Test that graph search provides desired registration on toy model.

    In this toy example we apply the same linear layer to two different inputs.
    This tests whether graph search can correctly group them.
    """
    with tf.Graph().as_default():
      w = tf.get_variable('W', [10, 10])
      b_0 = tf.get_variable('b_0', [
          10,
      ])
      x = tf.placeholder(tf.float32, shape=(32, 10))
      y = tf.placeholder(tf.float32, shape=(32, 10))

      out_0 = tf.matmul(x, w) + b_0
      out_1 = tf.matmul(y, w) + b_0

      layer_collection_manual = lc.LayerCollection()
      layer_collection_manual.register_fully_connected_multi((w, b_0), (x, y),
                                                             (out_0, out_1))

      layer_collection = lc.LayerCollection()
      gs.register_layers(layer_collection,
                         tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES))

      assert_fisher_blocks_match(self, layer_collection,
                                 layer_collection_manual)
Ejemplo n.º 2
0
  def test_filter_grouped_variable_records(self):
    """Matches violating specified parameter groupings should be removed."""
    with tf.Graph().as_default():
      tensor_dict, record_list_dict = _build_mock_records()

      layer_collection = lc.LayerCollection()
      layer_collection.define_linked_parameters(params=tensor_dict['w'])
      filtered_record_list_dict = gs.filter_grouped_variable_records(
          layer_collection, record_list_dict)
      expected_keys = [tensor_dict['w'], tensor_dict['b_0']]
      self.assertDictEqual(filtered_record_list_dict,
                           {k: record_list_dict[k]
                            for k in expected_keys})

    with tf.Graph().as_default():
      tensor_dict, record_list_dict = _build_mock_records()

      layer_collection = lc.LayerCollection()
      layer_collection.define_linked_parameters(
          params=(tensor_dict['w'], tensor_dict['b_0']))
      filtered_record_list_dict = gs.filter_grouped_variable_records(
          layer_collection, record_list_dict)
      expected_keys = [(tensor_dict['w'], tensor_dict['b_0'])]
      self.assertDictEqual(filtered_record_list_dict,
                           {k: record_list_dict[k]
                            for k in expected_keys})
Ejemplo n.º 3
0
  def DISABLED_test_subset_weights_manual_registration(self):
    """Test that graph search provides desired registration on toy model.

    In this toy example we apply the same matmul op to two different inputs
    followed by adding a bias to one of the inputs. This tests whether graph
    search can correctly group them.
    """
    with tf.Graph().as_default():
      w = tf.get_variable('W', [10, 10])
      b_0 = tf.get_variable('b_0', [
          10,
      ])
      x = tf.placeholder(tf.float32, shape=(32, 10))
      y = tf.placeholder(tf.float32, shape=(32, 10))

      out_0 = tf.matmul(x, w) + b_0
      out_1 = tf.matmul(y, w)

      layer_collection_manual = lc.LayerCollection()
      layer_collection_manual.register_fully_connected_multi(
          w, (x, y), (out_0, out_1))
      layer_collection_manual.register_generic(b_0, batch_size=1)

      layer_collection = lc.LayerCollection()
      layer_collection.define_linked_parameters(w)
      gs.register_layers(
          layer_collection,
          tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES),
          batch_size=1)

      assert_fisher_blocks_match(self, layer_collection,
                                 layer_collection_manual)
Ejemplo n.º 4
0
    def test_tied_weights_untied_bias_registered_weights(self):
        """Tests that graph search produces right solution on toy model."""
        with tf.Graph().as_default():
            tensor_dict = _build_model()

            layer_collection_manual = lc.LayerCollection()
            layer_collection_manual.register_squared_error_loss(
                tensor_dict['out_0'])
            layer_collection_manual.register_squared_error_loss(
                tensor_dict['out_1'])

            layer_collection_manual.register_fully_connected_multi(
                tensor_dict['w'], (tensor_dict['x'], tensor_dict['y']),
                (tensor_dict['pre_bias_0'], tensor_dict['pre_bias_1']))
            layer_collection_manual.register_generic(tensor_dict['b_0'],
                                                     batch_size=1)
            layer_collection_manual.register_generic(tensor_dict['b_1'],
                                                     batch_size=1)

            layer_collection = lc.LayerCollection()
            layer_collection.register_squared_error_loss(tensor_dict['out_0'])
            layer_collection.register_squared_error_loss(tensor_dict['out_1'])

            layer_collection.define_linked_parameters((tensor_dict['w']))
            gs.register_layers(layer_collection,
                               tf.get_collection_ref(
                                   tf.GraphKeys.GLOBAL_VARIABLES),
                               batch_size=1)

            assert_fisher_blocks_match(self, layer_collection,
                                       layer_collection_manual)
Ejemplo n.º 5
0
  def test_tied_weights_untied_bias_registered_affine(self):
    """Test registering linked variables.

    Registering (w, b_1) as linked variables should not raise an error, since
    the matches with parameters (w) and (w, b_0) will be filtered out.
    """
    with tf.Graph().as_default():
      tensor_dict = _build_model()

      layer_collection_manual = lc.LayerCollection()
      layer_collection_manual.register_fully_connected(
          params=(tensor_dict['w'], tensor_dict['b_1']),
          inputs=tensor_dict['y'],
          outputs=tensor_dict['out_1'])
      layer_collection_manual.register_generic(
          tensor_dict['b_0'], batch_size=32)

      layer_collection = lc.LayerCollection()
      layer_collection.define_linked_parameters((tensor_dict['w'],
                                                 tensor_dict['b_1']))
      gs.register_layers(
          layer_collection,
          tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES),
          batch_size=32)

      assert_fisher_blocks_match(self, layer_collection,
                                 layer_collection_manual)
Ejemplo n.º 6
0
  def test_specify_approximation(self):
    """Test specifying approximations.

    If linked parameters are identified along with an approximation, then
    that approximation should be used when registering those parameters.
    """
    with tf.Graph().as_default():
      w_0 = tf.get_variable('w_0', [10, 10])
      w_1 = tf.get_variable('w_1', [10, 10])

      b_0 = tf.get_variable('b_0', [10])
      b_1 = tf.get_variable('b_1', [10])

      x_0 = tf.placeholder(tf.float32, shape=(32, 10))
      x_1 = tf.placeholder(tf.float32, shape=(32, 10))

      pre_bias_0 = tf.matmul(x_0, w_0)
      pre_bias_1 = tf.matmul(x_1, w_1)

      out_0 = pre_bias_0 + b_0  # pylint: disable=unused-variable
      out_1 = pre_bias_1 + b_1  # pylint: disable=unused-variable

      # Group variables as affine layers.
      layer_collection = lc.LayerCollection()
      layer_collection.define_linked_parameters(
          (w_0, b_0), approximation=lc.APPROX_KRONECKER_NAME)
      layer_collection.define_linked_parameters(
          (w_1, b_1), approximation=lc.APPROX_DIAGONAL_NAME)
      gs.register_layers(
          layer_collection,
          tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES),
          batch_size=32)
      self.assertIsInstance(layer_collection.fisher_blocks[(w_0, b_0)],
                            fb.FullyConnectedKFACBasicFB)
      self.assertIsInstance(layer_collection.fisher_blocks[(w_1, b_1)],
                            fb.FullyConnectedDiagonalFB)

      # Group variables as linear layers and generic parameters.
      layer_collection = lc.LayerCollection()
      layer_collection.define_linked_parameters(
          w_0, approximation=lc.APPROX_DIAGONAL_NAME)
      layer_collection.define_linked_parameters(
          b_0, approximation=lc.APPROX_DIAGONAL_NAME)
      layer_collection.define_linked_parameters(
          w_1, approximation=lc.APPROX_KRONECKER_NAME)
      layer_collection.define_linked_parameters(
          b_1, approximation=lc.APPROX_FULL_NAME)
      gs.register_layers(
          layer_collection,
          tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES),
          batch_size=32)
      self.assertIsInstance(layer_collection.fisher_blocks[w_0],
                            fb.FullyConnectedDiagonalFB)
      self.assertIsInstance(layer_collection.fisher_blocks[b_0],
                            fb.NaiveDiagonalFB)
      self.assertIsInstance(layer_collection.fisher_blocks[w_1],
                            fb.FullyConnectedKFACBasicFB)
      self.assertIsInstance(layer_collection.fisher_blocks[b_1], fb.FullFB)
Ejemplo n.º 7
0
    def testRegisterCategoricalPredictiveDistribution(self):
        with tf.Graph().as_default(), self.test_session() as sess:
            tf.set_random_seed(200)
            logits = tf.eye(2)

            lc = layer_collection.LayerCollection()
            lc.register_categorical_predictive_distribution(logits, seed=200)
            single_loss = sess.run(lc.total_sampled_loss())

            lc2 = layer_collection.LayerCollection()
            lc2.register_categorical_predictive_distribution(logits, seed=200)
            lc2.register_categorical_predictive_distribution(logits, seed=200)
            double_loss = sess.run(lc2.total_sampled_loss())
            self.assertAlmostEqual(2 * single_loss, double_loss)
Ejemplo n.º 8
0
  def testRegisterNormalPredictiveDistribution(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      predictions = tf.constant([[1., 2.], [3., 4]], dtype=tf.float32)

      lc = layer_collection.LayerCollection()
      lc.register_normal_predictive_distribution(predictions, 1., seed=200)
      single_loss = sess.run(lc.total_sampled_loss())

      lc2 = layer_collection.LayerCollection()
      lc2.register_normal_predictive_distribution(predictions, 1., seed=200)
      lc2.register_normal_predictive_distribution(predictions, 1., seed=200)
      double_loss = sess.run(lc2.total_sampled_loss())

      self.assertAlmostEqual(2 * single_loss, double_loss)
Ejemplo n.º 9
0
  def testMultiplyInverseAgainstExplicit(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      params = (tf.constant([1., 2.]), tf.constant(3.))
      block = fb.NaiveDiagonalFB(lc.LayerCollection(), params)
      block.register_additional_tower(32)
      grads = (params[0]**2, tf.sqrt(params[1]))
      damping = 0.5
      block.instantiate_factors((grads,), damping)
      block._factor.instantiate_cov_variables()

      cov = tf.reshape(tf.constant([2., 3., 4.]), [-1, 1])

      sess.run(tf.global_variables_initializer())
      sess.run(block._factor._cov.add_to_average(cov))

      sess.run(block._factor.make_inverse_update_ops())

      v_flat = np.array([4., 5., 6.], dtype=np.float32)
      vector = utils.column_to_tensors(params, tf.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat)

      self.assertAllClose(output_flat, explicit)
Ejemplo n.º 10
0
    def testOptimizerInit(self):
        with tf.Graph().as_default():
            layer_collection = lc.LayerCollection()

            inputs = tf.ones((2, 1)) * 2
            weights_val = np.ones((1, 1), dtype=np.float32) * 3.
            weights = tf.get_variable('w',
                                      initializer=tf.constant(weights_val))
            bias = tf.get_variable('b',
                                   initializer=tf.zeros_initializer(),
                                   shape=(1, 1))
            output = tf.matmul(inputs, weights) + bias

            layer_collection.register_fully_connected((weights, bias), inputs,
                                                      output)

            logits = tf.tanh(output)
            targets = tf.constant([[0.], [1.]])
            output = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                        labels=targets))

            layer_collection.register_categorical_predictive_distribution(
                logits)

            optimizer.KfacOptimizer(0.1,
                                    0.2,
                                    layer_collection,
                                    0.3,
                                    momentum=0.5,
                                    momentum_type='regular')
Ejemplo n.º 11
0
  def DISABLED_test_specify_approximation_shared_parameters(self):
    """Test specifying approximations with layers containing shared parameters.

    If linked parameters are identified along with an approximation, then
    that approximation should be used when registering those parameters.
    """
    with tf.Graph().as_default():
      tensor_dict = _build_model()

      layer_collection = lc.LayerCollection()
      layer_collection.define_linked_parameters(
          tensor_dict['w'], approximation=lc.APPROX_KRONECKER_INDEP_NAME)
      layer_collection.define_linked_parameters(
          tensor_dict['b_0'], approximation=lc.APPROX_DIAGONAL_NAME)
      layer_collection.define_linked_parameters(
          tensor_dict['b_1'], approximation=lc.APPROX_FULL_NAME)

      gs.register_layers(
          layer_collection,
          tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES),
          batch_size=1)

      self.assertIsInstance(layer_collection.fisher_blocks[tensor_dict['w']],
                            fb.FullyConnectedMultiIndepFB)
      self.assertIsInstance(
          layer_collection.fisher_blocks[(tensor_dict['b_0'],)],
          fb.NaiveDiagonalFB)
      self.assertIsInstance(
          layer_collection.fisher_blocks[(tensor_dict['b_1'],)], fb.FullFB)
Ejemplo n.º 12
0
  def test_graph_search_match_fail(self):
    """Tests graph search with linked bias tensors.

    In this code snippet two non adjacent bias tensors are linked together.
    There is no fisher block in kfac that matches this configuration, so the
    biases should not be registered.
    """
    with tf.Graph().as_default():
      tensor_dict = _build_model()

      layer_collection = lc.LayerCollection()
      # TODO(b/69055612): remove this manual registration once layer_collection
      # implements register_fully_connected_multi.
      layer_collection.register_fully_connected(
          tensor_dict['w'], tensor_dict['x'], tensor_dict['pre_bias_0'])
      layer_collection.define_linked_parameters((tensor_dict['b_0'],
                                                 tensor_dict['b_1']))

      with self.assertRaises(ValueError) as cm:
        gs.register_layers(layer_collection,
                           tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES))

      self.assertIn('in linked group', str(cm.exception))
      self.assertIn('was not matched', str(cm.exception))
      self.assertIn(
          str(frozenset([tensor_dict['b_0'], tensor_dict['b_1']])),
          str(cm.exception))
Ejemplo n.º 13
0
    def test_train(self):
        image = tf.random_uniform(shape=(_BATCH_SIZE, 784), maxval=1.)
        labels = tf.random_uniform(shape=(_BATCH_SIZE, ),
                                   maxval=10,
                                   dtype=tf.int32)
        labels_one_hot = tf.one_hot(labels, 10)

        model = snt.Sequential(
            [snt.BatchFlatten(),
             snt.nets.MLP([128, 128, 10])])
        logits = model(image)
        all_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits, labels=labels_one_hot)
        loss = tf.reduce_mean(all_losses)
        layers = lc.LayerCollection()
        optimizer = ak.AsyncInvCovUpdateKfacOpt(inv_devices=["/cpu:0"],
                                                cov_devices=["/cpu:0"],
                                                learning_rate=1e-4,
                                                cov_ema_decay=0.95,
                                                damping=1e+3,
                                                layer_collection=layers,
                                                momentum=0.9)
        _construct_layer_collection(layers, [logits], tf.trainable_variables())
        train_step = optimizer.minimize(loss)
        target_loss = 0.05
        max_iterations = 500

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            optimizer.run_cov_inv_ops(sess)
            for _ in range(max_iterations):
                loss_, _ = sess.run([loss, train_step])
                if loss_ < target_loss:
                    break
            optimizer.stop_cov_inv_ops(sess)
Ejemplo n.º 14
0
  def testRegisterLayers(self):
    """Ensure graph search can find a single layer network."""
    with tf.Graph().as_default():
      layer_collection = lc.LayerCollection()

      # Construct a 1-layer model.
      inputs = tf.ones((2, 1)) * 2
      weights = tf.get_variable(
          'w',
          shape=(1, 1),
          dtype=tf.float32,
          initializer=tf.random_normal_initializer)
      bias = tf.get_variable(
          'b', initializer=tf.zeros_initializer(), shape=(1, 1))
      non_variable_bias = tf.ones((1, 1))
      output = tf.matmul(inputs, weights) + bias + non_variable_bias
      logits = tf.tanh(output)

      # Register posterior distribution. Graph search will infer variables
      # needed to construct this.
      layer_collection.register_categorical_predictive_distribution(logits)

      # Register variables.
      gs.register_layers(layer_collection, tf.trainable_variables())

      # Ensure 1-layer got registered.
      self.assertEqual(
          [(weights, bias)],
          list(layer_collection.fisher_blocks.keys()))
      self.assertEqual(1, len(layer_collection.losses))
Ejemplo n.º 15
0
 def testOptimizerInitInvalidMomentumRegistration(self):
     with self.assertRaises(ValueError):
         optimizer.KfacOptimizer(0.1,
                                 0.2,
                                 lc.LayerCollection(),
                                 0.3,
                                 momentum_type='foo')
Ejemplo n.º 16
0
  def testMultiplyInverseAgainstExplicit(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      params = (tf.constant([1., 2.]), tf.constant(3.))
      block = fb.FullFB(lc.LayerCollection(), params)
      block.register_additional_tower(32)
      grads = (tf.constant([2., 3.]), tf.constant(4.))
      damping = 0.5
      block.instantiate_factors((grads,), damping)
      block._factor.instantiate_cov_variables()
      block.register_inverse()
      block._factor.instantiate_inv_variables()

      # Make sure our inverse is something other than the identity.
      sess.run(tf.assign(block._factor._cov, _make_psd(3)))
      sess.run(block._factor.make_inverse_update_ops())

      v_flat = np.array([4., 5., 6.], dtype=np.float32)
      vector = utils.column_to_tensors(params, tf.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat)

      self.assertAllClose(output_flat, explicit)
Ejemplo n.º 17
0
    def testDefaultLayerCollection(self):
        with tf.Graph().as_default():
            # Can't get default if there isn't one set.
            with self.assertRaises(ValueError):
                layer_collection.get_default_layer_collection()

            # Can't set default twice.
            lc = layer_collection.LayerCollection()
            layer_collection.set_default_layer_collection(lc)
            with self.assertRaises(ValueError):
                layer_collection.set_default_layer_collection(lc)

            # Same as one set.
            self.assertTrue(
                lc is layer_collection.get_default_layer_collection())

            # Can set to None.
            layer_collection.set_default_layer_collection(None)
            with self.assertRaises(ValueError):
                layer_collection.get_default_layer_collection()

            # as_default() is the same as setting/clearing.
            with lc.as_default():
                self.assertTrue(
                    lc is layer_collection.get_default_layer_collection())
            with self.assertRaises(ValueError):
                layer_collection.get_default_layer_collection()
Ejemplo n.º 18
0
  def testMultiplyInverseAgainstExplicit(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      input_dim, output_dim = 3, 2
      inputs = tf.zeros([32, input_dim])
      outputs = tf.zeros([32, output_dim])
      params = tf.zeros([input_dim, output_dim])
      block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False)
      block.register_additional_tower(inputs, outputs)
      grads = outputs**2
      damping = 0.  # This test is only valid without damping.
      block.instantiate_factors(((grads,),), damping)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()

      sess.run(tf.global_variables_initializer())
      sess.run(block._input_factor._cov.add_to_average(_make_psd(3)))
      sess.run(block._output_factor._cov.add_to_average(_make_psd(2)))

      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      v_flat = np.arange(6, dtype=np.float32)
      vector = utils.column_to_tensors(params, tf.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat)

      self.assertAllClose(output_flat, explicit)
  def test_train(self):
    image = tf.random_uniform(shape=(_BATCH_SIZE, 784), maxval=1.)
    labels = tf.random_uniform(shape=(_BATCH_SIZE,), maxval=10, dtype=tf.int32)
    labels_one_hot = tf.one_hot(labels, 10)

    model = snt.Sequential([snt.BatchFlatten(), snt.nets.MLP([128, 128, 10])])
    logits = model(image)
    all_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits, labels=labels_one_hot)
    loss = tf.reduce_mean(all_losses)
    layers = layer_collection.LayerCollection()
    optimizer = periodic_inv_cov_update_kfac_opt.PeriodicInvCovUpdateKfacOpt(
        invert_every=10,
        cov_update_every=1,
        learning_rate=0.03,
        cov_ema_decay=0.95,
        damping=100.,
        layer_collection=layers,
        momentum=0.9,
        num_burnin_steps=0,
        placement_strategy="round_robin")
    _construct_layer_collection(layers, [logits], tf.trainable_variables())

    train_step = optimizer.minimize(loss)
    counter = optimizer.counter
    max_iterations = 50

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      coord = tf.train.Coordinator()
      tf.train.start_queue_runners(sess=sess, coord=coord)
      for iteration in range(max_iterations):
        sess.run([loss, train_step])
        counter_ = sess.run(counter)
        self.assertEqual(counter_, iteration + 1.0)
Ejemplo n.º 20
0
    def testRegisterCategoricalPredictiveDistributionBatchSize1(self):
        with tf.Graph().as_default():
            tf.set_random_seed(200)
            logits = tf.random_normal((1, 2))
            lc = layer_collection.LayerCollection()

            lc.register_categorical_predictive_distribution(logits, seed=200)
Ejemplo n.º 21
0
 def testRegisterSingleParamNotRegistered(self):
     x = tf.get_variable('x', initializer=tf.constant(1, ))
     lc = layer_collection.LayerCollection()
     lc.fisher_blocks = {
         tf.get_variable('y', initializer=tf.constant(1, )): '1'
     }
     lc.register_block(x, 'foo')
Ejemplo n.º 22
0
 def testShouldRegisterSingleParamRegistered(self):
     x = tf.get_variable('x', initializer=tf.constant(1, ))
     lc = layer_collection.LayerCollection()
     lc.fisher_blocks = {x: '1'}
     with self.assertRaises(ValueError) as cm:
         lc.register_block(x, 'foo')
     self.assertIn('already in LayerCollection', str(cm.exception))
Ejemplo n.º 23
0
  def testMultiplyInverseNotTupleWithBias(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      params = [tf.random_normal((2, 2, 2, 2))]
      inputs = tf.random_normal((2, 2, 2, 2))
      outputs = tf.random_normal((2, 2, 2, 2))
      block = fb.ConvKFCBasicFB(
          lc.LayerCollection(), params=params, padding='SAME')
      block.register_additional_tower(inputs, outputs)
      self.assertTrue(block._has_bias)
      grads = outputs**2
      block.instantiate_factors(((grads,),), 0.5)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      # Make sure our inverse is something other than the identity.
      sess.run(tf.global_variables_initializer())
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      vector = np.arange(1, 19).reshape(9, 2).astype(np.float32)
      output = block.multiply_inverse(tf.constant(vector))

      self.assertAllClose([0.136455, 0.27291], sess.run(output)[0])
Ejemplo n.º 24
0
  def testMultiplyInverseAgainstExplicit(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      params = tf.zeros((2, 2, 2, 2))
      inputs = tf.zeros((2, 2, 2, 2))
      outputs = tf.zeros((2, 2, 2, 2))
      block = fb.ConvKFCBasicFB(
          lc.LayerCollection(), params=params, padding='SAME')
      block.register_additional_tower(inputs, outputs)
      grads = outputs**2
      damping = 0.  # This test is only valid without damping.
      block.instantiate_factors(((grads,),), damping)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      sess.run(tf.assign(block._input_factor._cov, _make_psd(8)))
      sess.run(tf.assign(block._output_factor._cov, _make_psd(2)))
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      v_flat = np.arange(16, dtype=np.float32)
      vector = utils.column_to_tensors(params, tf.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat)

      self.assertAllClose(output_flat, explicit)
Ejemplo n.º 25
0
  def testMultiplyInverse(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      params = tf.random_normal((3, 3, 8, 2))
      inputs = tf.random_normal((32, 5, 5, 8))
      outputs = tf.random_normal((32, 5, 5, 16))
      layer_collection = lc.LayerCollection()
      block = fb.DepthwiseConvKFCBasicFB(
          layer_collection, params=params, strides=[1, 1, 1, 1], padding='SAME')
      block.register_additional_tower(inputs, outputs)
      grads = outputs**2
      block.instantiate_factors(([grads],), 0.5)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      # Ensure inverse update op doesn't crash.
      sess.run(tf.global_variables_initializer())
      sess.run([
          factor.make_inverse_update_ops()
          for factor in layer_collection.get_factors()
      ])

      # Ensure inverse-vector multiply doesn't crash.
      output = block.multiply_inverse(params)
      sess.run(output)

      # Ensure same shape.
      self.assertAllEqual(output.shape, params.shape)
Ejemplo n.º 26
0
  def testMultiplyInverseNotTuple(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)
      inputs = tf.constant([[1., 2.], [3., 4.]])
      outputs = tf.constant([[3., 4.], [5., 6.]])
      block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False)
      block.register_additional_tower(inputs, outputs)
      grads = outputs**2
      block.instantiate_factors(((grads,),), 0.5)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      # Make sure our inverse is something other than the identity.
      sess.run(tf.global_variables_initializer())
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      vector = np.arange(2, 6).reshape(2, 2).astype(np.float32)
      output = block.multiply_inverse(tf.constant(vector))

      self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]],
                          sess.run(output))
Ejemplo n.º 27
0
 def testEmptyGraph(self):
   """Ensure nothing is registered if there are no variables/losses."""
   with tf.Graph().as_default():
     layer_collection = lc.LayerCollection()
     gs.register_layers(layer_collection, tf.trainable_variables())
     self.assertEqual(0, len(layer_collection.fisher_blocks))
     self.assertEqual(0, len(layer_collection.losses))
Ejemplo n.º 28
0
  def testMultiplyInverseDense(self):
    with tf.Graph().as_default(), self.test_session() as sess:
      tf.set_random_seed(200)

      block = fb.EmbeddingKFACMultiIndepFB(lc.LayerCollection())

      inputs = [tf.constant([[0., 1], [1, 2], [2, 3]]),
                tf.constant([[0.1], [0.], [0.]])]
      outputs = [tf.constant([[0.], [1.], [2.]]),
                 tf.constant([[0., 0], [0, 0], [0, 4]])]
      block.register_additional_tower(inputs, outputs, transpose=[False, True])

      grads = [output**2 for output in outputs]
      damping = tf.constant(0.)
      block.instantiate_factors(((grads,),), damping)

      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      # Create a dense update.
      dense_vector = tf.constant([[0.5], [0.5]])

      # Compare Fisher-vector product against explicit result.
      result = block.multiply_inverse(dense_vector)
      expected_result = tf.matrix_solve(block.full_fisher_block(), dense_vector)

      sess.run(tf.global_variables_initializer())
      self.assertAlmostEqual(
          sess.run(expected_result[0]), sess.run(result[0]))
      self.assertAlmostEqual(
          sess.run(expected_result[1]), sess.run(result[1]))
Ejemplo n.º 29
0
    def testRegisterBlocks(self):
        with tf.Graph().as_default():
            tf.set_random_seed(200)
            lc = layer_collection.LayerCollection()
            lc.register_fully_connected(tf.constant(1), tf.constant(2),
                                        tf.constant(3))
            lc.register_fully_connected(
                tf.constant(1),
                tf.constant(2),
                tf.constant(3),
                approx=layer_collection.APPROX_DIAGONAL_NAME)
            lc.register_conv2d(params=tf.ones((2, 3, 4, 5)),
                               strides=[1, 1, 1, 1],
                               padding='SAME',
                               inputs=tf.ones((1, 2, 3, 4)),
                               outputs=tf.ones((1, 1, 1, 5)))
            lc.register_conv2d(params=tf.ones((2, 3, 4, 5)),
                               strides=[1, 1, 1, 1],
                               padding='SAME',
                               inputs=tf.ones((1, 2, 3, 4)),
                               outputs=tf.ones((1, 1, 1, 5)),
                               approx=layer_collection.APPROX_DIAGONAL_NAME)
            lc.register_separable_conv2d(depthwise_params=tf.ones(
                (3, 3, 1, 2)),
                                         pointwise_params=tf.ones(
                                             (1, 1, 2, 4)),
                                         inputs=tf.ones((32, 5, 5, 1)),
                                         depthwise_outputs=tf.ones(
                                             (32, 5, 5, 2)),
                                         pointwise_outputs=tf.ones(
                                             (32, 5, 5, 4)),
                                         strides=[1, 1, 1, 1],
                                         padding='SAME')
            lc.register_convolution(params=tf.ones((3, 3, 1, 8)),
                                    inputs=tf.ones((32, 5, 5, 1)),
                                    outputs=tf.ones((32, 5, 5, 8)),
                                    padding='SAME')
            lc.register_generic(tf.constant(5),
                                16,
                                approx=layer_collection.APPROX_FULL_NAME)
            lc.register_generic(tf.constant(6),
                                16,
                                approx=layer_collection.APPROX_DIAGONAL_NAME)
            lc.register_fully_connected_multi(tf.constant(1),
                                              (tf.constant(2), tf.constant(3)),
                                              (tf.constant(4), tf.constant(5)))
            lc.register_conv2d_multi(params=tf.ones((2, 3, 4, 5)),
                                     strides=[1, 1, 1, 1],
                                     padding='SAME',
                                     inputs=(tf.ones(
                                         (1, 2, 3, 4)), tf.ones((5, 6, 7, 8))),
                                     outputs=(tf.ones(
                                         (1, 1, 1, 5)), tf.ones(
                                             (2, 2, 2, 10))))
            lc.register_embedding_multi(tf.constant(
                (1, )), (tf.constant(2), tf.constant(3)),
                                        (tf.constant(4), tf.constant(5)))

            self.assertEqual(12, len(lc.get_blocks()))
Ejemplo n.º 30
0
    def testIdentifySubsetPreviouslyRegisteredTensor(self):
        x = tf.get_variable('x', shape=())
        y = tf.get_variable('y', shape=())
        lc = layer_collection.LayerCollection()
        lc.define_linked_parameters((x, y))

        with self.assertRaises(ValueError):
            lc.define_linked_parameters(x)