def testMultiplyInverseNotTupleWithBias(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      params = [random_ops.random_normal((2, 2, 2, 2))]
      inputs = random_ops.random_normal((2, 2, 2, 2))
      outputs = random_ops.random_normal((2, 2, 2, 2))
      block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1),
                                'SAME')
      block.register_additional_minibatch(inputs, outputs)
      self.assertTrue(block._has_bias)
      grads = outputs**2
      block.instantiate_factors(((grads,),), 0.5)
      block._input_factor.instantiate_cov_variables()
      block._output_factor.instantiate_cov_variables()
      block.register_inverse()
      block._input_factor.instantiate_inv_variables()
      block._output_factor.instantiate_inv_variables()

      # Make sure our inverse is something other than the identity.
      sess.run(tf_variables.global_variables_initializer())
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      vector = np.arange(1, 19).reshape(9, 2).astype(np.float32)
      output = block.multiply_inverse(array_ops.constant(vector))

      self.assertAllClose([0.136455, 0.27291], sess.run(output)[0])
  def testMultiplyInverseAgainstExplicit(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
      block = fb.FullFB(lc.LayerCollection(), params)
      block.register_additional_minibatch(32)
      grads = (array_ops.constant([2., 3.]), array_ops.constant(4.))
      damping = 0.5
      block.instantiate_factors((grads,), damping)
      block._factor.instantiate_cov_variables()
      block.register_inverse()
      block._factor.instantiate_inv_variables()

      # Make sure our inverse is something other than the identity.
      sess.run(state_ops.assign(block._factor._cov, _make_psd(3)))
      sess.run(block._factor.make_inverse_update_ops())

      v_flat = np.array([4., 5., 6.], dtype=np.float32)
      vector = utils.column_to_tensors(params, array_ops.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat)

      self.assertAllClose(output_flat, explicit)
    def testRegisterBlocks(self):
        with ops.Graph().as_default():
            random_seed.set_random_seed(200)
            lc = layer_collection.LayerCollection()
            lc.register_fully_connected(array_ops.constant(1),
                                        array_ops.constant(2),
                                        array_ops.constant(3))
            lc.register_fully_connected(
                array_ops.constant(1),
                array_ops.constant(2),
                array_ops.constant(3),
                approx=layer_collection.APPROX_DIAGONAL_NAME)
            lc.register_conv2d(array_ops.constant(4), [1, 1, 1, 1], 'SAME',
                               array_ops.ones((1, 1, 1, 1)),
                               array_ops.constant(3))
            lc.register_conv2d(array_ops.constant(4), [1, 1, 1, 1],
                               'SAME',
                               array_ops.ones((1, 1, 1, 1)),
                               array_ops.constant(3),
                               approx=layer_collection.APPROX_DIAGONAL_NAME)
            lc.register_generic(array_ops.constant(5),
                                16,
                                approx=layer_collection.APPROX_FULL_NAME)
            lc.register_generic(array_ops.constant(6),
                                16,
                                approx=layer_collection.APPROX_DIAGONAL_NAME)

            self.assertEqual(6, len(lc.get_blocks()))
Example #4
0
 def testShouldRegisterSingleParamRegistered(self):
     x = variable_scope.get_variable('x',
                                     initializer=array_ops.constant(1, ))
     lc = layer_collection.LayerCollection()
     lc.fisher_blocks = {x: '1'}
     with self.assertRaises(ValueError):
         lc.register_block(x, 'foo')
    def testDefaultLayerCollection(self):
        with ops.Graph().as_default():
            # Can't get default if there isn't one set.
            with self.assertRaises(ValueError):
                layer_collection.get_default_layer_collection()

            # Can't set default twice.
            lc = layer_collection.LayerCollection()
            layer_collection.set_default_layer_collection(lc)
            with self.assertRaises(ValueError):
                layer_collection.set_default_layer_collection(lc)

            # Same as one set.
            self.assertTrue(
                lc is layer_collection.get_default_layer_collection())

            # Can set to None.
            layer_collection.set_default_layer_collection(None)
            with self.assertRaises(ValueError):
                layer_collection.get_default_layer_collection()

            # as_default() is the same as setting/clearing.
            with lc.as_default():
                self.assertTrue(
                    lc is layer_collection.get_default_layer_collection())
            with self.assertRaises(ValueError):
                layer_collection.get_default_layer_collection()
  def testRegisterCategoricalPredictiveDistributionBatchSize1(self):
    with ops.Graph().as_default():
      random_seed.set_random_seed(200)
      logits = random_ops.random_normal((1, 2))
      lc = layer_collection.LayerCollection()

      lc.register_categorical_predictive_distribution(logits, seed=200)
Example #7
0
    def testMultiplyInverseTuple(self):
        with ops.Graph().as_default(), self.test_session() as sess:
            random_seed.set_random_seed(200)
            inputs = array_ops.constant([[1., 2., 3.], [3., 4., 5.],
                                         [5., 6., 7.]])
            outputs = array_ops.constant([[3., 4.], [5., 6.]])
            block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(),
                                                 has_bias=False)
            block.register_additional_minibatch(inputs, outputs)
            grads = outputs**2
            block.instantiate_factors(([grads], ), 0.5)

            # Make sure our inverse is something other than the identity.
            sess.run(tf_variables.global_variables_initializer())
            sess.run(block._input_factor.make_inverse_update_ops())
            sess.run(block._output_factor.make_inverse_update_ops())

            vector = (
                np.arange(2, 6).reshape(2, 2).astype(np.float32),  #
                np.arange(1, 3).reshape(2, 1).astype(np.float32))
            output = block.multiply_inverse(
                (array_ops.constant(vector[0]), array_ops.constant(vector[1])))

            output = sess.run(output)
            self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]],
                                output[0])
            self.assertAllClose([0.343146, 0.686291], output[1])
Example #8
0
    def testMultiplyInverseAgainstExplicit(self):
        with ops.Graph().as_default(), self.test_session() as sess:
            random_seed.set_random_seed(200)
            params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
            block = fb.NaiveDiagonalFB(lc.LayerCollection(), params)
            block.register_additional_minibatch(32)
            grads = (params[0]**2, math_ops.sqrt(params[1]))
            damping = 0.5
            block.instantiate_factors((grads, ), damping)

            cov = array_ops.reshape(array_ops.constant([2., 3., 4.]), [-1, 1])
            sess.run(state_ops.assign(block._factor._cov, cov))
            sess.run(block._factor.make_inverse_update_ops())

            v_flat = np.array([4., 5., 6.], dtype=np.float32)
            vector = utils.column_to_tensors(params,
                                             array_ops.constant(v_flat))
            output = block.multiply_inverse(vector)
            output_flat = sess.run(utils.tensors_to_column(output)).ravel()

            full = sess.run(block.full_fisher_block())
            explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)),
                              v_flat)

            self.assertAllClose(output_flat, explicit)
Example #9
0
    def testMultiplyInverseNotTuple(self):
        with ops.Graph().as_default(), self.cached_session() as sess:
            random_seed.set_random_seed(200)
            inputs = array_ops.constant([[1., 2.], [3., 4.]])
            outputs = array_ops.constant([[3., 4.], [5., 6.]])
            block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(),
                                                 has_bias=False)
            block.register_additional_tower(inputs, outputs)
            grads = outputs**2
            block.instantiate_factors(((grads, ), ), 0.5)
            block._input_factor.instantiate_cov_variables()
            block._output_factor.instantiate_cov_variables()
            block.register_inverse()
            block._input_factor.instantiate_inv_variables()
            block._output_factor.instantiate_inv_variables()

            # Make sure our inverse is something other than the identity.
            sess.run(tf_variables.global_variables_initializer())
            sess.run(block._input_factor.make_inverse_update_ops())
            sess.run(block._output_factor.make_inverse_update_ops())

            vector = np.arange(2, 6).reshape(2, 2).astype(np.float32)
            output = block.multiply_inverse(array_ops.constant(vector))

            self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]],
                                sess.run(output))
    def testFullFBInitTensorTuple(self):
        with ops.Graph().as_default():
            random_seed.set_random_seed(200)
            params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
            block = fb.FullFB(lc.LayerCollection(), params, 32)

            self.assertAllEqual(params, block.tensors_to_compute_grads())
    def testMultiplyInverseAgainstExplicit(self):
        with ops.Graph().as_default(), self.test_session() as sess:
            random_seed.set_random_seed(200)
            params = array_ops.zeros((2, 2, 2, 2))
            inputs = array_ops.zeros((2, 2, 2, 2))
            outputs = array_ops.zeros((2, 2, 2, 2))
            block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, inputs,
                                      outputs, (1, 1, 1, 1), 'SAME')
            grads = outputs**2
            damping = 0.  # This test is only valid without damping.
            block.instantiate_factors((grads, ), damping)

            sess.run(state_ops.assign(block._input_factor._cov, _make_psd(8)))
            sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2)))
            sess.run(block._input_factor.make_inverse_update_ops())
            sess.run(block._output_factor.make_inverse_update_ops())

            v_flat = np.arange(16, dtype=np.float32)
            vector = utils.column_to_tensors(params,
                                             array_ops.constant(v_flat))
            output = block.multiply_inverse(vector)
            output_flat = sess.run(utils.tensors_to_column(output)).ravel()

            full = sess.run(block.full_fisher_block())
            explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)),
                              v_flat)

            self.assertAllClose(output_flat, explicit)
Example #12
0
    def setUp(self):
        self._graph = ops.Graph()
        with self._graph.as_default():
            self.layer_collection = lc.LayerCollection()

            self.inputs = random_ops.random_normal((2, 2),
                                                   dtype=dtypes.float32)
            self.weights = variable_scope.get_variable("w",
                                                       shape=(2, 2),
                                                       dtype=dtypes.float32)
            self.bias = variable_scope.get_variable(
                "b", initializer=init_ops.zeros_initializer(), shape=(2, 1))
            self.output = math_ops.matmul(self.inputs,
                                          self.weights) + self.bias

            # Only register the weights.
            self.layer_collection.register_fully_connected(
                params=(self.weights, ),
                inputs=self.inputs,
                outputs=self.output)

            self.outputs = math_ops.tanh(self.output)
            self.targets = array_ops.zeros_like(self.outputs)
            self.layer_collection.register_categorical_predictive_distribution(
                logits=self.outputs, targets=self.targets)
Example #13
0
    def testMultiplyInverse(self):
        with ops.Graph().as_default(), self.cached_session() as sess:
            random_seed.set_random_seed(200)
            params = random_ops.random_normal((3, 3, 8, 2))
            inputs = random_ops.random_normal((32, 5, 5, 8))
            outputs = random_ops.random_normal((32, 5, 5, 16))
            layer_collection = lc.LayerCollection()
            block = fb.DepthwiseConvKFCBasicFB(layer_collection,
                                               params=params,
                                               strides=[1, 1, 1, 1],
                                               padding='SAME')
            block.register_additional_tower(inputs, outputs)
            grads = outputs**2
            block.instantiate_factors(([grads], ), 0.5)
            block._input_factor.instantiate_cov_variables()
            block._output_factor.instantiate_cov_variables()
            block.register_inverse()
            block._input_factor.instantiate_inv_variables()
            block._output_factor.instantiate_inv_variables()

            # Ensure inverse update op doesn't crash.
            sess.run(tf_variables.global_variables_initializer())
            sess.run([
                factor.make_inverse_update_ops()
                for factor in layer_collection.get_factors()
            ])

            # Ensure inverse-vector multiply doesn't crash.
            output = block.multiply_inverse(params)
            sess.run(output)

            # Ensure same shape.
            self.assertAllEqual(output.shape, params.shape)
Example #14
0
 def testOptimizerInitInvalidMomentumRegistration(self):
     with self.assertRaises(ValueError):
         optimizer.KfacOptimizer(0.1,
                                 0.2,
                                 0.3,
                                 lc.LayerCollection(),
                                 momentum_type='foo')
Example #15
0
    def testOptimizerInit(self):
        with ops.Graph().as_default():
            layer_collection = lc.LayerCollection()

            inputs = array_ops.ones((2, 1)) * 2
            weights_val = np.ones((1, 1), dtype=np.float32) * 3.
            weights = variable_scope.get_variable(
                'w', initializer=array_ops.constant(weights_val))
            bias = variable_scope.get_variable(
                'b', initializer=init_ops.zeros_initializer(), shape=(1, 1))
            output = math_ops.matmul(inputs, weights) + bias

            layer_collection.register_fully_connected((weights, bias), inputs,
                                                      output)

            logits = math_ops.tanh(output)
            targets = array_ops.constant([[0.], [1.]])
            output = math_ops.reduce_mean(
                nn.softmax_cross_entropy_with_logits(logits=logits,
                                                     labels=targets))

            layer_collection.register_categorical_predictive_distribution(
                logits)

            optimizer.KfacOptimizer(0.1,
                                    0.2,
                                    0.3,
                                    layer_collection,
                                    momentum=0.5,
                                    momentum_type='regular')
  def testRegisterNormalPredictiveDistribution(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      predictions = array_ops.constant(
          [[1., 2.], [3., 4]], dtype=dtypes.float32)

      lc = layer_collection.LayerCollection()
      lc.register_normal_predictive_distribution(predictions, 1., seed=200)
      single_loss = sess.run(lc.total_sampled_loss())

      lc2 = layer_collection.LayerCollection()
      lc2.register_normal_predictive_distribution(predictions, 1., seed=200)
      lc2.register_normal_predictive_distribution(predictions, 1., seed=200)
      double_loss = sess.run(lc2.total_sampled_loss())

      self.assertAlmostEqual(2 * single_loss, double_loss)
Example #17
0
    def testMultiplyInverseAgainstExplicit(self):
        with ops.Graph().as_default(), self.test_session() as sess:
            random_seed.set_random_seed(200)
            input_dim, output_dim = 3, 2
            inputs = array_ops.zeros([32, input_dim])
            outputs = array_ops.zeros([32, output_dim])
            params = array_ops.zeros([input_dim, output_dim])
            block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(),
                                                 has_bias=False)
            block.register_additional_minibatch(inputs, outputs)
            grads = outputs**2
            damping = 0.  # This test is only valid without damping.
            block.instantiate_factors(([grads], ), damping)

            sess.run(state_ops.assign(block._input_factor._cov, _make_psd(3)))
            sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2)))
            sess.run(block._input_factor.make_inverse_update_ops())
            sess.run(block._output_factor.make_inverse_update_ops())

            v_flat = np.arange(6, dtype=np.float32)
            vector = utils.column_to_tensors(params,
                                             array_ops.constant(v_flat))
            output = block.multiply_inverse(vector)
            output_flat = sess.run(utils.tensors_to_column(output)).ravel()

            full = sess.run(block.full_fisher_block())
            explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)),
                              v_flat)

            self.assertAllClose(output_flat, explicit)
Example #18
0
    def testMultiplyInverseTuple(self):
        with ops.Graph().as_default(), self.test_session() as sess:
            random_seed.set_random_seed(200)
            params = random_ops.random_normal((2, 2, 2, 2))
            inputs = random_ops.random_normal((2, 2, 2, 2))
            outputs = random_ops.random_normal((2, 2, 2, 2))
            block = fb.ConvKFCBasicFB(lc.LayerCollection(), params,
                                      (1, 1, 1, 1), 'SAME')
            block.register_additional_minibatch(inputs, outputs)
            grads = outputs**2
            block.instantiate_factors(([grads], ), 0.5)

            # Make sure our inverse is something other than the identity.
            sess.run(tf_variables.global_variables_initializer())
            sess.run(block._input_factor.make_inverse_update_ops())
            sess.run(block._output_factor.make_inverse_update_ops())

            vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32),
                      np.arange(2, 4).reshape(2, 1).astype(np.float32))
            output = block.multiply_inverse(
                (array_ops.constant(vector[0]), array_ops.constant(vector[1])))

            output = sess.run(output)
            self.assertAllClose([0.136455, 0.27291], output[0][0])
            self.assertAllClose([0.27291, 0.409365], output[1])
 def testRegisterSingleParamRegisteredInTuple(self):
   x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
   y = variable_scope.get_variable('y', initializer=array_ops.constant(1,))
   lc = layer_collection.LayerCollection()
   lc.fisher_blocks = {(x, y): '1'}
   lc.register_block(x, 'foo')
   self.assertEqual(set(['1']), set(lc.get_blocks()))
 def testRegisterSingleParamNotRegistered(self):
   x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
   lc = layer_collection.LayerCollection()
   lc.fisher_blocks = {
       variable_scope.get_variable('y', initializer=array_ops.constant(1,)):
           '1'
   }
   lc.register_block(x, 'foo')
    def testIdentifySubsetPreviouslyRegisteredTensor(self):
        x = variable_scope.get_variable('x', shape=())
        y = variable_scope.get_variable('y', shape=())
        lc = layer_collection.LayerCollection()
        lc.define_linked_parameters((x, y))

        with self.assertRaises(ValueError):
            lc.define_linked_parameters(x)
    def testFullFBInitSingleTensor(self):
        with ops.Graph().as_default():
            random_seed.set_random_seed(200)
            params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
            block = fb.FullFB(lc.LayerCollection(), params)
            block.register_additional_minibatch(32)

            self.assertAllEqual(params, block.tensors_to_compute_grads())
Example #23
0
 def testFullyConnectedSeriesFBInit(self):
     with ops.Graph().as_default():
         random_seed.set_random_seed(200)
         inputs = array_ops.constant([1., 2.])
         outputs = array_ops.constant([3., 4.])
         block = fb.FullyConnectedSeriesFB(lc.LayerCollection())
         block.register_additional_tower([inputs], [outputs])
         self.assertAllEqual([[outputs]], block.tensors_to_compute_grads())
    def testInstantiateFactors(self):
        with ops.Graph().as_default():
            random_seed.set_random_seed(200)
            params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
            block = fb.FullFB(lc.LayerCollection(), params, 32)

            grads = (params[0]**2, math_ops.sqrt(params[1]))
            block.instantiate_factors(grads, 0.5)
    def testFullyConnectedKFACBasicFBInit(self):
        with ops.Graph().as_default():
            random_seed.set_random_seed(200)
            inputs = array_ops.constant([1., 2.])
            outputs = array_ops.constant([3., 4.])
            block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), inputs,
                                                 outputs)

            self.assertAllEqual(outputs, block.tensors_to_compute_grads())
    def testIdentifyLinkedParametersSomeRegisteredInOtherTuples(self):
        x = variable_scope.get_variable('x', shape=())
        y = variable_scope.get_variable('y', shape=())
        z = variable_scope.get_variable('z', shape=())
        lc = layer_collection.LayerCollection()
        lc.define_linked_parameters((x, y))

        with self.assertRaises(ValueError):
            lc.define_linked_parameters((x, z))
    def testInstantiateFactors(self):
        with ops.Graph().as_default():
            random_seed.set_random_seed(200)
            params = (array_ops.constant([1., 2.]), array_ops.constant(3.))
            block = fb.NaiveDiagonalFB(lc.LayerCollection(), params)
            block.register_additional_minibatch(32)

            grads = (params[0]**2, math_ops.sqrt(params[1]))
            block.instantiate_factors(grads, 0.5)
 def testRegisterBlocksMultipleRegistrations(self):
     with ops.Graph().as_default():
         random_seed.set_random_seed(200)
         lc = layer_collection.LayerCollection()
         key = array_ops.constant(1)
         lc.register_fully_connected(key, array_ops.constant(2),
                                     array_ops.constant(3))
         with self.assertRaises(ValueError):
             lc.register_generic(key, 16)
  def testRegisterTupleParamSomeRegistered(self):
    x = variable_scope.get_variable('x', initializer=array_ops.constant(1,))
    y = variable_scope.get_variable('y', initializer=array_ops.constant(1,))
    z = variable_scope.get_variable('z', initializer=array_ops.constant(1,))
    lc = layer_collection.LayerCollection()
    lc.fisher_blocks = {x: '1', z: '2'}

    lc.register_block((x, y), 'foo')
    self.assertEqual(set(['2', 'foo']), set(lc.get_blocks()))
    def testLossFunctionWithoutName(self):
        """Ensure loss functions get unique names if 'name' not specified."""
        with ops.Graph().as_default():
            logits = linalg_ops.eye(2)
            lc = layer_collection.LayerCollection()

            # Create a new loss function with default names.
            lc.register_categorical_predictive_distribution(logits)
            lc.register_categorical_predictive_distribution(logits)
            self.assertEqual(2, len(lc.losses))