Esempio n. 1
0
 def testNewOptSameVarScope(self):
   model = _simple_mlp()
   opt = optimizers.Kfac(
       learning_rate=0.01, damping=0.01, model=model, loss='mse')
   opt._create_optimizer()
   opt2 = optimizers.Kfac(
       learning_rate=0.02, damping=0.03, model=model, loss='mse')
   opt2._create_optimizer()
Esempio n. 2
0
 def testConfig(self):
   fisher_approx = {layers.Dense: 'kron_in_diag', 'dense_1': 'kron_both_diag'}
   kwargs = {
       'loss': 'mse',
       'momentum': 7,
       'num_burnin_steps': 11,
       'min_damping': 9,
       'invert_every': 13,
       'fisher_approx': fisher_approx,
       'seed': 12,
   }
   opt = optimizers.Kfac(
       learning_rate=3, damping=5, model=_simple_mlp(), **kwargs)
   opt.learning_rate = 23
   opt.damping = 27
   config = opt.get_config()
   self.assertEqual(config['learning_rate'], 23)
   self.assertEqual(config['damping'], 27)
   dense_approx = fisher_approx.pop(layers.Dense)
   fisher_approx[utils._CLASS_NAME_PREFIX + 'Dense'] = dense_approx
   for key, val in kwargs.items():
     self.assertEqual(config[key], val)
     # Below is how Keras's model.save saves the configs. If the config is not
     # serializable, it will throw a TypeError or OverflowError.
   json.dumps(config, default=serialization.get_json_type).encode('utf8')
Esempio n. 3
0
    def testCustomLossFn(self):
        rands = lambda: np.random.random((100, 1)).astype(np.float32)
        dataset = tf.data.Dataset.from_tensor_slices((rands(), rands()))
        dataset = dataset.repeat().batch(10, drop_remainder=True)
        train_batch = dataset.make_one_shot_iterator().get_next()
        model = tf.keras.Sequential(
            [tf.keras.layers.Dense(1, input_shape=(1, ))])

        def loss_fn(inputs):
            mse = tf.keras.losses.mean_squared_error(model(inputs[0]),
                                                     inputs[1])
            return tf.reduce_mean(mse)

        loss = 'mse'
        train_batch = dataset.make_one_shot_iterator().get_next()
        optimizer = optimizers.Kfac(damping=10.,
                                    train_batch=train_batch,
                                    adaptive=True,
                                    model=model,
                                    loss=loss,
                                    loss_fn=loss_fn,
                                    qmodel_update_rescale=0.01)
        model.compile(optimizer, loss)
        model.fit(train_batch, steps_per_epoch=10, epochs=1)
        self.assertEqual(loss_fn, optimizer.optimizer._loss_fn)
Esempio n. 4
0
 def testRegisterLayersWithLayerCollection(self):
   model, loss = _mnist_model(), 'categorical_crossentropy'
   lc = utils.get_layer_collection(model, loss)
   opt = optimizers.Kfac(learning_rate=0.01, damping=0.001)
   opt.register_layers(layer_collection=lc)
   model.compile(optimizer=opt, loss=loss)
   opt.get_updates(model.total_loss, model.trainable_weights)
Esempio n. 5
0
 def testCustomTrainingLoopMakeOptimizerBeforeModelCall(self):
     # We defer the creation of the layer_collection to the minimize call for
     # this situation, because if we make the layer_collection immediately it
     # will capture the wrong inbound node.
     model = tf.keras.Sequential([
         layers.Conv2D(13, 5),
         layers.BatchNormalization(fused=False),
         layers.Conv2D(23, 3),
         layers.LayerNormalization(),
         layers.GlobalMaxPool2D(),
         layers.Dense(10, activation='softmax', name='output_test')
     ])
     optimizer = optimizers.Kfac(learning_rate=0.01,
                                 damping=0.01,
                                 model=model,
                                 loss='binary_crossentropy')
     x, y = _get_synthetic_mnist_train_tensors(batch_size=10)
     model_input = tf.keras.Input(tensor=x)
     output = model(model_input)
     loss = tf.keras.losses.binary_crossentropy(output, y)
     train_op = optimizer.minimize(loss, var_list=model.trainable_weights)
     with self.cached_session() as sess:
         sess.run(tf.global_variables_initializer())
         for _ in range(3):
             sess.run([train_op])
    def test_functional_model_saving(self):
        if h5py is None:
            self.skipTest('h5py required to run this test')

        with self.cached_session():
            inputs = keras.layers.Input(shape=(3, ))
            x = keras.layers.Dense(2)(inputs)
            output = keras.layers.Dense(3)(x)

            model = keras.models.Model(inputs, output)
            model.compile(loss=keras.losses.MSE,
                          optimizer=optimizers.Kfac(model=model,
                                                    **_KFAC_KWARGS),
                          metrics=[
                              keras.metrics.categorical_accuracy,
                              keras.metrics.CategoricalAccuracy()
                          ],
                          weighted_metrics=[
                              keras.metrics.categorical_accuracy,
                              keras.metrics.CategoricalAccuracy()
                          ])
            x = np.random.random((1, 3))
            y = np.random.random((1, 3))
            model.train_on_batch(x, y)

            out = model.predict(x)
            fd, fname = tempfile.mkstemp('.h5')
            keras.models.save_model(model, fname)

            model = saving_utils.load_model(fname, optimizer_name='new')
            os.close(fd)
            os.remove(fname)

            out2 = model.predict(x)
            self.assertAllClose(out, out2, atol=1e-05)
Esempio n. 7
0
 def testRegisterLayersCompiledModel(self, loss):
   opt = optimizers.Kfac(learning_rate=0.01, damping=0.001)
   model = _mnist_model()
   model.compile(optimizer=opt, loss=loss)
   opt.register_layers(model=model)
   model.compile(optimizer=opt, loss=loss)
   opt.get_updates(model.total_loss, model.trainable_weights)
Esempio n. 8
0
  def testCustomTrainingLoopFunctionalInpShape(self):
    # We need to ensure correct inbound node is used for layer collection.
    x, y = _get_synthetic_mnist_train_tensors(batch_size=10)
    model_input = tf.keras.Input(tensor=x)

    # Build Model
    inp = tf.keras.Input(shape=(28, 28, 1))
    x = layers.Conv2D(13, 5)(inp)
    x = layers.BatchNormalization(fused=True)(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(23, 3)(x)
    x = layers.LayerNormalization()(x)
    x = layers.GlobalMaxPool2D()(x)
    out = layers.Dense(10, activation='softmax', name='output_test')(x)
    model = tf.keras.Model(inputs=inp, outputs=out)

    output = model(model_input)
    loss = tf.keras.losses.binary_crossentropy(output, y)
    optimizer = optimizers.Kfac(damping=0.01, learning_rate=0.01,
                                    model=model, loss='binary_crossentropy')
    train_op = optimizer.minimize(loss, var_list=model.trainable_weights)
    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      for _ in range(3):
        sess.run([train_op])
Esempio n. 9
0
 def testTrainWithoutCreatingOptimizerFails(self):
   with self.assertRaisesRegex(ValueError, '.*provide a model with a loss.*'):
     opt = optimizers.Kfac(learning_rate=0.01, damping=0.001)
     model = _mnist_model()
     model.compile(optimizer=opt, loss='categorical_crossentropy')
     grads_vars = opt.get_gradients(model.total_loss, model.trainable_weights)
     opt.apply_gradients(grads_vars)
Esempio n. 10
0
 def testInstantiationWithLayerCollection(self):
   model = _simple_mlp()
   lc = utils.get_layer_collection(model, 'mse')
   opt = optimizers.Kfac(
       learning_rate=0.1, damping=0.2, layer_collection=lc)
   model.compile(optimizer=opt, loss='mse')
   opt.get_updates(model.total_loss, model.trainable_weights)
Esempio n. 11
0
 def testFunctionalInstantiation(self):
   inputs = layers.Input(shape=(3,))
   x = layers.Dense(4, activation=tf.nn.relu)(inputs)
   outputs = layers.Dense(5, activation=tf.nn.softmax)(x)
   model = tf.keras.Model(inputs=inputs, outputs=outputs)
   optimizers.Kfac(learning_rate=0.002, damping=0.04,
                       model=model, loss='binary_crossentropy')
Esempio n. 12
0
def _train_model(data,
                 model,
                 loss,
                 lr=0.001,
                 damping=0.001,
                 batch_size=32,
                 epochs=1,
                 loss_weights=None):
  """Compiles and fits model to data and returns trainging results.

  Args:
    data: Tuple of numpy arrays shaped ((x_train, y_train), (x_test, y_test)).
    model: Uncompiled Keras model with inputs/output shapes matching the data.
    loss: tf.keras.losses loss function or serialized (string) loss function.
    lr: Learning rate for optimizer.
    damping: Damping parameter for KFAC.
    batch_size: Batch size used for training.
    epochs: Number of training epochs.
    loss_weights: List of weights or dict mapping layer names to loss function
      weight.

  Returns:
    A History object. Calling History.history gives you a dictionary with
    training and validation results.
  """
  (x_train, y_train), valid_data = data
  opt = optimizers.Kfac(learning_rate=lr, damping=damping, model=model,
                            loss=loss, loss_weights=loss_weights)
  model.compile(opt, loss, loss_weights=loss_weights)

  return model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                   validation_data=valid_data, verbose=0)
Esempio n. 13
0
    def testExponentialDampingValuesWithDecayRate(self):
        init_value = 0.01
        decay_rate = 0.3
        num_decay_steps = 4
        num_delay_steps = 3
        opt = optimizers.Kfac(learning_rate=0.01,
                              damping=init_value,
                              model=self.model,
                              loss='mse')
        self.model.compile(opt, 'mse')
        damping_list = []
        cbs = [
            callbacks.ExponentialDecay(hyperparameter='damping',
                                       init_value=init_value,
                                       decay_rate=decay_rate,
                                       num_decay_steps=num_decay_steps,
                                       num_delay_steps=num_delay_steps,
                                       verbose=1),
            HyperParamTracker('damping', damping_list, HyperParamTracker.BATCH)
        ]
        self.model.fit(self.data,
                       self.labels,
                       batch_size=self.batch_size,
                       callbacks=cbs)

        expected_list = [init_value] * num_delay_steps + [
            init_value * decay_rate**min(i, num_decay_steps)
            for i in range(self.num_steps - num_delay_steps)
        ]
        self.assertAllClose(damping_list, expected_list)
Esempio n. 14
0
 def testClipValueFails(self):
     with self.assertRaises(ValueError):
         optimizers.Kfac(learning_rate=0.01,
                         damping=0.01,
                         model=_simple_mlp(),
                         loss='mse',
                         clipvalue=0.1)
Esempio n. 15
0
 def testFromConfig(self, kwargs_updates):
   kwargs = {
       'learning_rate': 3,
       'damping': 5,
       'momentum': 7,
       'min_damping': 9,
       'num_burnin_steps': 11,
       'invert_every': 13,
       'fisher_approx': {
           layers.Dense: 'kron_in_diag',
           'dense_1': 'kron_both_diag'
       },
   }
   kwargs.update(kwargs_updates)
   opt = optimizers.Kfac(model=_simple_mlp(), **kwargs)
   config = opt.get_config()
   config['name'] = 'diff_scope_name'
   opt2 = optimizers.Kfac.from_config(config)
   config2 = opt2.get_config()
   config2.pop('name')
   config.pop('name')
   self.assertEqual(config, config2)
   # Below is how Keras's model.save saves the configs. If the config is not
   # serializable, it will throw a TypeError or OverflowError.
   json.dumps(config, default=serialization.get_json_type).encode('utf8')
   json.dumps(config2, default=serialization.get_json_type).encode('utf8')
Esempio n. 16
0
 def testInferredBatchSizeFail(self, kfac_kwargs):
   dataset = tf.data.Dataset.from_tensors(([1.], [1.]))
   dataset = dataset.repeat().batch(11, drop_remainder=False)
   train_batch = dataset.make_one_shot_iterator().get_next()
   with self.assertRaisesRegex(ValueError, 'Could not infer batch_size.*'):
     optimizer = optimizers.Kfac(damping=10.,
                                 train_batch=train_batch,
                                 **kfac_kwargs)
Esempio n. 17
0
 def testLossTensor(self):
     loss_tensor = tf.convert_to_tensor(2.0)
     opt = optimizers.Kfac(learning_rate=0.01,
                           damping=0.01,
                           model=_simple_mlp(),
                           loss='mse',
                           loss_tensor=loss_tensor)
     self.assertEqual(opt.optimizer._loss_tensor, loss_tensor)
Esempio n. 18
0
 def testSeed(self):
     opt = optimizers.Kfac(learning_rate=0.01,
                           damping=0.01,
                           model=_simple_mlp(),
                           loss='mse',
                           seed=4321)
     lc = opt.optimizer.layers
     self.assertEqual(lc._loss_dict['squared_error_loss'][0]._default_seed,
                      4321)
Esempio n. 19
0
 def testGettingVariableHyperFails(self):
   self.skipTest('This is not fixed in TF 1.14 yet.')
   opt = optimizers.Kfac(model=_simple_mlp(),
                         loss='mse',
                         learning_rate=tf.Variable(0.1),
                         damping=tf.Variable(0.1))
   with self.assertRaisesRegex(tf.errors.FailedPreconditionError,
                               '.*uninitialized.*'):
     backend.get_value(opt.learning_rate)
Esempio n. 20
0
 def testAdaptiveWithLR(self, kfac_kwargs):
   dataset = tf.data.Dataset.from_tensors(([1.], [1.]))
   dataset = dataset.repeat().batch(11, drop_remainder=True)
   train_batch = dataset.make_one_shot_iterator().get_next()
   with self.assertRaisesRegex(ValueError, 'learning_rate must be None.*'):
     optimizer = optimizers.Kfac(damping=10.,
                                 train_batch=train_batch,
                                 learning_rate=0.1,
                                 **kfac_kwargs)
Esempio n. 21
0
 def testSequentialInstantiation(self):
   model = tf.keras.Sequential([
       layers.Conv2D(7, (3, 3), input_shape=(28, 28, 3)),
       layers.Activation('relu'),
       layers.Conv2D(13, (3, 3), activation='relu'),
       layers.GlobalMaxPool2D(),
       layers.Activation('softmax')
   ])
   optimizers.Kfac(learning_rate=0.03, damping=0.00007,
                       model=model, loss='binary_crossentropy')
Esempio n. 22
0
 def testRNNFails(self):
   model = tf.keras.Sequential()
   model.add(layers.Embedding(43, 128))
   model.add(layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2))
   model.add(layers.Dense(1, activation='sigmoid'))
   opt = optimizers.Kfac(learning_rate=0.003, damping=0.003,
                             model=model, loss='binary_crossentropy')
   with self.assertRaisesRegex(ValueError,
                               '.*lstm.* has more than one parent tensor.$'):
     opt._create_optimizer()
Esempio n. 23
0
 def testModifyingTensorHypersFails(self, name, val):
   kwargs = {'learning_rate': 3, 'damping': 5, 'momentum': 7}
   kwargs[name] = tf.convert_to_tensor(val)
   opt = optimizers.Kfac(model=_simple_mlp(), loss='mse', **kwargs)
   with self.subTest(name='AssignedCorrectly'):
     self.assertEqual(backend.get_value(getattr(opt, name)), val)
   with self.subTest(name='RaisesError'):
     with self.assertRaisesRegex(AttributeError,
                                 "Can't set attribute: {}".format(name)):
       setattr(opt, name, 17)
Esempio n. 24
0
    def test_saving_model_with_long_weights_names(self):
        self.skipTest('KFAC does not support nested models yet.')
        if h5py is None:
            self.skipTest('h5py required to run this test')

        with self.cached_session():
            x = keras.Input(shape=(2, ), name='nested_model_input')
            f = x
            for i in range(4):
                f = keras.layers.Dense(2,
                                       name='nested_model_dense_%d' % (i, ))(f)
            # This layer name will make the `weights_name`
            # HDF5 attribute blow out of proportion.
            f = keras.layers.Dense(2,
                                   name='nested_model_output' + ('x' *
                                                                 (2**14)))(f)
            nested_model = keras.Model(inputs=[x],
                                       outputs=[f],
                                       name='nested_model')

            x = keras.Input(shape=(2, ), name='outer_model_input')
            f = nested_model(x)
            f = keras.layers.Dense(2, name='outer_model_output')(f)

            model = keras.Model(inputs=[x], outputs=[f])
            model.compile(loss='mse',
                          optimizer=optimizers.Kfac(model=model,
                                                    **_KFAC_KWARGS),
                          metrics=['acc'])

            x = np.random.random((1, 2))
            y = np.random.random((1, 2))
            model.train_on_batch(x, y)
            out = model.predict(x)

            fd, fname = tempfile.mkstemp('.h5')
            keras.models.save_model(model, fname)
            model = saving_utils.load_model(fname, optimizer_name='new')

            # Check that the HDF5 files contains chunked array
            # of weight names.
            with h5py.File(fname, 'r') as h5file:
                num_weight_arrays = len([
                    attr
                    for attr in h5file['model_weights']['nested_model'].attrs
                    if attr.startswith('weight_names')
                ])
            # The chunking of layer names array should have happened.
            self.assertGreater(num_weight_arrays, 0)
            out2 = model.predict(x)
            self.assertAllClose(out, out2, atol=1e-05)

            # Cleanup
            os.close(fd)
            os.remove(fname)
Esempio n. 25
0
    def test_sequential_model_saving(self):
        if h5py is None:
            self.skipTest('h5py required to run this test')

        with self.cached_session():
            model = keras.models.Sequential()
            model.add(keras.layers.Dense(2, input_shape=(2, )))
            model.add(keras.layers.RepeatVector(3))
            model.add(keras.layers.Flatten())
            model.add(keras.layers.Dense(3))
            model.compile(loss=keras.losses.MSE,
                          optimizer=optimizers.Kfac(model=model,
                                                    **_KFAC_KWARGS),
                          metrics=[
                              keras.metrics.categorical_accuracy,
                              keras.metrics.CategoricalAccuracy()
                          ])

            x = np.random.random((1, 2))
            y = np.random.random((1, 3))

            # TODO(b/136561651): Since we use TFP distributions to sample from the
            # output distribution, optimizer's won't match exactly unless they are run
            # for the same number of steps. Even with a random seed, the internal
            # state of TFP changes with each call. We must switch to a stateless
            # sampler. Uncomment the train line below once this is implemented.
            # model.train_on_batch(x, y)

            out = model.predict(x)
            fd, fname = tempfile.mkstemp('.h5')
            keras.models.save_model(model, fname)

            new_model = saving_utils.load_model(fname, optimizer_name='new')
            os.close(fd)
            os.remove(fname)

            out2 = new_model.predict(x)
            self.assertAllClose(out, out2, atol=1e-05)

            # test that new updates are the same with both models
            x = np.random.random((1, 2))
            y = np.random.random((1, 3))
            model.train_on_batch(x, y)
            new_model.train_on_batch(x, y)

            x = np.random.random((1, 2))
            y = np.random.random((1, 3))
            eval_out = model.evaluate(x, y)
            eval_out2 = new_model.evaluate(x, y)
            self.assertArrayNear(eval_out, eval_out2, 1e-03)

            out = model.predict(x)
            out2 = new_model.predict(x)

            self.assertAllClose(out, out2, atol=1e-05)
Esempio n. 26
0
 def testGettingHyper(self, hyper_ctor):
   kwarg_values = {'learning_rate': 3, 'damping': 20, 'momentum': 13}
   kwargs = {k: hyper_ctor(v) for k, v in kwarg_values.items()}
   opt = optimizers.Kfac(model=_simple_mlp(), loss='mse', **kwargs)
   get_value = backend.get_value
   tf_opt = opt.optimizer
   with self.subTest(name='MatchesFloat'):
     for name, val in kwarg_values.items():
       self.assertEqual(get_value(getattr(opt, name)), val)
   with self.subTest(name='MatchesTfOpt'):
     self.assertEqual(get_value(opt.lr), get_value(tf_opt.learning_rate))
     self.assertEqual(get_value(opt.damping), get_value(tf_opt.damping))
     self.assertEqual(get_value(opt.momentum), get_value(tf_opt.momentum))
Esempio n. 27
0
 def testTrainModelWithFusedBN(self, has_shift):
   model = tf.keras.Sequential([
       layers.Conv2D(13, 5, input_shape=(28, 28, 1)),
       layers.BatchNormalization(center=has_shift, fused=True),
       layers.GlobalMaxPool2D(),
       layers.Dense(10, activation='softmax')
   ])
   (x_train, y_train), _ = _get_synthetic_mnist_dataset()
   loss = 'categorical_crossentropy'
   opt = optimizers.Kfac(
       learning_rate=0.01, damping=0.01, model=model, loss=loss)
   model.compile(opt, loss)
   return model.fit(x_train, y_train, batch_size=32, epochs=1, verbose=0)
Esempio n. 28
0
 def testSettingName(self):
   model = _simple_mlp()
   optimizer = optimizers.Kfac(damping=0.01, learning_rate=0.01,
                                   model=model, loss='mse')
   optimizer.name = 'new_name'
   self.assertEqual(optimizer._name, 'new_name')
   self.assertEqual(optimizer.get_config()['name'], 'new_name')
   self.assertEqual(optimizer._kfac_kwargs['name'], 'new_name')
   model.compile(optimizer, 'mse')
   model._make_train_function()
   with self.assertRaisesRegex(ValueError,
                               '.*after the variables are created.*'):
     optimizer.name = 'another_name'
Esempio n. 29
0
 def testLRBackwardsCompatibility(self):
   """This tests learning rate getting/setting used by old Keras callbacks."""
   opt = optimizers.Kfac(
       learning_rate=3, damping=5, model=_simple_mlp(), loss='mse')
   self.assertEqual(backend.get_value(opt.lr), 3)
   self.assertEqual(backend.get_value(opt.learning_rate), 3)
   opt.lr = 7
   self.assertEqual(backend.get_value(opt.lr), 7)
   self.assertEqual(backend.get_value(opt.learning_rate), 7)
   backend.set_value(opt.lr, 9)
   self.assertEqual(backend.get_value(opt.lr), 9)
   self.assertEqual(backend.get_value(opt.learning_rate), 9)
   backend.set_value(opt.learning_rate, 11)
   self.assertEqual(backend.get_value(opt.lr), 11)
   self.assertEqual(backend.get_value(opt.learning_rate), 11)
Esempio n. 30
0
  def testAdaptiveModelFit(self, adaptive_kwargs):
    rands = lambda: np.random.random((100, 1)).astype(np.float32)
    dataset = tf.data.Dataset.from_tensor_slices((rands(), rands()))
    dataset = dataset.repeat().batch(10, drop_remainder=True)
    train_batch = dataset.make_one_shot_iterator().get_next()

    model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])
    loss = 'mse'
    train_batch = dataset.make_one_shot_iterator().get_next()
    optimizer = optimizers.Kfac(damping=10.,
                                train_batch=train_batch,
                                model=model,
                                loss=loss,
                                **adaptive_kwargs)
    model.compile(optimizer, loss)
    model.fit(train_batch, steps_per_epoch=10, epochs=1)