def opt_combinations_only():
  """Returns two combinations for running with the two base optimizers."""
  experimental_opt_combinations = test_combinations.combine(
      mode='eager', opt_cls=optimizer_experimental.Optimizer)
  orig_opt_combination = test_combinations.combine(
      opt_cls=optimizer_v2.OptimizerV2)
  return experimental_opt_combinations + orig_opt_combination
Exemple #2
0
class InterfaceTests(test_combinations.TestCase):
    def testNoDependency(self):
        root = tf.Module()
        hasdep = tf.Module()
        root.hasdep = hasdep
        nodep = tf.Module()
        root.nodep = data_structures.NoDependency(nodep)
        self.assertLen(root._trackable_children(), 1)
        self.assertIs(root._trackable_children()["hasdep"], root.hasdep)
        self.assertIs(root.hasdep, hasdep)
        self.assertIs(root.nodep, nodep)

        class NoDependencyModel(training.Model):
            @tf.__internal__.tracking.no_automatic_dependency_tracking
            def __init__(self):
                super(NoDependencyModel, self).__init__()
                self.a = []
                self.b = tf.Module()

        nodeps = NoDependencyModel()
        self.assertEqual([nodeps], util.list_objects(nodeps))

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testDictionariesBasic(self):
        a = training.Model()
        b = training.Model()
        a.attribute = {"b": b}
        c = training.Model()
        a.attribute["c"] = []
        a.attribute["c"].append(c)
        a_deps = util.list_objects(a)
        self.assertIn(b, a_deps)
        self.assertIn(c, a_deps)
        self.assertIs(b, a.attribute["b"])
        self.assertEqual({"b", "c"}, a.attribute._trackable_children().keys())
        self.assertEqual([b, c], a.layers)
        self.assertEqual([b, c], a.attribute.layers)
        self.assertEqual([c], a.attribute["c"].layers)
        checkpoint = tf.train.Checkpoint(a=a)
        save_path = checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
        with self.cached_session():
            checkpoint.restore(
                save_path).assert_consumed().initialize_or_restore()

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testNoDepList(self):
        a = training.Model()
        a.l1 = data_structures.NoDependency([])
        a.l1.insert(1, 0)
        self.assertIsInstance(a.l1, list)
        checkpoint = tf.train.Checkpoint(a=a)
        checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
        a.l2 = []
        a.l2.insert(1, tf.Module())
        with self.assertRaisesRegex(ValueError, "A list element was replaced"):
            checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
Exemple #3
0
class MixedPrecisionTest(test_combinations.TestCase):

  IGNORE_PERF_VAR = 'TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_IGNORE_PERFORMANCE'

  def setUp(self):
    super(MixedPrecisionTest, self).setUp()
    # Enable the tests to be run on pre-Volta GPUs by telling the grappler pass
    # to ignore performance and always transform the graph.
    self._original_ignore_perf_value = os.getenv(self.IGNORE_PERF_VAR)
    os.environ[self.IGNORE_PERF_VAR] = '1'

  def tearDown(self):
    # Set the IGNORE_PERF_VAR variable back to it's original value.
    if self._original_ignore_perf_value is not None:
      os.environ[self.IGNORE_PERF_VAR] = self._original_ignore_perf_value
    else:
      del os.environ[self.IGNORE_PERF_VAR]

    tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite()
    super(MixedPrecisionTest, self).tearDown()

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_wrap_optimizer(self):
    opt = gradient_descent_v2.SGD(1.0)
    opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(opt, 123.)
    self.assertIsInstance(
        opt, loss_scale_optimizer_v2.LossScaleOptimizerV1)
    self.assertEqual(self.evaluate(opt.loss_scale), 123.)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_optimizer_errors(self):
    opt = gradient_descent_v2.SGD(1.0)
    opt = loss_scale_optimizer_v2.LossScaleOptimizerV1(opt, 'dynamic')
    with self.assertRaisesRegex(
        ValueError, '"opt" must not already be an instance of a '
        'LossScaleOptimizer.'):
      tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(opt)
    self.assertFalse(tf.config.optimizer.get_experimental_options()
                     .get('auto_mixed_precision', False))

  @test_utils.enable_v2_dtype_behavior
  def test_error_if_policy_is_set(self):
    with policy.policy_scope('mixed_float16'):
      with self.assertRaisesRegex(ValueError,
                                  'the global Keras dtype Policy has been set'):
        tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
            gradient_descent_v2.SGD(1.0))
    # Test no error is thrown when the policy is currently the default.
    tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
        gradient_descent_v2.SGD(1.0))
    # Test no error is thrown when the policy is a non-mixed policy.
    with policy.policy_scope('float64'):
      tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
          gradient_descent_v2.SGD(1.0))
Exemple #4
0
class GRULayerGradientTapeTest(test_combinations.TestCase):
    @test_combinations.generate(test_combinations.combine(mode=["eager"]))
    def test_in_tape(self):
        with self.test_session(config=_config):
            time_steps = 10
            embedding_size = 11
            gru_unit_size = 12

            gru_layer = keras.layers.GRU(
                gru_unit_size,
                return_sequences=True,
                return_state=True,
                recurrent_activation="sigmoid",
                recurrent_initializer="glorot_uniform",
            )

            x = tf.random.uniform([1, time_steps, embedding_size])
            y = tf.random.uniform([1, gru_unit_size])

            with tf.GradientTape() as tape:
                hidden_state = tf.zeros([1, gru_unit_size], dtype=tf.float32)
                _, state = gru_layer(x, initial_state=hidden_state)

                loss = tf.reduce_mean(tf.square(state - y))

            tape.gradient(loss, gru_layer.variables)
def opt_and_strategy_and_mode_combinations():
  """Returns combinations for running with multiple optimizers and strategies.

  Returns:
    Combinations that run with both OptimizerV2 and the experimental optimizer;
    and with the default strategy and mirrored strategy; and in both graph and
    eager mode.
  """
  # For the experimental optimizer, don't use graph mode directly since it's
  # unsupported. Instead, run both without and with a tf.function, in order to
  # test both graph and eager mode.
  experimental_opt_combinations = test_combinations.combine(
      opt_cls=optimizer_experimental.Optimizer,
      strategy_fn=STRATEGY_FNS,
      mode='eager',
      use_tf_function=[False, True])
  orig_opt_combinations = test_combinations.combine(
      opt_cls=optimizer_v2.OptimizerV2,
      strategy_fn=STRATEGY_FNS,
      mode=['graph', 'eager'],
      use_tf_function=False)
  return experimental_opt_combinations + orig_opt_combinations
class BatchNormalizationV1Test(test_combinations.TestCase):
    @test_combinations.generate(
        test_combinations.combine(mode=['graph', 'eager']))
    def test_v1_fused_attribute(self):
        norm = batch_normalization_v1.BatchNormalization()
        inp = keras.layers.Input((4, 4, 4))
        norm(inp)
        self.assertEqual(norm.fused, True)

        norm = batch_normalization_v1.BatchNormalization(fused=False)
        self.assertEqual(norm.fused, False)
        inp = keras.layers.Input(shape=(4, 4, 4))
        norm(inp)
        self.assertEqual(norm.fused, False)

        norm = batch_normalization_v1.BatchNormalization(virtual_batch_size=2)
        self.assertEqual(norm.fused, True)
        inp = keras.layers.Input(shape=(2, 2, 2))
        norm(inp)
        self.assertEqual(norm.fused, False)
Exemple #7
0
class SequenceFeaturesSavingTest(tf.test.TestCase, parameterized.TestCase):
    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def test_saving_with_sequence_features(self):
        cols = [
            tf.feature_column.sequence_numeric_column("a"),
            tf.feature_column.indicator_column(
                tf.feature_column.
                sequence_categorical_column_with_vocabulary_list(
                    "b", ["one", "two"])),
        ]
        input_layers = {
            "a":
            keras.layers.Input(shape=(None, 1), sparse=True, name="a"),
            "b":
            keras.layers.Input(shape=(None, 1),
                               sparse=True,
                               name="b",
                               dtype="string"),
        }

        fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
        # TODO(tibell): Figure out the right dtype and apply masking.
        # sequence_length_mask = array_ops.sequence_mask(sequence_length)
        # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
        x = keras.layers.GRU(32)(fc_layer)
        output = keras.layers.Dense(10)(x)

        model = keras.models.Model(input_layers, output)

        model.compile(
            loss=keras.losses.MSE,
            optimizer="rmsprop",
            metrics=[keras.metrics.categorical_accuracy],
        )

        config = model.to_json()
        loaded_model = model_config.model_from_json(config)

        batch_size = 10
        timesteps = 1

        values_a = np.arange(10, dtype=np.float32)
        indices_a = np.zeros((10, 3), dtype=np.int64)
        indices_a[:, 0] = np.arange(10)
        inputs_a = tf.SparseTensor(indices_a, values_a,
                                   (batch_size, timesteps, 1))

        values_b = np.zeros(10, dtype=np.str)
        indices_b = np.zeros((10, 3), dtype=np.int64)
        indices_b[:, 0] = np.arange(10)
        inputs_b = tf.SparseTensor(indices_b, values_b,
                                   (batch_size, timesteps, 1))

        with self.cached_session():
            # Initialize tables for V1 lookup.
            if not tf.executing_eagerly():
                self.evaluate(tf.compat.v1.tables_initializer())

            self.assertLen(
                loaded_model.predict({
                    "a": inputs_a,
                    "b": inputs_b
                }, steps=1),
                batch_size,
            )
Exemple #8
0
class TestSaveModel(tf.test.TestCase, parameterized.TestCase):

  def setUp(self):
    super(TestSaveModel, self).setUp()
    self.model = test_utils.get_small_sequential_mlp(1, 2, 3)
    self.subclassed_model = test_utils.get_small_subclass_mlp(1, 2)

  def assert_h5_format(self, path):
    if h5py is not None:
      self.assertTrue(h5py.is_hdf5(path),
                      'Model saved at path {} is not a valid hdf5 file.'
                      .format(path))

  def assert_saved_model(self, path):
    tf.__internal__.saved_model.parse_saved_model(path)

  @test_utils.run_v2_only
  def test_load_file_not_found(self):
    path = pathlib.Path(self.get_temp_dir()) / 'does_not_exist'
    with self.assertRaisesRegex(IOError, 'No file or directory found at'):
      save.load_model(path)

  @test_utils.run_v2_only
  def test_save_format_defaults(self):
    path = os.path.join(self.get_temp_dir(), 'model_path')
    save.save_model(self.model, path)
    self.assert_saved_model(path)

  @test_utils.run_v2_only
  def test_save_format_defaults_pathlib(self):
    path = pathlib.Path(self.get_temp_dir()) / 'model_path'
    save.save_model(self.model, path)
    self.assert_saved_model(path)

  @test_utils.run_v2_only
  def test_save_hdf5(self):
    path = os.path.join(self.get_temp_dir(), 'model')
    save.save_model(self.model, path, save_format='h5')
    self.assert_h5_format(path)
    with self.assertRaisesRegex(
        NotImplementedError,
        'requires the model to be a Functional model or a Sequential model.'):
      save.save_model(self.subclassed_model, path, save_format='h5')

  @test_utils.run_v2_only
  def test_save_load_hdf5_pathlib(self):
    path = pathlib.Path(self.get_temp_dir()) / 'model'
    save.save_model(self.model, path, save_format='h5')
    save.load_model(path)

  @test_utils.run_v2_only
  def test_save_tf(self):
    path = os.path.join(self.get_temp_dir(), 'model')
    save.save_model(self.model, path, save_format='tf')
    self.assert_saved_model(path)
    with self.assertRaisesRegex(
        ValueError, r'Model.*cannot be saved.*as opposed to `model.call\(\).*'):
      save.save_model(self.subclassed_model, path, save_format='tf')
    self.subclassed_model.predict(np.random.random((3, 5)))
    save.save_model(self.subclassed_model, path, save_format='tf')
    self.assert_saved_model(path)

  @test_utils.run_v2_only
  def test_save_load_tf_string(self):
    path = os.path.join(self.get_temp_dir(), 'model')
    save.save_model(self.model, path, save_format='tf')
    save.load_model(path)

  @test_utils.run_v2_only
  def test_save_load_tf_pathlib(self):
    path = pathlib.Path(self.get_temp_dir()) / 'model'
    save.save_model(self.model, path, save_format='tf')
    save.load_model(path)

  @test_utils.run_v2_only
  def test_save_load_weights_tf_pathlib(self):
    path = pathlib.Path(self.get_temp_dir()) / 'model'
    self.model.save_weights(path, save_format='tf')
    self.model.load_weights(path)

  @test_utils.run_v2_only
  def test_save_load_weights_hdf5_pathlib(self):
    path = pathlib.Path(self.get_temp_dir()) / 'model'
    self.model.save_weights(path, save_format='h5')
    self.model.load_weights(path)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_saving_h5_for_rnn_layers(self):
    # See https://github.com/tensorflow/tensorflow/issues/35731 for details.
    inputs = keras.Input([10, 91], name='train_input')
    rnn_layers = [
        keras.layers.LSTMCell(size, recurrent_dropout=0, name='rnn_cell%d' % i)
        for i, size in enumerate([512, 512])
    ]
    rnn_output = keras.layers.RNN(
        rnn_layers, return_sequences=True, name='rnn_layer')(inputs)
    pred_feat = keras.layers.Dense(91, name='prediction_features')(rnn_output)
    pred = keras.layers.Softmax()(pred_feat)
    model = keras.Model(inputs=[inputs], outputs=[pred, pred_feat])
    path = os.path.join(self.get_temp_dir(), 'model_path.h5')
    model.save(path)

    # Make sure the variable name is unique.
    self.assertNotEqual(rnn_layers[0].kernel.name,
                        rnn_layers[1].kernel.name)
    self.assertIn('rnn_cell1', rnn_layers[1].kernel.name)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_saving_optimizer_weights(self):

    class MyModel(keras.Model):

      def __init__(self):
        super(MyModel, self).__init__()
        self.layer = keras.layers.Dense(1)

      def call(self, x):
        return self.layer(x)

    path = os.path.join(self.get_temp_dir(), 'weights_path')
    x, y = np.ones((10, 10)), np.ones((10, 1))

    model = MyModel()
    model.compile('rmsprop', loss='bce')
    model.train_on_batch(x, y)
    model.reset_metrics()
    model.save_weights(path, save_format='tf')

    batch_loss = model.train_on_batch(x, y)

    new_model = MyModel()
    new_model.compile('rmsprop', loss='bce')
    new_model.train_on_batch(x, y)
    new_model.reset_metrics()

    new_model.load_weights(path)
    new_batch_loss = new_model.train_on_batch(x, y)

    self.assertAllClose(batch_loss, new_batch_loss)

  @test_combinations.generate(
      test_combinations.combine(mode=['eager', 'graph']))
  def test_save_include_optimizer_false(self):

    def get_variables(file_name):
      reader = tf.train.load_checkpoint(
          os.path.join(file_name, 'variables/variables'))
      shape_from_key = reader.get_variable_to_shape_map()
      return sorted(shape_from_key.keys())

    path = os.path.join(self.get_temp_dir(), 'no_optimizer')
    x, y = np.ones((10, 10)), np.ones((10, 1))

    model = keras.models.Sequential()
    model.add(keras.layers.Dense(1))
    model.compile('adam', loss='mse')
    model.train_on_batch(x, y)
    model.save(path, save_format='tf', include_optimizer=False)
    variables = get_variables(path)

    for v in variables:
      self.assertNotIn('optimizer', v)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_saving_model_with_custom_object(self):
    with generic_utils.custom_object_scope(), self.cached_session():

      @generic_utils.register_keras_serializable()
      class CustomLoss(losses.MeanSquaredError):
        pass

      model = sequential.Sequential(
          [core.Dense(units=1, input_shape=(1,))])
      model.compile(optimizer='sgd', loss=CustomLoss())
      model.fit(np.zeros([10, 1]), np.zeros([10, 1]))

      temp_dir = self.get_temp_dir()
      filepath = os.path.join(temp_dir, 'saving')
      model.save(filepath)

      # Make sure the model can be correctly load back.
      _ = save.load_model(filepath, compile=True)

  def test_saving_model_with_name_conflict(self):

    class Sequential(keras.Model):

      def __init__(self):
        super(Sequential, self).__init__()
        self.layer = keras.layers.Dense(1)

      def call(self, x):
        return self.layer(x)

    model = Sequential()
    model(tf.ones((10, 10)))
    temp_dir = self.get_temp_dir()
    filepath = os.path.join(temp_dir, 'Sequential')

    with self.assertLogs() as logs:
      model.save(filepath, save_format='tf')

    expected_substring = 'has the same name \'Sequential\' as a built-in Keras'
    matched = [log for log in logs.output if expected_substring in log]
    self.assertNotEmpty(matched)

  def test_saving_built_in_model(self):
    model = LinearModel()
    model(tf.constant([[5.]]))
    temp_dir = self.get_temp_dir()
    filepath = os.path.join(temp_dir, 'LinearModel')
    with self.assertLogs() as logs:
      model.save(filepath, save_format='tf')

    expected_substring = 'has the same name \'LinearModel\' as a built-in Keras'
    matched = [log for log in logs.output if expected_substring in log]
    # Check that a warning is *not* logged for a premade model.
    self.assertEmpty(matched)
Exemple #9
0
class TestJson(test_combinations.TestCase):
  """Tests to_json()/from_json()."""

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_saving_with_dense_features(self):
    cols = [
        tf.feature_column.numeric_column('a'),
        tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                'b', ['one', 'two']))
    ]
    input_layers = {
        'a': keras.layers.Input(shape=(1,), name='a'),
        'b': keras.layers.Input(shape=(1,), name='b', dtype='string')
    }

    fc_layer = dense_features.DenseFeatures(cols)(input_layers)
    output = keras.layers.Dense(10)(fc_layer)

    model = keras.models.Model(input_layers, output)

    model.compile(
        loss=keras.losses.MSE,
        optimizer='rmsprop',
        metrics=[keras.metrics.categorical_accuracy])

    config = model.to_json()
    loaded_model = model_config.model_from_json(config)

    inputs_a = np.arange(10).reshape(10, 1)
    inputs_b = np.arange(10).reshape(10, 1).astype('str')

    with self.cached_session():
      # Initialize tables for V1 lookup.
      if not tf.executing_eagerly():
        self.evaluate(tf.compat.v1.tables_initializer())

      self.assertLen(loaded_model.predict({'a': inputs_a, 'b': inputs_b}), 10)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_saving_with_sequence_features(self):
    cols = [
        tf.feature_column.sequence_numeric_column('a'),
        tf.feature_column.indicator_column(
            tf.feature_column.sequence_categorical_column_with_vocabulary_list(
                'b', ['one', 'two']))
    ]
    input_layers = {
        'a':
            keras.layers.Input(shape=(None, 1), sparse=True, name='a'),
        'b':
            keras.layers.Input(
                shape=(None, 1), sparse=True, name='b', dtype='string')
    }

    fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
    # TODO(tibell): Figure out the right dtype and apply masking.
    # sequence_length_mask = array_ops.sequence_mask(sequence_length)
    # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
    x = keras.layers.GRU(32)(fc_layer)
    output = keras.layers.Dense(10)(x)

    model = keras.models.Model(input_layers, output)

    model.compile(
        loss=keras.losses.MSE,
        optimizer='rmsprop',
        metrics=[keras.metrics.categorical_accuracy])

    config = model.to_json()
    loaded_model = model_config.model_from_json(config)

    batch_size = 10
    timesteps = 1

    values_a = np.arange(10, dtype=np.float32)
    indices_a = np.zeros((10, 3), dtype=np.int64)
    indices_a[:, 0] = np.arange(10)
    inputs_a = tf.SparseTensor(indices_a, values_a,
                               (batch_size, timesteps, 1))

    values_b = np.zeros(10, dtype=np.str)
    indices_b = np.zeros((10, 3), dtype=np.int64)
    indices_b[:, 0] = np.arange(10)
    inputs_b = tf.SparseTensor(indices_b, values_b,
                               (batch_size, timesteps, 1))

    with self.cached_session():
      # Initialize tables for V1 lookup.
      if not tf.executing_eagerly():
        self.evaluate(tf.compat.v1.tables_initializer())

      self.assertLen(
          loaded_model.predict({
              'a': inputs_a,
              'b': inputs_b
          }, steps=1), batch_size)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_nested_layers(self):

    class MyLayer(keras.layers.Layer):

      def __init__(self, sublayers, **kwargs):
        super(MyLayer, self).__init__(**kwargs)
        self.sublayers = sublayers

      def get_config(self):
        config = super(MyLayer, self).get_config()
        config['sublayers'] = self.sublayers
        return config

    layer = MyLayer([keras.layers.Dense(2, name='MyDense'),
                     RegisteredSubLayer(name='MySubLayer')])
    model = keras.Sequential([keras.Input([None]), layer])
    model_json = model.to_json()

    self.assertIn('Foo>RegisteredSubLayer', model_json)

    loaded_model = model_config.model_from_json(
        model_json, custom_objects={'MyLayer': MyLayer})
    loaded_layer = loaded_model.layers[0]
    self.assertIsInstance(loaded_layer.sublayers[0], keras.layers.Dense)
    self.assertEqual(loaded_layer.sublayers[0].name, 'MyDense')
    self.assertIsInstance(loaded_layer.sublayers[1], RegisteredSubLayer)
    self.assertEqual(loaded_layer.sublayers[1].name, 'MySubLayer')
Exemple #10
0
class MultiHeadAttentionTest(test_combinations.TestCase):
    @parameterized.named_parameters(
        ("key_value_same_proj", None, None, [40, 80]),
        ("key_value_different_proj", 32, 60, [40, 60]),
    )
    def test_non_masked_attention(self, value_dim, output_shape, output_dims):
        """Test that the attention layer can be created without a mask tensor."""
        test_layer = keras.layers.MultiHeadAttention(
            num_heads=12,
            key_dim=64,
            value_dim=value_dim,
            output_shape=output_shape,
        )
        # Create a 3-dimensional input (the first dimension is implicit).
        query = keras.Input(shape=(40, 80))
        value = keras.Input(shape=(20, 80))
        output = test_layer(query=query, value=value)
        self.assertEqual(output.shape.as_list(), [None] + output_dims)

    def test_non_masked_self_attention(self):
        """Test with one input (self-attenntion) and no mask tensor."""
        test_layer = keras.layers.MultiHeadAttention(num_heads=12, key_dim=64)
        # Create a 3-dimensional input (the first dimension is implicit).
        query = keras.Input(shape=(40, 80))
        output = test_layer(query, query)
        self.assertEqual(output.shape.as_list(), [None, 40, 80])

    def test_attention_scores(self):
        """Test attention outputs with coefficients."""
        test_layer = keras.layers.MultiHeadAttention(num_heads=12, key_dim=64)
        # Create a 3-dimensional input (the first dimension is implicit).
        query = keras.Input(shape=(40, 80))
        output, coef = test_layer(query, query, return_attention_scores=True)
        self.assertEqual(output.shape.as_list(), [None, 40, 80])
        self.assertEqual(coef.shape.as_list(), [None, 12, 40, 40])

    def test_attention_scores_with_values(self):
        """Test attention outputs with coefficients."""
        test_layer = keras.layers.MultiHeadAttention(num_heads=12, key_dim=64)
        # Create a 3-dimensional input (the first dimension is implicit).
        query = keras.Input(shape=(40, 80))
        value = keras.Input(shape=(60, 80))
        output, coef = test_layer(query, value, return_attention_scores=True)
        self.assertEqual(output.shape.as_list(), [None, 40, 80])
        self.assertEqual(coef.shape.as_list(), [None, 12, 40, 60])

    @parameterized.named_parameters(("with_bias", True), ("no_bias", False))
    def test_masked_attention(self, use_bias):
        """Test with a mask tensor."""
        test_layer = keras.layers.MultiHeadAttention(num_heads=2,
                                                     key_dim=2,
                                                     use_bias=use_bias)
        # Create a 3-dimensional input (the first dimension is implicit).
        batch_size = 3
        query = keras.Input(shape=(4, 8))
        value = keras.Input(shape=(2, 8))
        mask_tensor = keras.Input(shape=(4, 2))
        output = test_layer(query=query,
                            value=value,
                            attention_mask=mask_tensor)

        # Create a model containing the test layer.
        model = keras.Model([query, value, mask_tensor], output)

        # Generate data for the input (non-mask) tensors.
        from_data = 10 * np.random.random_sample((batch_size, 4, 8))
        to_data = 10 * np.random.random_sample((batch_size, 2, 8))

        # Invoke the data with a random set of mask data. This should mask at least
        # one element.
        mask_data = np.random.randint(2, size=(batch_size, 4, 2))
        masked_output_data = model.predict([from_data, to_data, mask_data])

        # Invoke the same data, but with a null mask (where no elements are masked).
        null_mask_data = np.ones((batch_size, 4, 2))
        unmasked_output_data = model.predict(
            [from_data, to_data, null_mask_data])

        # Because one data is masked and one is not, the outputs should not be the
        # same.
        self.assertNotAllClose(masked_output_data, unmasked_output_data)

        # Tests the layer with three inputs: Q, K, V.
        key = keras.Input(shape=(2, 8))
        output = test_layer(query,
                            value=value,
                            key=key,
                            attention_mask=mask_tensor)
        model = keras.Model([query, value, key, mask_tensor], output)

        masked_output_data = model.predict(
            [from_data, to_data, to_data, mask_data])
        unmasked_output_data = model.predict(
            [from_data, to_data, to_data, null_mask_data])
        # Because one data is masked and one is not, the outputs should not be the
        # same.
        self.assertNotAllClose(masked_output_data, unmasked_output_data)

        if use_bias:
            self.assertLen(test_layer._query_dense.trainable_variables, 2)
            self.assertLen(test_layer._output_dense.trainable_variables, 2)
        else:
            self.assertLen(test_layer._query_dense.trainable_variables, 1)
            self.assertLen(test_layer._output_dense.trainable_variables, 1)

    def test_initializer(self):
        """Test with a specified initializer."""
        test_layer = keras.layers.MultiHeadAttention(
            num_heads=12,
            key_dim=64,
            kernel_initializer=keras.initializers.TruncatedNormal(stddev=0.02),
        )
        # Create a 3-dimensional input (the first dimension is implicit).
        query = keras.Input(shape=(40, 80))
        output = test_layer(query, query)
        self.assertEqual(output.shape.as_list(), [None, 40, 80])

        # Make sure the sub layers have different kernel init value, and not reusing
        # the initializers.
        self.assertNotAllClose(
            keras.backend.eval(test_layer._query_dense.kernel),
            keras.backend.eval(test_layer._key_dense.kernel),
        )
        self.assertNotAllClose(
            keras.backend.eval(test_layer._query_dense.kernel),
            keras.backend.eval(test_layer._value_dense.kernel),
        )
        self.assertNotAllClose(
            keras.backend.eval(test_layer._query_dense.kernel),
            keras.backend.eval(test_layer._output_dense.kernel),
        )

    def test_masked_attention_with_scores(self):
        """Test with a mask tensor."""
        test_layer = keras.layers.MultiHeadAttention(num_heads=2, key_dim=2)
        # Create a 3-dimensional input (the first dimension is implicit).
        batch_size = 3
        query = keras.Input(shape=(4, 8))
        value = keras.Input(shape=(2, 8))
        mask_tensor = keras.Input(shape=(4, 2))
        output = test_layer(query=query,
                            value=value,
                            attention_mask=mask_tensor)

        # Create a model containing the test layer.
        model = keras.Model([query, value, mask_tensor], output)

        # Generate data for the input (non-mask) tensors.
        from_data = 10 * np.random.random_sample((batch_size, 4, 8))
        to_data = 10 * np.random.random_sample((batch_size, 2, 8))

        # Invoke the data with a random set of mask data. This should mask at least
        # one element.
        mask_data = np.random.randint(2, size=(batch_size, 4, 2))
        masked_output_data = model.predict([from_data, to_data, mask_data])

        # Invoke the same data, but with a null mask (where no elements are masked).
        null_mask_data = np.ones((batch_size, 4, 2))
        unmasked_output_data = model.predict(
            [from_data, to_data, null_mask_data])

        # Because one data is masked and one is not, the outputs should not be the
        # same.
        self.assertNotAllClose(masked_output_data, unmasked_output_data)

        # Create a model containing attention scores.
        output, scores = test_layer(
            query=query,
            value=value,
            attention_mask=mask_tensor,
            return_attention_scores=True,
        )
        model = keras.Model([query, value, mask_tensor], [output, scores])
        masked_output_data_score, masked_score = model.predict(
            [from_data, to_data, mask_data])
        unmasked_output_data_score, unmasked_score = model.predict(
            [from_data, to_data, null_mask_data])
        self.assertNotAllClose(masked_output_data_score,
                               unmasked_output_data_score)
        self.assertAllClose(masked_output_data, masked_output_data_score)
        self.assertAllClose(unmasked_output_data, unmasked_output_data_score)
        self.assertNotAllClose(masked_score, unmasked_score)

    @parameterized.named_parameters(
        ("4d_inputs_1freebatch_mask2", [3, 4], [3, 2], [4, 2], (2, )),
        ("4d_inputs_1freebatch_mask3", [3, 4], [3, 2], [3, 4, 2], (2, )),
        ("4d_inputs_1freebatch_mask4", [3, 4], [3, 2], [3, 2, 4, 2], (2, )),
        ("4D_inputs_2D_attention", [3, 4], [3, 2], [3, 4, 3, 2], (1, 2)),
        ("5D_inputs_2D_attention", [5, 3, 4], [5, 3, 2], [3, 4, 3, 2], (2, 3)),
        (
            "5D_inputs_2D_attention_fullmask",
            [5, 3, 4],
            [5, 3, 2],
            [5, 3, 4, 3, 2],
            (2, 3),
        ),
    )
    def test_high_dim_attention(self, q_dims, v_dims, mask_dims,
                                attention_axes):
        """Test with a mask tensor."""
        test_layer = keras.layers.MultiHeadAttention(
            num_heads=2, key_dim=2, attention_axes=attention_axes)
        batch_size, hidden_size = 3, 8
        # Generate data for the input (non-mask) tensors.
        query_shape = [batch_size] + q_dims + [hidden_size]
        value_shape = [batch_size] + v_dims + [hidden_size]
        mask_shape = [batch_size] + mask_dims
        query = 10 * np.random.random_sample(query_shape)
        value = 10 * np.random.random_sample(value_shape)

        # Invoke the data with a random set of mask data. This should mask at least
        # one element.
        mask_data = np.random.randint(2, size=mask_shape).astype("bool")
        # Invoke the same data, but with a null mask (where no elements are masked).
        null_mask_data = np.ones(mask_shape)
        # Because one data is masked and one is not, the outputs should not be the
        # same.
        query_tensor = keras.Input(query_shape[1:], name="query")
        value_tensor = keras.Input(value_shape[1:], name="value")
        mask_tensor = keras.Input(mask_shape[1:], name="mask")
        output = test_layer(query=query_tensor,
                            value=value_tensor,
                            attention_mask=mask_tensor)
        model = keras.Model([query_tensor, value_tensor, mask_tensor], output)

        self.assertNotAllClose(
            model.predict([query, value, mask_data]),
            model.predict([query, value, null_mask_data]),
        )

    def test_dropout(self):
        test_layer = keras.layers.MultiHeadAttention(num_heads=2,
                                                     key_dim=2,
                                                     dropout=0.5)

        # Generate data for the input (non-mask) tensors.
        from_data = keras.backend.ones(shape=(32, 4, 8))
        to_data = keras.backend.ones(shape=(32, 2, 8))
        train_out = test_layer(from_data, to_data, None, None, None, True)
        test_out = test_layer(from_data, to_data, None, None, None, False)

        # Output should be close when not in training mode,
        # and should not be close when enabling dropout in training mode.
        self.assertNotAllClose(keras.backend.eval(train_out),
                               keras.backend.eval(test_out))

    @test_combinations.generate(
        test_combinations.combine(
            ragged_query=[True, False],
            ragged_value=[True, False],
            ragged_key=[True, False],
        ))
    def test_ragged_tensor(self, ragged_query, ragged_value, ragged_key):
        if ragged_query:
            query = tf.ragged.constant(
                [
                    [[3.0, 1.0], [4.0, 1.0]],
                    [[5.0, 9.0], [2.0, 6.0], [3.0, 1.0]],
                    [[1.0, 2.0]],
                ],
                inner_shape=(2, ),
            )
        else:
            query = keras.backend.ones(shape=(3, 2, 2))

        if ragged_value:
            value = tf.ragged.constant(
                [[[3.0, 1.0], [4.0, 1.0]], [[5.0, 9.0]], [[1.0, 2.0]]],
                inner_shape=(2, ),
            )
        else:
            value = keras.backend.ones(shape=(3, 4, 2))

        if ragged_key:
            key = tf.ragged.constant(
                [
                    [[3.0, 1.0], [4.0, 1.0]],
                    [[5.0, 9.0], [2.0, 6.0], [3.0, 1.0], [1.0, 5.0]],
                    [[1.0, 2.0]],
                ],
                inner_shape=(2, ),
            )
        else:
            key = keras.backend.ones(shape=(3, 4, 2))

        test_layer = keras.layers.MultiHeadAttention(num_heads=5, key_dim=2)
        results = test_layer(query, value, key)
        self.assertAllEqual(results.shape.as_list(), query.shape.as_list())
Exemple #11
0
class DenseTest(tf.test.TestCase, parameterized.TestCase):

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testDenseProperties(self):
    dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')
    self.assertEqual(dense.units, 2)
    self.assertEqual(dense.activation, tf.nn.relu)
    self.assertEqual(dense.kernel_regularizer, None)
    self.assertEqual(dense.bias_regularizer, None)
    self.assertEqual(dense.activity_regularizer, None)
    self.assertEqual(dense.use_bias, True)

    # Test auto-naming
    dense = core_layers.Dense(2, activation=tf.nn.relu)
    dense(tf.random.uniform((5, 2)))
    self.assertEqual(dense.name, 'dense_1')
    dense = core_layers.Dense(2, activation=tf.nn.relu)
    dense(tf.random.uniform((5, 2)))
    self.assertEqual(dense.name, 'dense_2')

  @tf_test_utils.run_deprecated_v1
  def testVariableInput(self):
    with self.cached_session():
      v = tf.compat.v1.get_variable(
          'X', initializer=tf.compat.v1.zeros_initializer(), shape=(1, 1))
      x = core_layers.Dense(1)(v)
      self.evaluate(tf.compat.v1.global_variables_initializer())
      self.assertAllEqual(x, [[0.0]])

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testCall(self):
    dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')
    inputs = tf.random.uniform((5, 4), seed=1)
    outputs = dense(inputs)
    self.assertListEqual([5, 2], outputs.get_shape().as_list())
    self.assertListEqual(dense.variables, [dense.kernel, dense.bias])
    self.assertListEqual(dense.trainable_variables,
                         [dense.kernel, dense.bias])
    self.assertListEqual(dense.non_trainable_variables, [])
    if not tf.executing_eagerly():
      self.assertEqual(
          len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 2)
    self.assertEqual(dense.kernel.name, 'my_dense/kernel:0')
    self.assertEqual(dense.bias.name, 'my_dense/bias:0')

  @tf_test_utils.assert_no_new_pyobjects_executing_eagerly
  def testNoEagerLeak(self):
    # Tests that repeatedly constructing and building a Layer does not leak
    # Python objects.
    inputs = tf.random.uniform((5, 4), seed=1)
    core_layers.Dense(5)(inputs)
    core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')(inputs)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testCallTensorDot(self):
    dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')
    inputs = tf.random.uniform((5, 4, 3), seed=1)
    outputs = dense(inputs)
    self.assertListEqual([5, 4, 2], outputs.get_shape().as_list())

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testNoBias(self):
    dense = core_layers.Dense(2, use_bias=False, name='my_dense')
    inputs = tf.random.uniform((5, 2), seed=1)
    _ = dense(inputs)
    self.assertListEqual(dense.variables, [dense.kernel])
    self.assertListEqual(dense.trainable_variables, [dense.kernel])
    self.assertListEqual(dense.non_trainable_variables, [])
    if not tf.executing_eagerly():
      self.assertEqual(
          len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 1)
    self.assertEqual(dense.kernel.name, 'my_dense/kernel:0')
    self.assertEqual(dense.bias, None)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testNonTrainable(self):
    dense = core_layers.Dense(2, trainable=False, name='my_dense')
    inputs = tf.random.uniform((5, 2), seed=1)
    _ = dense(inputs)
    self.assertListEqual(dense.variables, [dense.kernel, dense.bias])
    self.assertListEqual(dense.non_trainable_variables,
                         [dense.kernel, dense.bias])
    self.assertListEqual(dense.trainable_variables, [])
    if not tf.executing_eagerly():
      self.assertEqual(
          len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 0)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testOutputShape(self):
    dense = core_layers.Dense(7, activation=tf.nn.relu, name='my_dense')
    inputs = tf.random.uniform((5, 3), seed=1)
    outputs = dense(inputs)
    self.assertEqual(outputs.get_shape().as_list(), [5, 7])

    inputs = tf.random.uniform((5, 2, 3), seed=1)
    outputs = dense(inputs)
    self.assertEqual(outputs.get_shape().as_list(), [5, 2, 7])

    inputs = tf.random.uniform((1, 2, 4, 3), seed=1)
    outputs = dense(inputs)
    self.assertEqual(outputs.get_shape().as_list(), [1, 2, 4, 7])

  @tf_test_utils.run_deprecated_v1
  def testCallOnPlaceHolder(self):
    inputs = tf.compat.v1.placeholder(dtype=tf.float32)
    dense = core_layers.Dense(4, name='my_dense')
    with self.assertRaises(ValueError):
      dense(inputs)

    inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None])
    dense = core_layers.Dense(4, name='my_dense')
    with self.assertRaises(ValueError):
      dense(inputs)

    inputs = tf.compat.v1.placeholder(
        dtype=tf.float32, shape=[None, None, None])
    dense = core_layers.Dense(4, name='my_dense')
    with self.assertRaises(ValueError):
      dense(inputs)

    inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, 3])
    dense = core_layers.Dense(4, name='my_dense')
    dense(inputs)

    inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None, 3])
    dense = core_layers.Dense(4, name='my_dense')
    dense(inputs)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testActivation(self):
    dense = core_layers.Dense(2, activation=tf.nn.relu, name='dense1')
    inputs = tf.random.uniform((5, 3), seed=1)
    outputs = dense(inputs)
    if not tf.executing_eagerly():
      self.assertEqual(outputs.op.name, 'dense1/Relu')

    dense = core_layers.Dense(2, name='dense2')
    inputs = tf.random.uniform((5, 3), seed=1)
    outputs = dense(inputs)
    if not tf.executing_eagerly():
      self.assertEqual(outputs.op.name, 'dense2/BiasAdd')

  @tf_test_utils.run_deprecated_v1
  def testActivityRegularizer(self):
    regularizer = lambda x: tf.reduce_sum(x) * 1e-3
    dense = core_layers.Dense(
        2, name='my_dense', activity_regularizer=regularizer)
    inputs = tf.random.uniform((5, 3), seed=1)
    _ = dense(inputs)
    loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
    self.assertEqual(len(loss_keys), 1)
    self.assertListEqual(dense.losses, loss_keys)

  @tf_test_utils.run_deprecated_v1
  def testKernelRegularizer(self):
    regularizer = lambda x: tf.reduce_sum(x) * 1e-3
    dense = core_layers.Dense(
        2, name='my_dense', kernel_regularizer=regularizer)
    inputs = tf.random.uniform((5, 3), seed=1)
    _ = dense(inputs)
    loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
    self.assertEqual(len(loss_keys), 1)
    self.evaluate([v.initializer for v in dense.variables])
    self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys))

  @tf_test_utils.run_deprecated_v1
  def testKernelRegularizerWithReuse(self):
    regularizer = lambda x: tf.reduce_sum(x) * 1e-3
    inputs = tf.random.uniform((5, 3), seed=1)
    _ = core_layers.dense(
        inputs, 2, name='my_dense', kernel_regularizer=regularizer)
    self.assertEqual(
        len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 1)
    _ = core_layers.dense(
        inputs, 2, name='my_dense', kernel_regularizer=regularizer, reuse=True)
    self.assertEqual(
        len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 1)

  @tf_test_utils.run_deprecated_v1
  def testBiasRegularizer(self):
    regularizer = lambda x: tf.reduce_sum(x) * 1e-3
    dense = core_layers.Dense(2, name='my_dense', bias_regularizer=regularizer)
    inputs = tf.random.uniform((5, 3), seed=1)
    _ = dense(inputs)
    loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
    self.assertEqual(len(loss_keys), 1)
    self.evaluate([v.initializer for v in dense.variables])
    self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys))

  @tf_test_utils.run_deprecated_v1
  def testFunctionalDense(self):
    with self.cached_session():
      inputs = tf.random.uniform((5, 3), seed=1)
      outputs = core_layers.dense(
          inputs, 2, activation=tf.nn.relu, name='my_dense')
      self.assertEqual(
          len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 2)
      self.assertEqual(outputs.op.name, 'my_dense/Relu')

  @tf_test_utils.run_deprecated_v1
  def testFunctionalDenseTwice(self):
    inputs = tf.random.uniform((5, 3), seed=1)
    core_layers.dense(inputs, 2)
    vars1 = _get_variable_dict_from_varstore().values()
    core_layers.dense(inputs, 2)
    vars2 = _get_variable_dict_from_varstore().values()
    self.assertEqual(len(vars1), 2)
    self.assertEqual(len(vars2), 4)

  # TODO(alive): get this to  work in eager mode.
  def testFunctionalDenseTwiceReuse(self):
    with self.cached_session():
      inputs = tf.random.uniform((5, 3), seed=1)
      core_layers.dense(inputs, 2, name='my_dense')
      vars1 = tf.compat.v1.trainable_variables()
      core_layers.dense(inputs, 2, name='my_dense', reuse=True)
      vars2 = tf.compat.v1.trainable_variables()
      self.assertEqual(vars1, vars2)

  # TODO(alive): get this to  work in eager mode.
  def testFunctionalDenseTwiceReuseFromScope(self):
    with self.cached_session():
      with tf.compat.v1.variable_scope('scope'):
        inputs = tf.random.uniform((5, 3), seed=1)
        core_layers.dense(inputs, 2, name='my_dense')
        vars1 = tf.compat.v1.trainable_variables()
      with tf.compat.v1.variable_scope('scope', reuse=True):
        core_layers.dense(inputs, 2, name='my_dense')
        vars2 = tf.compat.v1.trainable_variables()
      self.assertEqual(vars1, vars2)

  @tf_test_utils.run_deprecated_v1
  def testFunctionalDenseInitializerFromScope(self):
    with tf.compat.v1.variable_scope(
        'scope',
        initializer=tf.compat.v1.ones_initializer()), self.cached_session():
      inputs = tf.random.uniform((5, 3), seed=1)
      core_layers.dense(inputs, 2)
      self.evaluate(tf.compat.v1.global_variables_initializer())
      weights = _get_variable_dict_from_varstore()
      self.assertEqual(len(weights), 2)
      # Check that the matrix weights got initialized to ones (from scope).
      self.assertAllClose(weights['scope/dense/kernel'].read_value(),
                          np.ones((3, 2)))
      # Check that the bias still got initialized to zeros.
      self.assertAllClose(weights['scope/dense/bias'].read_value(), np.zeros(
          (2)))

  def testFunctionalDenseWithCustomGetter(self):
    called = [0]

    def custom_getter(getter, *args, **kwargs):
      called[0] += 1
      return getter(*args, **kwargs)

    with tf.compat.v1.variable_scope('test', custom_getter=custom_getter):
      inputs = tf.random.uniform((5, 3), seed=1)
      core_layers.dense(inputs, 2)
    self.assertEqual(called[0], 2)

  @tf_test_utils.run_deprecated_v1
  def testFunctionalDenseInScope(self):
    with self.cached_session():
      with tf.compat.v1.variable_scope('test'):
        inputs = tf.random.uniform((5, 3), seed=1)
        core_layers.dense(inputs, 2, name='my_dense')
        var_dict = _get_variable_dict_from_varstore()
        var_key = 'test/my_dense/kernel'
        self.assertEqual(var_dict[var_key].name, '%s:0' % var_key)
      with tf.compat.v1.variable_scope('test1') as scope:
        inputs = tf.random.uniform((5, 3), seed=1)
        core_layers.dense(inputs, 2, name=scope)
        var_dict = _get_variable_dict_from_varstore()
        var_key = 'test1/kernel'
        self.assertEqual(var_dict[var_key].name, '%s:0' % var_key)
      with tf.compat.v1.variable_scope('test2'):
        inputs = tf.random.uniform((5, 3), seed=1)
        core_layers.dense(inputs, 2)
        var_dict = _get_variable_dict_from_varstore()
        var_key = 'test2/dense/kernel'
        self.assertEqual(var_dict[var_key].name, '%s:0' % var_key)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testComputeOutputShape(self):
    dense = core_layers.Dense(2, activation=tf.nn.relu, name='dense1')
    ts = tf.TensorShape
    # pylint: disable=protected-access
    with self.assertRaises(ValueError):
      dense.compute_output_shape(ts(None))
    with self.assertRaises(ValueError):
      dense.compute_output_shape(ts([]))
    with self.assertRaises(ValueError):
      dense.compute_output_shape(ts([1]))
    self.assertEqual(
        [None, 2],
        dense.compute_output_shape((None, 3)).as_list())
    self.assertEqual(
        [None, 2],
        dense.compute_output_shape(ts([None, 3])).as_list())
    self.assertEqual(
        [None, 4, 2],
        dense.compute_output_shape(ts([None, 4, 3])).as_list())
    # pylint: enable=protected-access

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testConstraints(self):
    k_constraint = lambda x: x / tf.reduce_sum(x)
    b_constraint = lambda x: x / tf.reduce_max(x)
    dense = core_layers.Dense(2,
                              kernel_constraint=k_constraint,
                              bias_constraint=b_constraint)
    inputs = tf.random.uniform((5, 3), seed=1)
    dense(inputs)
    self.assertEqual(dense.kernel_constraint, k_constraint)
    self.assertEqual(dense.bias_constraint, b_constraint)
Exemple #12
0
class CheckpointingTests(test_combinations.TestCase):
    @tf_test_utils.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
    def testNamingWithOptimizer(self):
        input_value = tf.constant([[3.]])
        model = MyModel()
        # A nuisance Model using the same optimizer. Its slot variables should not
        # go in the checkpoint, since it is never depended on.
        other_model = MyModel()
        optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
        optimizer_step = tf.compat.v1.train.get_or_create_global_step()
        root_trackable = tf.train.Checkpoint(optimizer=optimizer,
                                             model=model,
                                             optimizer_step=optimizer_step)
        if tf.executing_eagerly():
            optimizer.minimize(lambda: model(input_value),
                               global_step=optimizer_step)
            optimizer.minimize(lambda: other_model(input_value),
                               global_step=optimizer_step)
        else:
            train_op = optimizer.minimize(model(input_value),
                                          global_step=optimizer_step)
            optimizer.minimize(other_model(input_value),
                               global_step=optimizer_step)
            self.evaluate(trackable_utils.gather_initializers(root_trackable))
            self.evaluate(train_op)
        named_variables, serialized_graph, _ = tf.__internal__.tracking.ObjectGraphView(
            root_trackable).serialize_object_graph()
        expected_checkpoint_names = (
            # Created in the root node, so no prefix.
            "optimizer_step",
            "model/_second/kernel",
            "model/_named_dense/kernel",
            "model/_named_dense/bias",
            # non-Layer dependency of the model
            "model/_non_layer/a_variable",
            # The optimizer creates two non-slot variables
            "optimizer/beta1_power",
            "optimizer/beta2_power",
            # Slot variables
            "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
            "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
            "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
            "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
            "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
            "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
        )
        suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
        expected_checkpoint_names = [
            name + suffix for name in expected_checkpoint_names
        ]
        named_variables = {v.name: v for v in named_variables}
        self.assertEqual(len(expected_checkpoint_names),
                         len(named_variables.keys()))
        # Check that we've created the right full_names of objects (not exhaustive)
        expected_names = {
            "optimizer_step" + suffix: "global_step",
            "model/_second/kernel" + suffix: "my_model/dense_1/kernel",
            "model/_named_dense/kernel" + suffix: "my_model/dense/kernel",
            "optimizer/beta1_power" + suffix: "beta1_power",
            "optimizer/beta2_power" + suffix: "beta2_power",
        }
        for nodes in serialized_graph.nodes:
            for attribute in nodes.attributes:
                expected_name = expected_names.pop(attribute.checkpoint_key,
                                                   None)
                if expected_name is not None:
                    self.assertEqual(expected_name, attribute.full_name)
        self.assertEmpty(expected_names)

        # Spot check the generated protocol buffers.
        self.assertEqual("optimizer",
                         serialized_graph.nodes[0].children[1].local_name)
        optimizer_node = serialized_graph.nodes[
            serialized_graph.nodes[0].children[1].node_id]
        self.assertEqual("beta1_power", optimizer_node.children[0].local_name)
        self.assertEqual(
            "beta1_power", serialized_graph.nodes[
                optimizer_node.children[0].node_id].attributes[0].full_name)
        self.assertEqual(
            "my_model/dense/kernel",
            serialized_graph.nodes[optimizer_node.slot_variables[
                0].original_variable_node_id].attributes[0].full_name)

        # We strip off the :0 suffix, as variable.name-based saving does.
        self.assertEqual(
            "my_model/dense/kernel/Adam",
            serialized_graph.nodes[optimizer_node.slot_variables[
                0].slot_variable_node_id].attributes[0].full_name)
        self.assertEqual(
            "my_model/dense/kernel/Adam:0",
            optimizer.get_slot(var=model._named_dense.kernel, name="m").name)
        self.assertEqual(
            "model/_named_dense/kernel" + suffix,
            serialized_graph.nodes[optimizer_node.slot_variables[
                0].original_variable_node_id].attributes[0].checkpoint_key)
        self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
        self.assertEqual(
            "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix,
            serialized_graph.nodes[optimizer_node.slot_variables[
                0].slot_variable_node_id].attributes[0].checkpoint_key)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testSaveRestore(self):
        with self.test_session():
            model = MyModel()
            optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
            root_trackable = tf.train.Checkpoint(optimizer=optimizer,
                                                 model=model)
            input_value = tf.constant([[3.]])
            if tf.executing_eagerly():
                optimizer.minimize(lambda: model(input_value))
            else:
                train_op = optimizer.minimize(model(input_value))
                # TODO(allenl): Make initialization more pleasant when graph building.
                root_trackable.save_counter  # pylint: disable=pointless-statement
                self.evaluate(
                    trackable_utils.gather_initializers(root_trackable))
                self.evaluate(train_op)
            prefix = os.path.join(self.get_temp_dir(), "ckpt")
            self.evaluate(
                tf.compat.v1.assign(model._named_dense.variables[1], [42.]))
            m_bias_slot = optimizer.get_slot(model._named_dense.variables[1],
                                             "m")
            self.evaluate(tf.compat.v1.assign(m_bias_slot, [1.5]))
            save_path = root_trackable.save(file_prefix=prefix)
            self.evaluate(
                tf.compat.v1.assign(model._named_dense.variables[1], [43.]))
            self.evaluate(tf.compat.v1.assign(root_trackable.save_counter, 3))
            optimizer_variables = self.evaluate(optimizer.variables())
            self.evaluate(tf.compat.v1.assign(m_bias_slot, [-2.]))
            # Immediate restoration
            status = root_trackable.restore(
                save_path=save_path).assert_consumed()
            status.run_restore_ops()
            self.assertAllEqual([42.],
                                self.evaluate(model._named_dense.variables[1]))
            self.assertAllEqual(1, self.evaluate(root_trackable.save_counter))
            self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
            if not tf.executing_eagerly():
                return  # Restore-on-create is only supported when executing eagerly
            on_create_model = MyModel()
            on_create_optimizer = tf.compat.v1.train.AdamOptimizer(
                0.001,
                # Preserve beta1_power and beta2_power when applying gradients
                # so we can test that they've been restored correctly.
                beta1=1.0,
                beta2=1.0)
            on_create_root = tf.train.Checkpoint(optimizer=on_create_optimizer,
                                                 model=on_create_model)
            # Deferred restoration
            status = on_create_root.restore(save_path=save_path)
            status.assert_nontrivial_match()
            status.assert_existing_objects_matched()
            with self.assertRaises(AssertionError):
                status.assert_consumed()
            on_create_model(tf.constant([[3.]]))  # create variables
            self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
            self.assertAllEqual([42.],
                                self.evaluate(
                                    on_create_model._named_dense.variables[1]))
            on_create_m_bias_slot = on_create_optimizer.get_slot(
                on_create_model._named_dense.variables[1], "m")
            status.assert_existing_objects_matched()
            with self.assertRaises(AssertionError):
                status.assert_consumed()
            # Optimizer slot variables are created when the original variable is
            # restored.
            self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
            self.assertAllEqual(optimizer_variables[2:],
                                self.evaluate(on_create_optimizer.variables()))
            dummy_var = tf.Variable([1.])
            on_create_optimizer.minimize(loss=dummy_var.read_value)
            status.assert_existing_objects_matched()
            status.assert_consumed()
            beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators(
            )
            self.assertAllEqual(optimizer_variables[0],
                                self.evaluate(beta1_power))
            self.assertAllEqual(optimizer_variables[1],
                                self.evaluate(beta2_power))

    # TODO(allenl): Debug garbage created by this test in python3.
    def testDeferredRestorationUsageEager(self):
        """An idiomatic eager execution example."""
        num_training_steps = 10
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        for training_continuation in range(3):
            model = MyModel()
            optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
            root = tf.train.Checkpoint(
                optimizer=optimizer,
                model=model,
                optimizer_step=tf.compat.v1.train.get_or_create_global_step())
            root.restore(tf.train.latest_checkpoint(checkpoint_directory))
            for _ in range(num_training_steps):
                # TODO(allenl): Use a Dataset and serialize/checkpoint it.
                input_value = tf.constant([[3.]])
                optimizer.minimize(
                    lambda: model(input_value),  # pylint: disable=cell-var-from-loop
                    global_step=root.optimizer_step)
            root.save(file_prefix=checkpoint_prefix)
            self.assertEqual((training_continuation + 1) * num_training_steps,
                             root.optimizer_step.numpy())

    def testEagerDistributionStrategy(self):
        num_training_steps = 10
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")

        def _train_fn(optimizer, model, root):
            input_value = tf.constant([[3.]])
            optimizer.minimize(functools.partial(model, input_value),
                               global_step=root.optimizer_step)

        strategy = tf.distribute.MirroredStrategy()
        with strategy.scope():
            for training_continuation in range(3):
                model = MyModel()
                optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
                root = tf.train.Checkpoint(optimizer=optimizer,
                                           model=model,
                                           optimizer_step=tf.compat.v1.train.
                                           get_or_create_global_step())
                root.restore(tf.train.latest_checkpoint(checkpoint_directory))

                for _ in range(num_training_steps):
                    strategy.extended.call_for_each_replica(
                        functools.partial(_train_fn, optimizer, model, root))
                root.save(file_prefix=checkpoint_prefix)
                self.assertEqual(
                    (training_continuation + 1) * num_training_steps,
                    root.optimizer_step.numpy())

    def testGraphDistributionStrategy(self):
        self.skipTest("b/121381184")
        num_training_steps = 10
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")

        def _train_fn(optimizer, model, root):
            input_value = tf.constant([[3.]])
            return optimizer.minimize(functools.partial(model, input_value),
                                      global_step=root.optimizer_step)

        for training_continuation in range(3):
            with tf.Graph().as_default():
                strategy = tf.distribute.MirroredStrategy()
                with strategy.scope():
                    model = MyModel()
                    optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
                    root = tf.train.Checkpoint(
                        optimizer=optimizer,
                        model=model,
                        optimizer_step=tf.compat.v1.train.
                        get_or_create_global_step())
                    status = root.restore(
                        tf.train.latest_checkpoint(checkpoint_directory))
                    train_op = strategy.extended.call_for_each_replica(
                        functools.partial(_train_fn, optimizer, model, root))
                    with self.session() as session:
                        if training_continuation > 0:
                            status.assert_consumed()
                        status.initialize_or_restore()
                        for _ in range(num_training_steps):
                            session.run(train_op)
                        root.save(file_prefix=checkpoint_prefix)
                self.assertEqual(
                    (training_continuation + 1) * num_training_steps,
                    root.optimizer_step.numpy())

    def testUsageGraph(self):
        """Expected usage when graph building."""
        with context.graph_mode():
            num_training_steps = 10
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            for training_continuation in range(3):
                with tf.Graph().as_default():
                    model = MyModel()
                    optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
                    root = tf.compat.v1.train.Checkpoint(
                        optimizer=optimizer,
                        model=model,
                        global_step=tf.compat.v1.train.
                        get_or_create_global_step())
                    input_value = tf.constant([[3.]])
                    train_op = optimizer.minimize(model(input_value),
                                                  global_step=root.global_step)
                    checkpoint_path = tf.train.latest_checkpoint(
                        checkpoint_directory)
                    with self.session(
                            graph=tf.compat.v1.get_default_graph()) as session:
                        status = root.restore(save_path=checkpoint_path)
                        status.initialize_or_restore(session=session)
                        if checkpoint_path is None:
                            self.assertEqual(0, training_continuation)
                            with self.assertRaises(AssertionError):
                                status.assert_consumed()
                            with self.assertRaises(AssertionError):
                                status.assert_existing_objects_matched()
                        else:
                            status.assert_consumed()
                            status.assert_existing_objects_matched()
                        for _ in range(num_training_steps):
                            session.run(train_op)
                        root.save(file_prefix=checkpoint_prefix,
                                  session=session)
                        self.assertEqual(
                            (training_continuation + 1) * num_training_steps,
                            session.run(root.global_step))
                        self.assertEqual(training_continuation + 1,
                                         session.run(root.save_counter))

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testAgnosticUsage(self):
        """Graph/eager agnostic usage."""
        # Does create garbage when executing eagerly due to ops.Graph() creation.
        with self.test_session():
            num_training_steps = 10
            checkpoint_directory = self.get_temp_dir()
            for training_continuation in range(3):
                with test_utils.device(should_use_gpu=True):
                    model = MyModel()
                    optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
                    root = tf.train.Checkpoint(optimizer=optimizer,
                                               model=model,
                                               global_step=tf.compat.v1.train.
                                               get_or_create_global_step())
                    manager = tf.train.CheckpointManager(root,
                                                         checkpoint_directory,
                                                         max_to_keep=1)
                    status = root.restore(save_path=manager.latest_checkpoint)
                    input_value = tf.constant([[3.]])
                    train_fn = functools.partial(optimizer.minimize,
                                                 functools.partial(
                                                     model, input_value),
                                                 global_step=root.global_step)
                    if not tf.executing_eagerly():
                        train_fn = functools.partial(self.evaluate, train_fn())
                    status.initialize_or_restore()
                    for _ in range(num_training_steps):
                        train_fn()
                    manager.save()
                    self.assertEqual(
                        (training_continuation + 1) * num_training_steps,
                        self.evaluate(root.global_step))
                    self.assertEqual(training_continuation + 1,
                                     self.evaluate(root.save_counter))

    # pylint: disable=cell-var-from-loop
    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testWithDefun(self):
        with self.test_session():
            num_training_steps = 2
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            for training_continuation in range(3):
                with test_utils.device(should_use_gpu=True):
                    model = MyModel()
                    # Don't actually train so we can test variable values
                    optimizer = tf.compat.v1.train.AdamOptimizer(0.)
                    root = tf.train.Checkpoint(optimizer=optimizer,
                                               model=model,
                                               global_step=tf.compat.v1.train.
                                               get_or_create_global_step())
                    checkpoint_path = tf.train.latest_checkpoint(
                        checkpoint_directory)
                    status = root.restore(save_path=checkpoint_path)

                    def train_fn():
                        @tf.function
                        def _call_model(x):
                            return model(x)

                        with tf.GradientTape() as tape:
                            loss = _call_model(tf.constant([[3.]]))
                        gradients = tape.gradient(loss, model.variables)
                        return optimizer.apply_gradients(
                            zip(gradients, model.variables),
                            global_step=root.global_step)

                    if not tf.executing_eagerly():
                        train_fn = functools.partial(self.evaluate, train_fn())
                    status.initialize_or_restore()
                    for _ in range(num_training_steps):
                        train_fn()
                    if training_continuation > 0:
                        status.assert_consumed()
                        self.assertAllClose([[42.]],
                                            self.evaluate(model.variables[0]))
                    else:
                        self.evaluate(model.variables[0].assign([[42.]]))
                    root.save(file_prefix=checkpoint_prefix)
                    self.assertEqual(
                        (training_continuation + 1) * num_training_steps,
                        self.evaluate(root.global_step))
                    self.assertEqual(training_continuation + 1,
                                     self.evaluate(root.save_counter))

    # pylint: enable=cell-var-from-loop

    @test_combinations.generate(test_combinations.combine(mode=["eager"]))
    def testAnonymousVarsInInit(self):
        class Model(training.Model):
            def __init__(self):
                super().__init__()
                self.w = tf.Variable(0.0)
                self.b = tf.Variable(0.0)
                self.vars = [self.w, self.b]

            def call(self, x):
                return x * self.w + self.b

        model = Model()
        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.05)
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer)
        for _ in range(2):
            checkpoint.save(checkpoint_prefix)
            with tf.GradientTape() as tape:
                loss = (tf.constant(1.) - model(tf.constant(1.)))**2
            grad = tape.gradient(loss, model.vars)
            optimizer.apply_gradients([(g, v)
                                       for g, v in zip(grad, model.vars)])

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def test_initialize_if_not_restoring(self):
        with self.test_session():
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            optimizer_only_prefix = os.path.join(checkpoint_directory, "opt")
            with test_utils.device(should_use_gpu=True):
                model = MyModel()
                optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
                root = tf.train.Checkpoint(
                    model=
                    model,  # Do not save the optimizer with the checkpoint.
                    global_step=tf.compat.v1.train.get_or_create_global_step())
                optimizer_checkpoint = tf.train.Checkpoint(optimizer=optimizer)

                checkpoint_path = tf.train.latest_checkpoint(
                    checkpoint_directory)
                status = root.restore(save_path=checkpoint_path)
                input_value = tf.constant([[3.]])
                train_fn = functools.partial(optimizer.minimize,
                                             functools.partial(
                                                 model, input_value),
                                             global_step=root.global_step)
                if not tf.executing_eagerly():
                    train_fn = functools.partial(self.evaluate, train_fn())
                status.initialize_or_restore()
                self.evaluate([v.initializer for v in optimizer.variables()])
                train_fn()
                model_save_path = root.save(file_prefix=checkpoint_prefix)
                self.evaluate(optimizer.variables()[0].assign(42.))
                optimizer_save_path = optimizer_checkpoint.save(
                    optimizer_only_prefix)

            # Restore into a graph with the optimizer
            with test_utils.device(should_use_gpu=True):
                model = MyModel()
                optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
                root = tf.train.Checkpoint(
                    optimizer=optimizer,
                    model=model,
                    global_step=tf.compat.v1.train.get_or_create_global_step())
                status = root.restore(save_path=model_save_path)
                input_value = tf.constant([[3.]])
                train_fn = functools.partial(optimizer.minimize,
                                             functools.partial(
                                                 model, input_value),
                                             global_step=root.global_step)
                if not tf.executing_eagerly():
                    train_fn = functools.partial(self.evaluate, train_fn())
                status.initialize_or_restore()
                train_fn()
                with self.assertRaises(AssertionError):
                    status.assert_existing_objects_matched()
                with self.assertRaises(AssertionError):
                    status.assert_consumed()

            # Make sure initialization doesn't clobber later restores
            with test_utils.device(should_use_gpu=True):
                model = MyModel()
                optimizer = tf.compat.v1.train.AdamOptimizer(0.001, beta1=1.0)
                root = tf.train.Checkpoint(
                    optimizer=optimizer,
                    model=model,
                    global_step=tf.compat.v1.train.get_or_create_global_step())
                opt_root = tf.train.Checkpoint(optimizer=optimizer)
                status = root.restore(save_path=model_save_path)
                init_only_optimizer_status = opt_root.restore(save_path=None)
                optimizer_status = opt_root.restore(
                    save_path=optimizer_save_path)
                input_value = tf.constant([[3.]])
                train_fn = functools.partial(optimizer.minimize,
                                             functools.partial(
                                                 model, input_value),
                                             global_step=root.global_step)
                if not tf.executing_eagerly():
                    train_fn = functools.partial(self.evaluate, train_fn())
                optimizer_status.run_restore_ops()
                status.initialize_or_restore()
                init_only_optimizer_status.initialize_or_restore()
                train_fn()
                self.assertEqual(42., self.evaluate(optimizer.variables()[0]))
Exemple #13
0
class TimeDistributedTest(test_combinations.TestCase):
    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_timedistributed_dense(self):
        model = keras.models.Sequential()
        model.add(
            keras.layers.TimeDistributed(
                keras.layers.Dense(2), input_shape=(3, 4)
            )
        )
        model.compile(optimizer="rmsprop", loss="mse")
        model.fit(
            np.random.random((10, 3, 4)),
            np.random.random((10, 3, 2)),
            epochs=1,
            batch_size=10,
        )

        # test config
        model.get_config()

        # check whether the model variables are present in the
        # trackable list of objects
        checkpointed_object_ids = {
            id(o) for o in trackable_util.list_objects(model)
        }
        for v in model.variables:
            self.assertIn(id(v), checkpointed_object_ids)

    def test_timedistributed_static_batch_size(self):
        model = keras.models.Sequential()
        model.add(
            keras.layers.TimeDistributed(
                keras.layers.Dense(2), input_shape=(3, 4), batch_size=10
            )
        )
        model.compile(optimizer="rmsprop", loss="mse")
        model.fit(
            np.random.random((10, 3, 4)),
            np.random.random((10, 3, 2)),
            epochs=1,
            batch_size=10,
        )

    def test_timedistributed_invalid_init(self):
        x = tf.constant(np.zeros((1, 1)).astype("float32"))
        with self.assertRaisesRegex(
            ValueError,
            "Please initialize `TimeDistributed` layer with a "
            "`tf.keras.layers.Layer` instance.",
        ):
            keras.layers.TimeDistributed(x)

    def test_timedistributed_conv2d(self):
        with self.cached_session():
            model = keras.models.Sequential()
            model.add(
                keras.layers.TimeDistributed(
                    keras.layers.Conv2D(5, (2, 2), padding="same"),
                    input_shape=(2, 4, 4, 3),
                )
            )
            model.add(keras.layers.Activation("relu"))
            model.compile(optimizer="rmsprop", loss="mse")
            model.train_on_batch(
                np.random.random((1, 2, 4, 4, 3)),
                np.random.random((1, 2, 4, 4, 5)),
            )

            model = keras.models.model_from_json(model.to_json())
            model.summary()

    def test_timedistributed_stacked(self):
        with self.cached_session():
            model = keras.models.Sequential()
            model.add(
                keras.layers.TimeDistributed(
                    keras.layers.Dense(2), input_shape=(3, 4)
                )
            )
            model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
            model.add(keras.layers.Activation("relu"))
            model.compile(optimizer="rmsprop", loss="mse")

            model.fit(
                np.random.random((10, 3, 4)),
                np.random.random((10, 3, 3)),
                epochs=1,
                batch_size=10,
            )

    def test_regularizers(self):
        with self.cached_session():
            model = keras.models.Sequential()
            model.add(
                keras.layers.TimeDistributed(
                    keras.layers.Dense(
                        2, kernel_regularizer="l1", activity_regularizer="l1"
                    ),
                    input_shape=(3, 4),
                )
            )
            model.add(keras.layers.Activation("relu"))
            model.compile(optimizer="rmsprop", loss="mse")
            self.assertEqual(len(model.losses), 2)

    def test_TimeDistributed_learning_phase(self):
        with self.cached_session():
            keras.utils.set_random_seed(0)
            x = keras.layers.Input(shape=(3, 2))
            y = keras.layers.TimeDistributed(keras.layers.Dropout(0.999))(
                x, training=True
            )
            model = keras.models.Model(x, y)
            y = model.predict(np.random.random((10, 3, 2)))
            self.assertAllClose(np.mean(y), 0.0, atol=1e-1, rtol=1e-1)

    def test_TimeDistributed_batchnorm(self):
        with self.cached_session():
            # test that wrapped BN updates still work.
            model = keras.models.Sequential()
            model.add(
                keras.layers.TimeDistributed(
                    keras.layers.BatchNormalization(center=True, scale=True),
                    name="bn",
                    input_shape=(10, 2),
                )
            )
            model.compile(optimizer="rmsprop", loss="mse")
            # Assert that mean and variance are 0 and 1.
            td = model.layers[0]
            self.assertAllClose(td.get_weights()[2], np.array([0, 0]))
            assert np.array_equal(td.get_weights()[3], np.array([1, 1]))
            # Train
            model.train_on_batch(
                np.random.normal(loc=2, scale=2, size=(1, 10, 2)),
                np.broadcast_to(np.array([0, 1]), (1, 10, 2)),
            )
            # Assert that mean and variance changed.
            assert not np.array_equal(td.get_weights()[2], np.array([0, 0]))
            assert not np.array_equal(td.get_weights()[3], np.array([1, 1]))

    def test_TimeDistributed_trainable(self):
        # test layers that need learning_phase to be set
        x = keras.layers.Input(shape=(3, 2))
        layer = keras.layers.TimeDistributed(keras.layers.BatchNormalization())
        _ = layer(x)
        self.assertEqual(len(layer.trainable_weights), 2)
        layer.trainable = False
        assert not layer.trainable_weights
        layer.trainable = True
        assert len(layer.trainable_weights) == 2

    def test_TimeDistributed_with_masked_embedding_and_unspecified_shape(self):
        with self.cached_session():
            # test with unspecified shape and Embeddings with mask_zero
            model = keras.models.Sequential()
            model.add(
                keras.layers.TimeDistributed(
                    keras.layers.Embedding(5, 6, mask_zero=True),
                    input_shape=(None, None),
                )
            )  # N by t_1 by t_2 by 6
            model.add(
                keras.layers.TimeDistributed(
                    keras.layers.SimpleRNN(7, return_sequences=True)
                )
            )
            model.add(
                keras.layers.TimeDistributed(
                    keras.layers.SimpleRNN(8, return_sequences=False)
                )
            )
            model.add(keras.layers.SimpleRNN(1, return_sequences=False))
            model.compile(optimizer="rmsprop", loss="mse")
            model_input = np.random.randint(
                low=1, high=5, size=(10, 3, 4), dtype="int32"
            )
            for i in range(4):
                model_input[i, i:, i:] = 0
            model.fit(
                model_input, np.random.random((10, 1)), epochs=1, batch_size=10
            )
            mask_outputs = [model.layers[0].compute_mask(model.input)]
            for layer in model.layers[1:]:
                mask_outputs.append(
                    layer.compute_mask(layer.input, mask_outputs[-1])
                )
            func = keras.backend.function([model.input], mask_outputs[:-1])
            mask_outputs_val = func([model_input])
            ref_mask_val_0 = model_input > 0  # embedding layer
            ref_mask_val_1 = ref_mask_val_0  # first RNN layer
            ref_mask_val_2 = np.any(ref_mask_val_1, axis=-1)  # second RNN layer
            ref_mask_val = [ref_mask_val_0, ref_mask_val_1, ref_mask_val_2]
            for i in range(3):
                self.assertAllEqual(mask_outputs_val[i], ref_mask_val[i])
            self.assertIs(mask_outputs[-1], None)  # final layer

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_TimeDistributed_with_masking_layer(self):
        # test with Masking layer
        model = keras.models.Sequential()
        model.add(
            keras.layers.TimeDistributed(
                keras.layers.Masking(
                    mask_value=0.0,
                ),
                input_shape=(None, 4),
            )
        )
        model.add(keras.layers.TimeDistributed(keras.layers.Dense(5)))
        model.compile(optimizer="rmsprop", loss="mse")
        model_input = np.random.randint(low=1, high=5, size=(10, 3, 4))
        for i in range(4):
            model_input[i, i:, :] = 0.0
        model.compile(optimizer="rmsprop", loss="mse")
        model.fit(
            model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6
        )
        mask_outputs = [model.layers[0].compute_mask(model.input)]
        mask_outputs += [
            model.layers[1].compute_mask(
                model.layers[1].input, mask_outputs[-1]
            )
        ]
        func = keras.backend.function([model.input], mask_outputs)
        mask_outputs_val = func([model_input])
        self.assertEqual((mask_outputs_val[0]).all(), model_input.all())
        self.assertEqual((mask_outputs_val[1]).all(), model_input.all())

    def test_TimeDistributed_with_different_time_shapes(self):
        time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5))
        ph_1 = keras.backend.placeholder(shape=(None, 10, 13))
        out_1 = time_dist(ph_1)
        self.assertEqual(out_1.shape.as_list(), [None, 10, 5])

        ph_2 = keras.backend.placeholder(shape=(None, 1, 13))
        out_2 = time_dist(ph_2)
        self.assertEqual(out_2.shape.as_list(), [None, 1, 5])

        ph_3 = keras.backend.placeholder(shape=(None, 1, 18))
        with self.assertRaisesRegex(ValueError, "is incompatible with"):
            time_dist(ph_3)

    def test_TimeDistributed_with_invalid_dimensions(self):
        time_dist = keras.layers.TimeDistributed(keras.layers.Dense(5))
        ph = keras.backend.placeholder(shape=(None, 10))
        with self.assertRaisesRegex(
            ValueError,
            "`TimeDistributed` Layer should be passed an `input_shape `",
        ):
            time_dist(ph)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_TimeDistributed_reshape(self):
        class NoReshapeLayer(keras.layers.Layer):
            def call(self, inputs):
                return inputs

        # Built-in layers that aren't stateful use the reshape implementation.
        td1 = keras.layers.TimeDistributed(keras.layers.Dense(5))
        self.assertTrue(td1._always_use_reshape)

        # Built-in layers that are stateful don't use the reshape
        # implementation.
        td2 = keras.layers.TimeDistributed(
            keras.layers.RNN(keras.layers.SimpleRNNCell(10), stateful=True)
        )
        self.assertFalse(td2._always_use_reshape)

        # Custom layers are not allowlisted for the fast reshape implementation.
        td3 = keras.layers.TimeDistributed(NoReshapeLayer())
        self.assertFalse(td3._always_use_reshape)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_TimeDistributed_output_shape_return_types(self):
        class TestLayer(keras.layers.Layer):
            def call(self, inputs):
                return tf.concat([inputs, inputs], axis=-1)

            def compute_output_shape(self, input_shape):
                output_shape = tf.TensorShape(input_shape).as_list()
                output_shape[-1] = output_shape[-1] * 2
                output_shape = tf.TensorShape(output_shape)
                return output_shape

        class TestListLayer(TestLayer):
            def compute_output_shape(self, input_shape):
                shape = super().compute_output_shape(input_shape)
                return shape.as_list()

        class TestTupleLayer(TestLayer):
            def compute_output_shape(self, input_shape):
                shape = super().compute_output_shape(input_shape)
                return tuple(shape.as_list())

        # Layers can specify output shape as list/tuple/TensorShape
        test_layers = [TestLayer, TestListLayer, TestTupleLayer]
        for layer in test_layers:
            input_layer = keras.layers.TimeDistributed(layer())
            inputs = keras.backend.placeholder(shape=(None, 2, 4))
            output = input_layer(inputs)
            self.assertEqual(output.shape.as_list(), [None, 2, 8])
            self.assertEqual(
                input_layer.compute_output_shape([None, 2, 4]).as_list(),
                [None, 2, 8],
            )

    @test_combinations.run_all_keras_modes(always_skip_v1=True)
    # TODO(scottzhu): check why v1 session failed.
    def test_TimeDistributed_with_mask_first_implementation(self):
        np.random.seed(100)
        rnn_layer = keras.layers.LSTM(4, return_sequences=True, stateful=True)

        data = np.array(
            [
                [[[1.0], [1.0]], [[0.0], [1.0]]],
                [[[1.0], [0.0]], [[1.0], [1.0]]],
                [[[1.0], [0.0]], [[1.0], [1.0]]],
            ]
        )
        x = keras.layers.Input(shape=(2, 2, 1), batch_size=3)
        x_masking = keras.layers.Masking()(x)
        y = keras.layers.TimeDistributed(rnn_layer)(x_masking)
        model_1 = keras.models.Model(x, y)
        model_1.compile(
            "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly()
        )
        output_with_mask = model_1.predict(data, steps=1)

        y = keras.layers.TimeDistributed(rnn_layer)(x)
        model_2 = keras.models.Model(x, y)
        model_2.compile(
            "rmsprop", "mse", run_eagerly=test_utils.should_run_eagerly()
        )
        output = model_2.predict(data, steps=1)

        self.assertNotAllClose(output_with_mask, output, atol=1e-7)

    @test_combinations.run_all_keras_modes
    @parameterized.named_parameters(
        *test_utils.generate_combinations_with_testcase_name(
            layer=[keras.layers.LSTM, keras.layers.Dense]
        )
    )
    def test_TimeDistributed_with_ragged_input(self, layer):
        if tf.executing_eagerly():
            self.skipTest("b/143103634")
        np.random.seed(100)
        layer = layer(4)
        ragged_data = tf.ragged.constant(
            [
                [[[1.0], [1.0]], [[2.0], [2.0]]],
                [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]],
                [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]],
            ],
            ragged_rank=1,
        )

        x_ragged = keras.Input(shape=(None, 2, 1), dtype="float32", ragged=True)
        y_ragged = keras.layers.TimeDistributed(layer)(x_ragged)
        model_1 = keras.models.Model(x_ragged, y_ragged)
        model_1._run_eagerly = test_utils.should_run_eagerly()
        output_ragged = model_1.predict(ragged_data, steps=1)

        x_dense = keras.Input(shape=(None, 2, 1), dtype="float32")
        masking = keras.layers.Masking()(x_dense)
        y_dense = keras.layers.TimeDistributed(layer)(masking)
        model_2 = keras.models.Model(x_dense, y_dense)
        dense_data = ragged_data.to_tensor()
        model_2._run_eagerly = test_utils.should_run_eagerly()
        output_dense = model_2.predict(dense_data, steps=1)

        output_ragged = convert_ragged_tensor_value(output_ragged)
        self.assertAllEqual(output_ragged.to_tensor(), output_dense)

    @test_combinations.run_all_keras_modes
    def test_TimeDistributed_with_ragged_input_with_batch_size(self):
        np.random.seed(100)
        layer = keras.layers.Dense(16)

        ragged_data = tf.ragged.constant(
            [
                [[[1.0], [1.0]], [[2.0], [2.0]]],
                [[[4.0], [4.0]], [[5.0], [5.0]], [[6.0], [6.0]]],
                [[[7.0], [7.0]], [[8.0], [8.0]], [[9.0], [9.0]]],
            ],
            ragged_rank=1,
        )

        # Use the first implementation by specifying batch_size
        x_ragged = keras.Input(
            shape=(None, 2, 1), batch_size=3, dtype="float32", ragged=True
        )
        y_ragged = keras.layers.TimeDistributed(layer)(x_ragged)
        model_1 = keras.models.Model(x_ragged, y_ragged)
        output_ragged = model_1.predict(ragged_data, steps=1)

        x_dense = keras.Input(shape=(None, 2, 1), batch_size=3, dtype="float32")
        masking = keras.layers.Masking()(x_dense)
        y_dense = keras.layers.TimeDistributed(layer)(masking)
        model_2 = keras.models.Model(x_dense, y_dense)
        dense_data = ragged_data.to_tensor()
        output_dense = model_2.predict(dense_data, steps=1)

        output_ragged = convert_ragged_tensor_value(output_ragged)
        self.assertAllEqual(output_ragged.to_tensor(), output_dense)

    def test_TimeDistributed_set_static_shape(self):
        layer = keras.layers.TimeDistributed(keras.layers.Conv2D(16, (3, 3)))
        inputs = keras.Input(batch_shape=(1, None, 32, 32, 1))
        outputs = layer(inputs)
        # Make sure the batch dim is not lost after array_ops.reshape.
        self.assertListEqual(outputs.shape.as_list(), [1, None, 30, 30, 16])

    @test_combinations.run_all_keras_modes
    def test_TimeDistributed_with_mimo(self):
        dense_1 = keras.layers.Dense(8)
        dense_2 = keras.layers.Dense(16)

        class TestLayer(keras.layers.Layer):
            def __init__(self):
                super().__init__()
                self.dense_1 = dense_1
                self.dense_2 = dense_2

            def call(self, inputs):
                return self.dense_1(inputs[0]), self.dense_2(inputs[1])

            def compute_output_shape(self, input_shape):
                output_shape_1 = self.dense_1.compute_output_shape(
                    input_shape[0]
                )
                output_shape_2 = self.dense_2.compute_output_shape(
                    input_shape[1]
                )
                return output_shape_1, output_shape_2

        np.random.seed(100)
        layer = TestLayer()

        data_1 = tf.constant(
            [
                [[[1.0], [1.0]], [[2.0], [2.0]]],
                [[[4.0], [4.0]], [[5.0], [5.0]]],
                [[[7.0], [7.0]], [[8.0], [8.0]]],
            ]
        )

        data_2 = tf.constant(
            [
                [[[1.0], [1.0]], [[2.0], [2.0]]],
                [[[4.0], [4.0]], [[5.0], [5.0]]],
                [[[7.0], [7.0]], [[8.0], [8.0]]],
            ]
        )

        x1 = keras.Input(shape=(None, 2, 1), dtype="float32")
        x2 = keras.Input(shape=(None, 2, 1), dtype="float32")
        y1, y2 = keras.layers.TimeDistributed(layer)([x1, x2])
        model_1 = keras.models.Model([x1, x2], [y1, y2])
        model_1.compile(
            optimizer="rmsprop",
            loss="mse",
            run_eagerly=test_utils.should_run_eagerly(),
        )
        output_1 = model_1.predict((data_1, data_2), steps=1)

        y1 = dense_1(x1)
        y2 = dense_2(x2)
        model_2 = keras.models.Model([x1, x2], [y1, y2])
        output_2 = model_2.predict((data_1, data_2), steps=1)

        self.assertAllClose(output_1, output_2)

        model_1.fit(
            x=[
                np.random.random((10, 2, 2, 1)),
                np.random.random((10, 2, 2, 1)),
            ],
            y=[
                np.random.random((10, 2, 2, 8)),
                np.random.random((10, 2, 2, 16)),
            ],
            epochs=1,
            batch_size=3,
        )

    def test_TimeDistributed_Attention(self):
        query_input = keras.layers.Input(shape=(None, 1, 10), dtype="float32")
        value_input = keras.layers.Input(shape=(None, 4, 10), dtype="float32")

        # Query-value attention of shape [batch_size, Tq, filters].
        query_value_attention_seq = keras.layers.TimeDistributed(
            keras.layers.Attention()
        )([query_input, value_input])
        model = keras.models.Model(
            [query_input, value_input], query_value_attention_seq
        )
        model.compile(optimizer="rmsprop", loss="mse")
        model.fit(
            [
                np.random.random((10, 8, 1, 10)),
                np.random.random((10, 8, 4, 10)),
            ],
            np.random.random((10, 8, 1, 10)),
            epochs=1,
            batch_size=10,
        )

        # test config and serialization/deserialization
        model.get_config()
        model = keras.models.model_from_json(model.to_json())
        model.summary()
Exemple #14
0
class ListTests(test_combinations.TestCase):
    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testTracking(self):
        with self.test_session():
            model = HasList()
            output = model(tf.ones([32, 2]))
            self.assertAllEqual([32, 12], output.shape)
            self.assertEqual(11, len(model.layers))
            self.assertEqual(10, len(model.layer_list.layers))
            self.assertEqual(
                len(model.layers),
                len(model.layer_list.layers + model.layers_with_updates))
            for index in range(10):
                self.assertEqual(3 + index,
                                 model.layer_list.layers[index].units)
            children = model._trackable_children()
            self.assertLen(children, 2)
            self.assertIs(model.layer_list, children["layer_list"])
            self.assertIs(model.layers_with_updates,
                          children["layers_with_updates"])
            self.assertLen(children["layer_list"]._trackable_children(), 10)
            self.evaluate([v.initializer for v in model.variables])
            self.evaluate(model.variables[0].assign([[1., 2., 3.],
                                                     [4., 5., 6.]]))
            save_path = os.path.join(self.get_temp_dir(), "ckpt")
            model.save_weights(save_path)
            self.evaluate(model.variables[0].assign(tf.zeros([2, 3])))
            model.load_weights(save_path)
            self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]],
                                self.evaluate(model.variables[0]))
            v = tf.Variable(1.)
            model.var_list = [v]
        self.assertTrue(any(v is t for t in model.variables))
        self.assertTrue(any(v is t for t in model.trainable_variables))
        self.assertFalse(any(v is t for t in model.non_trainable_variables))
        self.assertTrue(
            any(model.layer_list[0].trainable_weights[0] is t
                for t in model.trainable_weights))

    def testSubModelTracking(self):
        model = training.Model()
        model.v = tf.Variable(1.)
        self.assertIn(model.v, model.trainable_weights)
        model2 = training.Model()
        model2.m = [model]
        self.assertIn(model.v, model2.trainable_weights)

    def testSubSequentialTracking(self):
        class _Subclassed(training.Model):
            def __init__(self, wrapped):
                super(_Subclassed, self).__init__()
                self._wrapped = wrapped

            def call(self, x):
                return self._wrapped(x)

        model = sequential.Sequential()
        layer = core.Dense(1)
        model.add(layer)
        model2 = _Subclassed(model)
        model2(tf.ones([1, 2]))
        model2.m = [model]
        self.assertIn(layer.kernel, model2.trainable_weights)

    def testLayerTrackedThroughSequential(self):
        class AttrDict(dict):
            def __init__(self, *args, **kwargs):
                super(AttrDict, self).__init__(*args, **kwargs)
                self.__dict__ = self

        def ffnet(layer_sizes, name):
            ff = sequential.Sequential(name=name)
            for i, width in enumerate(layer_sizes):
                ff.add(
                    core.Dense(width,
                               activation=("relu" if i < len(layer_sizes) - 1
                                           else None)))
            return ff

        class MyModel2(training.Model):
            def __init__(self, config, name="my_model_2"):
                super(MyModel2, self).__init__(name=name)
                self._num_tokens = config.num_tokens

                # list of sub-models
                self._ffnet = [
                    ffnet(config.module_layers + (self._num_tokens, ), "ff")
                ]

            def null_input(self):
                return tf.zeros([1, self._num_tokens], dtype=tf.float32)

            def call(self, input_, module_index=None):
                return self._ffnet[0](input_)

        m2 = MyModel2(AttrDict(num_tokens=5, module_layers=(50, 30)))

        # Construct
        m2(m2.null_input())
        self.assertLen(m2.trainable_variables, 6)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testUpdatesForwarded(self):
        model = HasList()
        model_input = tf.ones([32, 2])
        model(model_input)
        if tf.executing_eagerly():
            self.assertEqual(0, len(model.updates))
        else:
            self.assertGreater(len(model.layers_with_updates[0].updates), 0)
            self.assertEqual(set(model.layers_with_updates[0].updates),
                             set(model.updates))

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testLossesForwarded(self):
        model = HasList()
        model_input = tf.ones([32, 2])
        model(model_input)
        self.assertEqual(2, len(model.losses))

    def testModelContainersCompareEqual(self):
        class HasEqualContainers(training.Model):
            def __init__(self):
                super(HasEqualContainers, self).__init__()
                self.l1 = []
                self.l2 = []

        model = HasEqualContainers()
        first_layer = HasEqualContainers()
        model.l1.append(first_layer)
        second_layer = HasEqualContainers()
        model.l2.append(second_layer)
        self.assertEqual([first_layer, second_layer], model.layers)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testTensorConversion(self):
        class ListToTensor(training.Model):
            def __init__(self):
                super(ListToTensor, self).__init__()
                self.l = [1., 2., 3.]

        self.assertAllEqual([1., 2., 3.],
                            self.evaluate(tf.constant(ListToTensor().l)))

        self.assertAllEqual([1., 2., 3.],
                            self.evaluate(
                                tf.raw_ops.Pack(values=ListToTensor().l)))
Exemple #15
0
class TupleTests(test_combinations.TestCase):
    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testTracking(self):
        with self.test_session():
            model = HasTuple()
            output = model(tf.ones([32, 2]))
            self.assertAllEqual([32, 5], output.shape.as_list())
            self.assertLen(model.layers, 4)
            self.assertLen(model.layer_list.layers, 3)
            self.assertEqual(
                len(model.layers),
                len(
                    tuple(model.layer_list.layers) +
                    model.layers_with_updates))
            self.assertEqual(3, model.layer_list.layers[0].units)
            self.assertEqual(4, model.layer_list.layers[1].units)
            self.assertEqual(5, model.layer_list.layers[2].units)
            self.assertLen(model._trackable_children(), 2)
            self.assertIs(model.layer_list,
                          model._trackable_children()["layer_list"])
            self.assertIs(model.layers_with_updates,
                          model._trackable_children()["layers_with_updates"])
            self.assertLen(model.layer_list._trackable_children(), 3)
            self.evaluate([v.initializer for v in model.variables])
            self.evaluate(model.variables[0].assign([[1., 2., 3.],
                                                     [4., 5., 6.]]))
            save_path = os.path.join(self.get_temp_dir(), "ckpt")
            model.save_weights(save_path)
            self.evaluate(model.variables[0].assign(tf.zeros([2, 3])))
            model.load_weights(save_path)
            self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]],
                                self.evaluate(model.variables[0]))
            v = tf.Variable(1.)
            model.var_list = (v, )
            self.assertIn(id(v), [id(obj) for obj in model.variables])
            self.assertIn(id(v),
                          [id(obj) for obj in model.trainable_variables])
            self.assertNotIn(
                id(v), [id(obj) for obj in model.non_trainable_variables])
            self.assertIn(id(model.layer_list[0].trainable_weights[0]),
                          [id(obj) for obj in model.trainable_weights])

    @parameterized.named_parameters(
        ("Module", tf.Module),
        ("Model", training.Model),
    )
    def testSubModelTracking(self, module_subclass):
        model = module_subclass()
        model.v = tf.Variable(1.)
        self.assertIn(model.v, model.trainable_variables)
        model2 = module_subclass()
        model2.m = (model, )
        self.assertIn(model.v, model2.trainable_variables)

    def testSubSequentialTracking(self):
        class _Subclassed(training.Model):
            def __init__(self, wrapped):
                super(_Subclassed, self).__init__()
                self._wrapped = wrapped

            def call(self, x):
                return self._wrapped(x)

        model = sequential.Sequential()
        layer = core.Dense(1)
        model.add(layer)
        model2 = _Subclassed(model)
        model2(tf.ones([1, 2]))
        model2.m = (model, )
        self.assertIn(layer.kernel, model2.trainable_weights)

    def testUpdatesForwarded(self):
        with tf.Graph().as_default():
            model = HasTuple()
            model_input = tf.ones([32, 2])
            model(model_input)
            self.assertNotEmpty(model.layers_with_updates[0].updates)
            self.assertEqual(set(model.layers_with_updates[0].updates),
                             set(model.updates))

        model = HasTuple()
        model_input = tf.ones([32, 2])
        model(model_input)
        self.assertEmpty(model.updates)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testLossesForwarded(self):
        model = HasTuple()
        model_input = tf.ones([32, 2])
        model(model_input)
        self.assertLen(model.losses, 1)

    def testModelContainersCompareEqual(self):
        class HasEqualContainers(training.Model):
            def __init__(self):
                super(HasEqualContainers, self).__init__()
                self.l1 = ()
                self.l2 = ()

        model = HasEqualContainers()
        first_layer = HasEqualContainers()
        model.l1 = (first_layer, )
        second_layer = HasEqualContainers()
        model.l2 = (second_layer, )
        self.assertEqual((first_layer, ), model.l1)
        d = {model.l1: 1, model.l2: 2}
        self.assertEqual(1, d[model.l1])
        self.assertEqual(1, d[(first_layer, )])
        self.assertEqual(2, d[model.l2])
        self.assertEqual(2, d[(second_layer, )])
        self.assertEqual([first_layer, second_layer], model.layers)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testTensorConversion(self):
        class TupleToTensor(training.Model):
            def __init__(self):
                super(TupleToTensor, self).__init__()
                self.l = (1., 2., 3.)

        self.assertAllEqual((1., 2., 3.),
                            self.evaluate(tf.constant(TupleToTensor().l)))

        self.assertAllEqual(
            (1., 2., 3.),
            self.evaluate(tf.raw_ops.Pack(values=TupleToTensor().l)))
Exemple #16
0
class RMSpropOptimizerTest(tf.test.TestCase, parameterized.TestCase):
    def _rmsprop_update_numpy(
        self, var, g, mg, rms, mom, lr, rho, momentum, epsilon, centered
    ):
        rms_t = rms * rho + (1 - rho) * g * g
        if centered:
            mg_t = mg * rho + (1 - rho) * g
            denom_t = rms_t - mg_t * mg_t
        else:
            mg_t = mg
            denom_t = rms_t
        if momentum > 0.0:
            mom_t = momentum * mom + lr * g / (np.sqrt(denom_t + epsilon))
            var_t = var - mom_t
        else:
            mom_t = mom
            var_t = var - lr * g / (np.sqrt(denom_t) + epsilon)
        return var_t, mg_t, rms_t, mom_t

    def _sparse_rmsprop_update_numpy(
        self,
        var,
        gindexs,
        gvalues,
        mg,
        rms,
        mom,
        lr,
        rho,
        momentum,
        epsilon,
        centered,
    ):
        mg_t = copy.deepcopy(mg)
        rms_t = copy.deepcopy(rms)
        mom_t = copy.deepcopy(mom)
        var_t = copy.deepcopy(var)
        for i in range(len(gindexs)):
            gindex = gindexs[i]
            gvalue = gvalues[i]
            rms_t[gindex] = rms[gindex] * rho + (1 - rho) * gvalue * gvalue
            if centered:
                mg_t[gindex] = mg_t[gindex] * rho + (1 - rho) * gvalue
                denom_t = rms_t[gindex] - mg_t[gindex] * mg_t[gindex]
            else:
                denom_t = rms_t[gindex]
            if momentum > 0.0:
                mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt(
                    denom_t + epsilon
                )
                var_t[gindex] = var[gindex] - mom_t[gindex]
            else:
                mom_t[gindex] = mom[gindex]
                var_t[gindex] = var[gindex] - lr * gvalue / (
                    np.sqrt(denom_t) + epsilon
                )
        return var_t, mg_t, rms_t, mom_t

    def testDense(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for (
            dtype,
            learning_rate,
            rho,
            momentum,
            epsilon,
            centered,
        ) in _TESTPARAMS:
            with tf.compat.v1.get_default_graph().as_default(), test_utils.use_gpu():  # noqa: E501
                # Initialize variables for numpy implementation.
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np, dtype=dtype)
                var1 = tf.Variable(var1_np, dtype=dtype)
                grads0 = tf.constant(grads0_np, dtype=dtype)
                grads1 = tf.constant(grads1_np, dtype=dtype)
                opt = rmsprop.RMSprop(
                    learning_rate=learning_rate,
                    rho=rho,
                    momentum=momentum,
                    epsilon=epsilon,
                    centered=centered,
                )

                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1])
                )
                self.evaluate(tf.compat.v1.global_variables_initializer())

                if centered:
                    mg0 = opt.get_slot(var0, "mg")
                    mg1 = opt.get_slot(var1, "mg")
                else:
                    mg0 = None
                    mg1 = None

                if momentum > 0.0:
                    mom0 = opt.get_slot(var0, "momentum")
                    mom1 = opt.get_slot(var1, "momentum")
                else:
                    mom0 = None
                    mom1 = None

                rms0 = opt.get_slot(var0, "rms")
                self.assertIsNotNone(rms0)
                rms1 = opt.get_slot(var1, "rms")
                self.assertIsNotNone(rms1)

                mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Run 3 steps of RMSprop
                for _ in range(1, 4):
                    self.evaluate(update)

                    (
                        var0_np,
                        mg0_np,
                        rms0_np,
                        mom0_np,
                    ) = self._rmsprop_update_numpy(
                        var0_np,
                        grads0_np,
                        mg0_np,
                        rms0_np,
                        mom0_np,
                        learning_rate,
                        rho,
                        momentum,
                        epsilon,
                        centered,
                    )
                    (
                        var1_np,
                        mg1_np,
                        rms1_np,
                        mom1_np,
                    ) = self._rmsprop_update_numpy(
                        var1_np,
                        grads1_np,
                        mg1_np,
                        rms1_np,
                        mom1_np,
                        learning_rate,
                        rho,
                        momentum,
                        epsilon,
                        centered,
                    )

                    # Validate updated params
                    if centered:
                        self.assertAllCloseAccordingToType(
                            mg0_np, self.evaluate(mg0)
                        )
                        self.assertAllCloseAccordingToType(
                            mg1_np, self.evaluate(mg1)
                        )
                    if momentum > 0.0:
                        self.assertAllCloseAccordingToType(
                            mom0_np, self.evaluate(mom0)
                        )
                        self.assertAllCloseAccordingToType(
                            mom1_np, self.evaluate(mom1)
                        )
                    self.assertAllCloseAccordingToType(
                        rms0_np, self.evaluate(rms0)
                    )
                    self.assertAllCloseAccordingToType(
                        rms1_np, self.evaluate(rms1)
                    )
                    self.assertAllCloseAccordingToType(
                        var0_np, self.evaluate(var0)
                    )
                    self.assertAllCloseAccordingToType(
                        var1_np, self.evaluate(var1)
                    )

    def testDenseWithLearningRateDecay(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            var0_np = np.array([1.0, 2.0])
            grads0_np = np.array([0.1, 0.2])
            var1_np = np.array([3.0, 4.0])
            grads1_np = np.array([0.01, 0.2])

            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)
            learning_rate = 0.01
            rho = 0.9
            momentum = 0.0
            epsilon = 1e-7
            centered = False
            decay = 0.5
            opt = rmsprop.RMSprop(
                learning_rate=learning_rate,
                rho=rho,
                momentum=momentum,
                epsilon=epsilon,
                centered=centered,
                decay=decay,
            )

            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())

            rms0 = opt.get_slot(var0, "rms")
            self.assertIsNotNone(rms0)
            rms1 = opt.get_slot(var1, "rms")
            self.assertIsNotNone(rms1)
            if momentum > 0.0:
                mom0 = opt.get_slot(var0, "momentum")
                mom1 = opt.get_slot(var1, "momentum")
            else:
                mom0 = None
                mom1 = None

            mg0_np = np.array([0.0, 0.0])
            mg1_np = np.array([0.0, 0.0])
            rms0_np = np.array([0.0, 0.0])
            rms1_np = np.array([0.0, 0.0])
            mom0_np = np.array([0.0, 0.0])
            mom1_np = np.array([0.0, 0.0])

            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Run 4 steps of RMSprop
            for t in range(2):
                self.evaluate(update)

                lr = learning_rate / (1 + decay * t)
                var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
                    var0_np,
                    grads0_np,
                    mg0_np,
                    rms0_np,
                    mom0_np,
                    lr,
                    rho,
                    momentum,
                    epsilon,
                    centered,
                )
                var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
                    var1_np,
                    grads1_np,
                    mg1_np,
                    rms1_np,
                    mom1_np,
                    lr,
                    rho,
                    momentum,
                    epsilon,
                    centered,
                )

                # Validate updated params
                self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0))
                self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1))
                if momentum > 0.0:
                    self.assertAllCloseAccordingToType(
                        mom0_np, self.evaluate(mom0)
                    )
                    self.assertAllCloseAccordingToType(
                        mom1_np, self.evaluate(mom1)
                    )
                self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))

    def testDenseWithLearningRateInverseTimeDecay(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            var0_np = np.array([1.0, 2.0])
            grads0_np = np.array([0.1, 0.2])
            var1_np = np.array([3.0, 4.0])
            grads1_np = np.array([0.01, 0.2])

            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)
            learning_rate = 0.01
            rho = 0.9
            momentum = 0.0
            epsilon = 1e-7
            centered = False
            decay = 0.5
            lr_schedule = learning_rate_schedule.InverseTimeDecay(
                learning_rate, decay_steps=1.0, decay_rate=decay
            )
            opt = rmsprop.RMSprop(
                learning_rate=lr_schedule,
                rho=rho,
                momentum=momentum,
                epsilon=epsilon,
                centered=centered,
            )

            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())

            rms0 = opt.get_slot(var0, "rms")
            self.assertIsNotNone(rms0)
            rms1 = opt.get_slot(var1, "rms")
            self.assertIsNotNone(rms1)
            if momentum > 0.0:
                mom0 = opt.get_slot(var0, "momentum")
                mom1 = opt.get_slot(var1, "momentum")
            else:
                mom0 = None
                mom1 = None

            mg0_np = np.array([0.0, 0.0])
            mg1_np = np.array([0.0, 0.0])
            rms0_np = np.array([0.0, 0.0])
            rms1_np = np.array([0.0, 0.0])
            mom0_np = np.array([0.0, 0.0])
            mom1_np = np.array([0.0, 0.0])

            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Run 4 steps of RMSprop
            for t in range(2):
                self.evaluate(update)

                lr = learning_rate / (1 + decay * t)
                var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
                    var0_np,
                    grads0_np,
                    mg0_np,
                    rms0_np,
                    mom0_np,
                    lr,
                    rho,
                    momentum,
                    epsilon,
                    centered,
                )
                var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
                    var1_np,
                    grads1_np,
                    mg1_np,
                    rms1_np,
                    mom1_np,
                    lr,
                    rho,
                    momentum,
                    epsilon,
                    centered,
                )

                # Validate updated params
                self.assertAllCloseAccordingToType(rms0_np, self.evaluate(rms0))
                self.assertAllCloseAccordingToType(rms1_np, self.evaluate(rms1))
                if momentum > 0.0:
                    self.assertAllCloseAccordingToType(
                        mom0_np, self.evaluate(mom0)
                    )
                    self.assertAllCloseAccordingToType(
                        mom1_np, self.evaluate(mom1)
                    )
                self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))

    def testMinimizeSparseResourceVariable(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)
                x = tf.constant([[4.0], [5.0]], dtype=dtype)

                def loss():
                    pred = tf.matmul(
                        tf.compat.v1.nn.embedding_lookup([var0], [0]), x
                    )
                    return pred * pred

                sgd_op = rmsprop.RMSprop(
                    learning_rate=1.0,
                    rho=0.0,
                    momentum=0.0,
                    epsilon=0.0,
                    centered=False,
                ).minimize(loss, var_list=[var0])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllCloseAccordingToType(
                    [[1.0, 2.0]], self.evaluate(var0)
                )
                # Run 1 step of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                self.assertAllCloseAccordingToType(
                    [[0.0, 1.0]], self.evaluate(var0), atol=0.01
                )

    def testMinimizeSparseResourceVariableCentered(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)
                x = tf.constant([[4.0], [5.0]], dtype=dtype)

                def loss():
                    pred = tf.matmul(
                        tf.compat.v1.nn.embedding_lookup([var0], [0]), x
                    )
                    return pred * pred

                # loss = lambda: pred * pred
                # disable=cell-var-from-loop
                sgd_op = rmsprop.RMSprop(
                    learning_rate=1.0,
                    rho=0.0,
                    momentum=0.0,
                    epsilon=1.0,
                    centered=True,
                ).minimize(loss, var_list=[var0])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllCloseAccordingToType(
                    [[1.0, 2.0]], self.evaluate(var0)
                )
                # Run 1 step of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                self.assertAllCloseAccordingToType(
                    [[-111, -138]], self.evaluate(var0), atol=0.01
                )

    def testSparse(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for (
            dtype,
            learning_rate,
            rho,
            momentum,
            epsilon,
            centered,
        ) in _TESTPARAMS:
            with tf.compat.v1.get_default_graph().as_default(), test_utils.use_gpu():  # noqa: E501
                # Initialize variables for numpy implementation.
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0_np_indices = np.array([0], dtype=np.int32)
                grads0 = tf.IndexedSlices(
                    tf.constant(grads0_np),
                    tf.constant(grads0_np_indices),
                    tf.constant([1]),
                )
                grads1_np_indices = np.array([1], dtype=np.int32)
                grads1 = tf.IndexedSlices(
                    tf.constant(grads1_np),
                    tf.constant(grads1_np_indices),
                    tf.constant([1]),
                )
                opt = rmsprop.RMSprop(
                    learning_rate=learning_rate,
                    rho=rho,
                    momentum=momentum,
                    epsilon=epsilon,
                    centered=centered,
                )
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1])
                )
                self.evaluate(tf.compat.v1.global_variables_initializer())

                if centered:
                    mg0 = opt.get_slot(var0, "mg")
                    self.assertEqual(mg0 is not None, centered)
                    mg1 = opt.get_slot(var1, "mg")
                    self.assertEqual(mg1 is not None, centered)
                else:
                    mg0 = None
                    mg1 = None
                rms0 = opt.get_slot(var0, "rms")
                self.assertIsNotNone(rms0)
                rms1 = opt.get_slot(var1, "rms")
                self.assertIsNotNone(rms1)
                if momentum > 0.0:
                    mom0 = opt.get_slot(var0, "momentum")
                    mom1 = opt.get_slot(var1, "momentum")
                else:
                    mom0 = None
                    mom1 = None

                mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Run 3 steps of RMSprop
                for _ in range(1, 4):
                    self.evaluate(update)

                    (
                        var0_np,
                        mg0_np,
                        rms0_np,
                        mom0_np,
                    ) = self._sparse_rmsprop_update_numpy(
                        var0_np,
                        grads0_np_indices,
                        grads0_np,
                        mg0_np,
                        rms0_np,
                        mom0_np,
                        learning_rate,
                        rho,
                        momentum,
                        epsilon,
                        centered,
                    )
                    (
                        var1_np,
                        mg1_np,
                        rms1_np,
                        mom1_np,
                    ) = self._sparse_rmsprop_update_numpy(
                        var1_np,
                        grads1_np_indices,
                        grads1_np,
                        mg1_np,
                        rms1_np,
                        mom1_np,
                        learning_rate,
                        rho,
                        momentum,
                        epsilon,
                        centered,
                    )

                    # Validate updated params
                    if centered:
                        self.assertAllCloseAccordingToType(
                            mg0_np, self.evaluate(mg0)
                        )
                        self.assertAllCloseAccordingToType(
                            mg1_np, self.evaluate(mg1)
                        )
                    self.assertAllCloseAccordingToType(
                        rms0_np, self.evaluate(rms0)
                    )
                    self.assertAllCloseAccordingToType(
                        rms1_np, self.evaluate(rms1)
                    )
                    if momentum > 0.0:
                        self.assertAllCloseAccordingToType(
                            mom0_np, self.evaluate(mom0)
                        )
                        self.assertAllCloseAccordingToType(
                            mom1_np, self.evaluate(mom1)
                        )
                    self.assertAllCloseAccordingToType(
                        var0_np, self.evaluate(var0)
                    )
                    self.assertAllCloseAccordingToType(
                        var1_np, self.evaluate(var1)
                    )

    @test_combinations.generate(test_combinations.combine(mode=["eager"]))
    def testCallableParams(self):
        for dtype in _DATA_TYPES:
            var0 = tf.Variable([1.0, 2.0], dtype=dtype)
            var1 = tf.Variable([3.0, 4.0], dtype=dtype)
            grads0 = tf.constant([0.1, 0.1], dtype=dtype)
            grads1 = tf.constant([0.01, 0.01], dtype=dtype)

            learning_rate = lambda: 2.0
            rho = lambda: 0.9
            momentum = lambda: 0.0
            epsilon = 1.0
            opt = rmsprop.RMSprop(learning_rate, rho, momentum, epsilon)

            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))
            # Step 1: the rms accumulators where 1. So we should see a normal
            # update: v -= grad * learning_rate
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            # Check the parameters.
            self.assertAllCloseAccordingToType(
                np.array(
                    [
                        1.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)),
                        2.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)),
                    ]
                ),
                self.evaluate(var0),
            )
            self.assertAllCloseAccordingToType(
                np.array(
                    [
                        3.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)),
                        4.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)),
                    ]
                ),
                self.evaluate(var1),
            )
            # Step 2: the root mean square accumulators contain the previous
            # update.
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            # Check the parameters.
            self.assertAllCloseAccordingToType(
                np.array(
                    [
                        1.0
                        - (0.1 * 2.0 / math.sqrt(0.001 + 1.0))
                        - (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)),
                        2.0
                        - (0.1 * 2.0 / math.sqrt(0.001 + 1.0))
                        - (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)),
                    ]
                ),
                self.evaluate(var0),
            )
            self.assertAllCloseAccordingToType(
                np.array(
                    [
                        3.0
                        - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0))
                        - (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)),
                        4.0
                        - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0))
                        - (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)),
                    ]
                ),
                self.evaluate(var1),
            )

    def testConstructRMSpropWithLR(self):
        opt = rmsprop.RMSprop(lr=1.0)
        opt_2 = rmsprop.RMSprop(learning_rate=0.1, lr=1.0)
        opt_3 = rmsprop.RMSprop(learning_rate=0.1)
        self.assertIsInstance(opt.lr, tf.Variable)
        self.assertIsInstance(opt_2.lr, tf.Variable)
        self.assertIsInstance(opt_3.lr, tf.Variable)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(opt.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_3.lr), (0.1))

    @test_combinations.generate(test_combinations.combine(mode=["eager"]))
    def testSlotsUniqueEager(self):
        v1 = tf.Variable(1.0)
        v2 = tf.Variable(1.0)

        opt = rmsprop.RMSprop(1.0, momentum=0.0, centered=False)
        opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
        # There should be iteration, and one unique slot variable for v1 and v2.
        self.assertLen(set({id(v) for v in opt.variables()}), 3)
        self.assertEqual(
            self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)
        )

        opt = rmsprop.RMSprop(learning_rate=1.0, momentum=0.2, centered=False)
        opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
        # There should be iteration, and two unique slot variables for v1 and
        # v2.
        self.assertLen(set({id(v) for v in opt.variables()}), 5)
        self.assertEqual(
            self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)
        )

        opt = rmsprop.RMSprop(learning_rate=1.0, momentum=0.2, centered=True)
        opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
        # There should be iteration, and three unique slot variables for v1 and
        # v2
        self.assertLen(set({id(v) for v in opt.variables()}), 7)
        self.assertEqual(
            self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations)
        )

    @test_combinations.generate(test_combinations.combine(mode=["eager"]))
    def testMomentumProperValue(self):
        with self.assertRaisesRegex(
            ValueError,
            r"`momentum` must be between \[0, 1\]. "
            r"Received: momentum=2.5 \(of type <class "
            r"\'float\'>\).",
        ):
            rmsprop.RMSprop(1.0, momentum=2.5, centered=False)
class MixedPrecisionTest(test_combinations.TestCase):

    IGNORE_PERF_VAR = 'TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_IGNORE_PERFORMANCE'

    def setUp(self):
        super().setUp()
        # Enable the tests to be run on pre-Volta GPUs by telling the grappler pass
        # to ignore performance and always transform the graph.
        self._original_ignore_perf_value = os.getenv(self.IGNORE_PERF_VAR)
        os.environ[self.IGNORE_PERF_VAR] = '1'

    def tearDown(self):
        # Set the IGNORE_PERF_VAR variable back to it's original value.
        if self._original_ignore_perf_value is not None:
            os.environ[self.IGNORE_PERF_VAR] = self._original_ignore_perf_value
        else:
            del os.environ[self.IGNORE_PERF_VAR]

        tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite()
        super().tearDown()

    @test_combinations.generate(
        test_combinations.combine(mode=['graph', 'eager']))
    def test_wrap_optimizer_fixed_loss_scale(self):
        opt = gradient_descent_v2.SGD(1.0)
        opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
            opt, 123)
        self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertEqual(self.evaluate(opt.loss_scale), 123.)
        self.assertFalse(opt.dynamic)
        self.assertTrue(opt.initial_scale, 123.)

        opt = gradient_descent_v2.SGD(1.0)
        opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
            opt, tf.compat.v1.mixed_precision.FixedLossScale(123))
        self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertEqual(self.evaluate(opt.loss_scale), 123.)
        self.assertFalse(opt.dynamic)
        self.assertTrue(opt.initial_scale, 123.)

    @test_combinations.generate(
        test_combinations.combine(mode=['graph', 'eager']))
    def test_wrap_optimizer_dynamic_loss_scale(self):
        opt = gradient_descent_v2.SGD(1.0)
        opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
            opt, 'dynamic')
        self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertEqual(self.evaluate(opt.loss_scale), 2.**15)
        self.assertTrue(opt.dynamic)
        self.assertTrue(opt.initial_scale, 2.**15)
        self.assertTrue(opt.dynamic_growth_steps, 2000)

        opt = gradient_descent_v2.SGD(1.0)
        opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
            opt,
            tf.compat.v1.mixed_precision.DynamicLossScale(
                initial_loss_scale=4, increment_period=1000))
        self.assertIsInstance(opt, loss_scale_optimizer_v2.LossScaleOptimizer)
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertEqual(self.evaluate(opt.loss_scale), 4.)
        self.assertTrue(opt.dynamic)
        self.assertTrue(opt.initial_scale, 4.)
        self.assertTrue(opt.dynamic_growth_steps, 1000)

    @test_combinations.generate(
        test_combinations.combine(mode=['graph', 'eager']))
    def test_wrap_optimizer_dynamic_loss_scale_errors(self):

        opt = gradient_descent_v2.SGD(1.0)
        with self.assertRaisesRegex(
                ValueError, 'When passing a DynamicLossScale to "loss_scale", '
                'DynamicLossScale.multiplier must be 2. Got: '
                'DynamicLossScale'):
            tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
                opt,
                tf.compat.v1.mixed_precision.DynamicLossScale(multiplier=4.))

        class MyLossScale(tf.compat.v1.mixed_precision.LossScale):
            def __call__(self):
                return 1.

            def update(self, grads):
                return None, True

            def get_config(self):
                return {}

        with self.assertRaisesRegex(
                TypeError,
                'Passing a LossScale that is not a FixedLossScale or a '
                'DynamicLossScale is not supported. Got:'):
            tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
                opt, MyLossScale())

    @test_combinations.generate(
        test_combinations.combine(mode=['graph', 'eager']))
    def test_optimizer_errors(self):
        opt = gradient_descent_v2.SGD(1.0)
        opt = loss_scale_optimizer_v2.LossScaleOptimizer(opt)
        with self.assertRaisesRegex(
                ValueError, '"opt" must not already be an instance of a '
                'LossScaleOptimizer.'):
            tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
                opt)
        self.assertFalse(tf.config.optimizer.get_experimental_options().get(
            'auto_mixed_precision', False))

    @test_utils.enable_v2_dtype_behavior
    def test_error_if_policy_is_set(self):
        with policy.policy_scope('mixed_float16'):
            with self.assertRaisesRegex(
                    ValueError, 'the global Keras dtype Policy has been set'):
                tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
                    gradient_descent_v2.SGD(1.0))
        # Test no error is thrown when the policy is currently the default.
        tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
            gradient_descent_v2.SGD(1.0))
        # Test no error is thrown when the policy is a non-mixed policy.
        with policy.policy_scope('float64'):
            tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
                gradient_descent_v2.SGD(1.0))
Exemple #18
0
class CheckpointCompatibilityTests(test_combinations.TestCase):
    def _initialized_model(self):
        input_value = tf.constant([[3.]])
        model = MyModel()
        optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
        optimizer_step = tf.compat.v1.train.get_or_create_global_step()
        root_trackable = tf.train.Checkpoint(optimizer=optimizer,
                                             model=model,
                                             optimizer_step=optimizer_step)
        train_op = optimizer.minimize(functools.partial(model, input_value),
                                      global_step=optimizer_step)
        self.evaluate(trackable_utils.gather_initializers(root_trackable))
        self.evaluate(train_op)
        # A regular variable, a slot variable, and a non-slot Optimizer variable
        # with known values to check when loading.
        self.evaluate(model._named_dense.bias.assign([1.]))
        self.evaluate(
            optimizer.get_slot(var=model._named_dense.bias,
                               name="m").assign([2.]))
        beta1_power, _ = optimizer._get_beta_accumulators()
        self.evaluate(beta1_power.assign(3.))
        return root_trackable

    def _set_sentinels(self, root_trackable):
        self.evaluate(root_trackable.model._named_dense.bias.assign([101.]))
        self.evaluate(
            root_trackable.optimizer.get_slot(
                var=root_trackable.model._named_dense.bias,
                name="m").assign([102.]))
        beta1_power, _ = root_trackable.optimizer._get_beta_accumulators()
        self.evaluate(beta1_power.assign(103.))

    def _check_sentinels(self, root_trackable):
        self.assertAllEqual([1.],
                            self.evaluate(
                                root_trackable.model._named_dense.bias))
        self.assertAllEqual([2.],
                            self.evaluate(
                                root_trackable.optimizer.get_slot(
                                    var=root_trackable.model._named_dense.bias,
                                    name="m")))
        beta1_power, _ = root_trackable.optimizer._get_beta_accumulators()
        self.assertAllEqual(3., self.evaluate(beta1_power))

    def _write_name_based_checkpoint(self):
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        with context.graph_mode():
            save_graph = tf.Graph()
            with save_graph.as_default(), self.session(
                    graph=save_graph) as session:
                root = self._initialized_model()
                name_saver = tf.compat.v1.train.Saver()
                return name_saver.save(sess=session,
                                       save_path=checkpoint_prefix,
                                       global_step=root.optimizer_step)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testLoadFromNameBasedSaver(self):
        """Save a name-based checkpoint, load it using the object-based API."""
        with test_utils.device(should_use_gpu=True):
            with self.test_session():
                save_path = self._write_name_based_checkpoint()
                root = self._initialized_model()
                self._set_sentinels(root)
                with self.assertRaises(AssertionError):
                    self._check_sentinels(root)
                object_saver = tf.train.Checkpoint(root=root)
                self._set_sentinels(root)
                status = object_saver.read(save_path)
                if tf.executing_eagerly():
                    self._check_sentinels(root)
                if tf.executing_eagerly():
                    status.assert_consumed()
                    status.assert_existing_objects_matched()
                    status.assert_nontrivial_match()
                else:
                    # When graph building, we haven't read any keys, so we don't know
                    # whether the restore will be complete.
                    with self.assertRaisesRegex(AssertionError,
                                                "not restored"):
                        status.assert_consumed()
                    with self.assertRaisesRegex(AssertionError,
                                                "not restored"):
                        status.assert_existing_objects_matched()
                    with self.assertRaisesRegex(AssertionError,
                                                "not restored"):
                        status.assert_nontrivial_match()
                status.run_restore_ops()
                self._check_sentinels(root)
                self._set_sentinels(root)
                status = object_saver.read(save_path)
                status.initialize_or_restore()
                self._check_sentinels(root)
                # Check that there is no error when keys are missing from the name-based
                # checkpoint.
                root.not_in_name_checkpoint = tf.Variable([1.])
                status = object_saver.read(save_path)
                with self.assertRaises(AssertionError):
                    status.assert_existing_objects_matched()

    def testSaveGraphLoadEager(self):
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        with context.graph_mode():
            save_graph = tf.Graph()
            with save_graph.as_default(), self.session(graph=save_graph):
                root = self._initialized_model()
                save_path = root.save(file_prefix=checkpoint_prefix)
        with tf.__internal__.eager_context.eager_mode():
            root = self._initialized_model()
            self._set_sentinels(root)
            root.restore(save_path).assert_consumed()
            self._check_sentinels(root)

    def testSaveEagerLoadGraph(self):
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        with tf.__internal__.eager_context.eager_mode():
            root = self._initialized_model()
            save_path = root.save(file_prefix=checkpoint_prefix)
        with context.graph_mode():
            save_graph = tf.Graph()
            with save_graph.as_default(), self.session(graph=save_graph):
                root = self._initialized_model()
                self._set_sentinels(root)
                root.restore(save_path).assert_consumed().run_restore_ops()
                self._check_sentinels(root)
Exemple #19
0
class DropoutTest(tf.test.TestCase, parameterized.TestCase):

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testDropoutProperties(self):
    dp = core_layers.Dropout(0.5, name='dropout')
    self.assertEqual(dp.rate, 0.5)
    self.assertEqual(dp.noise_shape, None)
    dp(tf.ones(()))
    self.assertEqual(dp.name, 'dropout')

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testBooleanLearningPhase(self):
    dp = core_layers.Dropout(0.5)
    inputs = tf.ones((5, 3))
    dropped = dp(inputs, training=True)
    if not tf.executing_eagerly():
      self.evaluate(tf.compat.v1.global_variables_initializer())
    np_output = self.evaluate(dropped)
    self.assertAlmostEqual(0., np_output.min())
    dropped = dp(inputs, training=False)
    np_output = self.evaluate(dropped)
    self.assertAllClose(np.ones((5, 3)), np_output)

  @tf_test_utils.run_deprecated_v1
  def testDynamicLearningPhase(self):
    with self.cached_session() as sess:
      dp = core_layers.Dropout(0.5, seed=1)
      inputs = tf.ones((5, 5))
      training = tf.compat.v1.placeholder(dtype='bool')
      dropped = dp(inputs, training=training)
      self.evaluate(tf.compat.v1.global_variables_initializer())
      np_output = sess.run(dropped, feed_dict={training: True})
      self.assertAlmostEqual(0., np_output.min())
      np_output = sess.run(dropped, feed_dict={training: False})
      self.assertAllClose(np.ones((5, 5)), np_output)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testDynamicNoiseShape(self):
    inputs = tf.ones((5, 3, 2))
    noise_shape = [None, 1, None]
    dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1)
    dropped = dp(inputs, training=True)
    self.evaluate(tf.compat.v1.global_variables_initializer())
    np_output = self.evaluate(dropped)
    self.assertAlmostEqual(0., np_output.min())
    self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :])

  def testCustomNoiseShape(self):
    inputs = tf.ones((5, 3, 2))
    noise_shape = [5, 1, 2]
    dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1)
    dropped = dp(inputs, training=True)
    self.evaluate(tf.compat.v1.global_variables_initializer())
    np_output = self.evaluate(dropped)
    self.assertAlmostEqual(0., np_output.min())
    self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :])

  @tf_test_utils.run_deprecated_v1
  def testFunctionalDropout(self):
    with self.cached_session():
      inputs = tf.ones((5, 5))
      dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1)
      self.evaluate(tf.compat.v1.global_variables_initializer())
      np_output = self.evaluate(dropped)
      self.assertAlmostEqual(0., np_output.min())
      dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1)
      np_output = self.evaluate(dropped)
      self.assertAllClose(np.ones((5, 5)), np_output)

  @tf_test_utils.run_deprecated_v1
  def testDynamicRate(self):
    with self.cached_session() as sess:
      rate = tf.compat.v1.placeholder(dtype='float32', name='rate')
      dp = core_layers.Dropout(rate, name='dropout')
      inputs = tf.ones((5, 5))
      dropped = dp(inputs, training=True)
      self.evaluate(tf.compat.v1.global_variables_initializer())
      np_output = sess.run(dropped, feed_dict={rate: 0.5})
      self.assertAlmostEqual(0., np_output.min())
      np_output = sess.run(dropped, feed_dict={rate: 0.0})
      self.assertAllClose(np.ones((5, 5)), np_output)
from keras.optimizers.optimizer_v2 import gradient_descent
from keras.optimizers.schedules import learning_rate_schedule
from keras.testing_infra import test_combinations


def _maybe_serialized(lr_decay, serialize_and_deserialize):
    if serialize_and_deserialize:
        serialized = learning_rate_schedule.serialize(lr_decay)
        return learning_rate_schedule.deserialize(serialized)
    else:
        return lr_decay


@test_combinations.generate(
    test_combinations.combine(serialize=[False, True], mode=["graph",
                                                             "eager"]))
class LRDecayTestV2(tf.test.TestCase, parameterized.TestCase):
    def testContinuous(self, serialize):
        self.evaluate(tf.compat.v1.global_variables_initializer())
        step = 5
        decayed_lr = learning_rate_schedule.ExponentialDecay(0.05, 10, 0.96)
        decayed_lr = _maybe_serialized(decayed_lr, serialize)
        expected = 0.05 * 0.96**(5.0 / 10.0)
        self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6)

    def testStaircase(self, serialize):
        if tf.executing_eagerly():
            step = tf.Variable(0)
            self.evaluate(tf.compat.v1.global_variables_initializer())
            decayed_lr = learning_rate_schedule.ExponentialDecay(
                0.1, 3, 0.96, staircase=True)
class TraceModelCallTest(test_combinations.TestCase):
    def _assert_all_close(self, expected, actual):
        if not tf.executing_eagerly():
            with self.cached_session() as sess:
                backend._initialize_variables(sess)
                self.assertAllClose(expected, actual)
        else:
            self.assertAllClose(expected, actual)

    @test_combinations.run_with_all_model_types
    @test_combinations.run_all_keras_modes
    def test_trace_model_outputs(self):
        input_dim = 5 if test_utils.get_model_type() == "functional" else None
        model = test_utils.get_small_mlp(10, 3, input_dim)
        inputs = tf.ones((8, 5))

        if input_dim is None:
            with self.assertRaisesRegex(
                ValueError, ".*input shape is not availabl*"
            ):
                saving_utils.trace_model_call(model)
            model._set_inputs(inputs)

        fn = saving_utils.trace_model_call(model)
        signature_outputs = fn(inputs)
        if model.output_names:
            expected_outputs = {model.output_names[0]: model(inputs)}
        else:
            expected_outputs = {"output_1": model(inputs)}

        self._assert_all_close(expected_outputs, signature_outputs)

    @test_combinations.run_with_all_model_types
    @test_combinations.run_all_keras_modes
    def test_trace_model_outputs_after_fitting(self):
        input_dim = 5 if test_utils.get_model_type() == "functional" else None
        model = test_utils.get_small_mlp(10, 3, input_dim)
        model.compile(
            optimizer="sgd",
            loss="mse",
            run_eagerly=test_utils.should_run_eagerly(),
        )
        model.fit(
            x=np.random.random((8, 5)).astype(np.float32),
            y=np.random.random((8, 3)).astype(np.float32),
            epochs=2,
        )

        inputs = tf.ones((8, 5))

        fn = saving_utils.trace_model_call(model)
        signature_outputs = fn(inputs)
        if model.output_names:
            expected_outputs = {model.output_names[0]: model(inputs)}
        else:
            expected_outputs = {"output_1": model(inputs)}

        self._assert_all_close(expected_outputs, signature_outputs)

    @test_combinations.run_with_all_model_types(exclude_models="sequential")
    @test_combinations.run_all_keras_modes
    def test_trace_multi_io_model_outputs(self):
        input_dim = 5
        num_classes = 3
        num_classes_b = 4
        input_a = keras.layers.Input(shape=(input_dim,), name="input_a")
        input_b = keras.layers.Input(shape=(input_dim,), name="input_b")

        dense = keras.layers.Dense(num_classes, name="dense")
        dense2 = keras.layers.Dense(num_classes_b, name="dense2")
        dropout = keras.layers.Dropout(0.5, name="dropout")
        branch_a = [input_a, dense]
        branch_b = [input_b, dense, dense2, dropout]

        model = test_utils.get_multi_io_model(branch_a, branch_b)

        input_a_ts = tf.constant(
            np.random.random((10, input_dim)).astype(np.float32)
        )
        input_b_ts = tf.constant(
            np.random.random((10, input_dim)).astype(np.float32)
        )

        if test_utils.get_model_type() == "subclass":
            with self.assertRaisesRegex(
                ValueError, ".*input shape is not availabl*"
            ):
                saving_utils.trace_model_call(model)

        model.compile(
            optimizer="sgd",
            loss="mse",
            run_eagerly=test_utils.should_run_eagerly(),
        )
        model.fit(
            x=[
                np.random.random((8, input_dim)).astype(np.float32),
                np.random.random((8, input_dim)).astype(np.float32),
            ],
            y=[
                np.random.random((8, num_classes)).astype(np.float32),
                np.random.random((8, num_classes_b)).astype(np.float32),
            ],
            epochs=2,
        )

        fn = saving_utils.trace_model_call(model)
        # tf.function requires that the input structures match when calling a
        # ConcreteFunction. For some reason V1 models defines the inputs as a list,
        # while V2 models sets the inputs as a tuple.
        if (
            not tf.executing_eagerly()
            and test_utils.get_model_type() != "functional"
        ):
            signature_outputs = fn([input_a_ts, input_b_ts])
        else:
            signature_outputs = fn((input_a_ts, input_b_ts))
        outputs = model([input_a_ts, input_b_ts])
        if model.output_names:
            expected_outputs = {
                model.output_names[0]: outputs[0],
                model.output_names[1]: outputs[1],
            }
        else:
            expected_outputs = {"output_1": outputs[0], "output_2": outputs[1]}
        self._assert_all_close(expected_outputs, signature_outputs)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_trace_features_layer(self):
        columns = [tf.feature_column.numeric_column("x")]
        model = sequential.Sequential([dense_features.DenseFeatures(columns)])
        model_input = {"x": tf.constant([[1.0]])}
        model.predict(model_input, steps=1)
        fn = saving_utils.trace_model_call(model)
        self.assertAllClose({"output_1": [[1.0]]}, fn(model_input))

        columns = [
            tf.feature_column.numeric_column("x"),
            tf.feature_column.numeric_column("y"),
        ]
        model = sequential.Sequential([dense_features.DenseFeatures(columns)])
        model_input = {"x": tf.constant([[1.0]]), "y": tf.constant([[2.0]])}
        model.predict(model_input, steps=1)
        fn = saving_utils.trace_model_call(model)
        self.assertAllClose({"output_1": [[1.0, 2.0]]}, fn(model_input))

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_specify_input_signature(self):
        model = test_utils.get_small_sequential_mlp(10, 3, None)
        inputs = tf.ones((8, 5))

        with self.assertRaisesRegex(
            ValueError, ".*input shape is not availabl*"
        ):
            saving_utils.trace_model_call(model)

        fn = saving_utils.trace_model_call(
            model, [tf.TensorSpec(shape=[None, 5], dtype=tf.float32)]
        )
        signature_outputs = fn(inputs)
        if model.output_names:
            expected_outputs = {model.output_names[0]: model(inputs)}
        else:
            expected_outputs = {"output_1": model(inputs)}
        self._assert_all_close(expected_outputs, signature_outputs)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_subclassed_model_with_input_signature(self):
        class Model(keras.Model):
            def __init__(self):
                super().__init__()
                self.dense = keras.layers.Dense(3, name="dense")

            @tf.function(
                input_signature=[
                    [
                        tf.TensorSpec([None, 5], tf.float32),
                        tf.TensorSpec([None], tf.float32),
                    ]
                ],
            )
            def call(self, inputs, *args):
                x, y = inputs
                return self.dense(x) + y

        model = Model()
        fn = saving_utils.trace_model_call(model)
        x = tf.ones((8, 5), dtype=tf.float32)
        y = tf.ones((3,), dtype=tf.float32)
        expected_outputs = {"output_1": model([x, y])}
        signature_outputs = fn([x, y])
        self._assert_all_close(expected_outputs, signature_outputs)

    @test_combinations.run_with_all_model_types
    @test_combinations.run_all_keras_modes
    def test_model_with_fixed_input_dim(self):
        """Ensure that the batch_dim is removed when saving.

        When serving or retraining, it is important to reset the batch dim.
        This can be an issue inside of tf.function. See b/132783590 for context.
        """
        model = test_utils.get_small_mlp(10, 3, 5)

        loss_object = keras.losses.MeanSquaredError()
        optimizer = gradient_descent.SGD()

        @tf.function
        def train_step(data, labels):
            with tf.GradientTape() as tape:
                predictions = model(data)
                loss = loss_object(labels, predictions)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        x = np.random.random((8, 5))
        y = np.random.random((8, 3))

        train_step(x, y)

        fn = saving_utils.trace_model_call(model)
        self.assertEqual(
            fn.structured_input_signature[0][0].shape.as_list(),
            tf.TensorShape([None, 5]).as_list(),
        )
Exemple #22
0
class EmbeddingTest(test_combinations.TestCase):
    @test_combinations.run_all_keras_modes
    def test_embedding(self):
        if tf.test.is_gpu_available():
            self.skipTest("Only test embedding on CPU.")

        test_utils.layer_test(
            keras.layers.Embedding,
            kwargs={"output_dim": 4, "input_dim": 10, "input_length": 2},
            input_shape=(3, 2),
            input_dtype="int32",
            expected_output_dtype="float32",
        )

        test_utils.layer_test(
            keras.layers.Embedding,
            kwargs={"output_dim": 4, "input_dim": 10, "mask_zero": True},
            input_shape=(3, 2),
            input_dtype="int32",
            expected_output_dtype="float32",
        )

        test_utils.layer_test(
            keras.layers.Embedding,
            kwargs={"output_dim": 4, "input_dim": 10, "mask_zero": True},
            input_shape=(3, 4, 2),
            input_dtype="int32",
            expected_output_dtype="float32",
        )

        test_utils.layer_test(
            keras.layers.Embedding,
            kwargs={
                "output_dim": 4,
                "input_dim": 10,
                "mask_zero": True,
                "input_length": (None, 2),
            },
            input_shape=(3, 4, 2),
            input_dtype="int32",
            expected_output_dtype="float32",
        )

    @test_combinations.run_all_keras_modes
    def test_embedding_correctness(self):
        layer = keras.layers.Embedding(output_dim=2, input_dim=2)
        model = keras.models.Sequential([layer])

        layer.set_weights([np.array([[1, 1], [2, 2]])])
        model.run_eagerly = test_utils.should_run_eagerly()
        outputs = model.predict(np.array([[0, 1, 0]], dtype="int32"))
        self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]])

    def test_embedding_incorrect_dimension(self):
        with self.assertRaises(ValueError):
            keras.layers.Embedding(input_dim=0, output_dim=1)

        with self.assertRaises(ValueError):
            keras.layers.Embedding(input_dim=1, output_dim=0)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_eager_gpu_cpu(self):
        l = keras.layers.Embedding(output_dim=2, input_dim=2)
        l.build((None, 2))
        inputs = keras.backend.constant([[0, 1, 0]], dtype="int32")
        with tf.GradientTape() as tape:
            output = l(inputs)
        gs = tape.gradient(output, l.weights)
        opt = tf.compat.v1.train.AdagradOptimizer(0.1)
        opt.apply_gradients(zip(gs, l.weights))
        self.assertAllEqual(len(gs), 1)

    @test_combinations.run_all_keras_modes
    def test_embedding_with_ragged_input(self):
        layer = keras.layers.Embedding(
            input_dim=3,
            output_dim=2,
            weights=[np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]])],
        )
        inputs = keras.layers.Input(
            shape=(None,), dtype=tf.float32, ragged=True
        )

        outputs = keras.layers.Lambda(
            lambda args: keras.backend.identity(args)
        )(inputs)

        outputs = layer(outputs)

        model = keras.Model(inputs, outputs)
        model.run_eagerly = test_utils.should_run_eagerly()
        outputs = model.predict(
            tf.ragged.constant(
                [[1.0, 2.0, 2.0], [0.0], [1.0, 2.0]], ragged_rank=1
            )
        )
        self.assertAllClose(
            outputs,
            tf.ragged.constant(
                [
                    [[1.0, 1.0], [2.0, 2.0], [2.0, 2.0]],
                    [[0.0, 0.0]],
                    [[1.0, 1.0], [2.0, 2.0]],
                ],
                ragged_rank=1,
            ),
        )

    @test_utils.enable_v2_dtype_behavior
    def test_mixed_precision_embedding(self):
        try:
            policy.set_global_policy("mixed_float16")
            layer = keras.layers.Embedding(input_dim=5, output_dim=2)
            self.assertEqual(layer._dtype_policy.name, "mixed_float16")
            outputs = layer(np.array([0, 1, 2]))
            self.assertEqual(outputs.dtype, "float16")
        finally:
            policy.set_global_policy("float32")
Exemple #23
0
class TestWholeModelSaving(test_combinations.TestCase):

  def _save_model_dir(self, dirname='saved_model'):
    temp_dir = self.get_temp_dir()
    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
    return os.path.join(temp_dir, dirname)

  def _assert_same_weights_and_metrics(self, model, loaded_model):
    """Checks that the loaded weights and metrics are the same as the original.

    Args:
      model: original model
      loaded_model: loaded model
    """
    self.assertAllClose(model.weights, loaded_model.weights)

    if loaded_model.optimizer:
      if test_utils.get_save_format() == 'tf':
        # TODO(b/153110928): Keras TF format doesn't restore optimizer weights
        # currently.
        return
      self.assertAllClose(model.optimizer.weights,
                          loaded_model.optimizer.weights)

    # In V1/Graph mode, the model isn't built, so the metrics are not loaded
    # immediately (requires model to be called on some data before building
    # metrics).
    check_metrics = tf.__internal__.tf2.enabled() and tf.executing_eagerly()

    if check_metrics:
      self.assertAllEqual([m.name for m in model.metrics],
                          [m.name for m in loaded_model.metrics])

  @test_combinations.run_with_all_model_types
  @test_combinations.run_all_keras_modes
  def test_save_and_load(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()
    save_kwargs = test_utils.get_save_kwargs()

    if ((save_format == 'h5' or not save_kwargs.get('save_traces', True)) and
        test_utils.get_model_type() == 'subclass'):
      # HDF5 format currently does not allow saving subclassed models.
      # When saving with `save_traces=False`, the subclassed model must have a
      # get_config/from_config, which the autogenerated model does not have.
      return

    with self.cached_session():
      model = test_utils.get_model_from_layers(
          [keras.layers.Dense(2),
           keras.layers.RepeatVector(3),
           keras.layers.TimeDistributed(keras.layers.Dense(3))],
          input_shape=(3,))
      model.compile(
          loss=keras.losses.MSE,
          optimizer=keras.optimizers.optimizer_v2.rmsprop.RMSprop(lr=0.0001),
          metrics=[
              keras.metrics.categorical_accuracy,
              keras.metrics.CategoricalCrossentropy(
                  name='cce', label_smoothing=tf.constant(0.2)),
          ],
          weighted_metrics=[
              keras.metrics.categorical_crossentropy,
              keras.metrics.CategoricalCrossentropy(
                  name='cce', label_smoothing=tf.constant(0.2)),
          ],
          sample_weight_mode='temporal')

      x = np.random.random((1, 3))
      y = np.random.random((1, 3, 3))
      model.train_on_batch(x, y)

      out = model.predict(x)
      keras.models.save_model(
          model, saved_model_dir, save_format=save_format,
          **save_kwargs)

      loaded_model = keras.models.load_model(saved_model_dir)
      self._assert_same_weights_and_metrics(model, loaded_model)

      out2 = loaded_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

      eval_out = model.evaluate(x, y)
      eval_out2 = loaded_model.evaluate(x, y)
      self.assertArrayNear(eval_out, eval_out2, 0.001)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_sequential_model_saving_without_input_shape(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()
    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2))
      model.add(keras.layers.RepeatVector(3))
      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
      model.compile(
          loss=keras.losses.MSE,
          optimizer='rmsprop',
          metrics=[
              keras.metrics.categorical_accuracy,
              keras.metrics.CategoricalAccuracy(name='cat_acc')
          ],
          weighted_metrics=[
              keras.metrics.categorical_accuracy,
              keras.metrics.CategoricalAccuracy(name='cat_acc2')
          ],
          sample_weight_mode='temporal')
      x = np.random.random((1, 3))
      y = np.random.random((1, 3, 3))
      model.train_on_batch(x, y)

      out = model.predict(x)
      model.save(saved_model_dir, save_format=save_format)

      new_model = keras.models.load_model(saved_model_dir)

      self._assert_same_weights_and_metrics(model, new_model)

      out2 = new_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_sequential_model_saving_without_compile(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()
    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2, input_shape=(3,)))
      model.add(keras.layers.RepeatVector(3))
      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))

      x = np.random.random((1, 3))
      out = model.predict(x)

      # Save the model without any compilation or training.
      keras.models.save_model(model, saved_model_dir, save_format=save_format)

      new_model = keras.models.load_model(saved_model_dir)
      self._assert_same_weights_and_metrics(model, new_model)

      out2 = new_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  def test_sequential_model_saving_2(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()

    with tf.Graph().as_default(), self.cached_session():
      # test with custom optimizer, loss

      class CustomOp(optimizer_v1.RMSprop):
        pass

      def custom_loss(y_true, y_pred):
        return keras.losses.mse(y_true, y_pred)

      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2, input_shape=(3,)))
      model.add(keras.layers.Dense(3))
      model.compile(loss=custom_loss, optimizer=CustomOp(), metrics=['acc'])

      x = np.random.random((1, 3))
      y = np.random.random((1, 3))
      model.train_on_batch(x, y)

      out = model.predict(x)
      keras.models.save_model(model, saved_model_dir, save_format=save_format)

      new_model = keras.models.load_model(
          saved_model_dir,
          custom_objects={'CustomOp': CustomOp,
                          'custom_loss': custom_loss})
      self._assert_same_weights_and_metrics(model, new_model)

      out2 = new_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  def test_saving_without_compilation(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(2, input_shape=(3,)))
    model.add(keras.layers.Dense(3))
    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])

    keras.models.save_model(model, saved_model_dir, save_format=save_format)
    model = keras.models.load_model(saved_model_dir)

  def test_saving_with_tf_optimizer(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()

    model = keras.models.Sequential()
    model.add(keras.layers.Dense(2, input_shape=(3,)))
    model.add(keras.layers.Dense(3))
    model.compile(loss='mse',
                  optimizer=tf.compat.v1.train.AdadeltaOptimizer(0.1),
                  metrics=['acc'])

    keras.models.save_model(model, saved_model_dir, save_format=save_format)
    model = keras.models.load_model(saved_model_dir)

  def test_saving_right_after_compilation(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()
    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2, input_shape=(3,)))
      model.add(keras.layers.Dense(3))
      model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
      if not tf.compat.v1.executing_eagerly_outside_functions():
        model._make_train_function()
      keras.models.save_model(model, saved_model_dir, save_format=save_format)
      model = keras.models.load_model(saved_model_dir)

  def test_saving_lambda_numpy_array_arguments(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()

    if h5py is None:
      self.skipTest('h5py required to run this test')

    mean = np.random.random((4, 2, 3))
    std = np.abs(np.random.random((4, 2, 3))) + 1e-5
    inputs = keras.layers.Input(shape=(4, 2, 3))
    output = keras.layers.Lambda(lambda image, mu, std: (image - mu) / std,
                                 arguments={'mu': mean, 'std': std})(inputs)
    model = keras.models.Model(inputs, output)
    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])

    keras.models.save_model(model, saved_model_dir, save_format=save_format)

    model = keras.models.load_model(saved_model_dir)

    self.assertAllClose(mean, model.layers[1].arguments['mu'])
    self.assertAllClose(std, model.layers[1].arguments['std'])

  def test_saving_model_with_long_layer_names(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()
    with self.cached_session():
      # This layer name will make the `layers_name` HDF5 attribute blow
      # out of proportion. Note that it fits into the internal HDF5
      # attribute memory limit on its own but because h5py converts
      # the list of layer names into numpy array, which uses the same
      # amount of memory for every item, it increases the memory
      # requirements substantially.
      x = keras.Input(shape=(2,), name='input_' + ('x' * (2**15)))
      f = x
      for i in range(4):
        f = keras.layers.Dense(2, name='dense_%d' % (i,))(f)
      model = keras.Model(inputs=[x], outputs=[f])
      model.compile(
          'adam', loss=keras.losses.MeanSquaredError(), metrics=['acc'])

      x = np.random.random((1, 2))
      y = np.random.random((1, 2))
      model.train_on_batch(x, y)
      out = model.predict(x)

      keras.models.save_model(model, saved_model_dir, save_format=save_format)
      model = keras.models.load_model(saved_model_dir)

      if save_format in ['tf', 'tensorflow']:
        return
      # Check that the HDF5 files contains chunked array
      # of layer names.
      with h5py.File(saved_model_dir, 'r') as h5file:
        num_names_arrays = len([attr for attr in h5file['model_weights'].attrs
                                if attr.startswith('layer_names')])
      # The chunking of layer names array should have happened.
      self.assertGreater(num_names_arrays, 0)
      out2 = model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  def test_saving_model_with_long_weights_names(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()

    with self.cached_session():
      x = keras.Input(shape=(2,), name='nested_model_input')
      f = x
      for i in range(4):
        f = keras.layers.Dense(2, name='nested_model_dense_%d' % (i,))(f)
      # This layer name will make the `weights_name`
      # HDF5 attribute blow out of proportion.
      f = keras.layers.Dense(2, name='nested_model_output' + ('x' * (2**14)))(f)
      nested_model = keras.Model(inputs=[x], outputs=[f], name='nested_model')

      x = keras.Input(shape=(2,), name='outer_model_input')
      f = nested_model(x)
      f = keras.layers.Dense(2, name='outer_model_output')(f)

      model = keras.Model(inputs=[x], outputs=[f])
      model.compile(loss='mse', optimizer='adam', metrics=['acc'])

      x = np.random.random((1, 2))
      y = np.random.random((1, 2))
      model.train_on_batch(x, y)
      out = model.predict(x)

      keras.models.save_model(model, saved_model_dir, save_format=save_format)
      model = keras.models.load_model(saved_model_dir)

      if save_format in ['h5', 'hdf5', 'keras']:
        # Check that the HDF5 files contains chunked array
        # of weight names.
        with h5py.File(saved_model_dir, 'r') as h5file:
          num_weight_arrays = len(
              [attr for attr in h5file['model_weights']['nested_model'].attrs
               if attr.startswith('weight_names')])
        # The chunking of layer names array should have happened.
        self.assertGreater(num_weight_arrays, 0)
      out2 = model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  def test_model_saving_to_pre_created_h5py_file(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()
    with tf.Graph().as_default(), self.cached_session():
      inputs = keras.Input(shape=(3,))
      x = keras.layers.Dense(2)(inputs)
      outputs = keras.layers.Dense(3)(x)

      model = keras.Model(inputs, outputs)
      model.compile(
          loss=keras.losses.MSE,
          optimizer=optimizer_v1.Adam(),
          metrics=[
              keras.metrics.categorical_accuracy,
              keras.metrics.CategoricalAccuracy()
          ])
      x = np.random.random((1, 3))
      y = np.random.random((1, 3))
      model.train_on_batch(x, y)

      out = model.predict(x)

      keras.models.save_model(model, saved_model_dir, save_format=save_format)
      loaded_model = keras.models.load_model(saved_model_dir)
      out1 = loaded_model.predict(x)
      self.assertAllClose(out, out1, atol=1e-05)
      if save_format in ['tf', 'tensorflow']:
        return

      # Test h5 format specifically
      fd, fname = tempfile.mkstemp('.h5')
      with h5py.File(fname, mode='r+') as h5file:
        keras.models.save_model(model, h5file)
        loaded_model = keras.models.load_model(h5file)
        out2 = loaded_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

      # Test non-default options in h5
      with h5py.File(
          '_', driver='core', mode='w', backing_store=False) as h5file:
        keras.models.save_model(model, h5file)
        loaded_model = keras.models.load_model(h5file)
        out2 = loaded_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

      # Cleanup
      os.close(fd)
      os.remove(fname)

  def test_model_saving_to_new_dir_path(self):
    saved_model_dir = os.path.join(self._save_model_dir(), 'newdir',
                                   'saved_model')
    save_format = test_utils.get_save_format()

    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2, input_shape=(3,)))
      model.add(keras.layers.RepeatVector(3))
      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))

      x = np.random.random((1, 3))
      out = model.predict(x)

      keras.models.save_model(model, saved_model_dir, save_format=save_format)

      new_model = keras.models.load_model(saved_model_dir)
      self._assert_same_weights_and_metrics(model, new_model)

      out2 = new_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  def test_model_raise_exception_with_failed_saving(self):
    if h5py is None:
      self.skipTest('h5py required to run this test')

    saved_model_dir = self._save_model_dir()
    saved_model_path = os.path.join(saved_model_dir, 'saved_model.h5')

    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2, input_shape=(3,)))
      model.add(keras.layers.RepeatVector(3))
      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))

      with self.assertRaisesRegex(OSError, 'Unable to create file'):
        with h5py.File(saved_model_path, 'w'):
          keras.models.save_model(model, saved_model_path)

  def test_saving_constant_initializer_with_numpy(self):
    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()

    model = keras.models.Sequential()
    model.add(
        keras.layers.Dense(
            2,
            input_shape=(3,),
            kernel_initializer=keras.initializers.Constant(np.ones((3, 2)))))
    model.add(keras.layers.Dense(3))
    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
    keras.models.save_model(model, saved_model_dir, save_format=save_format)
    model = keras.models.load_model(saved_model_dir)

  def test_saving_group_naming_h5py(self):
    # Test saving model with layer which name is prefix to a previous layer
    # name.

    temp_dir = self.get_temp_dir()
    self.addCleanup(shutil.rmtree, temp_dir)
    h5_path = os.path.join(temp_dir, 'test.h5')

    input_layer = keras.layers.Input((None, None, 3), name='test_input')
    x = keras.layers.Conv2D(1, 1, name='conv1/conv')(input_layer)
    x = keras.layers.Activation('relu', name='conv1')(x)
    model = keras.models.Model(inputs=input_layer, outputs=x)

    model.save_weights(h5_path)
    model.load_weights(h5_path)

  def test_primitive_attrs_contain_no_extraneous_strings(self):
    if h5py is None:
      self.skipTest('h5py required to run this test')

    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(1, input_shape=[2]))
    model.save(saved_model_dir, save_format=save_format)
    if save_format in ['tf', 'tensorflow']:
      return

    h5file = h5py.File(saved_model_dir, 'r')
    self.assertRegex(h5file.attrs['keras_version'], r'^[\d]+\.[\d]+\.[\S]+$')

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_functional_model_with_custom_loss_and_metric(self):
    def _make_model():
      inputs = keras.Input(shape=(4,))
      x = keras.layers.Dense(8, activation='relu')(inputs)
      outputs = keras.layers.Dense(3, activation='softmax')(x)
      model = keras.Model(inputs=inputs, outputs=outputs)
      custom_loss = keras.layers.Lambda(lambda x: keras.backend.sum(x * x))(x)
      model.add_loss(custom_loss)
      model.add_metric(custom_loss, aggregation='mean', name='custom_loss')
      return model

    saved_model_dir = self._save_model_dir()
    save_format = test_utils.get_save_format()

    with self.cached_session():
      model = _make_model()
      model.compile(
          loss=keras.losses.SparseCategoricalCrossentropy(),
          optimizer=optimizers.gradient_descent_v2.SGD(),
          metrics=[keras.metrics.SparseCategoricalCrossentropy()])
      x = np.random.normal(size=(32, 4))
      y = np.random.randint(0, 3, size=32)
      model.train_on_batch(x, y)
      evaluation_results = model.evaluate(x, y)
      # Save and reload model.
      model.save(saved_model_dir, save_format=save_format)
      del model  # Prevent misuse.
      loaded_model = keras.models.load_model(saved_model_dir)
      loaded_model_eval_results = loaded_model.evaluate(x, y)
      # Assert all evaluation results are the same.
      self.assertAllClose(evaluation_results, loaded_model_eval_results, 1e-9)
      # Check correctness of the loss calculation.
      self.assertAllGreater(evaluation_results, 0.)
      evaluation_results = dict(
          zip(loaded_model.metrics_names, evaluation_results))
      self.assertNear(
          evaluation_results['sparse_categorical_crossentropy'] +
          evaluation_results['custom_loss'], evaluation_results['loss'], 1e-6)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_save_uncompiled_model_with_optimizer(self):
    with self.cached_session() as session:
      saved_model_dir = self._save_model_dir()
      save_format = test_utils.get_save_format()
      model = keras.models.Sequential([keras.layers.Dense(1, input_shape=(3,))])
      # Set the model's optimizer but don't compile. This can happen if the
      # model is trained with a custom training loop.
      model.optimizer = keras.optimizers.optimizer_v2.rmsprop.RMSprop(lr=0.0001)
      if not tf.executing_eagerly():
        session.run([v.initializer for v in model.variables])
      model.save(saved_model_dir, save_format=save_format)

      if save_format in ['tf', 'tensorflow']:
        loaded = keras.models.load_model(saved_model_dir)
        self.assertIsInstance(
            loaded.optimizer,
            keras.optimizers.optimizer_v2.optimizer_v2.OptimizerV2)

  @test_combinations.generate(test_combinations.combine(mode=['eager']))
  def test_functional_model_with_getitem_op_layer(self):
    inp = keras.Input(shape=(8))

    out = inp[:]
    model = keras.Model(
        inputs=[inp],
        outputs=out)
    batch_size = 7
    x = tf.stack([
        tf.range(8) for _ in range(batch_size)])
    args = [x]
    expected = x[:]

    self.assertAllEqual(model(args), expected)
    self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)

    # Make sure it can be successfully saved and loaded.
    save_format = test_utils.get_save_format()
    saved_model_dir = self._save_model_dir()
    keras.models.save_model(model, saved_model_dir, save_format=save_format)

    loaded_model = keras.models.load_model(saved_model_dir)

    self.assertAllEqual(loaded_model(args), expected)
    self.assertAllEqual(loaded_model.predict(args, batch_size=batch_size),
                        expected)

  @test_combinations.generate(test_combinations.combine(
      mode=['eager', 'graph']))
  def test_custom_functional_registered(self):

    def _get_cls_definition():
      class CustomModel(keras.Model):

        def c(self):
          return 'c'

      return CustomModel

    cls = _get_cls_definition()
    self.assertEqual(cls.__bases__[0], keras.Model)

    with self.cached_session() as sess:
      input_ = keras.layers.Input(shape=(1,))
      output = keras.layers.Dense(1)(input_)
      model = cls(input_, output)
      # `cls` now inherits from `Functional` class.
      self.assertEqual(cls.__bases__[0], functional.Functional)

      if not tf.executing_eagerly():
        sess.run([v.initializer for v in model.variables])

      save_format = test_utils.get_save_format()
      saved_model_dir = self._save_model_dir()
      keras.models.save_model(model, saved_model_dir, save_format=save_format)

    loaded_model = keras.models.load_model(
        saved_model_dir, custom_objects={'CustomModel': cls})
    self.assertIsInstance(loaded_model, cls)

    # Check with "new" `CustomModel` class definition.
    new_cls = _get_cls_definition()
    # The new `CustomModel` class is *not* derived from `Functional`.
    self.assertEqual(new_cls.__bases__[0], keras.Model)
    reloaded_model = keras.models.load_model(
        saved_model_dir, custom_objects={'CustomModel': new_cls})
    self.assertIsInstance(reloaded_model, new_cls)

  @test_combinations.generate(test_combinations.combine(mode=['eager']))
  def test_shared_objects(self):
    class OuterLayer(keras.layers.Layer):

      def __init__(self, inner_layer):
        super(OuterLayer, self).__init__()
        self.inner_layer = inner_layer

      def call(self, inputs):
        return self.inner_layer(inputs)

      def get_config(self):
        return {
            'inner_layer': generic_utils.serialize_keras_object(
                self.inner_layer)
        }

      @classmethod
      def from_config(cls, config):
        return cls(generic_utils.deserialize_keras_object(
            config['inner_layer']))

    class InnerLayer(keras.layers.Layer):

      def __init__(self):
        super(InnerLayer, self).__init__()
        self.v = self.add_weight(name='v', shape=[], dtype=tf.float32)

      def call(self, inputs):
        return self.v + inputs

      @classmethod
      def from_config(cls, config):
        return cls()

    # Create a model with 2 output layers that share the same inner layer.
    inner_layer = InnerLayer()
    outer_layer_1 = OuterLayer(inner_layer)
    outer_layer_2 = OuterLayer(inner_layer)
    input_ = keras.Input(shape=(1,))
    model = keras.Model(
        inputs=input_, outputs=[outer_layer_1(input_), outer_layer_2(input_)])

    # Changes to the shared layer should affect both outputs.
    model.layers[1].inner_layer.v.assign(5)
    self.assertAllEqual(model(1), [6.0, 6.0])
    model.layers[1].inner_layer.v.assign(3)
    self.assertAllEqual(model(1), [4.0, 4.0])

    # After loading, changes to the shared layer should still affect both
    # outputs.
    def _do_assertions(loaded):
      loaded.layers[1].inner_layer.v.assign(5)
      self.assertAllEqual(loaded(1), [6.0, 6.0])
      loaded.layers[1].inner_layer.v.assign(3)
      self.assertAllEqual(loaded(1), [4.0, 4.0])
      loaded.layers[2].inner_layer.v.assign(5)
      self.assertAllEqual(loaded(1), [6.0, 6.0])
      loaded.layers[2].inner_layer.v.assign(3)
      self.assertAllEqual(loaded(1), [4.0, 4.0])

    # We'd like to make sure we only attach shared object IDs when strictly
    # necessary, so we'll recursively traverse the generated config to count
    # whether we have the exact number we expect.
    def _get_all_keys_recursive(dict_or_iterable):
      if isinstance(dict_or_iterable, dict):
        for key in dict_or_iterable.keys():
          yield key
        for key in _get_all_keys_recursive(dict_or_iterable.values()):
          yield key
      elif isinstance(dict_or_iterable, str):
        return
      else:
        try:
          for item in dict_or_iterable:
            for key in _get_all_keys_recursive(item):
              yield key
        # Not an iterable or dictionary
        except TypeError:
          return

    with generic_utils.CustomObjectScope({
        'OuterLayer': OuterLayer, 'InnerLayer': InnerLayer}):

      # Test saving and loading to disk
      save_format = test_utils.get_save_format()
      saved_model_dir = self._save_model_dir()
      keras.models.save_model(model, saved_model_dir, save_format=save_format)
      loaded = keras.models.load_model(saved_model_dir)
      _do_assertions(loaded)

      # Test recreating directly from config
      config = model.get_config()
      key_count = collections.Counter(_get_all_keys_recursive(config))
      self.assertEqual(key_count[generic_utils.SHARED_OBJECT_KEY], 2)
      loaded = keras.Model.from_config(config)
      _do_assertions(loaded)

  @test_combinations.generate(test_combinations.combine(mode=['eager']))
  def test_shared_objects_wrapper(self):
    """Tests that shared layers wrapped with `Wrapper` restore correctly."""
    input_ = keras.Input(shape=(1,))
    unwrapped = keras.layers.Layer(name='unwrapped')
    wrapped = keras.layers.Wrapper(unwrapped, name='wrapped')
    model = keras.Model(inputs=input_,
                        outputs=[unwrapped(input_), wrapped(input_)])

    # Test recreating directly from config
    config = model.get_config()
    loaded = keras.Model.from_config(config)
    self.assertIs(loaded.layers[1], loaded.layers[2].layer)

    # Test saving and loading to disk
    save_format = test_utils.get_save_format()
    saved_model_dir = self._save_model_dir()
    keras.models.save_model(model, saved_model_dir, save_format=save_format)
    loaded = keras.models.load_model(saved_model_dir)
    self.assertIs(loaded.layers[1], loaded.layers[2].layer)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager'], fit=[True, False]))
  def test_multi_output_metrics_name_stay_same(self, fit):
    """Tests that metric names don't change with each save/load cycle.

    e.g. "head_0_accuracy" should not become "head_0_head_0_accuracy" after
    saving and loading a model.

    Arguments:
      fit: Whether the model should be fit before saving.
    """
    # This doesn't work at all, so we can't check whether metric names are
    # correct.
    if not tf.executing_eagerly() and not fit:
      self.skipTest('b/181767784')

    input_ = keras.Input((4,))
    model = keras.Model(
        input_,
        [keras.layers.Softmax(name='head_0')(keras.layers.Dense(3)(input_)),
         keras.layers.Softmax(name='head_1')(keras.layers.Dense(5)(input_))])
    metric = keras.metrics.BinaryAccuracy()
    model.compile(optimizer='rmsprop',
                  loss='mse',
                  metrics={'head_0': [metric, 'accuracy']})

    x = np.random.rand(2, 4)
    y = {'head_0': np.random.randint(2, size=(2, 3)),
         'head_1': np.random.randint(2, size=(2, 5))}

    # Make sure metrix prefixing works the same regardless of whether the user
    # has fit the model before saving.
    if fit:
      model.fit(x, y, verbose=0)

    # Save and reload.
    save_format = test_utils.get_save_format()
    saved_model_dir = self._save_model_dir()
    keras.models.save_model(model, saved_model_dir, save_format=save_format)
    loaded = keras.models.load_model(saved_model_dir)

    # Make sure the metrics names from the model before saving match the loaded
    # model.
    self.assertSequenceEqual(model.metrics_names, loaded.metrics_names)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_warning_when_saving_invalid_custom_mask_layer(self):

    class MyMasking(keras.layers.Layer):

      def call(self, inputs):
        return inputs

      def compute_mask(self, inputs, mask=None):
        mask = tf.not_equal(inputs, 0)
        return mask

    class MyLayer(keras.layers.Layer):

      def call(self, inputs, mask=None):
        return tf.identity(inputs)

    samples = np.random.random((2, 2))
    model = keras.Sequential([MyMasking(), MyLayer()])
    model.predict(samples)
    with warnings.catch_warnings(record=True) as w:
      model.save(self._save_model_dir(), test_utils.get_save_format())
    self.assertIn(generic_utils.CustomMaskWarning,
                  {warning.category for warning in w})

    # Test that setting up a custom mask correctly does not issue a warning.
    class MyCorrectMasking(keras.layers.Layer):

      def call(self, inputs):
        return inputs

      def compute_mask(self, inputs, mask=None):
        mask = tf.not_equal(inputs, 0)
        return mask

      # This get_config doesn't actually do anything because our mask is
      # static and doesn't need any external information to work. We do need a
      # dummy get_config method to prevent the warning from appearing, however.
      def get_config(self, *args, **kwargs):
        return {}

    model = keras.Sequential([MyCorrectMasking(), MyLayer()])
    model.predict(samples)
    with warnings.catch_warnings(record=True) as w:
      model.save(self._save_model_dir(), test_utils.get_save_format())
    self.assertNotIn(generic_utils.CustomMaskWarning,
                     {warning.category for warning in w})

  # Test only in eager mode because ragged tensor inputs
  # cannot be used in graph mode.
  @test_combinations.generate(
      test_combinations.combine(mode=['eager']))
  @test_utils.run_v2_only
  def test_save_functional_with_ragged_constant_input(self):
    input1 = keras.Input(shape=[])
    input2 = tf.ragged.constant([[1., 2.], [3.]])
    outputs = keras.layers.Add()([input1, input2])
    model = keras.Model(input1, outputs)
    saved_model_dir = self._save_model_dir()
    model.save(saved_model_dir)
    keras.models.load_model(saved_model_dir)

  @test_combinations.generate(
      test_combinations.combine(mode=['eager']))
  @test_utils.run_v2_only
  def test_save_functional_with_constant_input(self):
    input1 = keras.Input(shape=[2])
    input2 = tf.constant([[1., 2.]])
    outputs = keras.layers.Add()([input1, input2])
    model = keras.Model(input1, outputs)
    saved_model_dir = self._save_model_dir()
    model.save(saved_model_dir)
    keras.models.load_model(saved_model_dir)
Exemple #24
0
class KerasModelTest(test_combinations.TestCase):
    """Test mixed precision with Keras models."""

    def _skip_if_strategy_unsupported(self, strategy_fn):
        if (
            strategy_fn != default_strategy_fn
            and test_utils.get_model_type() == "subclass"
        ):
            self.skipTest(
                "Non-default strategies are unsupported with subclassed "
                "models"
            )

    def _skip_if_save_format_unsupported(self, save_format):
        model_type = test_utils.get_model_type()
        if save_format == "h5" and model_type == "subclass":
            self.skipTest(
                "Saving subclassed models with the HDF5 format is "
                "unsupported"
            )
        if (
            save_format == "tf"
            and model_type == "subclass"
            and not tf.executing_eagerly()
        ):
            self.skipTest(
                "b/148820505: This combination of features is currently "
                "broken."
            )

    @test_combinations.run_with_all_model_types
    @test_combinations.run_all_keras_modes
    @parameterized.named_parameters(
        {"testcase_name": "base", "strategy_fn": default_strategy_fn},
        {
            "testcase_name": "distribute",
            "strategy_fn": create_mirrored_strategy,
        },
        {
            "testcase_name": "operator",
            "strategy_fn": create_mirrored_strategy,
            "use_operator": True,
        },
        {
            "testcase_name": "regularizer",
            "strategy_fn": create_mirrored_strategy,
            "use_regularizer": True,
        },
        {
            "testcase_name": "get_config",
            "strategy_fn": create_mirrored_strategy,
            "get_config": True,
            "use_regularizer": True,
        },
        {
            "testcase_name": "saved_model",
            "strategy_fn": default_strategy_fn,
            "save_format": "tf",
            "use_regularizer": True,
        },
        {
            "testcase_name": "saved_model_input_spec",
            "strategy_fn": default_strategy_fn,
            "save_format": "tf",
            "use_regularizer": True,
            "use_input_spec": True,
        },
        {
            "testcase_name": "h5",
            "strategy_fn": default_strategy_fn,
            "save_format": "h5",
            "use_regularizer": True,
        },
        {
            "testcase_name": "saved_model_distribute",
            "strategy_fn": create_mirrored_strategy,
            "save_format": "tf",
            "use_regularizer": True,
        },
        {
            "testcase_name": "saved_model_input_spec_distribute",
            "strategy_fn": create_mirrored_strategy,
            "save_format": "tf",
            "use_regularizer": True,
            "use_input_spec": True,
        },
        {
            "testcase_name": "h5_distribute",
            "strategy_fn": create_mirrored_strategy,
            "save_format": "h5",
            "use_regularizer": True,
        },
    )
    def test_model(
        self,
        strategy_fn,
        use_operator=False,
        use_regularizer=False,
        policy_name="mixed_float16",
        get_config=False,
        save_format=None,
        use_input_spec=False,
    ):
        self._skip_if_strategy_unsupported(strategy_fn)
        self._skip_if_save_format_unsupported(save_format)
        if use_regularizer:
            weight_regularizer = mp_test_util.IdentityRegularizer()
            activity_regularizer = mp_test_util.ReduceSumRegularizer()
        else:
            weight_regularizer = activity_regularizer = None
        with strategy_fn().scope():
            with policy.policy_scope(policy_name):
                layer = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    use_operator=use_operator,
                    regularizer=weight_regularizer,
                    activity_regularizer=activity_regularizer,
                    input_shape=(1,),
                )
                if use_input_spec:
                    layer.input_spec = input_spec.InputSpec(shape=(None, 1))
                model = test_utils.get_model_from_layers(
                    [layer], input_shape=(1,), input_dtype=tf.float16
                )
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            "MultiplyLayer": mp_test_util.MultiplyLayer
                        },
                    )
                    (layer,) = (
                        layer
                        for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer)
                    )

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)
                # Use a fixed loss scale, as this test will fail if gradients are
                # skipped for a step due to dynamic loss scaling.
                opt = loss_scale_optimizer.LossScaleOptimizer(
                    opt, dynamic=False, initial_scale=8
                )
                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 2 ** -14 subtracted
        # from it.
        expected = 1 - 2**-14
        if use_regularizer:
            # Weight and activity regularizer each add another 2 ** -14 to the
            # gradient.
            expected -= 2 * 2**-14
        self.assertEqual(backend.eval(layer.v), expected)

        if save_format:
            with generic_utils.CustomObjectScope(
                {
                    "MultiplyLayer": mp_test_util.MultiplyLayer,
                    "loss_fn": loss_fn,
                }
            ):
                self._test_saving(model, dataset, save_format, use_regularizer)

    def _test_saving(self, model, dataset, save_format, use_regularizer):
        # Save and load model, asserting variable does not change
        save_path = os.path.join(self.get_temp_dir(), "model")
        model.save(save_path, save_format=save_format)
        model = save.load_model(save_path)
        (layer,) = (
            layer
            for layer in model.layers
            if "MultiplyLayer" in layer.__class__.__name__
        )
        expected = 1 - 2**-14
        if use_regularizer:
            expected -= 2 * 2**-14
        self.assertEqual(backend.eval(layer.v), expected)

        # Continue training, and assert variable is correct value
        model.fit(dataset)
        new_expected = expected - 2**-14
        if use_regularizer:
            new_expected -= 2 * 2**-14
        self.assertEqual(backend.eval(layer.v), new_expected)

        # Load saved model again, and assert variable is previous value
        model = save.load_model(save_path)
        (layer,) = (
            layer
            for layer in model.layers
            if "MultiplyLayer" in layer.__class__.__name__
        )
        self.assertEqual(backend.eval(layer.v), expected)

        # Ensure various dtype-related aspects of the layer are correct
        self.assertEqual(layer.dtype, "float32")
        self.assertEqual(layer.dtype_policy.name, "mixed_float16")
        self.assertEqual(layer.v.dtype, "float32")
        self.assertEqual(layer(np.ones((2, 1))).dtype, "float16")

        self.assertEqual(type(model.dtype_policy), policy.Policy)
        self.assertEqual(
            layer.get_config()["dtype"],
            {"class_name": "Policy", "config": {"name": "mixed_float16"}},
        )

    @test_combinations.run_all_keras_modes
    @parameterized.named_parameters(
        {"testcase_name": "base", "strategy_fn": default_strategy_fn},
        {
            "testcase_name": "distribute",
            "strategy_fn": create_mirrored_strategy,
        },
    )
    def test_fixed_loss_scaling(self, strategy_fn):
        # Note: We do not test mixed precision in this method, only loss scaling.
        loss_scale = 8.0
        batch_size = 4
        with strategy_fn().scope():
            x = layers.Input(shape=(1,), batch_size=batch_size)
            layer = mp_test_util.MultiplyLayer()
            y = layer(x)

            # The gradient of 'y' at this point is 1. With loss scaling, the gradient
            # is 'loss_scale'. We divide by the batch size since the loss is averaged
            # across batch elements.
            expected_gradient = loss_scale / batch_size
            identity_with_grad_check_fn = (
                mp_test_util.create_identity_with_grad_check_fn(
                    [expected_gradient]
                )
            )
            y = core.Lambda(identity_with_grad_check_fn)(y)
            model = models.Model(inputs=x, outputs=y)

            def loss_fn(y_true, y_pred):
                del y_true
                return tf.reduce_mean(y_pred)

            opt = gradient_descent.SGD(1.0)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, dynamic=False, initial_scale=loss_scale
            )
            model.compile(
                opt, loss=loss_fn, run_eagerly=test_utils.should_run_eagerly()
            )

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 1 subtracted from it.
        expected = 0
        self.assertEqual(backend.eval(layer.v), expected)

    @test_combinations.run_all_keras_modes
    @parameterized.named_parameters(
        {"testcase_name": "base", "strategy_fn": default_strategy_fn},
        {
            "testcase_name": "distribute",
            "strategy_fn": create_mirrored_strategy,
        },
        {
            "testcase_name": "loss_scaling",
            "strategy_fn": create_mirrored_strategy,
            "use_loss_scaling": True,
        },
    )
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):
        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * A fixed loss scale (if use_loss_scaling is True)

        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.0
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(policy.Policy("mixed_float16")):
                x = layers.Input(shape=(1,), batch_size=2)
                layer1 = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=True,
                )
                layer2 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16, use_operator=True
                )
                layer3 = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16, use_operator=False
                )
                layer4 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=False,
                )
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. We divide by the batch size of 2 since the
                    # loss is averaged across batch elements.
                    expected_gradient = loss_scale / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=tf.float16,
                            expected_gradient=[expected_gradient],
                        )
                    )
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                if use_loss_scaling:
                    opt = loss_scale_optimizer.LossScaleOptimizer(
                        opt, dynamic=False, initial_scale=loss_scale
                    )
                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        for layer in (layer1, layer2, layer3, layer4):
            if layer.losses:
                # Layer has weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
            else:
                # Layer does not have weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - learning_rate)

    @test_combinations.run_all_keras_modes(always_skip_v1=True)
    @parameterized.named_parameters(
        {"testcase_name": "base", "strategy_fn": default_strategy_fn},
        {
            "testcase_name": "distribute",
            "strategy_fn": create_mirrored_strategy,
        },
        {
            "testcase_name": "get_config",
            "strategy_fn": create_mirrored_strategy,
            "get_config": True,
        },
    )
    def test_dynamic_loss_scaling(self, strategy_fn, get_config=False):
        strategy = strategy_fn()
        initial_loss_scale = 2.0
        batch_size = 4
        expected_gradient = backend.variable(
            [initial_loss_scale / batch_size], dtype=tf.float16
        )
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=tf.bool)
        with strategy.scope():
            opt = gradient_descent.SGD(1.0)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, initial_scale=initial_loss_scale, dynamic_growth_steps=2
            )
            with policy.policy_scope("mixed_float16"):
                x = layers.Input(
                    shape=(1,), batch_size=batch_size, dtype=tf.float16
                )
                layer = mp_test_util.MultiplyLayer(assert_type=tf.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients
                    )
                )
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=tf.float16,
                        expected_gradient=expected_gradient,
                    )
                )
                y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            "MultiplyLayer": mp_test_util.MultiplyLayer
                        },
                    )
                    (layer,) = (
                        layer
                        for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer)
                    )

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(
            expected_gradient, backend.get_value(expected_gradient * 2)
        )
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(
            expected_gradient, backend.get_value(expected_gradient / 2)
        )
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_compile_wraps_with_loss_scale_optimizer(self):
        x = layers.Input(shape=(1,))
        y = mp_test_util.MultiplyLayer()(x)

        with policy.policy_scope("mixed_float16"):
            # Test optimizer is automatically wrapped with LSO
            model = models.Model(x, y)
            model.compile(gradient_descent.SGD(1.0), "mse")
            self.assertIsInstance(
                model.optimizer, loss_scale_optimizer.LossScaleOptimizer
            )
            self.assertEqual(
                backend.get_value(model.optimizer.learning_rate), 1.0
            )

            # Test optimizer specified as string is automatically wrapped in LSO
            model = models.Model(x, y)
            model.compile("sgd", "mse")
            self.assertIsInstance(
                model.optimizer, loss_scale_optimizer.LossScaleOptimizer
            )

            # Test if an LSO is passed, optimizer is not automatically wrapped with
            # another LSO
            model = models.Model(x, y)
            optimizer = loss_scale_optimizer.LossScaleOptimizer(
                gradient_descent.SGD(1.0), dynamic_growth_steps=2
            )
            model.compile(optimizer, "mse")
            self.assertIsInstance(
                model.optimizer, loss_scale_optimizer.LossScaleOptimizer
            )
            self.assertEqual(model.optimizer.dynamic_growth_steps, 2)

        with policy.policy_scope("mixed_bfloat16"):
            # Test mixed_bfloat16 models are not automatically wrapped with LSO
            model = models.Model(x, y)
            model.compile(gradient_descent.SGD(1.0), "mse")
            self.assertNotIsInstance(
                model.optimizer, loss_scale_optimizer.LossScaleOptimizer
            )
            self.assertIsInstance(model.optimizer, gradient_descent.SGD)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_pass_invalid_optimizer_with_loss_scaling(self):
        with policy.policy_scope(policy.Policy("mixed_float16")):
            x = layers.Input(shape=(1,))
            y = mp_test_util.MultiplyLayer()(x)
            model = models.Model(x, y)
            if tf.executing_eagerly():
                error_msg = "Use a `tf.keras` Optimizer instead"
            else:
                error_msg = 'optimizer" must be an instance of '
            with self.assertRaisesRegex(ValueError, error_msg):
                model.compile(optimizer_v1.SGD(1.0), "mse")

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"])
    )
    def test_functional_model_loss_dtype(self):
        with policy.policy_scope("float16"):
            x = layers.Input(shape=(1,))
            y = mp_test_util.MultiplyLayer()(x)
            model = models.Model(x, y)
            model.add_loss(tf.cast(y, "float32"))
            # The loss should not be casted to the policy's dtype.
            self.assertEqual(model.losses[0].dtype, "float32")

    @test_combinations.run_all_keras_modes
    @parameterized.named_parameters(
        {
            "testcase_name": "base",
            "strategy_fn": default_strategy_fn,
        },
        {
            "testcase_name": "distribute",
            "strategy_fn": create_mirrored_strategy,
        },
        {
            "testcase_name": "base_h5",
            "strategy_fn": default_strategy_fn,
            "h5": True,
        },
        {
            "testcase_name": "distribute_h5",
            "strategy_fn": create_mirrored_strategy,
            "h5": True,
        },
    )
    def test_save_weights_with_autocast_vars(self, strategy_fn, h5=False):
        with strategy_fn().scope():
            with policy.policy_scope("mixed_float16"):
                x = layers.Input(shape=(1,), batch_size=2)
                layer = mp_test_util.MultiplyLayer(assert_type=tf.float16)
                y = layer(x)
                model = models.Model(inputs=x, outputs=y)

        model.set_weights([np.array(100.0)])
        x = np.ones((2, 1))
        self.assertAllClose(backend.get_value(model(x)), x * 100.0)
        suffix = ".h5" if h5 else ""
        weights_file = os.path.join(self.get_temp_dir(), "weights" + suffix)
        model.save_weights(weights_file)

        model.set_weights([np.array(200.0)])
        self.assertAllClose(backend.get_value(model(x)), x * 200.0)
        model.load_weights(weights_file)
        self.assertAllClose(backend.get_value(model(x)), x * 100.0)
        self.assertEqual(model.get_weights(), [np.array(100.0)])

    @test_combinations.run_all_keras_modes
    @parameterized.named_parameters(
        {
            "testcase_name": "base",
            "strategy_fn": default_strategy_fn,
        },
        {
            "testcase_name": "distribute",
            "strategy_fn": create_mirrored_strategy,
        },
        {
            "testcase_name": "different_var_name",
            "strategy_fn": default_strategy_fn,
            "var_name": "w",
        },
        {
            "testcase_name": "different_var_name_distribute",
            "strategy_fn": create_mirrored_strategy,
            "var_name": "w",
        },
    )
    def test_save_slot_variables_with_autocast_vars(
        self, strategy_fn, var_name="v"
    ):
        p = policy.Policy("mixed_float16")
        with strategy_fn().scope(), policy.policy_scope(p):
            x = layers.Input(shape=(2,), batch_size=2)
            # Having a var_name other than 'v' tests that a fixed bug (b/134713714)
            # does not reoccur. The bug was that a crash would occur when saving a
            # checkpoint where an AutoCastVariable with a slot variable would have a
            # different name than the layer attribute's name (layer.v in this case).
            layer = mp_test_util.MultiplyLayer(
                assert_type=tf.float16, var_name=var_name
            )
            y = layer(x)
            model = models.Model(inputs=x, outputs=y)
            opt = gradient_descent.SGD(1.0, 1.0)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, dynamic=False, initial_scale=1
            )
            model.compile(
                optimizer=opt,
                loss="mse",
                run_eagerly=test_utils.should_run_eagerly(),
            )

        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        weights_file = os.path.join(self.get_temp_dir(), "weights")
        model.save_weights(weights_file)
        saved_slot = backend.get_value(opt.get_slot(layer.v, "momentum"))

        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        new_slot = backend.get_value(opt.get_slot(layer.v, "momentum"))
        self.assertNotEqual(new_slot, saved_slot)

        model.load_weights(weights_file)
        restored_slot = backend.get_value(opt.get_slot(layer.v, "momentum"))
        self.assertEqual(restored_slot, saved_slot)

    @test_combinations.run_all_keras_modes
    @parameterized.named_parameters(*TESTCASES)
    def test_save_weights_with_dynamic_loss_scaling(self, strategy_fn):
        strategy = strategy_fn()
        if (
            isinstance(strategy, tf.distribute.MirroredStrategy)
            and not tf.executing_eagerly()
        ):
            # TODO(b/121381184): Enable running the test in this case.
            return

        # Create and run model.
        with strategy.scope():
            x = layers.Input(shape=(2,), batch_size=2, dtype=tf.float32)
            y = mp_test_util.MultiplyLayer(assert_type=tf.float32)(x)
            model = models.Model(inputs=x, outputs=y)

            opt = gradient_descent.SGD(1.0)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, initial_scale=1.0, dynamic_growth_steps=2.0
            )
            model.compile(
                optimizer=opt,
                loss="mse",
                run_eagerly=test_utils.should_run_eagerly(),
            )
        # Run for 3 steps (6 examples with a batch size of 2)
        model.fit(np.zeros((6, 2)), np.zeros((6, 2)), batch_size=2)
        self.assertEqual(backend.get_value(opt.loss_scale), 2)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 1)

        # Save model weights.
        save_prefix = os.path.join(self.get_temp_dir(), "ckpt")
        model.save_weights(save_prefix)

        # Run model again for 1 step (2 examples with a batch size of 2)
        model.fit(np.zeros((2, 2)), np.zeros((2, 2)), batch_size=2)
        self.assertEqual(backend.get_value(opt.loss_scale), 4)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 0)

        # Load model weights and ensure loss scale weights are restored.
        model.load_weights(save_prefix)
        self.assertEqual(backend.get_value(opt.loss_scale), 2)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 1)

    @test_combinations.run_all_keras_modes
    def test_restore_old_loss_scale_checkpoint(self):
        # Ensure a checkpoint from TF 2.2 can be loaded. The checkpoint format
        # of LossScaleOptimizer changed, but old checkpoints can still be loaded
        opt = gradient_descent.SGD(0.1, momentum=0.1)
        opt = loss_scale_optimizer.LossScaleOptimizer(opt)
        model = sequential.Sequential(
            [
                core.Dense(
                    2,
                )
            ]
        )

        # The checkpoint and expected values were obtained from the program in
        # testdata/BUILD.
        ckpt_dir = os.path.join(
            flags.FLAGS["test_srcdir"].value,
            "org_keras/keras",
            "mixed_precision/testdata/lso_ckpt_tf2.2",
        )
        # ckpt_dir = test.test_src_dir_path(
        #     'python/keras/mixed_precision/testdata/lso_ckpt_tf2.2')
        model.load_weights(os.path.join(ckpt_dir, "ckpt"))
        model.compile(opt, "mse", run_eagerly=test_utils.should_run_eagerly())
        model(np.zeros((2, 2)))  # Create model weights
        opt._create_all_weights(model.weights)
        expected_kernel = np.array(
            [[9.229685, 10.901115], [10.370763, 9.757362]]
        )
        expected_slot = np.array([[10.049943, 9.917691], [10.049943, 9.917691]])
        self.assertAllClose(self.evaluate(model.weights[0]), expected_kernel)
        self.assertAllClose(
            self.evaluate(opt.get_slot(model.weights[0], "momentum")),
            expected_slot,
        )
        self.assertEqual(self.evaluate(opt.loss_scale), 32768)
        self.assertEqual(self.evaluate(opt.dynamic_counter), 1)

        # Check restoring works even after the model is compiled and the weights
        # have been created.
        model.fit(np.random.normal(size=(2, 2)), np.random.normal(size=(2, 2)))
        self.assertNotAllClose(self.evaluate(model.weights[0]), expected_kernel)
        self.assertNotAllClose(
            self.evaluate(opt.get_slot(model.weights[0], "momentum")),
            expected_slot,
        )
        model.load_weights(os.path.join(ckpt_dir, "ckpt"))
        self.assertAllClose(self.evaluate(model.weights[0]), expected_kernel)
        self.assertAllClose(
            self.evaluate(opt.get_slot(model.weights[0], "momentum")),
            expected_slot,
        )
        self.assertEqual(self.evaluate(opt.loss_scale), 32768)
        self.assertEqual(self.evaluate(opt.dynamic_counter), 1)

    def test_restore_old_saved_model(self):
        saved_model_dir = os.path.join(
            flags.FLAGS["test_srcdir"].value,
            "org_keras/keras",
            "mixed_precision/testdata/lso_savedmodel_tf2.2",
        )
        # saved_model_dir = test.test_src_dir_path(
        #     'python/keras/mixed_precision/testdata/'
        #     'lso_savedmodel_tf2.2')
        model = save.load_model(saved_model_dir)
        expected_kernel = np.array(
            [[9.229685, 10.901115], [10.370763, 9.757362]]
        )
        self.assertAllClose(backend.eval(model.weights[0]), expected_kernel)
        self.assertEqual(
            type(model.optimizer), loss_scale_optimizer.LossScaleOptimizer
        )

    @test_combinations.run_all_keras_modes
    @parameterized.named_parameters(
        {
            "testcase_name": "base",
            "strategy_fn": default_strategy_fn,
        },
        {
            "testcase_name": "distribute",
            "strategy_fn": create_mirrored_strategy,
        },
        {
            "testcase_name": "base_h5",
            "strategy_fn": default_strategy_fn,
            "h5": True,
        },
        {
            "testcase_name": "distribute_h5",
            "strategy_fn": create_mirrored_strategy,
            "h5": True,
        },
    )
    def test_save_model_with_dynamic_loss_scaling(self, strategy_fn, h5=False):
        # TODO(reedwm): Support and test saving model with a mixed_[b]float16 policy
        # as well.
        strategy = strategy_fn()
        if (
            isinstance(strategy, tf.distribute.MirroredStrategy)
            and not tf.executing_eagerly()
        ):
            # TODO(b/121381184): Enable running the test in this case.
            return

        # Create and run model.
        with strategy.scope():
            x = layers.Input(shape=(2,), batch_size=2, dtype=tf.float32)
            y = mp_test_util.MultiplyLayer()(x)
            model = models.Model(inputs=x, outputs=y)

            opt = gradient_descent.SGD(1.0)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, initial_scale=1.0, dynamic_growth_steps=2.0
            )
            model.compile(
                optimizer=opt,
                loss="mse",
                run_eagerly=test_utils.should_run_eagerly(),
            )
        # Run for 3 steps (6 examples with a batch size of 2)
        model.fit(np.ones((6, 2)), np.zeros((6, 2)), batch_size=2)
        self.assertEqual(backend.get_value(opt.loss_scale), 2)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 1)
        (weight,) = model.trainable_weights
        orig_weight = backend.get_value(weight)

        # Save model weights.
        save_path = os.path.join(self.get_temp_dir(), "model")
        model.save(save_path, save_format="h5" if h5 else "tf")

        # Run model again for 1 step (2 examples with a batch size of 2)
        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        new_weight = backend.get_value(weight)
        self.assertNotEqual(new_weight, orig_weight)
        self.assertEqual(backend.get_value(opt.loss_scale), 4)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 0)

        # Load model weights and ensure loss scale weights are restored.
        model = save.load_model(
            save_path,
            custom_objects={"MultiplyLayer": mp_test_util.MultiplyLayer},
        )
        (weight,) = model.trainable_weights
        loaded_weight = backend.get_value(weight)
        self.assertEqual(loaded_weight, orig_weight)
        # Currently the loss scale isn't always saved when the model is saved with
        # Model.save(). So we assert the loss scale either has the value when it was
        # saved, or the value it was initialized with.
        # TODO(reedwm): Always save/restore the loss scale with Model.save().
        self.assertIn(backend.get_value(model.optimizer.loss_scale), (1, 2))
        self.assertIn(
            backend.get_value(model.optimizer.dynamic_counter), (0, 1)
        )

        # Test optimizer attributes and type
        self.assertEqual(model.optimizer.initial_scale, 1.0)
        self.assertEqual(model.optimizer.dynamic_growth_steps, 2.0)
        self.assertEqual(
            type(model.optimizer), loss_scale_optimizer.LossScaleOptimizer
        )
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import numpy as np

import tensorflow.compat.v2 as tf

import keras
from keras import backend
from keras.testing_infra import test_combinations
from keras.engine import base_layer_utils


@test_combinations.generate(test_combinations.combine(mode=['graph', 'eager']))
class TrackableWeightHandlerTest(test_combinations.TestCase):
    def get_table_handler(self):
        # Note: There is some repetition in these tests' setup. However, Tensorflow
        # does not play nicely with a separate setUp() call (causing errors related
        # to graph building), so we have to use a called setup instead of a setUp()
        # call.
        table = tf.lookup.experimental.MutableHashTable(key_dtype=tf.string,
                                                        value_dtype=tf.int32,
                                                        default_value=0)
        return base_layer_utils.TrackableWeightHandler(table)

    def test_get_num_tensors(self):
        table_handler = self.get_table_handler()
        self.assertEqual(2, table_handler.num_tensors)
Exemple #26
0
class AdamaxOptimizerTest(tf.test.TestCase, parameterized.TestCase):
    def testResourceSparse(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for dtype in [tf.half, tf.float32, tf.float64]:
            with tf.Graph().as_default(), self.cached_session():
                # Initialize variables for numpy implementation.
                zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype)  # pylint: disable=cell-var-from-loop
                m0, v0, m1, v1 = zero_slots(), zero_slots(), zero_slots(
                ), zero_slots()
                var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)

                grads0_np_indices = np.array([0, 1], dtype=np.int32)
                grads0 = tf.IndexedSlices(tf.constant(grads0_np),
                                          tf.constant(grads0_np_indices),
                                          tf.constant([3]))
                grads1_np_indices = np.array([2, 1], dtype=np.int32)
                grads1 = tf.IndexedSlices(tf.constant(grads1_np),
                                          tf.constant(grads1_np_indices),
                                          tf.constant([3]))
                opt = adamax.Adamax()
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0, 3.0], var0)
                self.assertAllClose([4.0, 5.0, 6.0], var1)

                beta1_power = get_beta_accumulators(opt, dtype)

                # Run 3 steps of Adamax
                for t in range(3):
                    self.assertAllCloseAccordingToType(0.9**(t + 1),
                                                       beta1_power)
                    update.run()

                    var0_np, m0, v0 = adamax_sparse_update_numpy(
                        var0_np, grads0_np_indices, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adamax_sparse_update_numpy(
                        var1_np, grads1_np_indices, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np, var0)
                    self.assertAllCloseAccordingToType(var1_np, var1)

    def testSparseDevicePlacement(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for index_dtype in [tf.int32, tf.int64]:
            with tf.Graph().as_default(), self.cached_session(
                    force_gpu=tf.test.is_gpu_available()):
                # If a GPU is available, tests that all optimizer ops can be placed on
                # it (i.e. they have GPU kernels).
                var = tf.Variable([[1.0], [2.0]])
                indices = tf.constant([0, 1], dtype=index_dtype)
                g_sum = lambda: tf.reduce_sum(tf.gather(var, indices))  # pylint: disable=cell-var-from-loop
                optimizer = adamax.Adamax(3.0)
                minimize_op = optimizer.minimize(g_sum, var_list=[var])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                minimize_op.run()

    def testSparseRepeatedIndices(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for dtype in [tf.half, tf.float32, tf.float64]:
            with tf.Graph().as_default(), self.cached_session():
                repeated_index_update_var = tf.Variable([[1.0], [2.0]],
                                                        dtype=dtype)
                aggregated_update_var = tf.Variable([[1.0], [2.0]],
                                                    dtype=dtype)
                grad_repeated_index = tf.IndexedSlices(
                    tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
                    tf.constant([1, 1]), tf.constant([2, 1]))
                grad_aggregated = tf.IndexedSlices(
                    tf.constant([0.2], shape=[1, 1], dtype=dtype),
                    tf.constant([1]), tf.constant([2, 1]))
                repeated_update = adamax.Adamax().apply_gradients([
                    (grad_repeated_index, repeated_index_update_var)
                ])
                aggregated_update = adamax.Adamax().apply_gradients([
                    (grad_aggregated, aggregated_update_var)
                ])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertAllClose(aggregated_update_var,
                                    repeated_index_update_var.eval())
                for _ in range(3):
                    repeated_update.run()
                    aggregated_update.run()
                    self.assertAllClose(aggregated_update_var,
                                        repeated_index_update_var.eval())

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testBasic(self):
        for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
            with self.session(graph=tf.Graph(), use_gpu=True):
                # Initialize variables for numpy implementation.
                m0 = np.array([0.0, 0.0])
                v0 = np.array([0.0, 0.0])
                m1 = np.array([0.0, 0.0])
                v1 = np.array([0.0, 0.0])
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np, name="var0_%d" % i)
                var1 = tf.Variable(var1_np, name="var1_%d" % i)

                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)

                opt = adamax.Adamax()
                if not tf.executing_eagerly():
                    update = opt.apply_gradients(
                        zip([grads0, grads1], [var0, var1]))

                if not tf.executing_eagerly():
                    self.evaluate(tf.compat.v1.global_variables_initializer())
                    # Fetch params to validate initial values
                    self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                    self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Run 3 steps of Adamax
                for t in range(3):
                    beta_1_power = get_beta_accumulators(opt, dtype)
                    self.assertAllCloseAccordingToType(
                        0.9**(t + 1), self.evaluate(beta_1_power))
                    if not tf.executing_eagerly():
                        self.evaluate(update)
                    else:
                        opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))

                    var0_np, m0, v0 = adamax_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adamax_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0),
                                                       rtol=1e-2)
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1),
                                                       rtol=1e-2)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def testBasicWithLearningRateDecay(self):
        for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
            with self.session(graph=tf.Graph(), use_gpu=True):
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np, name="var0_%d" % i)
                var1 = tf.Variable(var1_np, name="var1_%d" % i)

                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)

                learning_rate = 0.001
                decay = 0.002
                opt = adamax.Adamax(learning_rate=learning_rate, decay=decay)
                if not tf.executing_eagerly():
                    update = opt.apply_gradients(
                        zip([grads0, grads1], [var0, var1]))

                if not tf.executing_eagerly():
                    self.evaluate(tf.compat.v1.global_variables_initializer())
                    # Fetch params to validate initial values
                    self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                    self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Run 3 steps of Adamax
                for t in range(3):
                    beta_1_power = get_beta_accumulators(opt, dtype)
                    self.assertAllCloseAccordingToType(
                        0.9**(t + 1), self.evaluate(beta_1_power))
                    if not tf.executing_eagerly():
                        self.evaluate(update)
                    else:
                        opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))

                    lr = learning_rate / (1 + decay * t)

                    var0_np, m0, v0 = adamax_update_numpy(var0_np,
                                                          grads0_np,
                                                          t,
                                                          m0,
                                                          v0,
                                                          alpha=lr)
                    var1_np, m1, v1 = adamax_update_numpy(var1_np,
                                                          grads1_np,
                                                          t,
                                                          m1,
                                                          v1,
                                                          alpha=lr)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0),
                                                       rtol=1e-2)
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1),
                                                       rtol=1e-2)

    def testTensorLearningRate(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for dtype in [tf.half, tf.float32, tf.float64]:
            with tf.Graph().as_default(), self.cached_session():
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)
                opt = adamax.Adamax(tf.constant(0.001))
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0)
                self.assertAllClose([3.0, 4.0], var1)

                beta1_power = get_beta_accumulators(opt, dtype)

                # Run 3 steps of Adamax
                for t in range(3):
                    self.assertAllCloseAccordingToType(0.9**(t + 1),
                                                       beta1_power)
                    update.run()

                    var0_np, m0, v0 = adamax_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adamax_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np, var0)
                    self.assertAllCloseAccordingToType(var1_np, var1)

    def testSharing(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for dtype in [tf.half, tf.float32, tf.float64]:
            with tf.Graph().as_default(), self.cached_session():
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)
                opt = adamax.Adamax()
                update1 = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                update2 = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                beta1_power = get_beta_accumulators(opt, dtype)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0)
                self.assertAllClose([3.0, 4.0], var1)

                # Run 3 steps of intertwined Adamax1 and Adamax2.
                for t in range(3):
                    self.assertAllCloseAccordingToType(0.9**(t + 1),
                                                       beta1_power)
                    if t % 2 == 0:
                        update1.run()
                    else:
                        update2.run()

                    var0_np, m0, v0 = adamax_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adamax_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np, var0)
                    self.assertAllCloseAccordingToType(var1_np, var1)

    @test_combinations.generate(test_combinations.combine(mode=["eager"]))
    def testSlotsUniqueEager(self):
        v1 = tf.Variable(1.)
        v2 = tf.Variable(1.)
        opt = adamax.Adamax(1.)
        opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
        # There should be iteration, and two unique slot variables for v1 and v2.
        self.assertLen({id(v) for v in opt.variables()}, 5)

    def testConstructAdamaxWithLR(self):
        opt = adamax.Adamax(lr=1.0)
        opt_2 = adamax.Adamax(learning_rate=0.1, lr=1.0)
        opt_3 = adamax.Adamax(learning_rate=0.1)
        self.assertIsInstance(opt.lr, tf.Variable)
        self.assertIsInstance(opt_2.lr, tf.Variable)
        self.assertIsInstance(opt_3.lr, tf.Variable)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(opt.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
Exemple #27
0
import numpy as np

import keras
from keras.testing_infra import test_combinations
from keras.feature_column import sequence_feature_column as ksfc
from keras.saving import model_config


def _initialized_session(config=None):
    sess = tf.compat.v1.Session(config=config)
    sess.run(tf.compat.v1.global_variables_initializer())
    sess.run(tf.compat.v1.tables_initializer())
    return sess


@test_combinations.generate(test_combinations.combine(mode=["graph", "eager"]))
class SequenceFeaturesTest(tf.test.TestCase, parameterized.TestCase):
    @parameterized.named_parameters(
        {
            "testcase_name":
            "2D",
            "sparse_input_args_a": {
                # example 0, ids [2]
                # example 1, ids [0, 1]
                "indices": ((0, 0), (1, 0), (1, 1)),
                "values": (2, 0, 1),
                "dense_shape": (2, 2),
            },
            "sparse_input_args_b": {
                # example 0, ids [1]
                # example 1, ids [2, 0]
class LayerNormalizationNumericsTest(test_combinations.TestCase):
  """Tests LayerNormalization has correct and numerically stable outputs."""

  def _expected_layer_norm(self, x, beta, gamma, batch_input_shape, axis,
                           epsilon):
    """Returns the layer norm, which is computed using NumPy."""
    broadcast_shape = [batch_input_shape[i] if i in axis else 1
                       for i in range(len(batch_input_shape))]
    mean = np.mean(x, axis=axis, keepdims=True)
    var = np.var(x, axis=axis, keepdims=True)
    expected = (x - mean) / np.sqrt(var + epsilon)
    expected *= np.reshape(gamma, broadcast_shape)
    expected += np.reshape(beta, broadcast_shape)
    return expected

  def _test_forward_pass(self, batch_input_shape, axis, fp64_tol=1e-14,
                         fp32_tol=1e-6, fp16_tol=1e-2):
    """Tests the forward pass of layer layer_normalization.

    Args:
      batch_input_shape: The input shape that will be used to test, including
        the batch dimension.
      axis: A list of axes to normalize. Will be passed to the `axis` argument
        of Layerlayer_normalization.
      fp64_tol: The relative and absolute tolerance for float64.
      fp32_tol: The relative and absolute tolerance for float32.
      fp16_tol: The relative and absolute tolerance for float16.
    """
    param_shape = [batch_input_shape[i] for i in axis]
    param_elems = 1
    for dim in param_shape:
      param_elems *= dim
    beta = np.arange(param_elems, dtype='float64').reshape(param_shape)
    gamma = np.arange(1, param_elems + 1, dtype='float64').reshape(param_shape)
    x = np.random.normal(size=batch_input_shape)

    for epsilon in 1e-12, 1e-3:
      expected = self._expected_layer_norm(x, beta, gamma, batch_input_shape,
                                           axis, epsilon)
      for dtype in 'float64', 'float32', 'float16':
        norm = layer_normalization.LayerNormalization(
            axis=axis, dtype=dtype, batch_input_shape=batch_input_shape,
            epsilon=epsilon, beta_initializer=keras.initializers.constant(beta),
            gamma_initializer=keras.initializers.constant(gamma))
        y = norm(keras.backend.cast(x, dtype))
        actual = keras.backend.eval(y)

        if dtype == 'float64':
          tol = fp64_tol
        elif dtype == 'float32':
          tol = fp32_tol
        else:
          assert dtype == 'float16'
          tol = fp16_tol

        # We use absolute tolerances in addition to relative tolerances, because
        # some of the values are very close to zero.
        self.assertAllClose(expected, actual, rtol=tol, atol=tol)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_forward(self):
    # For numeric stability, we ensure the axis's dimension(s) have at least 4
    # elements.
    self._test_forward_pass((4, 3), (0,))
    self._test_forward_pass((3, 4), (1,))
    self._test_forward_pass((4, 3, 2), (0,))
    self._test_forward_pass((2, 4, 2), (1,))
    self._test_forward_pass((2, 3, 4), (2,), fp16_tol=5e-2)
    self._test_forward_pass((2, 3, 2), (0, 2))
    self._test_forward_pass((2, 2, 2, 2), (1, 3))
    self._test_forward_pass((2, 2, 2, 2), (2, 3))
    self._test_forward_pass((2, 3, 4, 5), (3,))

  def _test_backward_pass(self, batch_input_shape, axis, fp64_tol=1e-5,
                          fp32_tol=1e-5, fp16_tol=2e-2):
    """Tests the backwards pass of layer layer_normalization.

    Args:
      batch_input_shape: The input shape that will be used to test, including
        the batch dimension.
      axis: A list of axes to normalize. Will be passed to the `axis` argument
        of Layerlayer_normalization.
      fp64_tol: The relative and absolute tolerance for float64.
      fp32_tol: The relative and absolute tolerance for float32.
      fp16_tol: The relative and absolute tolerance for float16.
    """
    param_shape = [batch_input_shape[i] for i in axis]
    param_elems = 1
    for dim in param_shape:
      param_elems *= dim
    beta = np.arange(param_elems, dtype='float64').reshape(param_shape)
    gamma = np.arange(1, param_elems + 1, dtype='float64').reshape(param_shape)
    x = np.random.normal(size=batch_input_shape)

    for epsilon in 1e-12, 1e-3:
      # Float64 must come first in this list, as we use the float64 numerical
      # gradients to compare to the float32 and float16 symbolic gradients as
      # well. Computing float32/float16 numerical gradients is too numerically
      # unstable.
      for dtype in 'float64', 'float32', 'float16':
        norm = layer_normalization.LayerNormalization(
            axis=axis, dtype=dtype, batch_input_shape=batch_input_shape,
            epsilon=epsilon, beta_initializer=keras.initializers.constant(beta),
            gamma_initializer=keras.initializers.constant(gamma))
        norm.build(x.shape)

        # pylint: disable=cell-var-from-loop
        def forward_fn(x, beta, gamma):
          # We must monkey-patch the attributes of `norm` with the function
          # arguments, so that the gradient checker will properly compute their
          # gradients. The gradient checker computes gradients with respect to
          # the input arguments of `f`.
          with tf.compat.v1.test.mock.patch.object(norm, 'beta', beta):
            with tf.compat.v1.test.mock.patch.object(norm, 'gamma', gamma):
              return norm(x)
        # pylint: enable=cell-var-from-loop
        results = tf.test.compute_gradient(
            forward_fn, [keras.backend.cast(x, dtype), norm.beta, norm.gamma])
        ([x_grad_t, beta_grad_t, gamma_grad_t],
         [x_grad_n, beta_grad_n, gamma_grad_n]) = results

        if dtype == 'float64':
          # We use the float64 numeric gradients as the reference, to compare
          # against the symbolic gradients for all dtypes.
          x_grad_ref = x_grad_n
          beta_grad_ref = beta_grad_n
          gamma_grad_ref = gamma_grad_n
          tol = fp64_tol
        elif dtype == 'float32':
          tol = fp32_tol
        else:
          assert dtype == 'float16'
          tol = fp16_tol

        # We use absolute tolerances in addition to relative tolerances, because
        # some of the values are very close to zero.
        self.assertAllClose(x_grad_t, x_grad_ref, rtol=tol, atol=tol)
        self.assertAllClose(beta_grad_t, beta_grad_ref, rtol=tol, atol=tol)
        self.assertAllClose(gamma_grad_t, gamma_grad_ref, rtol=tol, atol=tol)

  # The gradient_checker_v2 does not work properly with LayerNorm in graph mode.
  @test_utils.run_v2_only
  def test_backward(self):
    # For numeric stability, we ensure the axis's dimension(s) have at least 4
    # elements.
    self._test_backward_pass((4, 3), (0,))
    self._test_backward_pass((2, 4, 2), (1,))
    self._test_backward_pass((2, 3, 4), (2,))
    self._test_backward_pass((2, 3, 2), (0, 2), fp64_tol=5e-4, fp32_tol=5e-4)
    self._test_backward_pass((2, 2, 2, 2), (1, 3))
    self._test_backward_pass((2, 2, 2, 2), (2, 3))
class TestTensorBoardV1(tf.test.TestCase, parameterized.TestCase):
    def test_TensorBoard(self):
        np.random.seed(1337)

        temp_dir = self.get_temp_dir()
        self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)

        (x_train, y_train), (x_test, y_test) = test_utils.get_test_data(
            train_samples=TRAIN_SAMPLES,
            test_samples=TEST_SAMPLES,
            input_shape=(INPUT_DIM, ),
            num_classes=NUM_CLASSES,
        )
        y_test = np_utils.to_categorical(y_test)
        y_train = np_utils.to_categorical(y_train)

        def data_generator(train):
            if train:
                max_batch_index = len(x_train) // BATCH_SIZE
            else:
                max_batch_index = len(x_test) // BATCH_SIZE
            i = 0
            while 1:
                if train:
                    yield (
                        x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE],
                        y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE],
                    )
                else:
                    yield (
                        x_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE],
                        y_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE],
                    )
                i += 1
                i %= max_batch_index

        # case: Sequential
        with tf.Graph().as_default(), self.cached_session():
            model = sequential.Sequential()
            model.add(
                layers.Dense(NUM_HIDDEN,
                             input_dim=INPUT_DIM,
                             activation="relu"))
            # non_trainable_weights: moving_variance, moving_mean
            model.add(layers.BatchNormalization())
            model.add(layers.Dense(NUM_CLASSES, activation="softmax"))
            model.compile(
                loss="categorical_crossentropy",
                optimizer="sgd",
                metrics=["accuracy"],
            )
            tsb = callbacks_v1.TensorBoard(
                log_dir=temp_dir,
                histogram_freq=1,
                write_images=True,
                write_grads=True,
                batch_size=5,
            )
            cbks = [tsb]

            # fit with validation data
            model.fit(
                x_train,
                y_train,
                batch_size=BATCH_SIZE,
                validation_data=(x_test, y_test),
                callbacks=cbks,
                epochs=3,
                verbose=0,
            )

            # fit with validation data and accuracy
            model.fit(
                x_train,
                y_train,
                batch_size=BATCH_SIZE,
                validation_data=(x_test, y_test),
                callbacks=cbks,
                epochs=2,
                verbose=0,
            )

            # fit generator with validation data
            model.fit_generator(
                data_generator(True),
                len(x_train),
                epochs=2,
                validation_data=(x_test, y_test),
                callbacks=cbks,
                verbose=0,
            )

            # fit generator without validation data
            # histogram_freq must be zero
            tsb.histogram_freq = 0
            model.fit_generator(
                data_generator(True),
                len(x_train),
                epochs=2,
                callbacks=cbks,
                verbose=0,
            )

            # fit generator with validation data and accuracy
            tsb.histogram_freq = 1
            model.fit_generator(
                data_generator(True),
                len(x_train),
                epochs=2,
                validation_data=(x_test, y_test),
                callbacks=cbks,
                verbose=0,
            )

            # fit generator without validation data and accuracy
            tsb.histogram_freq = 0
            model.fit_generator(data_generator(True),
                                len(x_train),
                                epochs=2,
                                callbacks=cbks)
            assert os.path.exists(temp_dir)

    def test_TensorBoard_multi_input_output(self):
        np.random.seed(1337)
        tmpdir = self.get_temp_dir()
        self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True)

        with tf.Graph().as_default(), self.cached_session():
            filepath = os.path.join(tmpdir, "logs")

            (x_train, y_train), (x_test, y_test) = test_utils.get_test_data(
                train_samples=TRAIN_SAMPLES,
                test_samples=TEST_SAMPLES,
                input_shape=(INPUT_DIM, ),
                num_classes=NUM_CLASSES,
            )
            y_test = np_utils.to_categorical(y_test)
            y_train = np_utils.to_categorical(y_train)

            def data_generator(train):
                if train:
                    max_batch_index = len(x_train) // BATCH_SIZE
                else:
                    max_batch_index = len(x_test) // BATCH_SIZE
                i = 0
                while 1:
                    if train:
                        # simulate multi-input/output models
                        yield (
                            [x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2,
                            [y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2,
                        )
                    else:
                        yield (
                            [x_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2,
                            [y_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2,
                        )
                    i += 1
                    i %= max_batch_index

            inp1 = input_layer.Input((INPUT_DIM, ))
            inp2 = input_layer.Input((INPUT_DIM, ))
            inp = layers.add([inp1, inp2])
            hidden = layers.Dense(2, activation="relu")(inp)
            hidden = layers.Dropout(0.1)(hidden)
            output1 = layers.Dense(NUM_CLASSES, activation="softmax")(hidden)
            output2 = layers.Dense(NUM_CLASSES, activation="softmax")(hidden)
            model = training.Model([inp1, inp2], [output1, output2])
            model.compile(
                loss="categorical_crossentropy",
                optimizer="sgd",
                metrics=["accuracy"],
            )

            # we must generate new callbacks for each test, as they aren't stateless
            def callbacks_factory(histogram_freq):
                return [
                    callbacks_v1.TensorBoard(
                        log_dir=filepath,
                        histogram_freq=histogram_freq,
                        write_images=True,
                        write_grads=True,
                        batch_size=5,
                    )
                ]

            # fit without validation data
            model.fit(
                [x_train] * 2,
                [y_train] * 2,
                batch_size=BATCH_SIZE,
                callbacks=callbacks_factory(histogram_freq=0),
                epochs=3,
            )

            # fit with validation data and accuracy
            model.fit(
                [x_train] * 2,
                [y_train] * 2,
                batch_size=BATCH_SIZE,
                validation_data=([x_test] * 2, [y_test] * 2),
                callbacks=callbacks_factory(histogram_freq=1),
                epochs=2,
            )

            # fit generator without validation data
            model.fit_generator(
                data_generator(True),
                len(x_train),
                epochs=2,
                callbacks=callbacks_factory(histogram_freq=0),
            )

            # fit generator with validation data and accuracy
            model.fit_generator(
                data_generator(True),
                len(x_train),
                epochs=2,
                validation_data=([x_test] * 2, [y_test] * 2),
                callbacks=callbacks_factory(histogram_freq=1),
            )
            assert os.path.isdir(filepath)

    def test_Tensorboard_histogram_summaries_in_test_function(self):
        class FileWriterStub:
            def __init__(self, logdir, graph=None):
                self.logdir = logdir
                self.graph = graph
                self.steps_seen = []

            def add_summary(self, summary, global_step):
                summary_obj = tf.compat.v1.Summary()

                # ensure a valid Summary proto is being sent
                if isinstance(summary, bytes):
                    summary_obj.ParseFromString(summary)
                else:
                    assert isinstance(summary, tf.compat.v1.Summary)
                    summary_obj = summary

                # keep track of steps seen for the merged_summary op,
                # which contains the histogram summaries
                if len(summary_obj.value) > 1:
                    self.steps_seen.append(global_step)

            def flush(self):
                pass

            def close(self):
                pass

        def _init_writer(obj, _):
            obj.writer = FileWriterStub(obj.log_dir)

        np.random.seed(1337)
        tmpdir = self.get_temp_dir()
        self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True)
        (x_train, y_train), (x_test, y_test) = test_utils.get_test_data(
            train_samples=TRAIN_SAMPLES,
            test_samples=TEST_SAMPLES,
            input_shape=(INPUT_DIM, ),
            num_classes=NUM_CLASSES,
        )
        y_test = np_utils.to_categorical(y_test)
        y_train = np_utils.to_categorical(y_train)

        with tf.Graph().as_default(), self.cached_session():
            model = sequential.Sequential()
            model.add(
                layers.Dense(NUM_HIDDEN,
                             input_dim=INPUT_DIM,
                             activation="relu"))
            # non_trainable_weights: moving_variance, moving_mean
            model.add(layers.BatchNormalization())
            model.add(layers.Dense(NUM_CLASSES, activation="softmax"))
            model.compile(
                loss="categorical_crossentropy",
                optimizer="sgd",
                metrics=["accuracy"],
            )
            callbacks_v1.TensorBoard._init_writer = _init_writer
            tsb = callbacks_v1.TensorBoard(
                log_dir=tmpdir,
                histogram_freq=1,
                write_images=True,
                write_grads=True,
                batch_size=5,
            )
            cbks = [tsb]

            # fit with validation data
            model.fit(
                x_train,
                y_train,
                batch_size=BATCH_SIZE,
                validation_data=(x_test, y_test),
                callbacks=cbks,
                epochs=3,
                verbose=0,
            )

            self.assertAllEqual(tsb.writer.steps_seen, [0, 1, 2, 3, 4, 5])

    def test_Tensorboard_histogram_summaries_with_generator(self):
        np.random.seed(1337)
        tmpdir = self.get_temp_dir()
        self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True)

        def generator():
            x = np.random.randn(10, 100).astype(np.float32)
            y = np.random.randn(10, 10).astype(np.float32)
            while True:
                yield x, y

        with tf.Graph().as_default(), self.cached_session():
            model = test_utils.get_small_sequential_mlp(num_hidden=10,
                                                        num_classes=10,
                                                        input_dim=100)
            model.compile(
                loss="categorical_crossentropy",
                optimizer="sgd",
                metrics=["accuracy"],
            )
            tsb = callbacks_v1.TensorBoard(
                log_dir=tmpdir,
                histogram_freq=1,
                write_images=True,
                write_grads=True,
                batch_size=5,
            )
            cbks = [tsb]

            # fit with validation generator
            model.fit_generator(
                generator(),
                steps_per_epoch=2,
                epochs=2,
                validation_data=generator(),
                validation_steps=2,
                callbacks=cbks,
                verbose=0,
            )

            with self.assertRaises(ValueError):
                # fit with validation generator but no
                # validation_steps
                model.fit_generator(
                    generator(),
                    steps_per_epoch=2,
                    epochs=2,
                    validation_data=generator(),
                    callbacks=cbks,
                    verbose=0,
                )

            self.assertTrue(os.path.exists(tmpdir))

    def test_TensorBoard_with_ReduceLROnPlateau(self):
        with self.cached_session():
            temp_dir = self.get_temp_dir()
            self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)

            (x_train, y_train), (x_test, y_test) = test_utils.get_test_data(
                train_samples=TRAIN_SAMPLES,
                test_samples=TEST_SAMPLES,
                input_shape=(INPUT_DIM, ),
                num_classes=NUM_CLASSES,
            )
            y_test = np_utils.to_categorical(y_test)
            y_train = np_utils.to_categorical(y_train)

            model = test_utils.get_small_sequential_mlp(
                num_hidden=NUM_HIDDEN,
                num_classes=NUM_CLASSES,
                input_dim=INPUT_DIM,
            )
            model.compile(
                loss="binary_crossentropy",
                optimizer="sgd",
                metrics=["accuracy"],
            )

            cbks = [
                callbacks.ReduceLROnPlateau(monitor="val_loss",
                                            factor=0.5,
                                            patience=4,
                                            verbose=1),
                callbacks_v1.TensorBoard(log_dir=temp_dir),
            ]

            model.fit(
                x_train,
                y_train,
                batch_size=BATCH_SIZE,
                validation_data=(x_test, y_test),
                callbacks=cbks,
                epochs=2,
                verbose=0,
            )

            assert os.path.exists(temp_dir)

    def test_Tensorboard_batch_logging(self):
        class FileWriterStub:
            def __init__(self, logdir, graph=None):
                self.logdir = logdir
                self.graph = graph
                self.batches_logged = []
                self.summary_values = []
                self.summary_tags = []

            def add_summary(self, summary, step):
                self.summary_values.append(summary.value[0].simple_value)
                self.summary_tags.append(summary.value[0].tag)
                self.batches_logged.append(step)

            def flush(self):
                pass

            def close(self):
                pass

        with tf.Graph().as_default():
            temp_dir = self.get_temp_dir()
            self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)

            tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="batch")
            tb_cbk.writer = FileWriterStub(temp_dir)

            for batch in range(5):
                tb_cbk.on_batch_end(batch, {"acc": batch})
            self.assertEqual(tb_cbk.writer.batches_logged, [0, 1, 2, 3, 4])
            self.assertEqual(tb_cbk.writer.summary_values,
                             [0.0, 1.0, 2.0, 3.0, 4.0])
            self.assertEqual(tb_cbk.writer.summary_tags, ["batch_acc"] * 5)

    def test_Tensorboard_epoch_and_batch_logging(self):
        class FileWriterStub:
            def __init__(self, logdir, graph=None):
                self.logdir = logdir
                self.graph = graph

            def add_summary(self, summary, step):
                if "batch_" in summary.value[0].tag:
                    self.batch_summary = (step, summary)
                elif "epoch_" in summary.value[0].tag:
                    self.epoch_summary = (step, summary)

            def flush(self):
                pass

            def close(self):
                pass

        with tf.Graph().as_default():
            temp_dir = self.get_temp_dir()
            self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)

            tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="batch")
            tb_cbk.writer = FileWriterStub(temp_dir)

            tb_cbk.on_batch_end(0, {"acc": 5.0})
            tb_cbk.on_train_end()
            batch_step, batch_summary = tb_cbk.writer.batch_summary
            self.assertEqual(batch_step, 0)
            self.assertEqual(batch_summary.value[0].simple_value, 5.0)

            tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="epoch")
            tb_cbk.writer = FileWriterStub(temp_dir)
            tb_cbk.on_epoch_end(0, {"acc": 10.0})
            tb_cbk.on_train_end()
            epoch_step, epoch_summary = tb_cbk.writer.epoch_summary
            self.assertEqual(epoch_step, 0)
            self.assertEqual(epoch_summary.value[0].simple_value, 10.0)

    @test_combinations.generate(
        test_combinations.combine(mode=["graph", "eager"]))
    def test_Tensorboard_eager(self):
        temp_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
        self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)

        (x_train, y_train), (x_test, y_test) = test_utils.get_test_data(
            train_samples=TRAIN_SAMPLES,
            test_samples=TEST_SAMPLES,
            input_shape=(INPUT_DIM, ),
            num_classes=NUM_CLASSES,
        )
        y_test = np_utils.to_categorical(y_test)
        y_train = np_utils.to_categorical(y_train)

        model = test_utils.get_small_sequential_mlp(num_hidden=NUM_HIDDEN,
                                                    num_classes=NUM_CLASSES,
                                                    input_dim=INPUT_DIM)
        model.compile(
            loss="binary_crossentropy",
            optimizer=tf.compat.v1.train.AdamOptimizer(0.01),
            metrics=["accuracy"],
        )

        cbks = [callbacks_v1.TensorBoard(log_dir=temp_dir)]

        model.fit(
            x_train,
            y_train,
            batch_size=BATCH_SIZE,
            validation_data=(x_test, y_test),
            callbacks=cbks,
            epochs=2,
            verbose=0,
        )

        self.assertTrue(os.path.exists(temp_dir))

    def test_TensorBoard_update_freq(self):
        class FileWriterStub:
            def __init__(self, logdir, graph=None):
                self.logdir = logdir
                self.graph = graph
                self.batch_summaries = []
                self.epoch_summaries = []

            def add_summary(self, summary, step):
                if "batch_" in summary.value[0].tag:
                    self.batch_summaries.append((step, summary))
                elif "epoch_" in summary.value[0].tag:
                    self.epoch_summaries.append((step, summary))

            def flush(self):
                pass

            def close(self):
                pass

        with tf.Graph().as_default():
            temp_dir = self.get_temp_dir()
            self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)

            # Epoch mode
            tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="epoch")
            tb_cbk.writer = FileWriterStub(temp_dir)

            tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 1})
            self.assertEqual(tb_cbk.writer.batch_summaries, [])
            tb_cbk.on_epoch_end(0, {"acc": 10.0, "size": 1})
            self.assertLen(tb_cbk.writer.epoch_summaries, 1)
            tb_cbk.on_train_end()

            # Batch mode
            tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq="batch")
            tb_cbk.writer = FileWriterStub(temp_dir)

            tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 1})
            self.assertLen(tb_cbk.writer.batch_summaries, 1)
            tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 1})
            self.assertLen(tb_cbk.writer.batch_summaries, 2)
            self.assertFalse(tb_cbk.writer.epoch_summaries)
            tb_cbk.on_train_end()

            # Integer mode
            tb_cbk = callbacks_v1.TensorBoard(temp_dir, update_freq=20)
            tb_cbk.writer = FileWriterStub(temp_dir)

            tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10})
            self.assertFalse(tb_cbk.writer.batch_summaries)
            tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10})
            self.assertLen(tb_cbk.writer.batch_summaries, 1)
            tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10})
            self.assertLen(tb_cbk.writer.batch_summaries, 1)
            tb_cbk.on_batch_end(0, {"acc": 5.0, "size": 10})
            self.assertLen(tb_cbk.writer.batch_summaries, 2)
            tb_cbk.on_batch_end(0, {"acc": 10.0, "size": 10})
            self.assertLen(tb_cbk.writer.batch_summaries, 2)
            self.assertFalse(tb_cbk.writer.epoch_summaries)
            tb_cbk.on_train_end()
class LayerNormalizationTest(test_combinations.TestCase):

  @test_combinations.run_all_keras_modes
  def test_basic_layernorm(self):
    test_utils.layer_test(
        keras.layers.LayerNormalization,
        kwargs={
            'gamma_regularizer': keras.regularizers.l2(0.01),
            'beta_regularizer': keras.regularizers.l2(0.01)
        },
        input_shape=(3, 4, 2))
    test_utils.layer_test(
        keras.layers.LayerNormalization,
        kwargs={
            'gamma_initializer': 'ones',
            'beta_initializer': 'ones',
        },
        input_shape=(3, 4, 2))
    test_utils.layer_test(
        keras.layers.LayerNormalization,
        kwargs={'scale': False,
                'center': False},
        input_shape=(3, 3))
    test_utils.layer_test(
        keras.layers.LayerNormalization,
        kwargs={'axis': (-3, -2, -1)},
        input_shape=(2, 8, 8, 3))
    test_utils.layer_test(
        keras.layers.LayerNormalization,
        input_shape=(1, 0, 10))

  @test_combinations.run_all_keras_modes
  def test_non_fused_layernorm(self):
    test_utils.layer_test(
        keras.layers.LayerNormalization,
        kwargs={'axis': -2},
        input_shape=(3, 4, 2))
    test_utils.layer_test(
        keras.layers.LayerNormalization,
        kwargs={'axis': (-3, -2)},
        input_shape=(2, 8, 8, 3))
    test_utils.layer_test(
        keras.layers.LayerNormalization,
        kwargs={'axis': (-3, -1)},
        input_shape=(2, 8, 8, 3))

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_layernorm_weights(self):
    layer = keras.layers.LayerNormalization(scale=False, center=False)
    layer.build((None, 3, 4))
    self.assertEqual(len(layer.trainable_weights), 0)
    self.assertEqual(len(layer.weights), 0)

    layer = keras.layers.LayerNormalization()
    layer.build((None, 3, 4))
    self.assertEqual(len(layer.trainable_weights), 2)
    self.assertEqual(len(layer.weights), 2)

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def test_layernorm_regularization(self):
    layer = keras.layers.LayerNormalization(
        gamma_regularizer='l1', beta_regularizer='l1')
    layer.build((None, 3, 4))
    self.assertEqual(len(layer.losses), 2)
    max_norm = keras.constraints.max_norm
    layer = keras.layers.LayerNormalization(
        gamma_constraint=max_norm, beta_constraint=max_norm)
    layer.build((None, 3, 4))
    self.assertEqual(layer.gamma.constraint, max_norm)
    self.assertEqual(layer.beta.constraint, max_norm)

  @test_combinations.run_all_keras_modes
  def test_layernorm_convnet_channel_last(self):
    model = keras.models.Sequential()
    norm = keras.layers.LayerNormalization(input_shape=(4, 4, 3))
    model.add(norm)
    model.compile(
        loss='mse',
        optimizer=tf.compat.v1.train.GradientDescentOptimizer(0.01),
        run_eagerly=test_utils.should_run_eagerly())

    # centered on 5.0, variance 10.0
    x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3))
    model.fit(x, x, epochs=4, verbose=0)
    out = model.predict(x)
    out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3))
    out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3))

    np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1)
    np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1)

  @test_combinations.run_all_keras_modes
  def test_layernorm_ragged_tensor(self):
    x = tf.ragged.constant(
        [[[3., 1., 1.], [4., 1., 1.]],
         [[5., 9., 1.]],
         [[1., 2., 1.]]],
        inner_shape=(3,))
    layer = keras.layers.LayerNormalization()
    self.assertEqual(layer(x).shape, (3, None, 3))

  @test_combinations.run_all_keras_modes
  def test_layernorm_correctness(self):
    _run_layernorm_correctness_test(
        layer_normalization.LayerNormalization, dtype='float32')

  @test_combinations.run_all_keras_modes
  def test_layernorm_mixed_precision(self):
    _run_layernorm_correctness_test(
        layer_normalization.LayerNormalization, dtype='float16')

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testIncorrectAxisType(self):
    with self.assertRaisesRegex(TypeError,
                                r'Expected an int or a list/tuple of ints'):
      _ = layer_normalization.LayerNormalization(axis={'axis': -1})

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testInvalidAxis(self):
    with self.assertRaisesRegex(
        ValueError,
        r'Invalid value for `axis` argument. Expected 0 <= axis < inputs.rank'):
      layer_norm = layer_normalization.LayerNormalization(axis=3)
      layer_norm.build(input_shape=(2, 2, 2))

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testDuplicateAxis(self):
    with self.assertRaisesRegex(ValueError, r'Duplicate axis:'):
      layer_norm = layer_normalization.LayerNormalization(axis=[-1, -1])
      layer_norm.build(input_shape=(2, 2, 2))

  @test_combinations.generate(
      test_combinations.combine(mode=['graph', 'eager']))
  def testFusedAttr(self):
    layer_norm = layer_normalization.LayerNormalization(axis=[-2, -1])
    layer_norm.build(input_shape=(2, 2, 2))
    self.assertEqual(layer_norm._fused, True)