Ejemplo n.º 1
0
  def _test_checkpointing_layer_weights(self, strategy_fn,
                                        mixed_prec_when_saving,
                                        mixed_prec_when_loading):
    # In this test, we potentially save with mixed precision enabled and load
    # with mixed precision disabled, or vice versa. This is possible because
    # variables are float32 regardless of whether mixed precision is enabled.
    save_policy = 'mixed_float16' if mixed_prec_when_saving else 'float32'
    load_policy = 'mixed_float16' if mixed_prec_when_loading else 'float32'
    save_input_dtype = 'float16' if mixed_prec_when_saving else 'float32'
    load_input_dtype = 'float16' if mixed_prec_when_loading else 'float32'

    # Create a layer and save a checkpoint.
    x = tf.constant([1.])
    with strategy_fn().scope():
      with policy.policy_scope(save_policy):
        layer = mp_test_util.MultiplyLayer(assert_type=save_input_dtype)
        layer(x)  # Build layer
    layer.set_weights([np.array(100.)])
    self.assertEqual(self.evaluate(layer(x)), 100.)
    checkpoint = tf.train.Checkpoint(layer=layer)
    prefix = os.path.join(self.get_temp_dir(), 'ckpt')
    save_path = checkpoint.save(prefix)

    # Create a new layer and restore the checkpoint.
    x = tf.constant([1.])
    with strategy_fn().scope():
      with policy.policy_scope(load_policy):
        layer = mp_test_util.MultiplyLayer(assert_type=load_input_dtype)
        layer(x)  # Build layer
    layer.set_weights([np.array(200.)])
    self.assertEqual(self.evaluate(layer(x)), 200.)
    checkpoint = tf.train.Checkpoint(layer=layer)
    checkpoint.restore(save_path).assert_consumed().run_restore_ops()
    self.assertEqual(layer.get_weights(), [100.])
    self.assertEqual(self.evaluate(layer(x)), 100.)
 def test_error_if_policy_is_set(self):
   with policy.policy_scope('mixed_float16'):
     with self.assertRaisesRegex(ValueError,
                                 'the global Keras dtype Policy has been set'):
       enable_mixed_precision_graph_rewrite(gradient_descent_v2.SGD(1.0))
   # Test no error is thrown when the policy is currently the default.
   enable_mixed_precision_graph_rewrite(gradient_descent_v2.SGD(1.0))
   # Test no error is thrown when the policy is a non-mixed policy.
   with policy.policy_scope('float64'):
     enable_mixed_precision_graph_rewrite(gradient_descent_v2.SGD(1.0))
Ejemplo n.º 3
0
 def test_policy_scope(self):
     if base_layer_utils.v2_dtype_behavior_enabled():
         default_policy = "float32"
     else:
         default_policy = "_infer"
     with mp_policy.policy_scope("mixed_float16"):
         self.assertEqual(mp_policy.global_policy().name, "mixed_float16")
         with mp_policy.policy_scope("_infer"):
             self.assertEqual(mp_policy.global_policy().name, "_infer")
         self.assertEqual(mp_policy.global_policy().name, "mixed_float16")
     self.assertEqual(mp_policy.global_policy().name, default_policy)
Ejemplo n.º 4
0
 def test_policy_scope(self):
     if base_layer_utils.v2_dtype_behavior_enabled():
         default_policy = 'float32'
     else:
         default_policy = '_infer'
     with mp_policy.policy_scope('mixed_float16'):
         self.assertEqual(mp_policy.global_policy().name, 'mixed_float16')
         with mp_policy.policy_scope('_infer'):
             self.assertEqual(mp_policy.global_policy().name, '_infer')
         self.assertEqual(mp_policy.global_policy().name, 'mixed_float16')
     self.assertEqual(mp_policy.global_policy().name, default_policy)
Ejemplo n.º 5
0
 def test_v1_dtype_behavior(self):
     # Setting global policies are not allowed with V1 dtype behavior
     with self.assertRaisesRegex(
             ValueError, 'global policy can only be set in TensorFlow 2'):
         with mp_policy.policy_scope(mp_policy.Policy('_infer')):
             pass
     with self.assertRaisesRegex(
             ValueError, 'global policy can only be set in TensorFlow 2'):
         with mp_policy.policy_scope(mp_policy.Policy('float32')):
             pass
     with self.assertRaisesRegex(
             ValueError, 'global policy can only be set in TensorFlow 2'):
         with mp_policy.policy_scope(mp_policy.Policy('mixed_float16')):
             pass
Ejemplo n.º 6
0
    def test_gradient(self, strategy_fn):
        x = tf.constant([1.])
        with strategy_fn().scope() as strategy:
            with policy.policy_scope('mixed_float16'):
                layer = mp_test_util.MultiplyLayer(assert_type=tf.float16)
                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate is not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)

                def run_fn():
                    with tf.GradientTape() as tape:
                        y = layer(x)
                        # Divide by num_replicas_in_sync, as the effective total loss is the
                        # sum of each of the replica's losses.
                        y /= strategy.num_replicas_in_sync

                    grad = tape.gradient(y, layer.v)
                    return opt.apply_gradients([(grad, layer.v)])

                op = strategy.experimental_run(run_fn)
                if not tf.executing_eagerly():
                    self.evaluate(tf.compat.v1.global_variables_initializer())
                    self.evaluate(op)
                # The gradient with respective to the variable is 1. Since the
                # variable is initialized with 1 and the learning rate is 2**-14, the
                # new variable value should be: init_val - gradient * learning_rate,
                # which is  1 - 1 * 2**-14
                self.assertEqual(self.evaluate(layer.v), 1 - 2**-14)
Ejemplo n.º 7
0
    def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn):
        x = tf.constant([1.0])
        with strategy_fn().scope():
            with policy.policy_scope("mixed_float16"):
                # Test on MultiplyLayer
                layer = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                )
                layer(x)
                (regularizer_loss, ) = layer.losses
                self.assertEqual(regularizer_loss.dtype, tf.float32)
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertEqual(self.evaluate(regularizer_loss), 1.0)

                # Test on MultiplyLayerWithoutAutoCast
                layer = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                )
                layer(x)
                (regularizer_loss, ) = layer.losses
                self.assertEqual(regularizer_loss.dtype, tf.float32)
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertEqual(self.evaluate(regularizer_loss), 1.0)
Ejemplo n.º 8
0
    def test_lstm_model_correctness_mixed_precision(self, distribution,
                                                    use_numpy,
                                                    use_validation_data):
        if isinstance(
                distribution,
            (
                tf.distribute.experimental.CentralStorageStrategy,
                tf.compat.v1.distribute.experimental.CentralStorageStrategy,
            ),
        ):
            self.skipTest("CentralStorageStrategy is not supported by "
                          "mixed precision.")
        if isinstance(
                distribution,
            (
                tf.distribute.experimental.TPUStrategy,
                tf.compat.v1.distribute.experimental.TPUStrategy,
            ),
        ):
            policy_name = "mixed_bfloat16"
        else:
            policy_name = "mixed_float16"

        with policy.policy_scope(policy_name):
            self.run_correctness_test(distribution, use_numpy,
                                      use_validation_data)
Ejemplo n.º 9
0
  def test_save_slot_variables_with_autocast_vars(self,
                                                  strategy_fn,
                                                  var_name='v'):
    p = policy.Policy('mixed_float16')
    with strategy_fn().scope(), policy.policy_scope(p):
      x = layers.Input(shape=(2,), batch_size=2)
      # Having a var_name other than 'v' tests that a fixed bug (b/134713714)
      # does not reoccur. The bug was that a crash would occur when saving a
      # checkpoint where an AutoCastVariable with a slot variable would have a
      # different name than the layer attribute's name (layer.v in this case).
      layer = mp_test_util.MultiplyLayer(assert_type=tf.float16,
                                         var_name=var_name)
      y = layer(x)
      model = models.Model(inputs=x, outputs=y)
      opt = gradient_descent.SGD(1., 1.)
      opt = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False,
                                                    initial_scale=1)
      model.compile(
          optimizer=opt,
          loss='mse',
          run_eagerly=testing_utils.should_run_eagerly())

    model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
    weights_file = os.path.join(self.get_temp_dir(), 'weights')
    model.save_weights(weights_file)
    saved_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))

    model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
    new_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))
    self.assertNotEqual(new_slot, saved_slot)

    model.load_weights(weights_file)
    restored_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))
    self.assertEqual(restored_slot, saved_slot)
Ejemplo n.º 10
0
 def test_error_if_policy_is_set(self):
     with policy.policy_scope("mixed_float16"):
         with self.assertRaisesRegex(
             ValueError, "the global Keras dtype Policy has been set"
         ):
             tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(  # noqa: E501
                 gradient_descent_v2.SGD(1.0)
             )
     # Test no error is thrown when the policy is currently the default.
     tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
         gradient_descent_v2.SGD(1.0)
     )
     # Test no error is thrown when the policy is a non-mixed policy.
     with policy.policy_scope("float64"):
         tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
             gradient_descent_v2.SGD(1.0)
         )
Ejemplo n.º 11
0
 def test_functional_model_loss_dtype(self):
     with policy.policy_scope("float16"):
         x = layers.Input(shape=(1,))
         y = mp_test_util.MultiplyLayer()(x)
         model = models.Model(x, y)
         model.add_loss(tf.cast(y, "float32"))
         # The loss should not be casted to the policy's dtype.
         self.assertEqual(model.losses[0].dtype, "float32")
Ejemplo n.º 12
0
 def test_mixed_float16_policy(self):
     # Test case for GitHub issue:
     # https://github.com/tensorflow/tensorflow/issues/46064
     with policy.policy_scope('mixed_float16'):
         q = tf.cast(tf.random.uniform((2, 3, 4), seed=1), 'float16')
         v = tf.cast(tf.random.uniform((2, 3, 4), seed=2), 'float16')
         k = tf.cast(tf.random.uniform((2, 3, 4), seed=3), 'float16')
         layer = keras.layers.AdditiveAttention(causal=True)
         _ = layer([q, v, k])
Ejemplo n.º 13
0
 def test_layer_calling_tf_function(self, strategy_fn):
     x = tf.constant([1.])
     with strategy_fn().scope():
         with policy.policy_scope('mixed_float16'):
             layer = MultiplyLayerWithFunction(assert_type=tf.float16)
             y = layer(x)
             self.assertEqual(layer.v.dtype, tf.float32)
             self.assertEqual(y.dtype, tf.float16)
             self.evaluate(tf.compat.v1.global_variables_initializer())
             self.assertEqual(self.evaluate(y), 1.)
Ejemplo n.º 14
0
 def test_loss_scale_optimizer_overrides_policy_v1_loss_scale(self):
   with policy.policy_scope(policy.PolicyV1('float32', loss_scale=10.)):
     opt = gradient_descent.SGD(1.)
     opt = loss_scale_optimizer.LossScaleOptimizer(opt, dynamic=False,
                                                   initial_scale=5.)
     x = layers.Input(shape=(1,))
     y = mp_test_util.MultiplyLayer()(x)
     model = models.Model(x, y)
     model.compile(opt, loss='mse')
     self.assertEqual(self.evaluate(model.optimizer.loss_scale), 5.)
Ejemplo n.º 15
0
 def test_policy_v1_without_loss_scale(self):
   with policy.policy_scope(policy.PolicyV1('mixed_float16',
                                            loss_scale=None)):
     opt = gradient_descent.SGD(1.)
     x = layers.Input(shape=(1,))
     y = mp_test_util.MultiplyLayer()(x)
     model = models.Model(x, y)
     model.compile(opt, loss='mse')
     self.assertNotIsInstance(model.optimizer,
                              loss_scale_optimizer.LossScaleOptimizer)
Ejemplo n.º 16
0
    def test_compile_wraps_with_loss_scale_optimizer(self):
        x = layers.Input(shape=(1,))
        y = mp_test_util.MultiplyLayer()(x)

        with policy.policy_scope("mixed_float16"):
            # Test optimizer is automatically wrapped with LSO
            model = models.Model(x, y)
            model.compile(gradient_descent.SGD(1.0), "mse")
            self.assertIsInstance(
                model.optimizer, loss_scale_optimizer.LossScaleOptimizer
            )
            self.assertEqual(
                backend.get_value(model.optimizer.learning_rate), 1.0
            )

            # Test optimizer specified as string is automatically wrapped in LSO
            model = models.Model(x, y)
            model.compile("sgd", "mse")
            self.assertIsInstance(
                model.optimizer, loss_scale_optimizer.LossScaleOptimizer
            )

            # Test if an LSO is passed, optimizer is not automatically wrapped with
            # another LSO
            model = models.Model(x, y)
            optimizer = loss_scale_optimizer.LossScaleOptimizer(
                gradient_descent.SGD(1.0), dynamic_growth_steps=2
            )
            model.compile(optimizer, "mse")
            self.assertIsInstance(
                model.optimizer, loss_scale_optimizer.LossScaleOptimizer
            )
            self.assertEqual(model.optimizer.dynamic_growth_steps, 2)

        with policy.policy_scope("mixed_bfloat16"):
            # Test mixed_bfloat16 models are not automatically wrapped with LSO
            model = models.Model(x, y)
            model.compile(gradient_descent.SGD(1.0), "mse")
            self.assertNotIsInstance(
                model.optimizer, loss_scale_optimizer.LossScaleOptimizer
            )
            self.assertIsInstance(model.optimizer, gradient_descent.SGD)
Ejemplo n.º 17
0
 def test_layer_with_non_autocast_variable(self, strategy_fn):
     x = tf.constant([1.])
     with strategy_fn().scope():
         with policy.policy_scope('mixed_float16'):
             layer = mp_test_util.MultiplyLayerWithoutAutoCast(
                 assert_type=tf.float16)
             y = layer(x)
             self.assertEqual(layer.v.dtype, tf.float32)
             self.assertEqual(y.dtype, tf.float16)
             self.evaluate(tf.compat.v1.global_variables_initializer())
             self.assertEqual(self.evaluate(y), 1.)
Ejemplo n.º 18
0
 def test_pass_invalid_optimizer_with_loss_scaling(self):
     with policy.policy_scope(policy.Policy("mixed_float16")):
         x = layers.Input(shape=(1,))
         y = mp_test_util.MultiplyLayer()(x)
         model = models.Model(x, y)
         if tf.executing_eagerly():
             error_msg = "Use a `tf.keras` Optimizer instead"
         else:
             error_msg = 'optimizer" must be an instance of '
         with self.assertRaisesRegex(ValueError, error_msg):
             model.compile(optimizer_v1.SGD(1.0), "mse")
Ejemplo n.º 19
0
 def test_error_if_graph_rewrite_enabled(self):
   try:
     tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
         gradient_descent.SGD(1.))
     with self.assertRaisesRegex(
         ValueError, 'cannot be set to "mixed_float16", .* the mixed '
         'precision graph rewrite has already been enabled'):
       mp_policy.set_global_policy('mixed_float16')
     with mp_policy.policy_scope('float64'):
       pass  # Non-mixed policies are allowed
   finally:
     tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite()
Ejemplo n.º 20
0
    def test_save_weights_with_autocast_vars(self, strategy_fn, h5=False):
        with strategy_fn().scope():
            with policy.policy_scope("mixed_float16"):
                x = layers.Input(shape=(1,), batch_size=2)
                layer = mp_test_util.MultiplyLayer(assert_type=tf.float16)
                y = layer(x)
                model = models.Model(inputs=x, outputs=y)

        model.set_weights([np.array(100.0)])
        x = np.ones((2, 1))
        self.assertAllClose(backend.get_value(model(x)), x * 100.0)
        suffix = ".h5" if h5 else ""
        weights_file = os.path.join(self.get_temp_dir(), "weights" + suffix)
        model.save_weights(weights_file)

        model.set_weights([np.array(200.0)])
        self.assertAllClose(backend.get_value(model(x)), x * 200.0)
        model.load_weights(weights_file)
        self.assertAllClose(backend.get_value(model(x)), x * 100.0)
        self.assertEqual(model.get_weights(), [np.array(100.0)])
Ejemplo n.º 21
0
 def test_mixed_policies_(self, strategy_fn):
     strategy = strategy_fn()
     for dtype in 'float16', 'bfloat16':
         x = tf.constant([1.])
         policy_name = 'mixed_' + dtype
         with strategy.scope(), policy.policy_scope(policy_name):
             layer = mp_test_util.MultiplyLayer(assert_type=dtype)
             self.assertEqual(layer.dtype, tf.float32)
             self.assertEqual(layer.dtype_policy.name, policy_name)
             y = layer(x)
             self.assertEqual(layer.v.dtype, tf.float32)
             self.assertEqual(y.dtype, dtype)
             self.assertEqual(layer.dtype_policy.name, policy_name)
             self.assertIsInstance(layer.dtype_policy, policy.Policy)
             self.assertEqual(layer.compute_dtype, dtype)
             self.assertEqual(layer.dtype, tf.float32)
             self.assertEqual(layer.variable_dtype, tf.float32)
             self.assertEqual(layer.dtype_policy.name, policy_name)
             self.evaluate(tf.compat.v1.global_variables_initializer())
             self.assertEqual(self.evaluate(y), 1.)
Ejemplo n.º 22
0
 def test_passing_policy_to_layer(self, strategy_fn):
     x = tf.constant([1.], dtype=tf.float16)
     with strategy_fn().scope():
         # Passing a Policy to 'dtype' sets the policy for that layer.
         layer = mp_test_util.MultiplyLayer(
             assert_type=tf.float16, dtype=policy.Policy('mixed_float16'))
         # layer.dtype refers to the variable dtype
         self.assertEqual(layer.dtype, tf.float32)
         layer(x)
         self.assertEqual(layer.v.dtype, tf.float32)
         with policy.policy_scope('mixed_float16'):
             # Passing a Policy to dtype overrides the global Policy
             layer = mp_test_util.MultiplyLayer(
                 assert_type=tf.float64, dtype=policy.Policy('float64'))
             self.assertEqual(layer.dtype_policy.name, 'float64')
             self.assertIsInstance(layer.dtype_policy, policy.Policy)
             self.assertEqual(layer.compute_dtype, tf.float64)
             self.assertEqual(layer.dtype, tf.float64)
             self.assertEqual(layer.variable_dtype, tf.float64)
             self.assertEqual(layer(x).dtype, tf.float64)
             self.assertEqual(layer.v.dtype, tf.float64)
Ejemplo n.º 23
0
    def test_dynamic_loss_scaling(self, strategy_fn, get_config=False):
        strategy = strategy_fn()
        initial_loss_scale = 2.0
        batch_size = 4
        expected_gradient = backend.variable(
            [initial_loss_scale / batch_size], dtype=tf.float16
        )
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=tf.bool)
        with strategy.scope():
            opt = gradient_descent.SGD(1.0)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, initial_scale=initial_loss_scale, dynamic_growth_steps=2
            )
            with policy.policy_scope("mixed_float16"):
                x = layers.Input(
                    shape=(1,), batch_size=batch_size, dtype=tf.float16
                )
                layer = mp_test_util.MultiplyLayer(assert_type=tf.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients
                    )
                )
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=tf.float16,
                        expected_gradient=expected_gradient,
                    )
                )
                y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            "MultiplyLayer": mp_test_util.MultiplyLayer
                        },
                    )
                    (layer,) = (
                        layer
                        for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer)
                    )

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(
            expected_gradient, backend.get_value(expected_gradient * 2)
        )
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(
            expected_gradient, backend.get_value(expected_gradient / 2)
        )
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)
Ejemplo n.º 24
0
    def test_model(
        self,
        strategy_fn,
        use_operator=False,
        use_regularizer=False,
        policy_name="mixed_float16",
        get_config=False,
        save_format=None,
        use_input_spec=False,
    ):
        self._skip_if_strategy_unsupported(strategy_fn)
        self._skip_if_save_format_unsupported(save_format)
        if use_regularizer:
            weight_regularizer = mp_test_util.IdentityRegularizer()
            activity_regularizer = mp_test_util.ReduceSumRegularizer()
        else:
            weight_regularizer = activity_regularizer = None
        with strategy_fn().scope():
            with policy.policy_scope(policy_name):
                layer = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    use_operator=use_operator,
                    regularizer=weight_regularizer,
                    activity_regularizer=activity_regularizer,
                    input_shape=(1,),
                )
                if use_input_spec:
                    layer.input_spec = input_spec.InputSpec(shape=(None, 1))
                model = test_utils.get_model_from_layers(
                    [layer], input_shape=(1,), input_dtype=tf.float16
                )
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            "MultiplyLayer": mp_test_util.MultiplyLayer
                        },
                    )
                    (layer,) = (
                        layer
                        for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer)
                    )

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)
                # Use a fixed loss scale, as this test will fail if gradients are
                # skipped for a step due to dynamic loss scaling.
                opt = loss_scale_optimizer.LossScaleOptimizer(
                    opt, dynamic=False, initial_scale=8
                )
                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 2 ** -14 subtracted
        # from it.
        expected = 1 - 2**-14
        if use_regularizer:
            # Weight and activity regularizer each add another 2 ** -14 to the
            # gradient.
            expected -= 2 * 2**-14
        self.assertEqual(backend.eval(layer.v), expected)

        if save_format:
            with generic_utils.CustomObjectScope(
                {
                    "MultiplyLayer": mp_test_util.MultiplyLayer,
                    "loss_fn": loss_fn,
                }
            ):
                self._test_saving(model, dataset, save_format, use_regularizer)
Ejemplo n.º 25
0
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):
        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * A fixed loss scale (if use_loss_scaling is True)

        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.0
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(policy.Policy("mixed_float16")):
                x = layers.Input(shape=(1,), batch_size=2)
                layer1 = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=True,
                )
                layer2 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16, use_operator=True
                )
                layer3 = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16, use_operator=False
                )
                layer4 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=False,
                )
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. We divide by the batch size of 2 since the
                    # loss is averaged across batch elements.
                    expected_gradient = loss_scale / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=tf.float16,
                            expected_gradient=[expected_gradient],
                        )
                    )
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                if use_loss_scaling:
                    opt = loss_scale_optimizer.LossScaleOptimizer(
                        opt, dynamic=False, initial_scale=loss_scale
                    )
                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        for layer in (layer1, layer2, layer3, layer4):
            if layer.losses:
                # Layer has weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
            else:
                # Layer does not have weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
Ejemplo n.º 26
0
 def test_application_model(self, app):
     # Run on CPU since model weights may exhaust GPU memory
     with policy.policy_scope("mixed_float16"), tf.device("/CPU:0"):
         app(weights=None)
Ejemplo n.º 27
0
 def test_mixed_float16_policy(self):
     with policy.policy_scope("mixed_float16"):
         inputs1 = keras.Input(shape=(36, 512), dtype="float16")
         inputs2 = keras.Input(shape=(36, ), dtype="bool")
         average_layer = keras.layers.GlobalAveragePooling1D()
         _ = average_layer(inputs1, inputs2)
Ejemplo n.º 28
0
 def test_application_model(self, app):
     # Run on CPU since model weights may exhaust GPU memory
     with policy.policy_scope('mixed_float16'), tf.compat.v1.device(
             '/CPU:0'):
         app(weights=None)