Example #1
0
    def _test_checkpointing_layer_weights(self, strategy_fn,
                                          mixed_prec_when_saving,
                                          mixed_prec_when_loading):
        # In this test, we potentially save with mixed precision enabled and load
        # with mixed precision disabled, or vice versa. This is possible because
        # variables are float32 regardless of whether mixed precision is enabled.
        save_policy = 'infer_float32_vars' if mixed_prec_when_saving else 'infer'
        load_policy = 'infer_float32_vars' if mixed_prec_when_loading else 'infer'
        save_input_dtype = 'float16' if mixed_prec_when_saving else 'float32'
        load_input_dtype = 'float16' if mixed_prec_when_loading else 'float32'

        # Create a layer and save a checkpoint.
        x = constant_op.constant([1.], dtype=save_input_dtype)
        with strategy_fn().scope():
            with policy.policy_scope(save_policy):
                layer = AddLayer(assert_type=save_input_dtype)
                layer.build(())
        layer.set_weights([np.array(100.)])
        self.assertEqual(self.evaluate(layer(x)), 101.)
        checkpoint = trackable_utils.Checkpoint(layer=layer)
        prefix = os.path.join(self.get_temp_dir(), 'ckpt')
        save_path = checkpoint.save(prefix)

        # Create a new layer and restore the checkpoint.
        x = constant_op.constant([1.], dtype=load_input_dtype)
        with strategy_fn().scope():
            with policy.policy_scope(load_policy):
                layer = AddLayer(assert_type=load_input_dtype)
                layer.build(())
        layer.set_weights([np.array(200.)])
        self.assertEqual(self.evaluate(layer(x)), 201.)
        checkpoint = trackable_utils.Checkpoint(layer=layer)
        checkpoint.restore(save_path).assert_consumed().run_restore_ops()
        self.assertEqual(layer.get_weights(), [100.])
        self.assertEqual(self.evaluate(layer(x)), 101.)
Example #2
0
 def test_policy_scope(self):
   with mp_policy.policy_scope('infer_float32_vars'):
     self.assertEqual(mp_policy.global_policy().name, 'infer_float32_vars')
     with mp_policy.policy_scope('infer'):
       self.assertEqual(mp_policy.global_policy().name, 'infer')
     self.assertEqual(mp_policy.global_policy().name, 'infer_float32_vars')
   self.assertEqual(mp_policy.global_policy().name, 'infer')
Example #3
0
 def test_error_if_policy_is_set(self):
   with policy.policy_scope('mixed_float16'):
     with self.assertRaisesRegexp(
         ValueError, 'the global Keras dtype Policy has been set'):
       enable_mixed_precision_graph_rewrite(gradient_descent_v2.SGD(1.0))
   # Test no error is thrown when the policy is currently the default.
   enable_mixed_precision_graph_rewrite(gradient_descent_v2.SGD(1.0))
   # Test no error is thrown when the policy is a non-mixed policy.
   with policy.policy_scope('float64'):
     enable_mixed_precision_graph_rewrite(gradient_descent_v2.SGD(1.0))
Example #4
0
 def test_policy_scope(self):
   if base_layer_utils.v2_dtype_behavior_enabled():
     default_policy = 'float32'
   else:
     default_policy = '_infer'
   with mp_policy.policy_scope('mixed_float16'):
     self.assertEqual(mp_policy.global_policy().name, 'mixed_float16')
     with mp_policy.policy_scope('_infer'):
       self.assertEqual(mp_policy.global_policy().name, '_infer')
     self.assertEqual(mp_policy.global_policy().name, 'mixed_float16')
   self.assertEqual(mp_policy.global_policy().name, default_policy)
Example #5
0
 def test_v1_dtype_behavior(self):
   # Setting global policies are not allowed with V1 dtype behavior
   with self.assertRaisesRegex(
       ValueError, 'global policy can only be set in TensorFlow 2'):
     with mp_policy.policy_scope(mp_policy.Policy('_infer')):
       pass
   with self.assertRaisesRegex(
       ValueError, 'global policy can only be set in TensorFlow 2'):
     with mp_policy.policy_scope(mp_policy.Policy('float32')):
       pass
   with self.assertRaisesRegex(
       ValueError, 'global policy can only be set in TensorFlow 2'):
     with mp_policy.policy_scope(mp_policy.Policy('mixed_float16')):
       pass
Example #6
0
  def test_gradient(self, strategy_fn):
    x = constant_op.constant([1.], dtype=dtypes.float16)
    with strategy_fn().scope() as strategy:
      with policy.policy_scope('infer_float32_vars'):
        layer = AddLayer(assert_type=dtypes.float16)
        def run_fn():
          with backprop.GradientTape() as tape:
            y = layer(x)
            # Divide by num_replicas_in_sync, as the effective total loss is the
            # sum of each of the replica's losses.
            y /= strategy.num_replicas_in_sync

          # Learning rate is small enough that if applied to a float16 variable,
          # the variable will not change. So this tests the learning rate is not
          # applied to a float16 value, but instead the float32 variable.
          opt = gradient_descent.SGD(2 ** -14)
          grad = tape.gradient(y, layer.v)
          return opt.apply_gradients([(grad, layer.v)])

        op = strategy.experimental_run(run_fn)
        if not context.executing_eagerly():
          self.evaluate(variables.global_variables_initializer())
          self.evaluate(op)
        # The gradient with respective to the variable is 1. Since the
        # variable is initialized with 1 and the learning rate is 2**-14, the
        # new variable value should be: init_val - gradient * learning_rate,
        # which is  1 - 1 * 2**-14
        self.assertEqual(self.evaluate(layer.v), 1 - 2 ** -14)
Example #7
0
 def testMixedPrecision(self, required_gpus):
     if test_util.is_xla_enabled():
         return  # Test gets NaNs with XLA
     with policy.policy_scope('mixed_float16'):
         self._run_between_graph_clients(self._test_mixed_precision,
                                         self._cluster_spec,
                                         num_gpus=required_gpus)
Example #8
0
    def test_gradient(self, strategy_fn):
        x = constant_op.constant([1.], dtype=dtypes.float16)
        with strategy_fn().scope() as strategy:
            with policy.policy_scope('infer_float32_vars'):
                layer = AddLayer(assert_type=dtypes.float16)

                def run_fn():
                    with backprop.GradientTape() as tape:
                        y = layer(x)
                        # Divide by num_replicas_in_sync, as the effective total loss is the
                        # sum of each of the replica's losses.
                        y /= strategy.num_replicas_in_sync

                    # Learning rate is small enough that if applied to a float16 variable,
                    # the variable will not change. So this tests the learning rate is not
                    # applied to a float16 value, but instead the float32 variable.
                    opt = gradient_descent.SGD(2**-14)
                    grad = tape.gradient(y, layer.v)
                    return opt.apply_gradients([(grad, layer.v)])

                op = strategy.experimental_run(run_fn)
                if not context.executing_eagerly():
                    self.evaluate(variables.global_variables_initializer())
                    self.evaluate(op)
                # The gradient with respective to the variable is 1. Since the
                # variable is initialized with 1 and the learning rate is 2**-14, the
                # new variable value should be: init_val - gradient * learning_rate,
                # which is  1 - 1 * 2**-14
                self.assertEqual(self.evaluate(layer.v), 1 - 2**-14)
Example #9
0
    def test_int32_with_float32_vars(self, strategy_fn):

        # The policy int32_with_float32_vars is not useful at all (nor is any other
        # non-float policy with float32 variables), but we have it for consistency,
        # and so we test it.

        class IdentityLayerWithVar(base_layer.Layer):
            def build(self, _):
                self.v = self.add_weight('v', ())

            def call(self, inputs):
                # Variables are only casted to other floats, not ints
                assert array_ops.identity(self.v).dtype == 'float32'
                return array_ops.identity(inputs)

        x = constant_op.constant([1])
        with strategy_fn().scope(), policy.policy_scope(
                'int32_with_float32_vars'):
            layer = IdentityLayerWithVar()
            self.assertEqual(layer.dtype, dtypes.float32)
            self.assertEqual(layer._dtype_policy._name,
                             'int32_with_float32_vars')
            y = layer(x)
            self.assertEqual(layer.v.dtype, dtypes.float32)
            self.assertEqual(y.dtype, dtypes.int32)
Example #10
0
  def test_model(self, strategy_fn, use_operator=False, use_regularizer=False,
                 cloning=True):
    regularizer = IdentityRegularizer() if use_regularizer else None
    with strategy_fn().scope():
      with policy.policy_scope('infer_float32_vars'):
        x = layers.Input(shape=(1,), batch_size=2, dtype=dtypes.float16)
        layer = AddLayer(assert_type=dtypes.float16, use_operator=use_operator,
                         regularizer=regularizer)
        y = layer(x)
        y = math_ops.cast(y, dtypes.float32)
        model = models.Model(inputs=x, outputs=y)

        def loss_fn(y_true, y_pred):
          del y_true
          return math_ops.reduce_mean(y_pred)

        # Learning rate is small enough that if applied to a float16 variable,
        # the variable will not change. So this tests the learning rate not
        # applied to a float16 value, but instead the float32 variable.
        opt = gradient_descent.SGD(2 ** -14)
        model.compile(opt, loss=loss_fn, cloning=cloning)

    self.assertEqual(backend.eval(layer.v), 1)
    x = np.ones((2, 1))
    y = np.ones((2, 1))
    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
    model.fit(dataset)
    # Variable starts at 1, and should have gradient of 2 ** -14 subtracted
    # from it.
    expected = 1 - 2 ** -14
    if use_regularizer:
      # Regularizer adds another 2 ** -14 to the gradient.
      expected -= 2 ** -14
    self.assertEqual(backend.eval(layer.v), expected)
Example #11
0
  def test_save_slot_variables_with_autocast_vars(self, strategy_fn,
                                                  var_name='v'):
    if not self._is_strategy_supported(strategy_fn):
      return
    with strategy_fn().scope(), policy.policy_scope('infer_float32_vars'):
      x = layers.Input(shape=(2,), batch_size=2, dtype=dtypes.float16)
      # Having a var_name other than 'v' tests that a fixed bug (b/134713714)
      # does not reoccur. The bug was that a crash would occur when saving a
      # checkpoint where an AutoCastVariable with a slot variable would have a
      # different name than the layer attribute's name (layer.v in this case).
      layer = AddLayer(assert_type=dtypes.float16, var_name=var_name)
      y = layer(x)
      y = math_ops.cast(y, dtypes.float32)
      model = models.Model(inputs=x, outputs=y)
      opt = gradient_descent.SGD(1., 1.)
      model.compile(optimizer=opt, loss='mse',
                    run_eagerly=testing_utils.should_run_eagerly())

    model.fit(np.zeros((2, 2)), np.zeros((2, 2)), batch_size=2)
    weights_file = os.path.join(self.get_temp_dir(), 'weights')
    model.save_weights(weights_file)
    saved_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))

    model.fit(np.zeros((2, 2)), np.zeros((2, 2)), batch_size=2)
    new_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))
    self.assertNotEqual(new_slot, saved_slot)

    model.load_weights(weights_file)
    restored_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))
    self.assertEqual(restored_slot, saved_slot)
 def test_error_if_policy_is_set(self):
     with policy.policy_scope('infer_float32_vars'):
         with self.assertRaisesRegexp(
                 ValueError, 'a keras mixed precision Policy has been set'):
             enable_mixed_precision_graph_rewrite(
                 gradient_descent_v2.SGD(1.0))
     # Test no error is thrown when the policy is current the default.
     enable_mixed_precision_graph_rewrite(gradient_descent_v2.SGD(1.0))
Example #13
0
 def test_pass_invalid_optimizer_with_loss_scaling(self):
     with policy.policy_scope(policy.Policy('float32', loss_scale=10.)):
         x = layers.Input(shape=(1, ))
         y = AddLayer()(x)
         model = models.Model(x, y)
         with self.assertRaisesRegexp(ValueError,
                                      'optimizer" must be an instance of '):
             model.compile(optimizers.SGD(1.), 'mse')
Example #14
0
    def test_v1_dtype_behavior(self):
        # These policies are allowed with V1 dtype behavior
        with mp_policy.policy_scope(mp_policy.Policy('infer')):
            pass
        with mp_policy.policy_scope(mp_policy.Policy('infer_float32_vars')):
            pass

        # These policies are not allowed with V1 dtype behavior
        with self.assertRaisesRegexp(
                ValueError, 'the V2 layer dtype behavior must be enabled'):
            with mp_policy.policy_scope(mp_policy.Policy('float32')):
                pass
        with self.assertRaisesRegexp(
                ValueError, 'the V2 layer dtype behavior must be enabled'):
            with mp_policy.policy_scope(
                    mp_policy.Policy('float16_with_float32_vars')):
                pass
Example #15
0
 def test_functional_model_loss_dtype(self):
     with policy.policy_scope('float16'):
         x = layers.Input(shape=(1, ))
         y = AddLayer()(x)
         model = models.Model(x, y)
         model.add_loss(math_ops.cast(y, 'float32'))
         # The loss should not be casted to the policy's dtype.
         self.assertEqual(model.losses[0].dtype, 'float32')
Example #16
0
 def test_variable_not_casted_for_int_inputs(self, strategy_fn):
     x = constant_op.constant([[1]], dtype=dtypes.int32)
     with strategy_fn().scope():
         with policy.policy_scope('infer_float32_vars'):
             layer = layers.Embedding(input_dim=10, output_dim=32)
             y = layer(x)
             self.assertEqual(layer.embeddings.dtype, dtypes.float32)
             self.assertEqual(y.dtype, dtypes.float32)
Example #17
0
    def test_model(self,
                   strategy_fn,
                   use_operator=False,
                   use_regularizer=False,
                   policy_name='mixed_float16',
                   experimental_run_tf_function=True):
        if not self._is_strategy_supported(strategy_fn, check_model_type=True):
            return
        regularizer = IdentityRegularizer() if use_regularizer else None
        with strategy_fn().scope():
            # Pass loss_scale=None, as this test will fail if the DynamicLossScale
            # skips applying gradients for a step
            with policy.policy_scope(
                    policy.Policy(policy_name, loss_scale=None)):
                layer_list = []
                if testing_utils.get_model_type() == 'subclass':
                    # Subclassed models do not have an Input layer, so the model does not
                    # cast inputs to the Input layer's dtype. Therefore, we need to
                    # manually insert a float16 cast.
                    cast_f16_layer = layers.Lambda(
                        lambda x: math_ops.cast(x, 'float16'),
                        input_shape=(1, ))
                    layer_list.append(cast_f16_layer)
                layer = AddLayer(assert_type=dtypes.float16,
                                 use_operator=use_operator,
                                 regularizer=regularizer,
                                 input_shape=(1, ))
                cast_f32_layer = layers.Lambda(
                    lambda x: math_ops.cast(x, 'float32'))
                layer_list += [layer, cast_f32_layer]
                model = testing_utils.get_model_from_layers(
                    layer_list, input_shape=(1, ), input_dtype=dtypes.float16)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)
                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly(),
                              experimental_run_tf_function=testing_utils.
                              should_run_tf_function())

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 2 ** -14 subtracted
        # from it.
        expected = 1 - 2**-14
        if use_regularizer:
            # Regularizer adds another 2 ** -14 to the gradient.
            expected -= 2**-14
        self.assertEqual(backend.eval(layer.v), expected)
Example #18
0
 def test_loss_scale_optimizer_overrides_policy_loss_scale(self):
     with policy.policy_scope(policy.Policy('float32', loss_scale=10.)):
         opt = gradient_descent.SGD(1.)
         opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale=5.)
         x = layers.Input(shape=(1, ))
         y = AddLayer()(x)
         model = models.Model(x, y)
         model.compile(opt, loss='mse')
         self.assertEqual(self.evaluate(model.optimizer.loss_scale()), 5.)
    def test_v1_dtype_behavior(self):
        # Only the "infer" policy is allowed with V1 dtype behavior
        with mp_policy.policy_scope(mp_policy.Policy('infer')):
            pass

        # Non-infer policies are not allowed with V1 dtype behavior
        with self.assertRaisesRegexp(
                ValueError,
                'global policy can only be set to a non-infer policy in TensorFlow 2'
        ):
            with mp_policy.policy_scope(mp_policy.Policy('float32')):
                pass
        with self.assertRaisesRegexp(
                ValueError,
                'global policy can only be set to a non-infer policy in TensorFlow 2'
        ):
            with mp_policy.policy_scope(mp_policy.Policy('mixed_float16')):
                pass
 def testMixedPrecision(self, num_gpus):
     if context.num_gpus() < num_gpus:
         self.skipTest('Not enough GPUs')
     if test_util.is_xla_enabled():
         self.skipTest('Test gets NaNs with XLA')
     with policy.policy_scope('mixed_float16'):
         self._run_between_graph_clients(self._test_mixed_precision,
                                         self._cluster_spec,
                                         num_gpus=num_gpus)
Example #21
0
  def test_v1_dtype_behavior(self):
    # These policies are allowed with V1 dtype behavior
    with mp_policy.policy_scope(mp_policy.Policy('infer')):
      pass
    with mp_policy.policy_scope(mp_policy.Policy('infer_float32_vars')):
      pass

    # These policies are not allowed with V1 dtype behavior
    with self.assertRaisesRegexp(
        ValueError,
        'global policy can only be set to a non-infer policy in TensorFlow 2'):
      with mp_policy.policy_scope(mp_policy.Policy('float32')):
        pass
    with self.assertRaisesRegexp(
        ValueError,
        'global policy can only be set to a non-infer policy in TensorFlow 2'):
      with mp_policy.policy_scope(
          mp_policy.Policy('float16_with_float32_vars')):
        pass
  def test_lstm_model_correctness_mixed_precision(self, distribution, use_numpy,
                                                  use_validation_data):
    if isinstance(distribution,
                  (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1)):
      policy_name = 'mixed_bfloat16'
    else:
      policy_name = 'mixed_float16'

    with policy.policy_scope(policy_name):
      self.run_correctness_test(distribution, use_numpy, use_validation_data)
Example #23
0
 def test_layer_calling_tf_function(self, strategy_fn):
     x = constant_op.constant([1.], dtype=dtypes.float16)
     with strategy_fn().scope():
         with policy.policy_scope('infer_float32_vars'):
             layer = AddLayerWithFunction(assert_type=dtypes.float16)
             y = layer(x)
             self.assertEqual(layer.v.dtype, dtypes.float32)
             self.assertEqual(y.dtype, dtypes.float16)
             self.evaluate(variables.global_variables_initializer())
             self.assertEqual(self.evaluate(y), 2.)
Example #24
0
 def test_layer_with_non_autocast_variable(self, strategy_fn):
   x = constant_op.constant([1.], dtype=dtypes.float16)
   with strategy_fn().scope():
     with policy.policy_scope('infer_float32_vars'):
       layer = AddLayerWithoutAutoCast(assert_type=dtypes.float16)
       y = layer(x)
       self.assertEqual(layer.v.dtype, dtypes.float32)
       self.assertEqual(y.dtype, dtypes.float16)
       self.evaluate(variables.global_variables_initializer())
       self.assertEqual(self.evaluate(y), 2.)
Example #25
0
 def test_layer_with_non_autocast_variable(self, strategy_fn):
     x = constant_op.constant([1.])
     with strategy_fn().scope():
         with policy.policy_scope('mixed_float16'):
             layer = AddLayerWithoutAutoCast(assert_type=dtypes.float16)
             y = layer(x)
             self.assertEqual(layer.v.dtype, dtypes.float32)
             self.assertEqual(y.dtype, dtypes.float16)
             self.evaluate(variables.global_variables_initializer())
             self.assertEqual(self.evaluate(y), 2.)
Example #26
0
 def test_pass_invalid_optimizer_with_loss_scaling(self):
     with policy.policy_scope(policy.Policy('float32', loss_scale=10.)):
         x = layers.Input(shape=(1, ))
         y = mp_test_util.MultiplyLayer()(x)
         model = models.Model(x, y)
         if context.executing_eagerly():
             error_msg = 'Use a `tf.keras` Optimizer instead'
         else:
             error_msg = 'optimizer" must be an instance of '
         with self.assertRaisesRegexp(ValueError, error_msg):
             model.compile(optimizers.SGD(1.), 'mse')
Example #27
0
    def test_advanced_model(self, strategy_fn):

        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.

        strategy = strategy_fn()

        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(policy.Policy('infer_float32_vars')):
                x = layers.Input(shape=(), batch_size=2, dtype=dtypes.float16)
                layer1 = AddLayer(assert_type=dtypes.float16,
                                  regularizer=IdentityRegularizer(),
                                  use_operator=True)
                layer2 = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
                                                 use_operator=True)
                layer3 = AddLayer(assert_type=dtypes.float16,
                                  use_operator=False)
                layer4 = AddLayerWithoutAutoCast(
                    assert_type=dtypes.float16,
                    regularizer=IdentityRegularizer(),
                    use_operator=False)
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                y = math_ops.cast(y, dtypes.float32)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    self.assertEqual(y_true.dtype, dtypes.float32)
                    self.assertEqual(y_pred.dtype, dtypes.float32)
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                model.compile(opt, loss=loss_fn)

            x = np.ones((2, 1))
            y = np.ones((2, 1))
            dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
            model.fit(dataset)
            for layer in (layer1, layer2, layer3, layer4):
                if layer.losses:
                    # Layer has weight regularizer
                    self.assertEqual(backend.eval(layer.v),
                                     1 - 2 * learning_rate)
                else:
                    # Layer does not have weight regularizer
                    self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
Example #28
0
 def test_error_if_graph_rewrite_enabled(self):
   try:
     mixed_precision.enable_mixed_precision_graph_rewrite(
         gradient_descent.SGD(1.))
     with self.assertRaisesRegex(
         ValueError, 'cannot be set to "mixed_float16", .* the mixed '
         'precision graph rewrite has already been enabled'):
       mp_policy.set_policy('mixed_float16')
     with mp_policy.policy_scope('float64'):
       pass  # Non-mixed policies are allowed
   finally:
     mixed_precision.disable_mixed_precision_graph_rewrite()
Example #29
0
    def test_model(self,
                   strategy_fn,
                   use_operator=False,
                   use_regularizer=False,
                   cloning=True):
        if testing_utils.should_run_distributed():
            self.skipTest('b/137397816')
        if not self._is_strategy_supported(strategy_fn):
            return
        regularizer = IdentityRegularizer() if use_regularizer else None
        with strategy_fn().scope():
            with policy.policy_scope('infer_float32_vars'):
                x = layers.Input(shape=(1, ),
                                 batch_size=2,
                                 dtype=dtypes.float16)
                layer = AddLayer(assert_type=dtypes.float16,
                                 use_operator=use_operator,
                                 regularizer=regularizer)
                y = layer(x)
                y = math_ops.cast(y, dtypes.float32)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)
                model.compile(
                    opt,
                    loss=loss_fn,
                    cloning=cloning,
                    run_eagerly=testing_utils.should_run_eagerly(),
                    run_distributed=testing_utils.should_run_distributed())

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 2 ** -14 subtracted
        # from it.
        expected = 1 - 2**-14
        if use_regularizer:
            # Regularizer adds another 2 ** -14 to the gradient.
            expected -= 2**-14
        self.assertEqual(backend.eval(layer.v), expected)
Example #30
0
  def test_advanced_model(self, strategy_fn):

    # The advanced model tests mixed-precision-related features that would occur
    # in a resnet50 model. It tests a model that has:
    #  * Multiple layers, some which use auto-cast variables and some which do
    #    not
    #  * Regularization on some variables and not others.

    strategy = strategy_fn()

    learning_rate = 2 ** -14

    with strategy.scope():
      with policy.policy_scope(policy.Policy('infer_float32_vars')):
        x = layers.Input(shape=(), batch_size=2, dtype=dtypes.float16)
        layer1 = AddLayer(assert_type=dtypes.float16,
                          regularizer=IdentityRegularizer(), use_operator=True)
        layer2 = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
                                         use_operator=True)
        layer3 = AddLayer(assert_type=dtypes.float16, use_operator=False)
        layer4 = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
                                         regularizer=IdentityRegularizer(),
                                         use_operator=False)
        y = layer1(x)
        y = layer2(y)
        y = layer3(y)
        y = layer4(y)
        y = math_ops.cast(y, dtypes.float32)
        model = models.Model(inputs=x, outputs=y)

        def loss_fn(y_true, y_pred):
          self.assertEqual(y_true.dtype, dtypes.float32)
          self.assertEqual(y_pred.dtype, dtypes.float32)
          return math_ops.reduce_mean(y_pred)

        opt = gradient_descent.SGD(learning_rate)
        model.compile(opt, loss=loss_fn)

      x = np.ones((2, 1))
      y = np.ones((2, 1))
      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
      model.fit(dataset)
      for layer in (layer1, layer2, layer3, layer4):
        if layer.losses:
          # Layer has weight regularizer
          self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
        else:
          # Layer does not have weight regularizer
          self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
Example #31
0
 def test_mixed_policies_(self, strategy_fn):
     for dtype in 'float16', 'bfloat16':
         x = constant_op.constant([1.])
         policy_name = 'mixed_' + dtype
         with strategy_fn().scope(), policy.policy_scope(policy_name):
             layer = mp_test_util.AddLayer(assert_type=dtype)
             self.assertEqual(layer.dtype, dtypes.float32)
             self.assertEqual(layer._dtype_policy._name, policy_name)
             y = layer(x)
             self.assertEqual(layer.v.dtype, dtypes.float32)
             self.assertEqual(y.dtype, dtype)
             self.assertEqual(layer.dtype, dtypes.float32)
             self.assertEqual(layer._dtype_policy._name, policy_name)
             self.evaluate(variables.global_variables_initializer())
             self.assertEqual(self.evaluate(y), 2.)
Example #32
0
 def test_floating_point_policies_with_float32_vars(self, strategy_fn):
     for dtype in 'bfloat16', 'float16', 'float64':
         x = constant_op.constant([1.])
         policy_name = dtype + '_with_float32_vars'
         with strategy_fn().scope(), policy.policy_scope(policy_name):
             layer = AddLayer(assert_type=dtype)
             self.assertEqual(layer.dtype, dtypes.float32)
             self.assertEqual(layer._dtype_policy._name, policy_name)
             y = layer(x)
             self.assertEqual(layer.v.dtype, dtypes.float32)
             self.assertEqual(y.dtype, dtype)
             self.assertEqual(layer.dtype, dtypes.float32)
             self.assertEqual(layer._dtype_policy._name, policy_name)
             self.evaluate(variables.global_variables_initializer())
             self.assertEqual(self.evaluate(y), 2.)
Example #33
0
 def test_passing_policy_to_layer(self, strategy_fn):
     x = constant_op.constant([1.], dtype=dtypes.float16)
     with strategy_fn().scope():
         # Passing a Policy to 'dtype' sets the policy for that layer.
         layer = mp_test_util.AddLayer(assert_type=dtypes.float16,
                                       dtype=policy.Policy('mixed_float16'))
         # layer.dtype refers to the variable dtype
         self.assertEqual(layer.dtype, dtypes.float32)
         layer(x)
         self.assertEqual(layer.v.dtype, dtypes.float32)
         with policy.policy_scope('mixed_float16'):
             # Passing a Policy to dtype overrides the global Policy
             layer = mp_test_util.AddLayer(assert_type=dtypes.float64,
                                           dtype=policy.Policy('float64'))
             self.assertEqual(layer.dtype, 'float64')
             self.assertEqual(layer(x).dtype, dtypes.float64)
             self.assertEqual(layer.v.dtype, dtypes.float64)
Example #34
0
  def test_infer_with_float32_vars(self, strategy_fn):
    x = constant_op.constant([1.], dtype=dtypes.float16)
    with strategy_fn().scope(), policy.policy_scope('infer_float32_vars'):
      layer = AddLayer(assert_type=dtypes.float16)
      self.assertEqual(layer.dtype, dtypes.float32)
      y = layer(x)
      self.assertEqual(layer.v.dtype, dtypes.float32)
      self.assertEqual(y.dtype, dtypes.float16)
      self.assertEqual(layer.dtype, dtypes.float32)
      self.assertEqual(layer._dtype_policy._name, 'float16_with_float32_vars')
      self.evaluate(variables.global_variables_initializer())
      self.assertEqual(self.evaluate(y), 2.)

      if base_layer_utils.v2_dtype_behavior_enabled():
        # Layer should now cast inputs to float16
        x = constant_op.constant([1.], dtype=dtypes.float32)
        y = layer(x)
        self.assertEqual(y.dtype, dtypes.float16)
Example #35
0
 def test_passing_policy_to_layer(self, strategy_fn):
   x = constant_op.constant([1.], dtype=dtypes.float16)
   with strategy_fn().scope():
     # Passing a Policy to 'dtype' sets the policy for that layer.
     layer = AddLayer(assert_type=dtypes.float16,
                      dtype=policy.Policy('infer_float32_vars'))
     # layer.dtype refers to the variable dtype
     self.assertEqual(layer.dtype, dtypes.float32)
     layer(x)
     self.assertEqual(layer.v.dtype, dtypes.float32)
     with policy.policy_scope('infer_float32_vars'):
       # Passing a Policy to dtype overrides the global Policy
       layer = AddLayer(assert_type=dtypes.float16,
                        dtype=policy.Policy('infer'))
       # layer dtype is not yet known
       self.assertEqual(layer.dtype, None)
       layer(x)
       self.assertEqual(layer.v.dtype, dtypes.float16)
       self.assertEqual(layer.dtype, dtypes.float16)
Example #36
0
  def test_checkpointing_layer_weights(self, strategy_fn):
    x = constant_op.constant([1.], dtype=dtypes.float16)
    with strategy_fn().scope():
      with policy.policy_scope('infer_float32_vars'):
        layer = AddLayer(assert_type=dtypes.float16)
        layer.build(())

    layer.set_weights([np.array(100.)])
    self.assertEqual(self.evaluate(layer(x)), 101.)

    checkpoint = trackable_utils.Checkpoint(layer=layer)
    prefix = os.path.join(self.get_temp_dir(), 'ckpt')
    save_path = checkpoint.save(prefix)

    layer.set_weights([np.array(200.)])
    self.assertEqual(self.evaluate(layer(x)), 201.)
    checkpoint.restore(save_path).assert_consumed().run_restore_ops()
    self.assertEqual(layer.get_weights(), [100.])
    self.assertEqual(self.evaluate(layer(x)), 101.)
Example #37
0
  def test_layer_regularizer_runs_in_float32(self, strategy_fn):
    x = constant_op.constant([1.], dtype=dtypes.float16)
    with strategy_fn().scope():
      with policy.policy_scope('infer_float32_vars'):
        # Test on AddLayer
        layer = AddLayer(assert_type=dtypes.float16,
                         regularizer=IdentityRegularizer())
        layer(x)
        (regularizer_loss,) = layer.losses
        self.assertEqual(regularizer_loss.dtype, dtypes.float32)
        self.evaluate(variables.global_variables_initializer())
        self.assertEqual(self.evaluate(regularizer_loss), 1.)

        # Test on AddLayerWithoutAutoCast
        layer = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
                                        regularizer=IdentityRegularizer())
        layer(x)
        (regularizer_loss,) = layer.losses
        self.assertEqual(regularizer_loss.dtype, dtypes.float32)
        self.evaluate(variables.global_variables_initializer())
        self.assertEqual(self.evaluate(regularizer_loss), 1.)
Example #38
0
  def test_save_weights_with_autocast_vars(self, strategy_fn, h5=False):
    with strategy_fn().scope():
      with policy.policy_scope('infer_float32_vars'):
        x = layers.Input(shape=(1,), batch_size=2, dtype=dtypes.float16)
        layer = AddLayer(assert_type=dtypes.float16)
        y = layer(x)
        y = math_ops.cast(y, dtypes.float32)
        model = models.Model(inputs=x, outputs=y)

    model.set_weights([np.array(100.)])
    x = np.ones((2, 1), dtype=np.float16)
    self.assertAllClose(backend.get_value(model(x)), x + 100.)
    suffix = '.h5' if h5 else ''
    weights_file = os.path.join(self.get_temp_dir(), 'weights' + suffix)
    model.save_weights(weights_file)

    model.set_weights([np.array(200.)])
    self.assertAllClose(backend.get_value(model(x)), x + 200.)
    model.load_weights(weights_file)
    self.assertAllClose(backend.get_value(model(x)), x + 100.)
    self.assertEqual(model.get_weights(), [np.array(100.)])
Example #39
0
  def test_advanced_model(self, strategy_fn, use_loss_scaling=False):

    # The advanced model tests mixed-precision-related features that would occur
    # in a resnet50 model. It tests a model that has:
    #  * Multiple layers, some which use auto-cast variables and some which do
    #    not
    #  * Regularization on some variables and not others.
    #  * A fixed loss scale (if use_loss_scaling is True)

    strategy = strategy_fn()
    if use_loss_scaling:
      loss_scale = 8.
    learning_rate = 2 ** -14

    with strategy.scope():
      with policy.policy_scope(policy.Policy('infer_float32_vars')):
        x = layers.Input(shape=(1,), batch_size=2, dtype=dtypes.float16)
        layer1 = AddLayer(assert_type=dtypes.float16,
                          regularizer=IdentityRegularizer(), use_operator=True)
        layer2 = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
                                         use_operator=True)
        layer3 = AddLayer(assert_type=dtypes.float16, use_operator=False)
        layer4 = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
                                         regularizer=IdentityRegularizer(),
                                         use_operator=False)
        y = layer1(x)
        y = layer2(y)
        y = layer3(y)
        y = layer4(y)
        if use_loss_scaling:
          # The gradient of 'y' at this point is 1. With loss scaling, the
          # gradient is 'loss_scale'. We divide by the batch size of 2 since the
          # loss is averaged across batch elements.
          expected_gradient = loss_scale / 2
          identity_with_grad_check_fn = (
              mp_test_util.create_identity_with_grad_check_fn(
                  expected_dtype=dtypes.float16,
                  expected_gradient=[expected_gradient]))
          y = core.Lambda(identity_with_grad_check_fn)(y)
        y = math_ops.cast(y, dtypes.float32)
        model = models.Model(inputs=x, outputs=y)

        def loss_fn(y_true, y_pred):
          self.assertEqual(y_true.dtype, dtypes.float32)
          self.assertEqual(y_pred.dtype, dtypes.float32)
          return math_ops.reduce_mean(y_pred)

        opt = gradient_descent.SGD(learning_rate)
        if use_loss_scaling:
          opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
        model.compile(opt, loss=loss_fn)

    x = np.ones((2, 1))
    y = np.ones((2, 1))
    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
    model.fit(dataset)
    for layer in (layer1, layer2, layer3, layer4):
      if layer.losses:
        # Layer has weight regularizer
        self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
      else:
        # Layer does not have weight regularizer
        self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
Example #40
0
  def test_dynamic_loss_scaling(self, strategy_fn, cloning=True):
    strategy = strategy_fn()
    initial_loss_scale = 2.
    batch_size = 4
    expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                         dtype=dtypes.float16)
    # If this variable is set to True, the model below will have NaN gradients
    have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
    with strategy.scope():
      with policy.policy_scope(policy.Policy('infer_float32_vars')):
        x = layers.Input(shape=(1,), batch_size=batch_size,
                         dtype=dtypes.float16)
        layer = AddLayer(assert_type=dtypes.float16)
        y = layer(x)
        identity_with_nan_grads = (
            mp_test_util.create_identity_with_nan_gradients_fn(
                have_nan_gradients))
        y = core.Lambda(identity_with_nan_grads)(y)
        identity_with_grad_check_fn = (
            mp_test_util.create_identity_with_grad_check_fn(
                expected_dtype=dtypes.float16,
                expected_gradient=expected_gradient))
        y = core.Lambda(identity_with_grad_check_fn)(y)
        y = math_ops.cast(y, dtypes.float32)
        model = models.Model(inputs=x, outputs=y)

        def loss_fn(y_true, y_pred):
          del y_true
          return math_ops.reduce_mean(y_pred)

        opt = gradient_descent.SGD(1.)
        loss_scale = loss_scale_module.DynamicLossScale(
            initial_loss_scale=initial_loss_scale, increment_period=2)
        opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
        model.compile(opt, loss=loss_fn, cloning=cloning)

    self.assertEqual(backend.eval(layer.v), 1)
    x = np.ones((batch_size, 1))
    y = np.ones((batch_size, 1))
    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(batch_size)
    model.fit(dataset)
    # The variables starts with 1 and has a gradient of 1, so will go down by 1
    # each step.
    self.assertEqual(backend.eval(layer.v), 0)

    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -1)

    # There have been two steps without NaNs, so the loss scale will double
    backend.set_value(expected_gradient,
                      backend.get_value(expected_gradient * 2))
    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -2)

    # Next test with NaN gradients.
    backend.set_value(have_nan_gradients, True)
    model.fit(dataset)
    # Variable should not be updated
    self.assertEqual(backend.eval(layer.v), -2)

    # Test with finite gradients again
    backend.set_value(have_nan_gradients, False)
    # The loss scale will be halved due to the NaNs, so the gradient will also
    # be halved
    backend.set_value(expected_gradient,
                      backend.get_value(expected_gradient / 2))
    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -3)