Ejemplo n.º 1
0
 def test_v1_dtype_behavior(self):
     # Setting global policies are not allowed with V1 dtype behavior
     with self.assertRaisesRegex(
             ValueError, 'global policy can only be set in TensorFlow 2'):
         with mp_policy.policy_scope(mp_policy.Policy('_infer')):
             pass
     with self.assertRaisesRegex(
             ValueError, 'global policy can only be set in TensorFlow 2'):
         with mp_policy.policy_scope(mp_policy.Policy('float32')):
             pass
     with self.assertRaisesRegex(
             ValueError, 'global policy can only be set in TensorFlow 2'):
         with mp_policy.policy_scope(mp_policy.Policy('mixed_float16')):
             pass
Ejemplo n.º 2
0
    def test_save_slot_variables_with_autocast_vars(self,
                                                    strategy_fn,
                                                    var_name='v'):
        p = policy.Policy('mixed_float16')
        with strategy_fn().scope(), policy.policy_scope(p):
            x = layers.Input(shape=(2, ), batch_size=2)
            # Having a var_name other than 'v' tests that a fixed bug (b/134713714)
            # does not reoccur. The bug was that a crash would occur when saving a
            # checkpoint where an AutoCastVariable with a slot variable would have a
            # different name than the layer attribute's name (layer.v in this case).
            layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16,
                                               var_name=var_name)
            y = layer(x)
            model = models.Model(inputs=x, outputs=y)
            opt = gradient_descent.SGD(1., 1.)
            opt = loss_scale_optimizer.LossScaleOptimizer(opt,
                                                          dynamic=False,
                                                          initial_scale=1)
            model.compile(optimizer=opt,
                          loss='mse',
                          run_eagerly=testing_utils.should_run_eagerly())

        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        weights_file = os.path.join(self.get_temp_dir(), 'weights')
        model.save_weights(weights_file)
        saved_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))

        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        new_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))
        self.assertNotEqual(new_slot, saved_slot)

        model.load_weights(weights_file)
        restored_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))
        self.assertEqual(restored_slot, saved_slot)
Ejemplo n.º 3
0
    def test_dtype_attributes(self):
        for dtype in 'int32', 'bool', 'float16', 'float32':
            policy = mp_policy.Policy(dtype)
            self.assertEqual(policy.name, dtype)
            self.assertEqual(policy.compute_dtype, dtype)
            self.assertEqual(policy.variable_dtype, dtype)

        for dtype in 'float16', 'bfloat16':
            policy = mp_policy.Policy('mixed_' + dtype)
            self.assertEqual(policy.name, 'mixed_' + dtype)
            self.assertEqual(policy.compute_dtype, dtype)
            self.assertEqual(policy.variable_dtype, 'float32')

        policy = mp_policy.Policy('_infer')
        self.assertEqual(policy.compute_dtype, None)
        self.assertEqual(policy.variable_dtype, None)
Ejemplo n.º 4
0
    def test_config(self, strategy_fn):
        x = constant_op.constant([1.], dtype=dtypes.float16)
        with strategy_fn().scope():
            for layer, dtype in ((mp_test_util.MultiplyLayer(), 'float32'),
                                 (mp_test_util.MultiplyLayer(dtype='float64'),
                                  'float64'), (mp_test_util.MultiplyLayer(
                                      dtype=policy.Policy('float64')),
                                               'float64')):
                config = layer.get_config()
                self.assertEqual(config['dtype'], dtype)
                self.assertIsInstance(config['dtype'], str)
                layer = mp_test_util.MultiplyLayer.from_config(config)
                self.assertEqual(layer.dtype, dtype)
                self.assertEqual(layer(x).dtype, dtype)
                self.assertEqual(layer.v.dtype, dtype)

            layer = mp_test_util.MultiplyLayer(dtype='mixed_float16')
            config = layer.get_config()
            self.assertEqual(config['dtype'], {
                'class_name': 'Policy',
                'config': {
                    'name': 'mixed_float16'
                }
            })
            layer = mp_test_util.MultiplyLayer.from_config(config)
            self.assertEqual(layer.dtype, 'float32')
            self.assertEqual(layer(x).dtype, 'float16')
            self.assertEqual(layer.v.dtype, 'float32')
            config = layer.get_config()
            self.assertEqual(config['dtype'], {
                'class_name': 'Policy',
                'config': {
                    'name': 'mixed_float16'
                }
            })

            layer = mp_test_util.MultiplyLayer(dtype=policy.Policy('_infer'))
            config = layer.get_config()
            self.assertIsNone(config['dtype'])
            layer = mp_test_util.MultiplyLayer.from_config(config)
            # If a layer is serialized with the "_infer" policy, when deserialized
            # into TF 2 it will have the global policy instead of "_infer". This is
            # because "_infer" is serialized into None, and passing dtype=None in
            # TensorFlow 2 indicates to use the global policy.
            self.assertEqual(layer.dtype, 'float32')
            self.assertEqual(layer(x).dtype, 'float32')
            self.assertEqual(layer.v.dtype, 'float32')
Ejemplo n.º 5
0
 def test_batchnorm_mixed_precision_does_not_overflow(self, fused):
   norm = keras.layers.BatchNormalization(
       axis=-1,
       input_shape=(1, 1, 1),
       fused=fused,
       dtype=policy.Policy('mixed_float16'))
   x = np.array([-1000., 1000.]).reshape((2, 1, 1, 1))
   y = norm(x, training=True)
   expected_y = np.array([-1.0, 1.0]).reshape((2, 1, 1, 1))
   self.assertAllClose(keras.backend.eval(y), expected_y)
Ejemplo n.º 6
0
 def test_batchnorm_mixed_precision(self):
   norm = keras.layers.BatchNormalization(
       axis=-1,
       input_shape=(4, 4, 3),
       momentum=0.8,
       dtype=policy.Policy('mixed_float16'))
   x = np.random.normal(size=(10, 4, 4, 3))
   y = norm(x)
   self.assertEqual(y.dtype, 'float16')
   self.assertEqual(norm.beta.dtype.base_dtype, 'float32')
   self.assertEqual(norm.gamma.dtype.base_dtype, 'float32')
Ejemplo n.º 7
0
    def test_device_compatibility_warning(self):
        if not context.executing_eagerly():
            self.skipTest('Run in eager mode only.')

        device_compatibility_check._logged_compatibility_check = False
        with test.mock.patch.object(tf_logging, 'warn') as mock_warn:
            mp_policy.Policy('mixed_float16')
        if config_module.list_physical_devices('GPU'):
            mock_warn.assert_not_called()
        else:
            self.assertRegex(
                mock_warn.call_args[0][0],
                r'Mixed precision compatibility check \(mixed_float16\): WARNING.*'
            )

        if config_module.list_physical_devices('GPU'):
            # Assert message is only logged once
            with test.mock.patch.object(tf_logging, 'warn') as mock_warn:
                mp_policy.Policy('mixed_float16')
            mock_warn.assert_not_called()
Ejemplo n.º 8
0
 def test_dense_with_policy(self):
     inputs = ops.convert_to_tensor_v2_with_dispatch(
         np.random.randint(low=0, high=7, size=(2, 2)))
     layer = keras.layers.Dense(5, dtype=policy.Policy('mixed_float16'))
     outputs = layer(inputs)
     output_signature = layer.compute_output_signature(
         tensor_spec.TensorSpec(dtype='float16', shape=(2, 2)))
     self.assertEqual(output_signature.dtype, dtypes.float16)
     self.assertEqual(output_signature.shape, (2, 5))
     self.assertEqual(outputs.dtype, 'float16')
     self.assertEqual(layer.kernel.dtype, 'float32')
Ejemplo n.º 9
0
 def test_unsupported_strategy(self):
     strategy = create_central_storage_strategy()
     with strategy.scope(), self.assertRaisesRegex(
             ValueError, 'Mixed precision is not supported with the '
             'tf.distribute.Strategy: CentralStorageStrategy. Either '
             'stop using mixed precision by removing the use of the '
             '"mixed_float16" policy or use a different Strategy, e.g. '
             'a MirroredStrategy.'):
         mp_test_util.MultiplyLayer(dtype='mixed_float16')
     # Non-mixed policies are fine
     mp_test_util.MultiplyLayer(dtype=policy.Policy('float64'))
Ejemplo n.º 10
0
    def test_get_layer_policy(self):
        layer = core.Dense(4)
        self.assertEqual(
            get_layer_policy.get_layer_policy(layer).name, 'float32')

        p = policy.Policy('mixed_float16')
        layer = core.Dense(4, dtype=p)
        self.assertIs(get_layer_policy.get_layer_policy(layer), p)

        layer = core.Dense(4, dtype='float64')
        self.assertEqual(
            get_layer_policy.get_layer_policy(layer).name, 'float64')
Ejemplo n.º 11
0
 def test_global_policy_dtype_error(self):
     with self.assertRaisesRegex(
             ValueError,
             'set_policy can only be used to set the global policy to '
             'floating-point policies, such as "float32" and "mixed_float16", but '
             'got policy: int32'):
         mp_policy.set_policy('int32')
     with self.assertRaisesRegex(
             ValueError,
             'set_policy can only be used to set the global policy to '
             'floating-point policies, such as "float32" and "mixed_float16", but '
             'got policy: complex64'):
         mp_policy.set_policy(mp_policy.Policy('complex64'))
Ejemplo n.º 12
0
    def test_build_and_call_layer_in_function(self):
        layer = mp_test_util.MultiplyLayer(
            dtype=policy.Policy('mixed_float16'))

        @def_function.function
        def f():
            return layer(1.)

        y = f()
        self.evaluate(variables.global_variables_initializer())
        self.assertEqual(y.dtype, 'float16')
        self.assertEqual(layer.v.dtype, 'float32')
        self.assertEqual(self.evaluate(y), 1.)
Ejemplo n.º 13
0
 def test_passing_policy_to_layer(self, strategy_fn):
     x = constant_op.constant([1.], dtype=dtypes.float16)
     with strategy_fn().scope():
         # Passing a Policy to 'dtype' sets the policy for that layer.
         layer = mp_test_util.MultiplyLayer(
             assert_type=dtypes.float16,
             dtype=policy.Policy('mixed_float16'))
         # layer.dtype refers to the variable dtype
         self.assertEqual(layer.dtype, dtypes.float32)
         layer(x)
         self.assertEqual(layer.v.dtype, dtypes.float32)
         with policy.policy_scope('mixed_float16'):
             # Passing a Policy to dtype overrides the global Policy
             layer = mp_test_util.MultiplyLayer(
                 assert_type=dtypes.float64, dtype=policy.Policy('float64'))
             self.assertEqual(layer.dtype_policy.name, 'float64')
             self.assertIsInstance(layer.dtype_policy, policy.Policy)
             self.assertEqual(layer.compute_dtype, dtypes.float64)
             self.assertEqual(layer.dtype, dtypes.float64)
             self.assertEqual(layer.variable_dtype, dtypes.float64)
             self.assertEqual(layer(x).dtype, dtypes.float64)
             self.assertEqual(layer.v.dtype, dtypes.float64)
Ejemplo n.º 14
0
    def __init__(self, trainable=True, name=None, dtype=None, **kwargs):
        # For backwards compatibility, legacy layers do not use `ResourceVariable`
        # by default.
        self._use_resource_variables = False
        scope = kwargs.pop('_scope', None)
        self._reuse = kwargs.pop('_reuse', None)

        # Avoid an incorrect lint error
        self._trainable_weights = []
        self.built = False

        if dtype is None:
            # Indicates to infer dtype from inputs. When the V2 dtype behavior is
            # enabled, Keras layers default their dtype to floatx instead, so we pass
            # an "_infer" policy to keep the old V1 behavior.
            dtype = policy.Policy('_infer')

        if 'autocast' not in kwargs:
            kwargs['autocast'] = False

        # Mark that legacy layers should not be instrumented as Keras usage
        self._disable_keras_instrumentation = True

        super(Layer, self).__init__(trainable=trainable,
                                    name=name,
                                    dtype=dtype,
                                    **kwargs)

        if _is_in_keras_style_scope():
            if scope is not None:
                raise ValueError(
                    'scope argument not allowed when keras style layers are enabled, '
                    'but saw: {}'.format(scope))
            if self._reuse is not None:
                raise ValueError(
                    'reuse argument not allowed when keras style layers are enabled, '
                    'but saw: {}'.format(self._reuse))
            self._keras_style = True
        else:
            self._keras_style = False

        self._call_has_scope_arg = 'scope' in self._call_fn_args
        if scope:
            with vs.variable_scope(scope) as captured_scope:
                self._scope = captured_scope
        else:
            self._scope = None
        self._current_scope = None
Ejemplo n.º 15
0
    def test_repr(self):
        # Test Policy repr
        for policy in ('float32', 'int8', 'mixed_float16', 'mixed_bfloat16',
                       '_infer'):
            self.assertEqual(repr(mp_policy.Policy(policy)),
                             '<Policy "%s">' % policy)

        # Test PolicyV1 repr
        for policy in ('float32', 'int8', 'mixed_bfloat16', '_infer'):
            self.assertEqual(repr(mp_policy.PolicyV1(policy)),
                             '<PolicyV1 "%s", loss_scale=None>' % policy)
        self.assertEqual(
            repr(mp_policy.PolicyV1('float16', loss_scale=2)),
            '<PolicyV1 "float16", loss_scale=FixedLossScale(2.0)>')
        self.assertStartsWith(
            repr(mp_policy.PolicyV1('mixed_float16')),
            '<PolicyV1 "mixed_float16", loss_scale=DynamicLossScale(')
Ejemplo n.º 16
0
 def test_config(self):
     for policy in (
             mp_policy.Policy('float16'),
             mp_policy.Policy('float32'),
             mp_policy.Policy('int16'),
             mp_policy.Policy('mixed_float16'),
             mp_policy.Policy('mixed_bfloat16'),
             mp_policy.Policy('_infer'),
     ):
         config = policy.get_config()
         new_policy = mp_policy.Policy.from_config(config)
         # Comparing strings is the easiest way to ensure the policies are the
         # same, as policy does not override the == operator.
         self.assertEqual(str(policy), str(new_policy))
Ejemplo n.º 17
0
    def test_policy_errors(self):
        # Test passing invalid strings

        with self.assertRaisesRegex(
                ValueError,
                'Cannot convert value abc to a mixed precision Policy.'):
            mp_policy.Policy('abc')

        # Test passing a DType
        with self.assertRaisesRegex(
                TypeError, "'name' must be a string, not a DType. "
                'Instead, pass DType.name. Got: float16'):
            mp_policy.Policy(dtypes.float16)

        # Test passing a non-DType invalid type
        with self.assertRaisesRegex(TypeError,
                                    "'name' must be a string, but got: 5"):
            mp_policy.Policy(5)

        # Test passing a now-removed policy ending in float32_vars
        with self.assertRaisesRegex(
                ValueError,
                'Policies ending in \'_float32_vars\' have been removed '
                'from TensorFlow. Please use the \'mixed_float16\' or '
                '\'mixed_bfloat16\' policy instead. Got policy name: '
                '\'infer_float32_vars\''):
            mp_policy.Policy('infer_float32_vars')
        with self.assertRaisesRegex(
                ValueError,
                'Policies ending in \'_float32_vars\' have been removed '
                'from TensorFlow. Please use the \'mixed_float16\' policy '
                'instead. Got policy name: \'float16_with_float32_vars\''):
            mp_policy.Policy('float16_with_float32_vars')
        with self.assertRaisesRegex(
                ValueError,
                'Policies ending in \'_float32_vars\' have been removed '
                'from TensorFlow. Please use the \'mixed_bfloat16\' policy '
                'instead. Got policy name: \'bfloat16_with_float32_vars\''):
            mp_policy.Policy('bfloat16_with_float32_vars')
        with self.assertRaisesRegex(
                ValueError,
                'Policies ending in \'_float32_vars\' have been removed '
                'from TensorFlow. Got policy name: '
                '\'int8_with_float32_vars\''):
            mp_policy.Policy('int8_with_float32_vars')
Ejemplo n.º 18
0
 def test_global_policy(self):
     if base_layer_utils.v2_dtype_behavior_enabled():
         default_policy = 'float32'
     else:
         default_policy = '_infer'
     self.assertEqual(mp_policy.global_policy().name, default_policy)
     try:
         mp_policy.set_policy('mixed_float16')
         self.assertEqual(mp_policy.global_policy().name, 'mixed_float16')
         with ops.Graph().as_default(
         ):  # Policies are not associated with a graph
             self.assertEqual(mp_policy.global_policy().name,
                              'mixed_float16')
         mp_policy.set_policy('_infer')
         self.assertEqual(mp_policy.global_policy().name, '_infer')
         policy = mp_policy.Policy('mixed_bfloat16')
         mp_policy.set_policy(policy)
         self.assertIs(mp_policy.global_policy(), policy)
     finally:
         mp_policy.set_policy(None)
Ejemplo n.º 19
0
  def test_layer(self, f32_layer_fn, input_shape, rtol=2e-3, atol=2e-3,
                 input_data=None):
    """Tests a layer by comparing the float32 and mixed precision weights.

    A float32 layer, a mixed precision layer, and a distributed mixed precision
    layer are run. The three layers are identical other than their dtypes and
    distribution strategies. The outputs after predict() and weights after fit()
    are asserted to be close.

    Args:
      f32_layer_fn: A function returning a float32 layer. The other two layers
        will automatically be created from this
      input_shape: The shape of the input to the layer, including the batch
        dimension. Or a list of shapes if the layer takes multiple inputs.
      rtol: The relative tolerance to be asserted.
      atol: The absolute tolerance to be asserted.
      input_data: A Numpy array with the data of the input. If None, input data
        will be randomly generated
    """

    if f32_layer_fn == convolutional.ZeroPadding2D and \
       test.is_built_with_rocm():
      return
    if isinstance(input_shape[0], int):
      input_shapes = [input_shape]
    else:
      input_shapes = input_shape
    strategy = create_mirrored_strategy()
    f32_layer = f32_layer_fn()

    # Create the layers
    assert f32_layer.dtype == f32_layer._compute_dtype == 'float32'
    config = f32_layer.get_config()
    config['dtype'] = policy.Policy('mixed_float16')
    mp_layer = f32_layer.__class__.from_config(config)
    distributed_mp_layer = f32_layer.__class__.from_config(config)

    # Compute per_replica_input_shapes for the distributed model
    global_batch_size = input_shapes[0][0]
    assert global_batch_size % strategy.num_replicas_in_sync == 0, (
        'The number of replicas, %d, does not divide the global batch size of '
        '%d' % (strategy.num_replicas_in_sync, global_batch_size))
    per_replica_batch_size = (
        global_batch_size // strategy.num_replicas_in_sync)
    per_replica_input_shapes = [(per_replica_batch_size,) + s[1:]
                                for s in input_shapes]

    # Create the models
    f32_model = self._create_model_from_layer(f32_layer, input_shapes)
    mp_model = self._create_model_from_layer(mp_layer, input_shapes)
    with strategy.scope():
      distributed_mp_model = self._create_model_from_layer(
          distributed_mp_layer, per_replica_input_shapes)

    # Set all model weights to the same values
    f32_weights = f32_model.get_weights()
    mp_model.set_weights(f32_weights)
    distributed_mp_model.set_weights(f32_weights)

    # Generate input data
    if input_data is None:
      # Cast inputs to float16 to avoid measuring error from having f16 layers
      # cast to float16.
      input_data = [np.random.normal(size=s).astype('float16')
                    for s in input_shapes]
      if len(input_data) == 1:
        input_data = input_data[0]

    # Assert all models have close outputs.
    f32_output = f32_model.predict(input_data)
    mp_output = mp_model.predict(input_data)
    self.assertAllClose(
        mp_output, f32_output, rtol=rtol, atol=atol)
    self.assertAllClose(
        distributed_mp_model.predict(input_data), f32_output, rtol=rtol,
        atol=atol)

    # Run fit() on models
    output = np.random.normal(size=f32_model.outputs[0].shape).astype('float16')
    for model in f32_model, mp_model, distributed_mp_model:
      model.fit(input_data, output, batch_size=global_batch_size)

    # Assert all models have close weights
    f32_weights = f32_model.get_weights()
    self.assertAllClose(
        mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol)
    self.assertAllClose(
        distributed_mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol)
Ejemplo n.º 20
0
    def test_serialization(self):
        # Test policies that are equivalent to a single dtype
        for policy_name in 'float16', 'float32', 'int8', 'string', 'bool':
            policy = mp_policy.Policy(policy_name)
            config = mp_policy.serialize(policy)
            self.assertEqual(config, policy_name)
            new_policy = mp_policy.deserialize(config)
            self.assertEqual(str(policy), str(new_policy))

        # Test "_infer" policy
        policy = mp_policy.Policy('_infer')
        config = mp_policy.serialize(policy)
        self.assertIsNone(config)
        new_policy = mp_policy.deserialize(config)
        self.assertEqual(str(policy), str(new_policy))

        class MyPolicy(mp_policy.Policy):
            pass

        # Test policies that are not equivalent to a single dtype
        for policy in (mp_policy.Policy('mixed_float16'),
                       mp_policy.Policy('mixed_bfloat16'),
                       MyPolicy('float32')):
            config = mp_policy.serialize(policy)
            self.assertEqual(
                config, {
                    'class_name': policy.__class__.__name__,
                    'config': {
                        'name': policy.name
                    }
                })
            new_policy = mp_policy.deserialize(
                config, custom_objects={'MyPolicy': MyPolicy})
            self.assertEqual(str(policy), str(new_policy))

        # Test V1 policies that override the loss scale
        for policy in (
                mp_policy.PolicyV1('float32', loss_scale=2.),
                mp_policy.PolicyV1('float32', loss_scale=None),
                mp_policy.PolicyV1('mixed_float16', loss_scale=2.),
                mp_policy.PolicyV1('mixed_float16', loss_scale=None),
                mp_policy.PolicyV1('mixed_bfloat16', loss_scale=2.),
                mp_policy.PolicyV1('mixed_bfloat16', loss_scale=None),
        ):
            config = mp_policy.serialize(policy)
            expected_loss_scale_config = None
            if policy.loss_scale:
                expected_loss_scale_config = {
                    'class_name': 'FixedLossScale',
                    'config': {
                        'loss_scale_value': 2.
                    }
                }
            self.assertEqual(
                config, {
                    'class_name': policy.__class__.__name__,
                    'config': {
                        'name': policy.name,
                        'loss_scale': expected_loss_scale_config
                    }
                })
Ejemplo n.º 21
0
    def test_dynamic_loss_scaling(self,
                                  strategy_fn,
                                  pass_loss_scale_to_policy=False,
                                  get_config=False,
                                  use_v1_loss_scale_optimizer=False):
        strategy = strategy_fn()
        initial_loss_scale = 2.
        batch_size = 4
        expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                             dtype=dtypes.float16)
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
        with strategy.scope():
            opt = gradient_descent.SGD(1.)
            if pass_loss_scale_to_policy:
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=initial_loss_scale, increment_period=2)
                p = policy.PolicyV1('mixed_float16', loss_scale=loss_scale)
            elif use_v1_loss_scale_optimizer:
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=initial_loss_scale, increment_period=2)
                p = policy.Policy('mixed_float16')
                opt = loss_scale_optimizer.LossScaleOptimizerV1(
                    opt, loss_scale)
            else:
                p = policy.Policy('mixed_float16')
                opt = loss_scale_optimizer.LossScaleOptimizer(
                    opt,
                    initial_scale=initial_loss_scale,
                    dynamic_growth_steps=2)
            with policy.policy_scope(p):
                x = layers.Input(shape=(1, ),
                                 batch_size=batch_size,
                                 dtype=dtypes.float16)
                layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients))
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=dtypes.float16,
                        expected_gradient=expected_gradient))
                y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            'MultiplyLayer': mp_test_util.MultiplyLayer
                        })
                    (layer, ) = (
                        layer for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer))

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly())

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient * 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient / 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)
Ejemplo n.º 22
0
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):
        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * A fixed loss scale (if use_loss_scaling is True)

        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(policy.Policy('mixed_float16')):
                x = layers.Input(shape=(1, ), batch_size=2)
                layer1 = mp_test_util.MultiplyLayer(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=True)
                layer2 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=dtypes.float16, use_operator=True)
                layer3 = mp_test_util.MultiplyLayer(assert_type=dtypes.float16,
                                                    use_operator=False)
                layer4 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=False)
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. We divide by the batch size of 2 since the
                    # loss is averaged across batch elements.
                    expected_gradient = loss_scale / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=dtypes.float16,
                            expected_gradient=[expected_gradient]))
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                if use_loss_scaling:
                    opt = loss_scale_optimizer.LossScaleOptimizer(
                        opt, dynamic=False, initial_scale=loss_scale)
                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly())

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        for layer in (layer1, layer2, layer3, layer4):
            if layer.losses:
                # Layer has weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
            else:
                # Layer does not have weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - learning_rate)