Ejemplo n.º 1
0
 def test_unsupported_strategy(self):
     strategy = create_central_storage_strategy()
     with strategy.scope(), self.assertRaisesRegex(
             ValueError, 'Mixed precision is not supported with the '
             'tf.distribute.Strategy: CentralStorageStrategy. Either '
             'stop using mixed precision by removing the use of the '
             '"mixed_float16" policy or use a different Strategy, e.g. '
             'a MirroredStrategy.'):
         mp_test_util.MultiplyLayer(dtype='mixed_float16')
     # Non-mixed policies are fine
     mp_test_util.MultiplyLayer(dtype=policy.Policy('float64'))
Ejemplo n.º 2
0
    def test_input_spec_dtype(self):
        # Test the InputSpec's dtype is compared against the inputs before the layer
        # casts them, not after.
        layer = mp_test_util.MultiplyLayer(dtype='float64')
        layer.input_spec = input_spec.InputSpec(dtype='float16')

        # Test passing Eager tensors
        x = array_ops.ones((2, 2), dtype='float16')
        layer(x)
        x = array_ops.ones((2, 2), dtype='float64')
        with self.assertRaisesRegex(
                ValueError, 'expected dtype=float16, found dtype=.*float64'):
            layer(x)

        # Test passing symbolic tensors
        x = layers.Input((2, ), dtype='float16')
        y = layer(x)
        model = models.Model(x, y)
        model(array_ops.ones((2, 2)))

        x = layers.Input((2, ), dtype='float64')
        with self.assertRaisesRegex(
                ValueError, 'expected dtype=float16, found dtype=.*float64'):
            # In TF2, the error is only raised when the model is run
            y = layer(x)
            model = models.Model(x, y)
            model(array_ops.ones((2, 2)))
Ejemplo n.º 3
0
    def test_save_slot_variables_with_autocast_vars(self,
                                                    strategy_fn,
                                                    var_name='v'):
        p = policy.Policy('mixed_float16')
        with strategy_fn().scope(), policy.policy_scope(p):
            x = layers.Input(shape=(2, ), batch_size=2)
            # Having a var_name other than 'v' tests that a fixed bug (b/134713714)
            # does not reoccur. The bug was that a crash would occur when saving a
            # checkpoint where an AutoCastVariable with a slot variable would have a
            # different name than the layer attribute's name (layer.v in this case).
            layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16,
                                               var_name=var_name)
            y = layer(x)
            model = models.Model(inputs=x, outputs=y)
            opt = gradient_descent.SGD(1., 1.)
            opt = loss_scale_optimizer.LossScaleOptimizer(opt,
                                                          dynamic=False,
                                                          initial_scale=1)
            model.compile(optimizer=opt,
                          loss='mse',
                          run_eagerly=testing_utils.should_run_eagerly())

        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        weights_file = os.path.join(self.get_temp_dir(), 'weights')
        model.save_weights(weights_file)
        saved_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))

        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        new_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))
        self.assertNotEqual(new_slot, saved_slot)

        model.load_weights(weights_file)
        restored_slot = backend.get_value(opt.get_slot(layer.v, 'momentum'))
        self.assertEqual(restored_slot, saved_slot)
Ejemplo n.º 4
0
    def test_gradient(self, strategy_fn):
        x = constant_op.constant([1.])
        with strategy_fn().scope() as strategy:
            with policy.policy_scope('mixed_float16'):
                layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16)
                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate is not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)

                def run_fn():
                    with backprop.GradientTape() as tape:
                        y = layer(x)
                        # Divide by num_replicas_in_sync, as the effective total loss is the
                        # sum of each of the replica's losses.
                        y /= strategy.num_replicas_in_sync

                    grad = tape.gradient(y, layer.v)
                    return opt.apply_gradients([(grad, layer.v)])

                op = strategy.experimental_run(run_fn)
                if not context.executing_eagerly():
                    self.evaluate(variables.global_variables_initializer())
                    self.evaluate(op)
                # The gradient with respective to the variable is 1. Since the
                # variable is initialized with 1 and the learning rate is 2**-14, the
                # new variable value should be: init_val - gradient * learning_rate,
                # which is  1 - 1 * 2**-14
                self.assertEqual(self.evaluate(layer.v), 1 - 2**-14)
Ejemplo n.º 5
0
    def test_config(self, strategy_fn):
        x = constant_op.constant([1.], dtype=dtypes.float16)
        with strategy_fn().scope():
            for layer, dtype in ((mp_test_util.MultiplyLayer(), 'float32'),
                                 (mp_test_util.MultiplyLayer(dtype='float64'),
                                  'float64'), (mp_test_util.MultiplyLayer(
                                      dtype=policy.Policy('float64')),
                                               'float64')):
                config = layer.get_config()
                self.assertEqual(config['dtype'], dtype)
                self.assertIsInstance(config['dtype'], str)
                layer = mp_test_util.MultiplyLayer.from_config(config)
                self.assertEqual(layer.dtype, dtype)
                self.assertEqual(layer(x).dtype, dtype)
                self.assertEqual(layer.v.dtype, dtype)

            layer = mp_test_util.MultiplyLayer(dtype='mixed_float16')
            config = layer.get_config()
            self.assertEqual(config['dtype'], {
                'class_name': 'Policy',
                'config': {
                    'name': 'mixed_float16'
                }
            })
            layer = mp_test_util.MultiplyLayer.from_config(config)
            self.assertEqual(layer.dtype, 'float32')
            self.assertEqual(layer(x).dtype, 'float16')
            self.assertEqual(layer.v.dtype, 'float32')
            config = layer.get_config()
            self.assertEqual(config['dtype'], {
                'class_name': 'Policy',
                'config': {
                    'name': 'mixed_float16'
                }
            })

            layer = mp_test_util.MultiplyLayer(dtype=policy.Policy('_infer'))
            config = layer.get_config()
            self.assertIsNone(config['dtype'])
            layer = mp_test_util.MultiplyLayer.from_config(config)
            # If a layer is serialized with the "_infer" policy, when deserialized
            # into TF 2 it will have the global policy instead of "_infer". This is
            # because "_infer" is serialized into None, and passing dtype=None in
            # TensorFlow 2 indicates to use the global policy.
            self.assertEqual(layer.dtype, 'float32')
            self.assertEqual(layer(x).dtype, 'float32')
            self.assertEqual(layer.v.dtype, 'float32')
Ejemplo n.º 6
0
 def test_functional_model_loss_dtype(self):
     with policy.policy_scope('float16'):
         x = layers.Input(shape=(1, ))
         y = mp_test_util.MultiplyLayer()(x)
         model = models.Model(x, y)
         model.add_loss(math_ops.cast(y, 'float32'))
         # The loss should not be casted to the policy's dtype.
         self.assertEqual(model.losses[0].dtype, 'float32')
Ejemplo n.º 7
0
 def test_policy_v1_without_loss_scale(self):
     with policy.policy_scope(
             policy.PolicyV1('mixed_float16', loss_scale=None)):
         opt = gradient_descent.SGD(1.)
         x = layers.Input(shape=(1, ))
         y = mp_test_util.MultiplyLayer()(x)
         model = models.Model(x, y)
         model.compile(opt, loss='mse')
         self.assertNotIsInstance(model.optimizer,
                                  loss_scale_optimizer.LossScaleOptimizer)
Ejemplo n.º 8
0
 def test_loss_scale_optimizer_overrides_policy_v1_loss_scale(self):
     with policy.policy_scope(policy.PolicyV1('float32', loss_scale=10.)):
         opt = gradient_descent.SGD(1.)
         opt = loss_scale_optimizer.LossScaleOptimizer(opt,
                                                       dynamic=False,
                                                       initial_scale=5.)
         x = layers.Input(shape=(1, ))
         y = mp_test_util.MultiplyLayer()(x)
         model = models.Model(x, y)
         model.compile(opt, loss='mse')
         self.assertEqual(self.evaluate(model.optimizer.loss_scale), 5.)
Ejemplo n.º 9
0
 def test_pass_invalid_optimizer_with_loss_scaling(self):
     with policy.policy_scope(policy.PolicyV1('float32', loss_scale=10.)):
         x = layers.Input(shape=(1, ))
         y = mp_test_util.MultiplyLayer()(x)
         model = models.Model(x, y)
         if context.executing_eagerly():
             error_msg = 'Use a `tf.keras` Optimizer instead'
         else:
             error_msg = 'optimizer" must be an instance of '
         with self.assertRaisesRegex(ValueError, error_msg):
             model.compile(optimizer_v1.SGD(1.), 'mse')
Ejemplo n.º 10
0
 def test_passing_policy_to_layer(self, strategy_fn):
     x = constant_op.constant([1.], dtype=dtypes.float16)
     with strategy_fn().scope():
         # Passing a Policy to 'dtype' sets the policy for that layer.
         layer = mp_test_util.MultiplyLayer(
             assert_type=dtypes.float16,
             dtype=policy.Policy('mixed_float16'))
         # layer.dtype refers to the variable dtype
         self.assertEqual(layer.dtype, dtypes.float32)
         layer(x)
         self.assertEqual(layer.v.dtype, dtypes.float32)
         with policy.policy_scope('mixed_float16'):
             # Passing a Policy to dtype overrides the global Policy
             layer = mp_test_util.MultiplyLayer(
                 assert_type=dtypes.float64, dtype=policy.Policy('float64'))
             self.assertEqual(layer.dtype_policy.name, 'float64')
             self.assertIsInstance(layer.dtype_policy, policy.Policy)
             self.assertEqual(layer.compute_dtype, dtypes.float64)
             self.assertEqual(layer.dtype, dtypes.float64)
             self.assertEqual(layer.variable_dtype, dtypes.float64)
             self.assertEqual(layer(x).dtype, dtypes.float64)
             self.assertEqual(layer.v.dtype, dtypes.float64)
Ejemplo n.º 11
0
    def test_build_and_call_layer_in_function(self):
        layer = mp_test_util.MultiplyLayer(
            dtype=policy.Policy('mixed_float16'))

        @def_function.function
        def f():
            return layer(1.)

        y = f()
        self.evaluate(variables.global_variables_initializer())
        self.assertEqual(y.dtype, 'float16')
        self.assertEqual(layer.v.dtype, 'float32')
        self.assertEqual(self.evaluate(y), 1.)
Ejemplo n.º 12
0
    def _test_checkpointing_layer_weights(self, strategy_fn,
                                          mixed_prec_when_saving,
                                          mixed_prec_when_loading):
        # In this test, we potentially save with mixed precision enabled and load
        # with mixed precision disabled, or vice versa. This is possible because
        # variables are float32 regardless of whether mixed precision is enabled.
        save_policy = 'mixed_float16' if mixed_prec_when_saving else 'float32'
        load_policy = 'mixed_float16' if mixed_prec_when_loading else 'float32'
        save_input_dtype = 'float16' if mixed_prec_when_saving else 'float32'
        load_input_dtype = 'float16' if mixed_prec_when_loading else 'float32'

        # Create a layer and save a checkpoint.
        x = constant_op.constant([1.])
        with strategy_fn().scope():
            with policy.policy_scope(save_policy):
                layer = mp_test_util.MultiplyLayer(
                    assert_type=save_input_dtype)
                layer(x)  # Build layer
        layer.set_weights([np.array(100.)])
        self.assertEqual(self.evaluate(layer(x)), 100.)
        checkpoint = trackable_utils.Checkpoint(layer=layer)
        prefix = os.path.join(self.get_temp_dir(), 'ckpt')
        save_path = checkpoint.save(prefix)

        # Create a new layer and restore the checkpoint.
        x = constant_op.constant([1.])
        with strategy_fn().scope():
            with policy.policy_scope(load_policy):
                layer = mp_test_util.MultiplyLayer(
                    assert_type=load_input_dtype)
                layer(x)  # Build layer
        layer.set_weights([np.array(200.)])
        self.assertEqual(self.evaluate(layer(x)), 200.)
        checkpoint = trackable_utils.Checkpoint(layer=layer)
        checkpoint.restore(save_path).assert_consumed().run_restore_ops()
        self.assertEqual(layer.get_weights(), [100.])
        self.assertEqual(self.evaluate(layer(x)), 100.)
Ejemplo n.º 13
0
    def test_save_weights_with_autocast_vars(self, strategy_fn, h5=False):
        with strategy_fn().scope():
            with policy.policy_scope('mixed_float16'):
                x = layers.Input(shape=(1, ), batch_size=2)
                layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16)
                y = layer(x)
                model = models.Model(inputs=x, outputs=y)

        model.set_weights([np.array(100.)])
        x = np.ones((2, 1))
        self.assertAllClose(backend.get_value(model(x)), x * 100.)
        suffix = '.h5' if h5 else ''
        weights_file = os.path.join(self.get_temp_dir(), 'weights' + suffix)
        model.save_weights(weights_file)

        model.set_weights([np.array(200.)])
        self.assertAllClose(backend.get_value(model(x)), x * 200.)
        model.load_weights(weights_file)
        self.assertAllClose(backend.get_value(model(x)), x * 100.)
        self.assertEqual(model.get_weights(), [np.array(100.)])
Ejemplo n.º 14
0
 def test_mixed_policies_(self, strategy_fn):
     strategy = strategy_fn()
     for dtype in 'float16', 'bfloat16':
         x = constant_op.constant([1.])
         policy_name = 'mixed_' + dtype
         with strategy.scope(), policy.policy_scope(policy_name):
             layer = mp_test_util.MultiplyLayer(assert_type=dtype)
             self.assertEqual(layer.dtype, dtypes.float32)
             self.assertEqual(
                 get_layer_policy.get_layer_policy(layer).name, policy_name)
             y = layer(x)
             self.assertEqual(layer.v.dtype, dtypes.float32)
             self.assertEqual(y.dtype, dtype)
             self.assertEqual(layer.dtype_policy.name, policy_name)
             self.assertIsInstance(layer.dtype_policy, policy.Policy)
             self.assertEqual(layer.compute_dtype, dtype)
             self.assertEqual(layer.dtype, dtypes.float32)
             self.assertEqual(layer.variable_dtype, dtypes.float32)
             self.assertEqual(
                 get_layer_policy.get_layer_policy(layer).name, policy_name)
             self.evaluate(variables.global_variables_initializer())
             self.assertEqual(self.evaluate(y), 1.)
Ejemplo n.º 15
0
    def test_fixed_loss_scaling(self, strategy_fn):
        # Note: We do not test mixed precision in this method, only loss scaling.
        loss_scale = 8.
        batch_size = 4
        with strategy_fn().scope():
            x = layers.Input(shape=(1, ), batch_size=batch_size)
            layer = mp_test_util.MultiplyLayer()
            y = layer(x)

            # The gradient of 'y' at this point is 1. With loss scaling, the gradient
            # is 'loss_scale'. We divide by the batch size since the loss is averaged
            # across batch elements.
            expected_gradient = loss_scale / batch_size
            identity_with_grad_check_fn = (
                mp_test_util.create_identity_with_grad_check_fn(
                    [expected_gradient]))
            y = core.Lambda(identity_with_grad_check_fn)(y)
            model = models.Model(inputs=x, outputs=y)

            def loss_fn(y_true, y_pred):
                del y_true
                return math_ops.reduce_mean(y_pred)

            opt = gradient_descent.SGD(1.)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, dynamic=False, initial_scale=loss_scale)
            model.compile(opt,
                          loss=loss_fn,
                          run_eagerly=testing_utils.should_run_eagerly())

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 1 subtracted from it.
        expected = 0
        self.assertEqual(backend.eval(layer.v), expected)
Ejemplo n.º 16
0
    def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn):
        x = constant_op.constant([1.])
        with strategy_fn().scope():
            with policy.policy_scope('mixed_float16'):
                # Test on MultiplyLayer
                layer = mp_test_util.MultiplyLayer(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer())
                layer(x)
                (regularizer_loss, ) = layer.losses
                self.assertEqual(regularizer_loss.dtype, dtypes.float32)
                self.evaluate(variables.global_variables_initializer())
                self.assertEqual(self.evaluate(regularizer_loss), 1.)

                # Test on MultiplyLayerWithoutAutoCast
                layer = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer())
                layer(x)
                (regularizer_loss, ) = layer.losses
                self.assertEqual(regularizer_loss.dtype, dtypes.float32)
                self.evaluate(variables.global_variables_initializer())
                self.assertEqual(self.evaluate(regularizer_loss), 1.)
Ejemplo n.º 17
0
    def test_save_weights_with_dynamic_loss_scaling(self, strategy_fn):
        strategy = strategy_fn()
        if (isinstance(strategy, mirrored_strategy.MirroredStrategy)
                and not context.executing_eagerly()):
            # TODO(b/121381184): Enable running the test in this case.
            return

        # Create and run model.
        with strategy.scope():
            x = layers.Input(shape=(2, ), batch_size=2, dtype=dtypes.float32)
            y = mp_test_util.MultiplyLayer(assert_type=dtypes.float32)(x)
            model = models.Model(inputs=x, outputs=y)

            opt = gradient_descent.SGD(1.)
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, initial_scale=1., dynamic_growth_steps=2.)
            model.compile(optimizer=opt,
                          loss='mse',
                          run_eagerly=testing_utils.should_run_eagerly())
        # Run for 3 steps (6 examples with a batch size of 2)
        model.fit(np.zeros((6, 2)), np.zeros((6, 2)), batch_size=2)
        self.assertEqual(backend.get_value(opt.loss_scale), 2)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 1)

        # Save model weights.
        save_prefix = os.path.join(self.get_temp_dir(), 'ckpt')
        model.save_weights(save_prefix)

        # Run model again for 1 step (2 examples with a batch size of 2)
        model.fit(np.zeros((2, 2)), np.zeros((2, 2)), batch_size=2)
        self.assertEqual(backend.get_value(opt.loss_scale), 4)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 0)

        # Load model weights and ensure loss scale weights are restored.
        model.load_weights(save_prefix)
        self.assertEqual(backend.get_value(opt.loss_scale), 2)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 1)
Ejemplo n.º 18
0
    def _test_mixed_precision(self, task_type, task_id, num_gpus):
        """Tests mixed precision works with the CollectiveAllReduceStrategy.

    This tests:
      1. Variables are in float32, by running with a small enough learning rate
         that if the variables are float16, their values wouldn't change when
         gradients are applied.
      2. The loss scale is doubled if there are no NaNs.
      3. The loss scale is halved if the first worker has a NaN, even if the
         other works do not have NaNs.

    Args:
      task_type: A string, such as "worker", indicating the type of the replica.
      task_id: Zero-indexed ID of the task.
      num_gpus: The number of GPUs to use.
    """
        d, master_target, config = self._get_test_object(
            task_type, task_id, num_gpus)
        # Should be set to mixed_float16 by caller.
        self.assertEqual(policy.global_policy().name, 'mixed_float16')

        with ops.Graph().as_default(), \
             self.cached_session(config=config,
                                 target=master_target) as sess:
            # The loss on the first worker is multiplied by this value. Allows
            # testing the first worker having NaN loss and gradients while keeping the
            # other workers' losses and gradients finite.
            loss_multiplier_for_first_worker = variables.Variable(
                1., dtype='float16', trainable=False)
            with d.scope():
                model = sequential.Sequential([
                    mp_test_util.MultiplyLayer(assert_type=dtypes.float16,
                                               input_shape=(1, )),
                ])
                loss_scale = loss_scale_module.DynamicLossScale(
                    2**10, increment_period=1)

                def model_fn():
                    """Simple model to test mixed precision."""
                    x = np.ones((1, 1))
                    loss = model(x, training=True)

                    if ((task_type == 'worker' and task_id == 0)
                            or task_type is task_id is None):
                        loss *= loss_multiplier_for_first_worker
                    # Learning rate is small enough that if applied to a float16 variable,
                    # the variable will not change. So this tests the learning rate is not
                    # applied to a float16 value, but instead the float32 variable.
                    optimizer = gradient_descent.GradientDescentOptimizer(
                        2**-14)
                    optimizer = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(
                        optimizer, loss_scale)
                    train_op = optimizer.minimize(
                        loss, training_util.get_or_create_global_step())
                    return train_op

                train_op = d.extended.call_for_each_replica(model_fn)
                train_op = d.group(d.experimental_local_results(train_op))

            sess.run(variables.global_variables_initializer())
            sess.run(train_op)

            (var, ) = model.trainable_weights
            # Variable starts at 1. Each worker's gradient is 2 ** -14, the learning
            # rate, and each worker's gradient will be subtracted from the variable.
            expected = 1 - d.num_replicas_in_sync * 2**-14
            self.assertEqual(sess.run(var), expected)
            # Loss scale should double, as are gradients are finite.
            self.assertEqual(sess.run(loss_scale()), 2**11)

            # Set the first worker to have NaN loss and gradients.
            sess.run(loss_multiplier_for_first_worker.assign(float('NaN')))
            sess.run(train_op)
            # Variable should not change, since first worker had NaN
            self.assertEqual(sess.run(var), expected)
            # Loss scale should halve due to NaN
            self.assertEqual(sess.run(loss_scale()), 2**10)
Ejemplo n.º 19
0
    def test_save_model_with_dynamic_loss_scaling(
            self, strategy_fn, h5=False, use_v1_loss_scale_optimizer=False):
        # TODO(reedwm): Support and test saving model with a mixed_[b]float16 policy
        # as well.
        strategy = strategy_fn()
        if (isinstance(strategy, mirrored_strategy.MirroredStrategy)
                and not context.executing_eagerly()):
            # TODO(b/121381184): Enable running the test in this case.
            return

        # Create and run model.
        with strategy.scope():
            x = layers.Input(shape=(2, ), batch_size=2, dtype=dtypes.float32)
            y = mp_test_util.MultiplyLayer()(x)
            model = models.Model(inputs=x, outputs=y)

            opt = gradient_descent.SGD(1.)
            if use_v1_loss_scale_optimizer:
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=1., increment_period=2.)
                opt = loss_scale_optimizer.LossScaleOptimizerV1(
                    opt, loss_scale)
            else:
                opt = loss_scale_optimizer.LossScaleOptimizer(
                    opt, initial_scale=1., dynamic_growth_steps=2.)
            model.compile(optimizer=opt,
                          loss='mse',
                          run_eagerly=testing_utils.should_run_eagerly())
        # Run for 3 steps (6 examples with a batch size of 2)
        model.fit(np.ones((6, 2)), np.zeros((6, 2)), batch_size=2)
        self.assertEqual(backend.get_value(opt.loss_scale), 2)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 1)
        (weight, ) = model.trainable_weights
        orig_weight = backend.get_value(weight)

        # Save model weights.
        save_path = os.path.join(self.get_temp_dir(), 'model')
        model.save(save_path, save_format='h5' if h5 else 'tf')

        # Run model again for 1 step (2 examples with a batch size of 2)
        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        new_weight = backend.get_value(weight)
        self.assertNotEqual(new_weight, orig_weight)
        self.assertEqual(backend.get_value(opt.loss_scale), 4)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 0)

        # Load model weights and ensure loss scale weights are restored.
        model = save.load_model(
            save_path,
            custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer})
        (weight, ) = model.trainable_weights
        loaded_weight = backend.get_value(weight)
        self.assertEqual(loaded_weight, orig_weight)
        # Currently the loss scale isn't always saved when the model is saved with
        # Model.save(). So we assert the loss scale either has the value when it was
        # saved, or the value it was initialized with.
        # TODO(reedwm): Always save/restore the loss scale with Model.save().
        self.assertIn(backend.get_value(model.optimizer.loss_scale), (1, 2))
        self.assertIn(backend.get_value(model.optimizer.dynamic_counter),
                      (0, 1))

        # Test optimizer attributes and type
        self.assertEqual(model.optimizer.initial_scale, 1.)
        self.assertEqual(model.optimizer.dynamic_growth_steps, 2.)
        self.assertEqual(type(model.optimizer),
                         loss_scale_optimizer.LossScaleOptimizer)
Ejemplo n.º 20
0
    def test_config_policy_v1(self, strategy_fn):
        x = constant_op.constant([1.], dtype=dtypes.float16)
        with strategy_fn().scope():

            layer = mp_test_util.MultiplyLayer(
                dtype=policy.PolicyV1('mixed_float16', loss_scale=None))
            config = layer.get_config()
            self.assertEqual(
                config['dtype'], {
                    'class_name': 'PolicyV1',
                    'config': {
                        'name': 'mixed_float16',
                        'loss_scale': None
                    }
                })
            layer = mp_test_util.MultiplyLayer.from_config(config)
            self.assertEqual(layer.dtype, 'float32')
            self.assertEqual(layer(x).dtype, 'float16')
            self.assertEqual(layer.v.dtype, 'float32')
            # Restoring a PolicyV1 silently converts it to a Policy and drops the loss
            # scale.
            self.assertEqual(type(layer.dtype_policy), policy.Policy)
            config = layer.get_config()
            # The loss_scale is silently dropped
            self.assertEqual(config['dtype'], {
                'class_name': 'Policy',
                'config': {
                    'name': 'mixed_float16'
                }
            })

            layer = mp_test_util.MultiplyLayer(
                dtype=policy.PolicyV1('float64', loss_scale=2.))
            config = layer.get_config()
            self.assertEqual(
                config['dtype'], {
                    'class_name': 'PolicyV1',
                    'config': {
                        'name': 'float64',
                        'loss_scale': {
                            'class_name': 'FixedLossScale',
                            'config': {
                                'loss_scale_value': 2.0
                            }
                        }
                    }
                })
            layer = mp_test_util.MultiplyLayer.from_config(config)
            self.assertEqual(layer.dtype, 'float64')
            self.assertEqual(layer(x).dtype, 'float64')
            self.assertEqual(layer.v.dtype, 'float64')
            self.assertEqual(type(layer.dtype_policy), policy.Policy)
            config = layer.get_config()
            self.assertEqual(config['dtype'], 'float64')

            layer = mp_test_util.MultiplyLayer(
                dtype=policy.PolicyV1('_infer', loss_scale=2.))
            config = layer.get_config()
            self.assertEqual(
                config['dtype'], {
                    'class_name': 'PolicyV1',
                    'config': {
                        'name': '_infer',
                        'loss_scale': {
                            'class_name': 'FixedLossScale',
                            'config': {
                                'loss_scale_value': 2.0
                            }
                        }
                    }
                })
            layer = mp_test_util.MultiplyLayer.from_config(config)
            self.assertEqual(layer.dtype, None)
            self.assertEqual(layer(x).dtype, 'float16')
            self.assertEqual(layer.v.dtype, 'float16')
            self.assertEqual(type(layer.dtype_policy), policy.Policy)
            config = layer.get_config()
            self.assertEqual(config['dtype'], 'float16')
Ejemplo n.º 21
0
    def test_model(self,
                   strategy_fn,
                   use_operator=False,
                   use_regularizer=False,
                   policy_name='mixed_float16',
                   get_config=False,
                   save_format=None,
                   use_input_spec=False,
                   use_v1_policy=False):
        self._skip_if_strategy_unsupported(strategy_fn)
        self._skip_if_save_format_unsupported(save_format)
        if use_regularizer:
            weight_regularizer = mp_test_util.IdentityRegularizer()
            activity_regularizer = mp_test_util.ReduceSumRegularizer()
        else:
            weight_regularizer = activity_regularizer = None
        with strategy_fn().scope():
            cls = policy.PolicyV1 if use_v1_policy else policy.Policy
            with policy.policy_scope(cls(policy_name)):
                layer = mp_test_util.MultiplyLayer(
                    assert_type=dtypes.float16,
                    use_operator=use_operator,
                    regularizer=weight_regularizer,
                    activity_regularizer=activity_regularizer,
                    input_shape=(1, ))
                if use_input_spec:
                    layer.input_spec = input_spec.InputSpec(shape=(None, 1))
                model = testing_utils.get_model_from_layers(
                    [layer], input_shape=(1, ), input_dtype=dtypes.float16)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            'MultiplyLayer': mp_test_util.MultiplyLayer
                        })
                    (layer, ) = (
                        layer for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer))

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)
                # Use a fixed loss scale, as this test will fail if gradients are
                # skipped for a step due to dynamic loss scaling.
                opt = loss_scale_optimizer.LossScaleOptimizer(opt,
                                                              dynamic=False,
                                                              initial_scale=8)
                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly())

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 2 ** -14 subtracted
        # from it.
        expected = 1 - 2**-14
        if use_regularizer:
            # Weight and activity regularizer each add another 2 ** -14 to the
            # gradient.
            expected -= 2 * 2**-14
        self.assertEqual(backend.eval(layer.v), expected)

        if save_format:
            with generic_utils.CustomObjectScope({
                    'MultiplyLayer':
                    mp_test_util.MultiplyLayer,
                    'loss_fn':
                    loss_fn
            }):
                self._test_saving(model, dataset, save_format, use_regularizer)
Ejemplo n.º 22
0
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):
        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * A fixed loss scale (if use_loss_scaling is True)

        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(policy.Policy('mixed_float16')):
                x = layers.Input(shape=(1, ), batch_size=2)
                layer1 = mp_test_util.MultiplyLayer(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=True)
                layer2 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=dtypes.float16, use_operator=True)
                layer3 = mp_test_util.MultiplyLayer(assert_type=dtypes.float16,
                                                    use_operator=False)
                layer4 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=False)
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. We divide by the batch size of 2 since the
                    # loss is averaged across batch elements.
                    expected_gradient = loss_scale / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=dtypes.float16,
                            expected_gradient=[expected_gradient]))
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                if use_loss_scaling:
                    opt = loss_scale_optimizer.LossScaleOptimizer(
                        opt, dynamic=False, initial_scale=loss_scale)
                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly())

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        for layer in (layer1, layer2, layer3, layer4):
            if layer.losses:
                # Layer has weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
            else:
                # Layer does not have weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
Ejemplo n.º 23
0
    def test_dynamic_loss_scaling(self,
                                  strategy_fn,
                                  pass_loss_scale_to_policy=False,
                                  get_config=False,
                                  use_v1_loss_scale_optimizer=False):
        strategy = strategy_fn()
        initial_loss_scale = 2.
        batch_size = 4
        expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                             dtype=dtypes.float16)
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
        with strategy.scope():
            opt = gradient_descent.SGD(1.)
            if pass_loss_scale_to_policy:
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=initial_loss_scale, increment_period=2)
                p = policy.PolicyV1('mixed_float16', loss_scale=loss_scale)
            elif use_v1_loss_scale_optimizer:
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=initial_loss_scale, increment_period=2)
                p = policy.Policy('mixed_float16')
                opt = loss_scale_optimizer.LossScaleOptimizerV1(
                    opt, loss_scale)
            else:
                p = policy.Policy('mixed_float16')
                opt = loss_scale_optimizer.LossScaleOptimizer(
                    opt,
                    initial_scale=initial_loss_scale,
                    dynamic_growth_steps=2)
            with policy.policy_scope(p):
                x = layers.Input(shape=(1, ),
                                 batch_size=batch_size,
                                 dtype=dtypes.float16)
                layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients))
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=dtypes.float16,
                        expected_gradient=expected_gradient))
                y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            'MultiplyLayer': mp_test_util.MultiplyLayer
                        })
                    (layer, ) = (
                        layer for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer))

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly())

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient * 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient / 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)