Example #1
0
    def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn):
        x = tf.constant([1.0])
        with strategy_fn().scope():
            with policy.policy_scope("mixed_float16"):
                # Test on MultiplyLayer
                layer = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                )
                layer(x)
                (regularizer_loss, ) = layer.losses
                self.assertEqual(regularizer_loss.dtype, tf.float32)
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertEqual(self.evaluate(regularizer_loss), 1.0)

                # Test on MultiplyLayerWithoutAutoCast
                layer = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                )
                layer(x)
                (regularizer_loss, ) = layer.losses
                self.assertEqual(regularizer_loss.dtype, tf.float32)
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertEqual(self.evaluate(regularizer_loss), 1.0)
Example #2
0
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):
        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * A fixed loss scale (if use_loss_scaling is True)

        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.0
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(policy.Policy("mixed_float16")):
                x = layers.Input(shape=(1,), batch_size=2)
                layer1 = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=True,
                )
                layer2 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16, use_operator=True
                )
                layer3 = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16, use_operator=False
                )
                layer4 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=False,
                )
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. We divide by the batch size of 2 since the
                    # loss is averaged across batch elements.
                    expected_gradient = loss_scale / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=tf.float16,
                            expected_gradient=[expected_gradient],
                        )
                    )
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                if use_loss_scaling:
                    opt = loss_scale_optimizer.LossScaleOptimizer(
                        opt, dynamic=False, initial_scale=loss_scale
                    )
                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        for layer in (layer1, layer2, layer3, layer4):
            if layer.losses:
                # Layer has weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
            else:
                # Layer does not have weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
Example #3
0
    def test_model(
        self,
        strategy_fn,
        use_operator=False,
        use_regularizer=False,
        policy_name="mixed_float16",
        get_config=False,
        save_format=None,
        use_input_spec=False,
    ):
        self._skip_if_strategy_unsupported(strategy_fn)
        self._skip_if_save_format_unsupported(save_format)
        if use_regularizer:
            weight_regularizer = mp_test_util.IdentityRegularizer()
            activity_regularizer = mp_test_util.ReduceSumRegularizer()
        else:
            weight_regularizer = activity_regularizer = None
        with strategy_fn().scope():
            with policy.policy_scope(policy_name):
                layer = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    use_operator=use_operator,
                    regularizer=weight_regularizer,
                    activity_regularizer=activity_regularizer,
                    input_shape=(1,),
                )
                if use_input_spec:
                    layer.input_spec = input_spec.InputSpec(shape=(None, 1))
                model = test_utils.get_model_from_layers(
                    [layer], input_shape=(1,), input_dtype=tf.float16
                )
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            "MultiplyLayer": mp_test_util.MultiplyLayer
                        },
                    )
                    (layer,) = (
                        layer
                        for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer)
                    )

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)
                # Use a fixed loss scale, as this test will fail if gradients are
                # skipped for a step due to dynamic loss scaling.
                opt = loss_scale_optimizer.LossScaleOptimizer(
                    opt, dynamic=False, initial_scale=8
                )
                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 2 ** -14 subtracted
        # from it.
        expected = 1 - 2**-14
        if use_regularizer:
            # Weight and activity regularizer each add another 2 ** -14 to the
            # gradient.
            expected -= 2 * 2**-14
        self.assertEqual(backend.eval(layer.v), expected)

        if save_format:
            with generic_utils.CustomObjectScope(
                {
                    "MultiplyLayer": mp_test_util.MultiplyLayer,
                    "loss_fn": loss_fn,
                }
            ):
                self._test_saving(model, dataset, save_format, use_regularizer)