Exemple #1
0
    def test_layer_regularizer_runs_in_var_dtype(self, strategy_fn):
        x = tf.constant([1.0])
        with strategy_fn().scope():
            with policy.policy_scope("mixed_float16"):
                # Test on MultiplyLayer
                layer = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                )
                layer(x)
                (regularizer_loss, ) = layer.losses
                self.assertEqual(regularizer_loss.dtype, tf.float32)
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertEqual(self.evaluate(regularizer_loss), 1.0)

                # Test on MultiplyLayerWithoutAutoCast
                layer = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                )
                layer(x)
                (regularizer_loss, ) = layer.losses
                self.assertEqual(regularizer_loss.dtype, tf.float32)
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertEqual(self.evaluate(regularizer_loss), 1.0)
Exemple #2
0
 def test_layer_with_non_autocast_variable(self, strategy_fn):
     x = tf.constant([1.])
     with strategy_fn().scope():
         with policy.policy_scope('mixed_float16'):
             layer = mp_test_util.MultiplyLayerWithoutAutoCast(
                 assert_type=tf.float16)
             y = layer(x)
             self.assertEqual(layer.v.dtype, tf.float32)
             self.assertEqual(y.dtype, tf.float16)
             self.evaluate(tf.compat.v1.global_variables_initializer())
             self.assertEqual(self.evaluate(y), 1.)
Exemple #3
0
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):
        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * A fixed loss scale (if use_loss_scaling is True)

        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.0
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(policy.Policy("mixed_float16")):
                x = layers.Input(shape=(1,), batch_size=2)
                layer1 = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=True,
                )
                layer2 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16, use_operator=True
                )
                layer3 = mp_test_util.MultiplyLayer(
                    assert_type=tf.float16, use_operator=False
                )
                layer4 = mp_test_util.MultiplyLayerWithoutAutoCast(
                    assert_type=tf.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=False,
                )
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. We divide by the batch size of 2 since the
                    # loss is averaged across batch elements.
                    expected_gradient = loss_scale / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=tf.float16,
                            expected_gradient=[expected_gradient],
                        )
                    )
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return tf.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                if use_loss_scaling:
                    opt = loss_scale_optimizer.LossScaleOptimizer(
                        opt, dynamic=False, initial_scale=loss_scale
                    )
                model.compile(
                    opt,
                    loss=loss_fn,
                    run_eagerly=test_utils.should_run_eagerly(),
                )

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        for layer in (layer1, layer2, layer3, layer4):
            if layer.losses:
                # Layer has weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
            else:
                # Layer does not have weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - learning_rate)