コード例 #1
0
ファイル: keras_test.py プロジェクト: wangjunbo2000/wangjb
    def test_dynamic_loss_scaling(self,
                                  strategy_fn,
                                  pass_loss_scale_to_policy=False,
                                  get_config=False,
                                  experimental_run_tf_function=True):
        self._skip_if_strategy_unsupported(strategy_fn)
        strategy = strategy_fn()
        initial_loss_scale = 2.
        batch_size = 4
        loss_scale = loss_scale_module.DynamicLossScale(
            initial_loss_scale=initial_loss_scale, increment_period=2)
        expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                             dtype=dtypes.float16)
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
        with strategy.scope():
            opt = gradient_descent.SGD(1.)
            if pass_loss_scale_to_policy:
                p = policy.Policy('mixed_float16', loss_scale=loss_scale)
            else:
                p = policy.Policy('mixed_float16', loss_scale=None)
                opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
            with policy.policy_scope(p):
                x = layers.Input(shape=(1, ),
                                 batch_size=batch_size,
                                 dtype=dtypes.float16)
                layer = mp_test_util.AddLayer(assert_type=dtypes.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients))
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=dtypes.float16,
                        expected_gradient=expected_gradient))
                y = core.Lambda(identity_with_grad_check_fn)(y)
                y = math_ops.cast(y, dtypes.float32)
                model = models.Model(inputs=x, outputs=y)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={'AddLayer': mp_test_util.AddLayer})
                    (layer, ) = (layer for layer in model.layers
                                 if isinstance(layer, mp_test_util.AddLayer))

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly(),
                              experimental_run_tf_function=testing_utils.
                              should_run_tf_function())

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient * 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient / 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)
コード例 #2
0
ファイル: keras_test.py プロジェクト: zzg-china/tensorflow
 def test_delete_variable(self):
   layer = base_layer.Layer(dtype=policy.Policy('mixed_float16'))
   layer.x = layer.add_weight('x')
   self.assertEqual(layer.trainable_weights, [layer.x])
   del layer.x
   self.assertEqual(layer.trainable_weights, [])
コード例 #3
0
ファイル: keras_test.py プロジェクト: wangjunbo2000/wangjb
    def test_model(self,
                   strategy_fn,
                   use_operator=False,
                   use_regularizer=False,
                   policy_name='mixed_float16',
                   get_config=False,
                   save_format=None,
                   experimental_run_tf_function=True):
        self._skip_if_strategy_unsupported(strategy_fn, check_model_type=True)
        self._skip_if_save_format_unsupported(save_format)
        regularizer = (mp_test_util.IdentityRegularizer()
                       if use_regularizer else None)
        with strategy_fn().scope():
            # Pass loss_scale=None, as this test will fail if the DynamicLossScale
            # skips applying gradients for a step
            with policy.policy_scope(
                    policy.Policy(policy_name, loss_scale=None)):
                layer = mp_test_util.AddLayer(assert_type=dtypes.float16,
                                              use_operator=use_operator,
                                              regularizer=regularizer,
                                              input_shape=(1, ))
                cast_f32_layer = layers.Lambda(
                    lambda x: math_ops.cast(x, 'float32'))
                model = testing_utils.get_model_from_layers(
                    [layer, cast_f32_layer],
                    input_shape=(1, ),
                    input_dtype=dtypes.float16)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={'AddLayer': mp_test_util.AddLayer})
                    (layer, ) = (layer for layer in model.layers
                                 if isinstance(layer, mp_test_util.AddLayer))

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                # Learning rate is small enough that if applied to a float16 variable,
                # the variable will not change. So this tests the learning rate not
                # applied to a float16 value, but instead the float32 variable.
                opt = gradient_descent.SGD(2**-14)
                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly(),
                              experimental_run_tf_function=testing_utils.
                              should_run_tf_function())

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        # Variable starts at 1, and should have gradient of 2 ** -14 subtracted
        # from it.
        expected = 1 - 2**-14
        if use_regularizer:
            # Regularizer adds another 2 ** -14 to the gradient.
            expected -= 2**-14
        self.assertEqual(backend.eval(layer.v), expected)

        if save_format:
            with generic_utils.CustomObjectScope({
                    'AddLayer': mp_test_util.AddLayer,
                    'loss_fn': loss_fn
            }):
                self._test_saving(model, dataset, save_format, use_regularizer)
コード例 #4
0
ファイル: keras_test.py プロジェクト: wangjunbo2000/wangjb
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):
        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * A fixed loss scale (if use_loss_scaling is True)

        self._skip_if_strategy_unsupported(strategy_fn)
        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.
        else:
            loss_scale = None
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(
                    policy.Policy('mixed_float16', loss_scale=loss_scale)):
                x = layers.Input(shape=(1, ), batch_size=2)
                layer1 = mp_test_util.AddLayer(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=True)
                layer2 = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
                                                 use_operator=True)
                layer3 = mp_test_util.AddLayer(assert_type=dtypes.float16,
                                               use_operator=False)
                layer4 = AddLayerWithoutAutoCast(
                    assert_type=dtypes.float16,
                    regularizer=mp_test_util.IdentityRegularizer(),
                    use_operator=False)
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. We divide by the batch size of 2 since the
                    # loss is averaged across batch elements.
                    expected_gradient = loss_scale / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=dtypes.float16,
                            expected_gradient=[expected_gradient]))
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                y = math_ops.cast(y, dtypes.float32)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    self.assertEqual(y_true.dtype, dtypes.float32)
                    self.assertEqual(y_pred.dtype, dtypes.float32)
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly(),
                              experimental_run_tf_function=testing_utils.
                              should_run_tf_function())

        x = np.ones((2, 1))
        y = np.ones((2, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
        model.fit(dataset)
        for layer in (layer1, layer2, layer3, layer4):
            if layer.losses:
                # Layer has weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
            else:
                # Layer does not have weight regularizer
                self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
コード例 #5
0
    def test_serialization(self):
        # Test policies that are equivalent to a single dtype
        for policy_name in 'float16', 'float32', 'int8', 'string', 'bool':
            policy = mp_policy.Policy(policy_name)
            config = mp_policy.serialize(policy)
            self.assertEqual(config, policy_name)
            new_policy = mp_policy.deserialize(config)
            self.assertEqual(str(policy), str(new_policy))

        # Test "_infer" policy
        policy = mp_policy.Policy('_infer')
        config = mp_policy.serialize(policy)
        self.assertIsNone(config)
        new_policy = mp_policy.deserialize(config)
        self.assertEqual(str(policy), str(new_policy))

        class MyPolicy(mp_policy.Policy):
            pass

        # Test policies that do not override the loss scale
        for policy in (mp_policy.Policy('mixed_float16'),
                       mp_policy.Policy('mixed_bfloat16'),
                       MyPolicy('float32')):
            config = mp_policy.serialize(policy)
            self.assertEqual(
                config, {
                    'class_name': policy.__class__.__name__,
                    'config': {
                        'name': policy.name
                    }
                })
            new_policy = mp_policy.deserialize(
                config, custom_objects={'MyPolicy': MyPolicy})
            self.assertEqual(str(policy), str(new_policy))

        # Test policies that override the loss scale
        for policy in (
                mp_policy.Policy('float32', loss_scale=2.),
                mp_policy.Policy('float32', loss_scale=None),
                mp_policy.Policy('mixed_float16', loss_scale=2.),
                mp_policy.Policy('mixed_float16', loss_scale=None),
                mp_policy.Policy('mixed_bfloat16', loss_scale=2.),
                mp_policy.Policy('mixed_bfloat16', loss_scale=None),
        ):
            config = mp_policy.serialize(policy)
            expected_loss_scale_config = None
            if policy.loss_scale:
                expected_loss_scale_config = {
                    'class_name': 'FixedLossScale',
                    'config': {
                        'loss_scale_value': 2.
                    }
                }
            self.assertEqual(
                config, {
                    'class_name': policy.__class__.__name__,
                    'config': {
                        'name': policy.name,
                        'loss_scale': expected_loss_scale_config
                    }
                })
            new_policy = mp_policy.deserialize(
                config, custom_objects={'MyPolicy': MyPolicy})
            self.assertEqual(str(policy), str(new_policy))
コード例 #6
0
ファイル: keras_test.py プロジェクト: wangjunbo2000/wangjb
    def test_config(self, strategy_fn):
        x = constant_op.constant([1.], dtype=dtypes.float16)
        with strategy_fn().scope():
            for layer, dtype in ((mp_test_util.AddLayer(), 'float32'),
                                 (mp_test_util.AddLayer(dtype='float64'),
                                  'float64'), (mp_test_util.AddLayer(
                                      dtype=policy.Policy('float64')),
                                               'float64')):
                config = layer.get_config()
                self.assertEqual(config['dtype'], dtype)
                self.assertIsInstance(config['dtype'], str)
                layer = mp_test_util.AddLayer.from_config(config)
                self.assertEqual(layer.dtype, dtype)
                self.assertEqual(layer(x).dtype, dtype)
                self.assertEqual(layer.v.dtype, dtype)

            layer = mp_test_util.AddLayer(dtype=policy.Policy('mixed_float16'))
            config = layer.get_config()
            self.assertEqual(config['dtype'], {
                'class_name': 'Policy',
                'config': {
                    'name': 'mixed_float16'
                }
            })
            layer = mp_test_util.AddLayer.from_config(config)
            self.assertEqual(layer.dtype, 'float32')
            self.assertEqual(layer(x).dtype, 'float16')
            self.assertEqual(layer.v.dtype, 'float32')

            layer = mp_test_util.AddLayer(
                dtype=policy.Policy('mixed_float16', loss_scale=None))
            config = layer.get_config()
            self.assertEqual(
                config['dtype'], {
                    'class_name': 'Policy',
                    'config': {
                        'name': 'mixed_float16',
                        'loss_scale': None
                    }
                })
            layer = mp_test_util.AddLayer.from_config(config)
            self.assertEqual(layer.dtype, 'float32')
            self.assertEqual(layer(x).dtype, 'float16')
            self.assertEqual(layer.v.dtype, 'float32')

            layer = mp_test_util.AddLayer(
                dtype=policy.Policy('float64', loss_scale=2.))
            config = layer.get_config()
            self.assertEqual(
                config['dtype'], {
                    'class_name': 'Policy',
                    'config': {
                        'name': 'float64',
                        'loss_scale': {
                            'class_name': 'FixedLossScale',
                            'config': {
                                'loss_scale_value': 2.0
                            }
                        }
                    }
                })
            layer = mp_test_util.AddLayer.from_config(config)
            self.assertEqual(layer.dtype, 'float64')
            self.assertEqual(layer(x).dtype, 'float64')
            self.assertEqual(layer.v.dtype, 'float64')

            layer = mp_test_util.AddLayer(dtype=policy.Policy('infer'))
            config = layer.get_config()
            self.assertIsNone(config['dtype'])
            layer = mp_test_util.AddLayer.from_config(config)
            # If a layer is serialized with the "infer" policy, when deserialized into
            # TF 2 it will have the global policy instead of "infer". This is because
            # "infer" is serialized into None, and passing dtype=None in TensorFlow 2
            # indicates to use the global policy.
            self.assertEqual(layer.dtype, 'float32')
            self.assertEqual(layer(x).dtype, 'float32')
            self.assertEqual(layer.v.dtype, 'float32')

            layer = mp_test_util.AddLayer(
                dtype=policy.Policy('infer', loss_scale=2.))
            config = layer.get_config()
            self.assertEqual(
                config['dtype'], {
                    'class_name': 'Policy',
                    'config': {
                        'name': 'infer',
                        'loss_scale': {
                            'class_name': 'FixedLossScale',
                            'config': {
                                'loss_scale_value': 2.0
                            }
                        }
                    }
                })
            layer = mp_test_util.AddLayer.from_config(config)
            self.assertEqual(layer.dtype, None)
            self.assertEqual(layer(x).dtype, 'float16')
            self.assertEqual(layer.v.dtype, 'float16')
コード例 #7
0
    def _test_layer(self, f32_layer, input_shape):
        """Tests a layer by comparing the float32 and mixed precision weights.

    A float32 layer, a mixed precision layer, a distributed float32 layer, and a
    distributed mixed precision layer are run. The four layers are identical
    other than their dtypes and distribution strategies. The weights after
    running fit() are asserted to be close.

    Running the distributed float32 layer does not test mixed precision but we
    still test it for debugging purposes. If the distributed mixed precision
    layer fails, it's easier to debug if you know whether the issue also occurs
    in the distributed float32 layer.

    Args:
      f32_layer: A float32 layer. The other three layers will automatically
        be created from this
      input_shape: The shape of the inputs to the layer, including the batch
        dimension.
    """
        strategy = create_mirrored_strategy()

        # Create the layers
        assert f32_layer.dtype == f32_layer._compute_dtype == 'float32'
        config = f32_layer.get_config()
        distributed_f32_layer = f32_layer.__class__.from_config(config)
        config['dtype'] = policy.Policy('mixed_float16')
        mp_layer = f32_layer.__class__.from_config(config)
        distributed_mp_layer = f32_layer.__class__.from_config(config)

        # Compute per_replica_input_shape for the distributed models
        global_batch_size = input_shape[0]
        assert global_batch_size % strategy.num_replicas_in_sync == 0
        per_replica_batch_size = (global_batch_size //
                                  strategy.num_replicas_in_sync)
        per_replica_input_shape = list(input_shape)
        per_replica_input_shape[0] = per_replica_batch_size

        # Create the models
        f32_model = self._create_model_from_layer(f32_layer, input_shape)
        mp_model = self._create_model_from_layer(mp_layer, input_shape)
        with strategy.scope():
            distributed_f32_model = self._create_model_from_layer(
                distributed_f32_layer, per_replica_input_shape)
            distributed_mp_model = self._create_model_from_layer(
                distributed_mp_layer, per_replica_input_shape)

        # Set all model weights to the same values
        f32_weights = f32_model.get_weights()
        for model in mp_model, distributed_f32_model, distributed_mp_model:
            model.set_weights(f32_weights)

        # Run fit() on models
        x = np.random.normal(size=input_shape)
        y = np.random.normal(size=input_shape)
        for model in (f32_model, mp_model, distributed_f32_model,
                      distributed_mp_model):
            model.fit(x, y, batch_size=global_batch_size)

        # Assert all models have close weights
        f32_weights = f32_model.get_weights()
        self.assertAllClose(mp_model.get_weights(),
                            f32_weights,
                            rtol=1e-2,
                            atol=1e-4)
        self.assertAllClose(distributed_f32_model.get_weights(),
                            f32_weights,
                            rtol=1e-2,
                            atol=1e-4)
        self.assertAllClose(distributed_mp_model.get_weights(),
                            f32_weights,
                            rtol=1e-2,
                            atol=1e-4)
コード例 #8
0
 def test_config(self):
     for policy in (
             mp_policy.Policy('float16'),
             mp_policy.Policy('float32'),
             mp_policy.Policy('int16'),
             mp_policy.Policy('mixed_float16'),
             mp_policy.Policy('mixed_bfloat16'),
             mp_policy.Policy('_infer'),
             mp_policy.Policy('float32', loss_scale=2.),
             mp_policy.Policy('float32', loss_scale=None),
             mp_policy.Policy('mixed_float16', loss_scale=2.),
             mp_policy.Policy('mixed_float16', loss_scale=None),
             mp_policy.Policy('mixed_bfloat16', loss_scale=2.),
             mp_policy.Policy('mixed_bfloat16', loss_scale=None),
     ):
         config = policy.get_config()
         new_policy = mp_policy.Policy.from_config(config)
         # Comparing strings is the easiest way to ensure the policies are the
         # same, as policy does not override the == operator.
         self.assertEqual(str(policy), str(new_policy))
コード例 #9
0
    def test_dynamic_loss_scaling(self, strategy_fn, cloning=True):
        strategy = strategy_fn()
        initial_loss_scale = 2.
        batch_size = 4
        expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                             dtype=dtypes.float16)
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
        with strategy.scope():
            with policy.policy_scope(policy.Policy('infer_float32_vars')):
                x = layers.Input(shape=(1, ),
                                 batch_size=batch_size,
                                 dtype=dtypes.float16)
                layer = AddLayer(assert_type=dtypes.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients))
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=dtypes.float16,
                        expected_gradient=expected_gradient))
                y = core.Lambda(identity_with_grad_check_fn)(y)
                y = math_ops.cast(y, dtypes.float32)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(1.)
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=initial_loss_scale, increment_period=2)
                opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
                model.compile(opt, loss=loss_fn, cloning=cloning)

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient * 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient / 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)
コード例 #10
0
  def test_layer(self, f32_layer_fn, input_shape, rtol=2e-3, atol=2e-3,
                 input_data=None):
    """Tests a layer by comparing the float32 and mixed precision weights.

    A float32 layer, a mixed precision layer, and a distributed mixed precision
    layer are run. The three layers are identical other than their dtypes and
    distribution strategies. The outputs after predict() and weights after fit()
    are asserted to be close.

    Args:
      f32_layer_fn: A function returning a float32 layer. The other two layers
        will automatically be created from this
      input_shape: The shape of the input to the layer, including the batch
        dimension. Or a list of shapes if the layer takes multiple inputs.
      rtol: The relative tolerance to be asserted.
      atol: The absolute tolerance to be asserted.
      input_data: A Numpy array with the data of the input. If None, input data
        will be randomly generated
    """

    if f32_layer_fn == convolutional.ZeroPadding2D and \
       test.is_built_with_rocm():
      return
    if isinstance(input_shape[0], int):
      input_shapes = [input_shape]
    else:
      input_shapes = input_shape
    strategy = create_mirrored_strategy()
    f32_layer = f32_layer_fn()

    # Create the layers
    assert f32_layer.dtype == f32_layer._compute_dtype == 'float32'
    config = f32_layer.get_config()
    config['dtype'] = policy.Policy('mixed_float16')
    mp_layer = f32_layer.__class__.from_config(config)
    distributed_mp_layer = f32_layer.__class__.from_config(config)

    # Compute per_replica_input_shapes for the distributed model
    global_batch_size = input_shapes[0][0]
    assert global_batch_size % strategy.num_replicas_in_sync == 0, (
        'The number of replicas, %d, does not divide the global batch size of '
        '%d' % (strategy.num_replicas_in_sync, global_batch_size))
    per_replica_batch_size = (
        global_batch_size // strategy.num_replicas_in_sync)
    per_replica_input_shapes = [(per_replica_batch_size,) + s[1:]
                                for s in input_shapes]

    # Create the models
    f32_model = self._create_model_from_layer(f32_layer, input_shapes)
    mp_model = self._create_model_from_layer(mp_layer, input_shapes)
    with strategy.scope():
      distributed_mp_model = self._create_model_from_layer(
          distributed_mp_layer, per_replica_input_shapes)

    # Set all model weights to the same values
    f32_weights = f32_model.get_weights()
    mp_model.set_weights(f32_weights)
    distributed_mp_model.set_weights(f32_weights)

    # Generate input data
    if input_data is None:
      # Cast inputs to float16 to avoid measuring error from having f16 layers
      # cast to float16.
      input_data = [np.random.normal(size=s).astype('float16')
                    for s in input_shapes]
      if len(input_data) == 1:
        input_data = input_data[0]

    # Assert all models have close outputs.
    f32_output = f32_model.predict(input_data)
    mp_output = mp_model.predict(input_data)
    self.assertAllClose(
        mp_output, f32_output, rtol=rtol, atol=atol)
    self.assertAllClose(
        distributed_mp_model.predict(input_data), f32_output, rtol=rtol,
        atol=atol)

    # Run fit() on models
    output = np.random.normal(size=f32_model.outputs[0].shape).astype('float16')
    for model in f32_model, mp_model, distributed_mp_model:
      model.fit(input_data, output, batch_size=global_batch_size)

    # Assert all models have close weights
    f32_weights = f32_model.get_weights()
    self.assertAllClose(
        mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol)
    self.assertAllClose(
        distributed_mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol)
コード例 #11
0
 def test_infer_float32_vars(self):
     policy = mp_policy.Policy('infer_float32_vars')
     self.assertEqual(policy.name, 'infer_float32_vars')
     self.assertEqual(policy.default_variable_dtype, 'float32')
コード例 #12
0
 def test_infer(self):
     policy = mp_policy.Policy('infer')
     self.assertEqual(policy.name, 'infer')
     self.assertEqual(policy.default_variable_dtype, None)
コード例 #13
0
    def test_advanced_model(self, strategy_fn, use_loss_scaling=False):

        # The advanced model tests mixed-precision-related features that would occur
        # in a resnet50 model. It tests a model that has:
        #  * Multiple layers, some which use auto-cast variables and some which do
        #    not
        #  * Regularization on some variables and not others.
        #  * Loss scaling (if use_loss_scaling is True)

        strategy = strategy_fn()
        if use_loss_scaling:
            loss_scale = 8.
        learning_rate = 2**-14

        with strategy.scope():
            with policy.policy_scope(policy.Policy('infer_float32_vars')):
                x = layers.Input(shape=(), batch_size=2, dtype=dtypes.float16)
                layer1 = AddLayer(assert_type=dtypes.float16,
                                  regularizer=IdentityRegularizer(),
                                  use_operator=True)
                layer2 = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
                                                 use_operator=True)
                layer3 = AddLayer(assert_type=dtypes.float16,
                                  use_operator=False)
                layer4 = AddLayerWithoutAutoCast(
                    assert_type=dtypes.float16,
                    regularizer=IdentityRegularizer(),
                    use_operator=False)
                y = layer1(x)
                y = layer2(y)
                y = layer3(y)
                y = layer4(y)
                if use_loss_scaling:
                    # The gradient of 'y' at this point is 1. With loss scaling, the
                    # gradient is 'loss_scale'. The DistributionStrategy additionally
                    # scales the gradient by 1/num_replicas in_sync. We divide by the
                    # batch size of 2 since the loss is averaged across batch elements.
                    expected_gradient = loss_scale / strategy.num_replicas_in_sync / 2
                    identity_with_grad_check_fn = (
                        mp_test_util.create_identity_with_grad_check_fn(
                            expected_dtype=dtypes.float16,
                            expected_gradient=[expected_gradient] * 2))
                    y = core.Lambda(identity_with_grad_check_fn)(y)
                y = math_ops.cast(y, dtypes.float32)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    self.assertEqual(y_true.dtype, dtypes.float32)
                    self.assertEqual(y_pred.dtype, dtypes.float32)
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(learning_rate)
                if use_loss_scaling:
                    opt = loss_scale_optimizer.LossScaleOptimizer(
                        opt, loss_scale)
                model.compile(opt, loss=loss_fn)

            x = np.ones((2, 1))
            y = np.ones((2, 1))
            dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
            model.fit(dataset)
            for layer in (layer1, layer2, layer3, layer4):
                if layer.losses:
                    # Layer has weight regularizer
                    self.assertEqual(backend.eval(layer.v),
                                     1 - 2 * learning_rate)
                else:
                    # Layer does not have weight regularizer
                    self.assertEqual(backend.eval(layer.v), 1 - learning_rate)