コード例 #1
0
    def testPassingV1LossScaleErrors(self):
        opt = gradient_descent.SGD()
        loss_scale = tf.mixed_precision.experimental.DynamicLossScale(
            multiplier=4)
        with self.assertRaisesRegex(
                ValueError, 'When passing a DynamicLossScale to "loss_scale", '
                'DynamicLossScale.multiplier must be 2. Got: '
                'DynamicLossScale'):
            loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)

        class MyLossScale(tf.mixed_precision.experimental.LossScale):
            def __call__(self):
                return 1.

            def update(self, grads):
                return None, True

            def get_config(self):
                return {}

        with self.assertRaisesRegex(
                TypeError,
                'Passing a LossScale that is not a FixedLossScale or a '
                'DynamicLossScale is no longer supported. Got:'):
            loss_scale_optimizer.LossScaleOptimizerV1(opt, MyLossScale())
コード例 #2
0
    def testSerializationWithBuiltInOptimizer(self, use_v1):
        opt = gradient_descent.SGD(2., momentum=0.5)
        if use_v1:
            loss_scale = tf.mixed_precision.experimental.DynamicLossScale(
                initial_loss_scale=2., increment_period=3.)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)
        else:
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, initial_scale=2., dynamic_growth_steps=3.)
        config = optimizers.serialize(opt)
        opt = optimizers.deserialize(config)
        # Force hyperparameters to be created
        opt.lr  # pylint: disable=pointless-statement
        self.evaluate(tf.compat.v1.global_variables_initializer())

        self.assertEqual(self.evaluate(opt.lr), 2.)
        self.assertEqual(self.evaluate(opt.inner_optimizer.momentum), 0.5)
        self.assertEqual(self.evaluate(opt.loss_scale), 2.)
        self.assertEqual(opt.dynamic_growth_steps, 3.)
        self.assertTrue(opt.dynamic, 4.)
        # Deserializing a LossScaleOptimizer always always results in a V2
        # LossScaleOptimizer, even if serialized with a LossScaleOptimizerV1.
        self.assertAllEqual(type(opt), loss_scale_optimizer.LossScaleOptimizer)

        # Ensure the optimizer can be used
        var = tf.Variable([5.0])
        run_op = self._run_fn_with_grad_check(tf.distribute.get_strategy(),
                                              var, opt, 2)()
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self._run_if_in_graph_mode(run_op)
        self.assertEqual(self.evaluate(var), [3.])
        self.assertEqual(self.evaluate(opt.dynamic_counter), 1)
コード例 #3
0
 def test_optimizer_errors(self):
     opt = gradient_descent_v2.SGD(1.0)
     opt = loss_scale_optimizer_v2.LossScaleOptimizerV1(opt, 'dynamic')
     with self.assertRaisesRegex(
             ValueError, '"opt" must not already be an instance of a '
             'LossScaleOptimizer.'):
         enable_mixed_precision_graph_rewrite(opt)
     self.assertFalse(tf.config.optimizer.get_experimental_options().get(
         'auto_mixed_precision', False))
コード例 #4
0
    def testPassingV1LossScale(self, strategy_fn):
        strategy = strategy_fn()
        learning_rate = 2.
        with strategy.scope():
            # Test FixedLossScale
            var = tf.Variable([5.0])
            opt = gradient_descent.SGD(learning_rate)
            loss_scale = tf.mixed_precision.experimental.FixedLossScale(2.)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)
            self.assertIsInstance(opt.loss_scale, tf.Tensor)
            self.evaluate(tf.compat.v1.global_variables_initializer())
            self.assertEqual(self.evaluate(opt.loss_scale), 2)
            run_fn = self._run_fn_with_grad_check(
                strategy, var, opt, 2 / strategy.num_replicas_in_sync)
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(tf.compat.v1.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            # The loss is the identity of the variable. Therefore the gradient is 1,
            # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3
            self.assertAllClose([3.], self.evaluate(var))

            # Test DynamicLossScale
            var = tf.Variable([5.0])
            opt = gradient_descent.SGD(learning_rate)
            loss_scale = tf.mixed_precision.experimental.DynamicLossScale(
                initial_loss_scale=4, increment_period=1, multiplier=2)
            loss_scale._current_loss_scale.assign(2)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)
            self.assertEqual(opt.initial_scale, 4)
            self.assertEqual(opt.dynamic_growth_steps, 1)
            self.evaluate(tf.compat.v1.global_variables_initializer())
            # Current loss scale is not copied so loss scale is reinitialized to 4
            self.assertEqual(self.evaluate(opt.loss_scale), 4)
            for s in strategy.experimental_local_results(opt.dynamic_counter):
                self.assertEqual(self.evaluate(s), 0)

            run_fn = self._run_fn_with_grad_check(
                strategy, var, opt, 4 / strategy.num_replicas_in_sync)
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(tf.compat.v1.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            self.assertAllClose([3.], self.evaluate(var))
コード例 #5
0
    def testV1Optimizer(self, strategy_fn):
        strategy = strategy_fn()
        learning_rate = 2.
        with strategy.scope():
            # Test FixedLossScale
            var = tf.Variable([5.0])
            opt = gradient_descent.SGD(learning_rate)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale=2)
            self.assertIsInstance(opt.loss_scale, tf.Tensor)
            self.evaluate(tf.compat.v1.global_variables_initializer())
            self.assertEqual(self.evaluate(opt.loss_scale), 2)
            self.assertEqual(opt.initial_scale, 2)
            self.assertIsNone(opt.dynamic_growth_steps)
            run_fn = self._run_fn_with_grad_check(
                strategy, var, opt, 2 / strategy.num_replicas_in_sync)
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(tf.compat.v1.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            # The loss is the identity of the variable. Therefore the gradient is 1,
            # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3
            self.assertAllClose([3.], self.evaluate(var))

            # Test DynamicLossScale
            var = tf.Variable([5.0])
            opt = gradient_descent.SGD(learning_rate)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, 'dynamic')
            self.assertEqual(opt.initial_scale, 2**15)
            self.assertEqual(opt.dynamic_growth_steps, 2000)
            self.evaluate(tf.compat.v1.global_variables_initializer())
            self.assertEqual(self.evaluate(opt.loss_scale), 2**15)
            for s in strategy.experimental_local_results(opt.dynamic_counter):
                self.assertEqual(self.evaluate(s), 0)

            loss = lambda: var * float('NaN')
            run_fn = lambda: opt.minimize(loss, var_list=[var])
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(tf.compat.v1.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            self.assertAllClose([5.], self.evaluate(var))
            self.assertEqual(self.evaluate(opt.loss_scale), 2**14)
            for s in strategy.experimental_local_results(opt.dynamic_counter):
                self.assertEqual(self.evaluate(s), 0)
コード例 #6
0
  def test_dynamic_loss_scaling(self,
                                strategy_fn,
                                pass_loss_scale_to_policy=False,
                                get_config=False,
                                use_v1_loss_scale_optimizer=False):
    strategy = strategy_fn()
    initial_loss_scale = 2.
    batch_size = 4
    expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                         dtype=tf.float16)
    # If this variable is set to True, the model below will have NaN gradients
    have_nan_gradients = backend.variable(False, dtype=tf.bool)
    with strategy.scope():
      opt = gradient_descent.SGD(1.)
      if pass_loss_scale_to_policy:
        loss_scale = tf.mixed_precision.experimental.DynamicLossScale(
            initial_loss_scale=initial_loss_scale, increment_period=2)
        p = policy.PolicyV1('mixed_float16', loss_scale=loss_scale)
      elif use_v1_loss_scale_optimizer:
        loss_scale = tf.mixed_precision.experimental.DynamicLossScale(
            initial_loss_scale=initial_loss_scale, increment_period=2)
        p = policy.Policy('mixed_float16')
        opt = loss_scale_optimizer.LossScaleOptimizerV1(
            opt, loss_scale)
      else:
        p = policy.Policy('mixed_float16')
        opt = loss_scale_optimizer.LossScaleOptimizer(
            opt, initial_scale=initial_loss_scale, dynamic_growth_steps=2)
      with policy.policy_scope(p):
        x = layers.Input(
            shape=(1,), batch_size=batch_size, dtype=tf.float16)
        layer = mp_test_util.MultiplyLayer(assert_type=tf.float16)
        y = layer(x)
        identity_with_nan_grads = (
            mp_test_util.create_identity_with_nan_gradients_fn(
                have_nan_gradients))
        y = core.Lambda(identity_with_nan_grads)(y)
        identity_with_grad_check_fn = (
            mp_test_util.create_identity_with_grad_check_fn(
                expected_dtype=tf.float16,
                expected_gradient=expected_gradient))
        y = core.Lambda(identity_with_grad_check_fn)(y)
        model = models.Model(inputs=x, outputs=y)
        if get_config:
          config = model.get_config()
          model = model.__class__.from_config(
              config,
              custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer})
          (layer,) = (layer for layer in model.layers
                      if isinstance(layer, mp_test_util.MultiplyLayer))

        def loss_fn(y_true, y_pred):
          del y_true
          return tf.reduce_mean(y_pred)

        model.compile(
            opt,
            loss=loss_fn,
            run_eagerly=testing_utils.should_run_eagerly())

    self.assertEqual(backend.eval(layer.v), 1)
    x = np.ones((batch_size, 1))
    y = np.ones((batch_size, 1))
    dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
    model.fit(dataset)
    # The variables starts with 1 and has a gradient of 1, so will go down by 1
    # each step.
    self.assertEqual(backend.eval(layer.v), 0)

    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -1)

    # There have been two steps without NaNs, so the loss scale will double
    backend.set_value(expected_gradient,
                      backend.get_value(expected_gradient * 2))
    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -2)

    # Next test with NaN gradients.
    backend.set_value(have_nan_gradients, True)
    model.fit(dataset)
    # Variable should not be updated
    self.assertEqual(backend.eval(layer.v), -2)

    # Test with finite gradients again
    backend.set_value(have_nan_gradients, False)
    # The loss scale will be halved due to the NaNs, so the gradient will also
    # be halved
    backend.set_value(expected_gradient,
                      backend.get_value(expected_gradient / 2))
    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -3)
コード例 #7
0
  def test_save_model_with_dynamic_loss_scaling(
      self, strategy_fn, h5=False, use_v1_loss_scale_optimizer=False):
    # TODO(reedwm): Support and test saving model with a mixed_[b]float16 policy
    # as well.
    strategy = strategy_fn()
    if (isinstance(strategy, tf.distribute.MirroredStrategy) and
        not tf.executing_eagerly()):
      # TODO(b/121381184): Enable running the test in this case.
      return

    # Create and run model.
    with strategy.scope():
      x = layers.Input(shape=(2,), batch_size=2, dtype=tf.float32)
      y = mp_test_util.MultiplyLayer()(x)
      model = models.Model(inputs=x, outputs=y)

      opt = gradient_descent.SGD(1.)
      if use_v1_loss_scale_optimizer:
        loss_scale = tf.mixed_precision.experimental.DynamicLossScale(
            initial_loss_scale=1., increment_period=2.)
        opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)
      else:
        opt = loss_scale_optimizer.LossScaleOptimizer(opt, initial_scale=1.,
                                                      dynamic_growth_steps=2.)
      model.compile(
          optimizer=opt,
          loss='mse',
          run_eagerly=testing_utils.should_run_eagerly())
    # Run for 3 steps (6 examples with a batch size of 2)
    model.fit(np.ones((6, 2)), np.zeros((6, 2)), batch_size=2)
    self.assertEqual(backend.get_value(opt.loss_scale), 2)
    self.assertEqual(backend.get_value(opt.dynamic_counter), 1)
    (weight,) = model.trainable_weights
    orig_weight = backend.get_value(weight)

    # Save model weights.
    save_path = os.path.join(self.get_temp_dir(), 'model')
    model.save(save_path, save_format='h5' if h5 else 'tf')

    # Run model again for 1 step (2 examples with a batch size of 2)
    model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
    new_weight = backend.get_value(weight)
    self.assertNotEqual(new_weight, orig_weight)
    self.assertEqual(backend.get_value(opt.loss_scale), 4)
    self.assertEqual(backend.get_value(opt.dynamic_counter), 0)

    # Load model weights and ensure loss scale weights are restored.
    model = save.load_model(
        save_path, custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer})
    (weight,) = model.trainable_weights
    loaded_weight = backend.get_value(weight)
    self.assertEqual(loaded_weight, orig_weight)
    # Currently the loss scale isn't always saved when the model is saved with
    # Model.save(). So we assert the loss scale either has the value when it was
    # saved, or the value it was initialized with.
    # TODO(reedwm): Always save/restore the loss scale with Model.save().
    self.assertIn(backend.get_value(model.optimizer.loss_scale), (1, 2))
    self.assertIn(backend.get_value(model.optimizer.dynamic_counter), (0, 1))

    # Test optimizer attributes and type
    self.assertEqual(model.optimizer.initial_scale, 1.)
    self.assertEqual(model.optimizer.dynamic_growth_steps, 2.)
    self.assertEqual(type(model.optimizer),
                     loss_scale_optimizer.LossScaleOptimizer)
コード例 #8
0
    def testGetConfigDynamic(self, get_config, from_config):
        # Get a config from LossScaleOptimizerV1, LossScaleOptimizer, or the
        # LossScaleOptimizer from TF 2.3. Then restore the config into a
        # LossScaleOptimizerV1 or LossScaleOptimizer
        opt = gradient_descent.SGD(2., momentum=0.5)
        if get_config == 'v1':
            loss_scale = tf.mixed_precision.experimental.DynamicLossScale(
                initial_loss_scale=2, increment_period=3)
            opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale)
            config = opt.get_config()
        elif get_config == 'v2':
            opt = loss_scale_optimizer.LossScaleOptimizer(
                opt, initial_scale=2, dynamic_growth_steps=3)
            config = opt.get_config()
        else:
            self.assertEqual(get_config, 'tf2_3')
            config = {
                'optimizer': {
                    'class_name': 'SGD',
                    'config': {
                        'learning_rate': 2.0,
                        'momentum': 0.5,
                        'decay': 0.0,
                        'nesterov': False,
                        'name': 'SGD',
                    }
                },
                'loss_scale': {
                    'class_name': 'DynamicLossScale',
                    'config': {
                        'initial_loss_scale': 2.0,
                        'increment_period': 3,
                        'multiplier': 2.0,
                    }
                },
            }

        if from_config == 'v1':
            opt = loss_scale_optimizer.LossScaleOptimizerV1.from_config(config)
        else:
            self.assertEqual(from_config, 'v2')
            opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config)

        # Force hyperparameters to be created
        opt.lr  # pylint: disable=pointless-statement
        self.evaluate(tf.compat.v1.global_variables_initializer())

        # Test attributes on the optimizer
        self.assertEqual(self.evaluate(opt.lr), 2.)
        self.assertEqual(self.evaluate(opt.inner_optimizer.lr), 2.)
        self.assertEqual(self.evaluate(opt.momentum), 0.5)
        self.assertEqual(self.evaluate(opt.loss_scale), 2.)
        self.assertEqual(opt.initial_scale, 2.)
        self.assertEqual(opt.dynamic_growth_steps, 3.)
        self.assertTrue(opt.dynamic)

        # Ensure the optimizer can be used
        var = tf.Variable([5.0])
        run_op = self._run_fn_with_grad_check(tf.distribute.get_strategy(),
                                              var, opt, 2)()
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self._run_if_in_graph_mode(run_op)
        self.assertEqual(self.evaluate(var), [3.])
        self.assertEqual(self.evaluate(opt.dynamic_counter), 1)