コード例 #1
0
    def _test_helper(self,
                     inputs,
                     expected_outputs,
                     initial_loss_scale=1.,
                     increment_period=2,
                     multiplier=2):
        loss_scale = loss_scale_module.DynamicLossScale(
            initial_loss_scale=initial_loss_scale,
            increment_period=increment_period,
            multiplier=multiplier)
        itr = _get_example_iter(inputs)

        def update():
            is_finite = itr.get_next()
            grad = self._get_tensor(is_finite)
            update_op, should_apply_gradients = loss_scale.update([grad])
            assert_op = check_ops.assert_equal(should_apply_gradients,
                                               is_finite)
            if context.executing_eagerly():
                return
            with ops.control_dependencies([assert_op]):
                return array_ops.identity(update_op)

        actual_outputs = []

        if not context.executing_eagerly():
            update_op = update()
            self.evaluate(variables.global_variables_initializer())
        for _ in range(len(inputs)):
            if context.executing_eagerly():
                update()
            else:
                self.evaluate(update_op)
            actual_outputs.append(self.evaluate(loss_scale()))
        self.assertEqual(actual_outputs, expected_outputs)
コード例 #2
0
  def testDynamicLossScaleWithSlots(self, strategy_fn):
    with strategy_fn().scope() as strategy:
      var = variables.Variable([1.0, 2.0])
      # An SGD optimizer with momentum has slot variables.
      opt = gradient_descent.SGD(1.0, momentum=1.)
      initial_loss_scale = 2.
      loss_scale = loss_scale_module.DynamicLossScale(
          initial_loss_scale=initial_loss_scale, increment_period=1,
          multiplier=4)
      opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
      loss = lambda: var / strategy.num_replicas_in_sync
      run_fn = lambda: opt.minimize(loss, var_list=[var])
      run_op = strategy.experimental_run(run_fn)
      self.evaluate(variables.global_variables_initializer())
      self._run_if_in_graph_mode(run_op)
      # The momentum accumulator starts at 0 and the gradient is 1. The
      # accumulator is incremented by the gradient, so it is now 1. Then the
      # variable is subtracted by the accumulator, so the variable is subtracted
      # by 1.
      self.assertAllClose([0.0, 1.0], self.evaluate(var))
      self.assertEqual(self.evaluate(opt._loss_scale()), initial_loss_scale * 4)

      run_op = strategy.experimental_run(run_fn)
      self._run_if_in_graph_mode(run_op)
      # The momentum accumulator was 1 before this step and the gradient is 1.
      # The accumulator is incremented by the gradient, so it is now 2. Then the
      # variable is subtracted by the accumulator, so the variable is subtracted
      # by 2.
      self.assertAllClose([-2., -1.], self.evaluate(var))
      self.assertEqual(self.evaluate(opt._loss_scale()),
                       initial_loss_scale * 16)
コード例 #3
0
  def testDynamicUpdate(self, strategy_fn):
    with strategy_fn().scope() as strategy:
      var = variables.Variable([1.0, 2.0])
      opt = gradient_descent.SGD(1.0)
      loss_scale = loss_scale_module.DynamicLossScale(
          initial_loss_scale=2, increment_period=1, multiplier=2)
      opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)

      # Test optimizer with finite gradients
      loss = lambda: var * 2.0 / strategy.num_replicas_in_sync
      run_fn = lambda: opt.minimize(loss, var_list=[var])
      run_op = strategy.experimental_run(run_fn)
      self.evaluate(variables.global_variables_initializer())
      self._run_if_in_graph_mode(run_op)
      # Gradient is 2, so variable will have 2 subtracted from it
      self.assertAllClose([-1.0, 0.0], self.evaluate(var))
      # Loss scale has doubled from 2 to 4
      self.assertEqual(4., self.evaluate(opt._loss_scale()))

      # Test optimizer with NaN gradients
      loss = lambda: var * float('NaN')
      run_fn = lambda: opt.minimize(loss, var_list=[var])
      run_op = strategy.experimental_run(run_fn)
      self._run_if_in_graph_mode(run_op)
      # Variable should not change from before, due to NaN gradients.
      self.assertAllClose(self.evaluate(var), [-1.0, 0.0])
      # Loss scale should half due to NaN gradients.
      self.assertEqual(2., self.evaluate(opt._loss_scale()))
コード例 #4
0
  def testDynamicLossScale(self, strategy_fn):
    strategy = strategy_fn()
    learning_rate = 2.
    expected_gradient = resource_variable_ops.ResourceVariable(
        learning_rate / strategy.num_replicas_in_sync)
    with strategy.scope():
      var = variables.Variable([5.0])
      opt = gradient_descent.SGD(learning_rate)
      loss_scale = loss_scale_module.DynamicLossScale(
          initial_loss_scale=2, increment_period=1, multiplier=2)
      opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
      self.assertEqual(
          loss_scale.initial_loss_scale % strategy.num_replicas_in_sync, 0)

      run_fn = self._run_fn_with_grad_check(strategy, var, opt,
                                            expected_gradient)
      run_op = strategy.experimental_run(run_fn)
      self.evaluate(variables.global_variables_initializer())
      self._run_if_in_graph_mode(run_op)
      # The loss is the identity of the variable. Therefore the gradient is 1,
      # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3
      self.assertAllClose([3.], self.evaluate(var))

      # Loss scale will be double, so the expected gradient is also doubled.
      self.evaluate(expected_gradient.assign(
          2 * learning_rate / strategy.num_replicas_in_sync))
      run_op = strategy.experimental_run(run_fn)
      self._run_if_in_graph_mode(run_op)
      # As before, the 2 is subtracted from the variable, making it's new value
      # 1.
      self.assertAllClose([1.], self.evaluate(var))
コード例 #5
0
 def test_serialization(self):
     loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=1,
                                                     increment_period=2,
                                                     multiplier=3)
     config = loss_scale_module.serialize(loss_scale)
     loss_scale = loss_scale_module.deserialize(config)
     self.evaluate(variables.global_variables_initializer())
     self.assertEqual(self.evaluate(loss_scale()), 1)
     self.assertEqual(loss_scale.increment_period, 2)
     self.assertEqual(loss_scale.multiplier, 3)
コード例 #6
0
  def testCheckpoint(self, strategy_fn):
    strategy = strategy_fn()
    if (isinstance(strategy, mirrored_strategy.MirroredStrategy) and
        not context.executing_eagerly()):
      # TODO(b/121381184): Enable running the test in this case.
      return

    with self.test_session(), strategy.scope():
      # Build and run a simple model.
      var = variables.Variable([2.0])
      loss_scale = loss_scale_module.DynamicLossScale(
          initial_loss_scale=1., increment_period=2.,
          multiplier=2.)
      opt = gradient_descent.SGD(1., momentum=1.)
      opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
      run_fn = lambda: opt.minimize(lambda: var + 1., var_list=[var])
      opt_op = strategy.experimental_run(run_fn)
      self.evaluate(variables.global_variables_initializer())
      self.evaluate(opt_op)
      self.assertEqual(self.evaluate(loss_scale()), 1.)
      self.assertEqual(self.evaluate(loss_scale._num_good_steps), 1)
      slot_var = opt._optimizer.get_slot(var, 'momentum')
      slot_value = self.evaluate(slot_var).item()

      # Save a checkpoint.
      checkpoint = trackable_utils.Checkpoint(optimizer=opt, var=var)
      prefix = os.path.join(self.get_temp_dir(), 'ckpt')
      save_path = checkpoint.save(prefix)

      # Run model again.
      self.evaluate(strategy.experimental_run(run_fn))
      self.assertEqual(self.evaluate(loss_scale()), 2.)
      self.assertEqual(self.evaluate(loss_scale._num_good_steps), 0)
      self.assertNotAlmostEqual(self.evaluate(slot_var).item(), slot_value)

      # Load checkpoint and ensure loss scale is back to it's original value.
      status = checkpoint.restore(save_path)
      status.assert_consumed()
      status.run_restore_ops()
      self.assertEqual(self.evaluate(loss_scale()), 1.)
      self.assertEqual(self.evaluate(loss_scale._num_good_steps), 1)
      self.assertAlmostEqual(self.evaluate(slot_var).item(), slot_value)
コード例 #7
0
ファイル: keras_test.py プロジェクト: zxzheng/tensorflow
    def test_save_weights_with_dynamic_loss_scaling(self, strategy_fn):
        with context.eager_mode():
            strategy = strategy_fn()
            if (isinstance(strategy, mirrored_strategy.MirroredStrategy)
                    and not context.executing_eagerly()):
                # TODO(b/121381184): Enable running the test in this case.
                return

            # Create and run model.
            with strategy.scope():
                x = layers.Input(shape=(2, ),
                                 batch_size=2,
                                 dtype=dtypes.float32)
                y = AddLayer(assert_type=dtypes.float32)(x)
                model = models.Model(inputs=x, outputs=y)

                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=1., increment_period=2., multiplier=2.)
                opt = gradient_descent.SGD(1.)
                opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
                model.compile(optimizer=opt, loss='mse')
            # Run for 3 steps (6 examples with a batch size of 2)
            model.fit(np.zeros((6, 2)), np.zeros((6, 2)), batch_size=2)
            self.assertEqual(backend.get_value(loss_scale()), 2)
            self.assertEqual(backend.get_value(loss_scale._num_good_steps), 1)

            # Save model weights.
            save_prefix = os.path.join(self.get_temp_dir(), 'ckpt')
            model.save_weights(save_prefix)

            # Run model again for 1 step (2 examples with a batch size of 2)
            model.fit(np.zeros((2, 2)), np.zeros((2, 2)), batch_size=2)
            self.assertEqual(backend.get_value(loss_scale()), 4)
            self.assertEqual(backend.get_value(loss_scale._num_good_steps), 0)

            # Load model weights and ensure loss scale weights are restored.
            model.load_weights(save_prefix)
            self.assertEqual(backend.get_value(loss_scale()), 2)
            self.assertEqual(backend.get_value(loss_scale._num_good_steps), 1)
コード例 #8
0
ファイル: keras_test.py プロジェクト: zxzheng/tensorflow
    def test_dynamic_loss_scaling(self, strategy_fn, cloning=True):
        strategy = strategy_fn()
        initial_loss_scale = 2.
        batch_size = 4
        expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                             dtype=dtypes.float16)
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
        with strategy.scope():
            with policy.policy_scope(policy.Policy('infer_float32_vars')):
                x = layers.Input(shape=(1, ),
                                 batch_size=batch_size,
                                 dtype=dtypes.float16)
                layer = AddLayer(assert_type=dtypes.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients))
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=dtypes.float16,
                        expected_gradient=expected_gradient))
                y = core.Lambda(identity_with_grad_check_fn)(y)
                y = math_ops.cast(y, dtypes.float32)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(1.)
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=initial_loss_scale, increment_period=2)
                opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
                model.compile(opt, loss=loss_fn, cloning=cloning)

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient * 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient / 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)
コード例 #9
0
 def test_get(self):
     scalar = loss_scale_module.get('dynamic')
     scalar2 = loss_scale_module.DynamicLossScale()
     self.assertEqual(scalar.initial_loss_scale, scalar2.initial_loss_scale)
     self.assertEqual(scalar.increment_period, scalar2.increment_period)
     self.assertEqual(scalar.multiplier, scalar2.multiplier)
コード例 #10
0
ファイル: loss_scale_test.py プロジェクト: flavz27/master_PA
 def test_update_with_none_gradients(self):
     loss_scale = loss_scale_module.DynamicLossScale()
     loss_scale.update([None])