def testIsInstance(self):
    optimizer = create_lso(sgd_experimental.SGD())
    self.assertIsInstance(optimizer,
                          loss_scale_optimizer.BaseLossScaleOptimizer)

    optimizer = create_lso(gradient_descent.SGD())
    self.assertIsInstance(optimizer,
                          loss_scale_optimizer.BaseLossScaleOptimizer)
  def testGetConfigFixed(self, config_version):
    # Get a config from LossScaleOptimizer, LossScaleOptimizerV3, or the
    # LossScaleOptimizer from TF 2.3. Then restore the config into a
    # LossScaleOptimizer or LossScaleOptimizerV3
    if config_version == 'v2':
      opt = gradient_descent.SGD(2., momentum=0.5)
      opt = loss_scale_optimizer.LossScaleOptimizer(
          opt, dynamic=False, initial_scale=2)
      config = opt.get_config()
      opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config)
    elif config_version == 'v3':
      opt = sgd_experimental.SGD(2., momentum=0.5)
      opt = loss_scale_optimizer.LossScaleOptimizerV3(
          opt, dynamic=False, initial_scale=2)
      config = opt.get_config()
      opt = loss_scale_optimizer.LossScaleOptimizerV3.from_config(config)
    else:
      self.assertEqual(config_version, 'tf2_3')
      config = {
          'optimizer': {
              'class_name': 'SGD',
              'config': {
                  'learning_rate': 2.0,
                  'momentum': 0.5,
                  'decay': 0.0,
                  'nesterov': False,
                  'name': 'SGD',
              }
          },
          'loss_scale': {
              'class_name': 'FixedLossScale',
              'config': {'loss_scale_value': 2.0}
          },
      }
      opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config)

    # Force hyperparameters to be created
    opt.learning_rate  # pylint: disable=pointless-statement
    self.evaluate(tf.compat.v1.global_variables_initializer())

    # Test attributes on the optimizer
    self.assertEqual(self.evaluate(opt.learning_rate), 2.)
    self.assertEqual(self.evaluate(opt.inner_optimizer.learning_rate), 2.)
    self.assertEqual(self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5)
    self.assertEqual(self.evaluate(opt.loss_scale), 2.)
    self.assertEqual(opt.initial_scale, 2.)
    self.assertIsNone(opt.dynamic_growth_steps)
    self.assertIsNone(opt.dynamic_counter)
    self.assertFalse(opt.dynamic)

    # Ensure the optimizer can be used
    var = tf.Variable([5.0])
    run_op = self._run_fn_with_grad_check(
        tf.distribute.get_strategy(), var, opt, 2)()
    self.evaluate(tf.compat.v1.global_variables_initializer())
    self._run_if_in_graph_mode(run_op)
    self.assertEqual(self.evaluate(var), [3.])
  def testSerializationWithBuiltInOptimizer(self, lso_type):
    if lso_type in ('v1', 'v2'):
      opt = gradient_descent.SGD(2., momentum=0.5)
      opt = loss_scale_optimizer.LossScaleOptimizer(
          opt, initial_scale=2., dynamic_growth_steps=3.)
      config = optimizers.serialize(opt)
      if lso_type == 'v1':
        # LossScaleOptimizerV1 was an older experimental version of LSO that is
        # now deleted. The config had the same format as LSO but the class
        # name was different. This tests that LSO V1 configs can still be
        # deserialized, which are deserialized as a (non-V1) LSO
        config['class_name'] = 'LossScaleOptimizerV1'
    else:
      opt = sgd_experimental.SGD(2., momentum=0.5)
      opt = loss_scale_optimizer.LossScaleOptimizerV3(
          opt, initial_scale=2., dynamic_growth_steps=3)
      config = optimizers.serialize(opt)
    opt = optimizers.deserialize(config)
    # Force hyperparameters to be created
    opt.learning_rate  # pylint: disable=pointless-statement
    self.evaluate(tf.compat.v1.global_variables_initializer())

    self.assertEqual(self.evaluate(opt.learning_rate), 2.)
    self.assertEqual(self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5)
    self.assertEqual(self.evaluate(opt.loss_scale), 2.)
    self.assertEqual(opt.dynamic_growth_steps, 3.)
    self.assertTrue(opt.dynamic)
    if lso_type in ('v1', 'v2'):
      self.assertEqual(type(opt), loss_scale_optimizer.LossScaleOptimizer)
    else:
      self.assertEqual(type(opt), loss_scale_optimizer.LossScaleOptimizerV3)

    # Ensure the optimizer can be used
    var = tf.Variable([5.0])
    run_op = self._run_fn_with_grad_check(
        tf.distribute.get_strategy(), var, opt, 2)()
    self.evaluate(tf.compat.v1.global_variables_initializer())
    self._run_if_in_graph_mode(run_op)
    self.assertEqual(self.evaluate(var), [3.])
    self.assertEqual(self.evaluate(opt.dynamic_counter), 1)
Example #4
0
    def testMovingAverageOptimizer(self):
        optimizer = sgd_new.SGD(learning_rate=1,
                                use_ema=True,
                                ema_momentum=0.5,
                                ema_overwrite_frequency=3)

        var1, var2 = tf.Variable(2.0), tf.Variable(2.0)
        with tf.GradientTape() as tape:
            loss = var1 + var2
        grads = tape.gradient(loss, [var1, var2])
        # First iteration: [var1, var2] = [1.0, 1.0]
        optimizer.apply_gradients(zip(grads, [var1, var2]))
        self.assertAllEqual([var1.numpy(), var2.numpy()], [1.0, 1.0])

        # Second iteration: [var1, var2] = [0.0, 0.0]
        optimizer.apply_gradients(zip(grads, [var1, var2]))
        self.assertAllEqual([var1.numpy(), var2.numpy()], [0.0, 0.0])

        # Third iteration, without EMA, we should see [var1, var2] = [-1.0, -1.0],
        # but overwriting results in [var1, var2] = [-0.125, -0.125].
        optimizer.apply_gradients(zip(grads, [var1, var2]))
        self.assertAllEqual([var1.numpy(), var2.numpy()], [-0.125, -0.125])
def create_sgd(base_optimizer_cls, *args, **kwargs):
  """Creates an SGD optimizer.

  Will return either the new experimental SGD optimizer subclassing from
  `optimizer_experimental.Optimizer` or the old SGD optimizer subclassing from
  `optimizer_v2.OptimizerV2`, depending on `base_optimizer_cls`.

  Args:
    base_optimizer_cls: What the superclass of the returned SGD optimizer will
      be. Either `optimizer_experimental.Optimizer` or
      `optimizer_v2.OptimizerV2`.
    *args: Arguments to pass to the SGD constructor
    **kwargs: Keyword arguments to pass to the SGD constructor.

  Returns:
    An SGD optimizer.
  """
  if base_optimizer_cls == optimizer_v2.OptimizerV2:
    return gradient_descent.SGD(*args, **kwargs)
  else:
    assert base_optimizer_cls == optimizer_experimental.Optimizer, (
        f'Got invalid base_optimizer_cls: {base_optimizer_cls}')
    return sgd_experimental.SGD(*args, **kwargs)
Example #6
0
 def testSgd(self, nesterov):
     self._compare_numerical(sgd_old.SGD(nesterov=True),
                             sgd_new.SGD(nesterov=True))
Example #7
0
    lambda: adadelta_new.Adadelta(  # pylint: disable=g-long-lambda
        0.002,
        use_ema=True,
        ema_overwrite_frequency=None))
adagrad_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladagrad", lambda: adagrad_new.Adagrad(0.002))
adam_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladam", lambda: adam_new.Adam(0.002))
adamw_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladamw", lambda: adamw_new.AdamW(0.002, weight_decay=0.004))
rmsprop_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentalrmsprop", lambda: rmsprop_new.RMSprop(0.002))
sgd_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentalsgdaverage",
    lambda: sgd_new.SGD(  # pylint: disable=g-long-lambda
        0.002,
        use_ema=True,
        ema_overwrite_frequency=1))

OPTIMIZER_FN = [
    adadelta_new_fn,
    adagrad_new_fn,
    adam_new_fn,
    adamw_new_fn,
    rmsprop_new_fn,
    sgd_new_fn,
]


class OptimizerFuntionalityTest(tf.test.TestCase, parameterized.TestCase):
    """Test the functionality of optimizer."""
    def testAddVariableFromReference(self):
 def testErrorWhenV2LsoWrapsV3Optimizer(self):
   sgd = sgd_experimental.SGD()
   with self.assertRaisesRegex(
       TypeError, 'only the classic optimizers subclassing from '
       '`tf.keras.optimizers.Optimizer` can be passed'):
     loss_scale_optimizer.LossScaleOptimizer(sgd)
Example #9
0
    "experimentaladagrad", lambda: adagrad_new.Adagrad(0.002))
adam_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladam", lambda: adam_new.Adam(0.002))
adamax_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladamax", lambda: adamax_new.Adamax(0.002))
adamw_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentaladamw", lambda: adamw_new.AdamW(0.002, weight_decay=0.004))
ftrl_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentalftrl", lambda: ftrl_new.Ftrl(0.002))
nadam_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentnadam", lambda: nadam_new.Nadam(0.002))
rmsprop_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentalrmsprop", lambda: rmsprop_new.RMSprop(0.002))
sgd_new_fn = tf.__internal__.test.combinations.NamedObject(
    "experimentalsgdaverage",
    lambda: sgd_new.SGD(0.002, use_ema=True, ema_overwrite_frequency=1),
)

OPTIMIZER_FN = [
    adadelta_new_fn,
    adagrad_new_fn,
    adam_new_fn,
    adamax_new_fn,
    adamw_new_fn,
    ftrl_new_fn,
    nadam_new_fn,
    rmsprop_new_fn,
    sgd_new_fn,
]