def testIsInstance(self): optimizer = create_lso(sgd_experimental.SGD()) self.assertIsInstance(optimizer, loss_scale_optimizer.BaseLossScaleOptimizer) optimizer = create_lso(gradient_descent.SGD()) self.assertIsInstance(optimizer, loss_scale_optimizer.BaseLossScaleOptimizer)
def testGetConfigFixed(self, config_version): # Get a config from LossScaleOptimizer, LossScaleOptimizerV3, or the # LossScaleOptimizer from TF 2.3. Then restore the config into a # LossScaleOptimizer or LossScaleOptimizerV3 if config_version == 'v2': opt = gradient_descent.SGD(2., momentum=0.5) opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=2) config = opt.get_config() opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) elif config_version == 'v3': opt = sgd_experimental.SGD(2., momentum=0.5) opt = loss_scale_optimizer.LossScaleOptimizerV3( opt, dynamic=False, initial_scale=2) config = opt.get_config() opt = loss_scale_optimizer.LossScaleOptimizerV3.from_config(config) else: self.assertEqual(config_version, 'tf2_3') config = { 'optimizer': { 'class_name': 'SGD', 'config': { 'learning_rate': 2.0, 'momentum': 0.5, 'decay': 0.0, 'nesterov': False, 'name': 'SGD', } }, 'loss_scale': { 'class_name': 'FixedLossScale', 'config': {'loss_scale_value': 2.0} }, } opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) # Force hyperparameters to be created opt.learning_rate # pylint: disable=pointless-statement self.evaluate(tf.compat.v1.global_variables_initializer()) # Test attributes on the optimizer self.assertEqual(self.evaluate(opt.learning_rate), 2.) self.assertEqual(self.evaluate(opt.inner_optimizer.learning_rate), 2.) self.assertEqual(self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5) self.assertEqual(self.evaluate(opt.loss_scale), 2.) self.assertEqual(opt.initial_scale, 2.) self.assertIsNone(opt.dynamic_growth_steps) self.assertIsNone(opt.dynamic_counter) self.assertFalse(opt.dynamic) # Ensure the optimizer can be used var = tf.Variable([5.0]) run_op = self._run_fn_with_grad_check( tf.distribute.get_strategy(), var, opt, 2)() self.evaluate(tf.compat.v1.global_variables_initializer()) self._run_if_in_graph_mode(run_op) self.assertEqual(self.evaluate(var), [3.])
def testSerializationWithBuiltInOptimizer(self, lso_type): if lso_type in ('v1', 'v2'): opt = gradient_descent.SGD(2., momentum=0.5) opt = loss_scale_optimizer.LossScaleOptimizer( opt, initial_scale=2., dynamic_growth_steps=3.) config = optimizers.serialize(opt) if lso_type == 'v1': # LossScaleOptimizerV1 was an older experimental version of LSO that is # now deleted. The config had the same format as LSO but the class # name was different. This tests that LSO V1 configs can still be # deserialized, which are deserialized as a (non-V1) LSO config['class_name'] = 'LossScaleOptimizerV1' else: opt = sgd_experimental.SGD(2., momentum=0.5) opt = loss_scale_optimizer.LossScaleOptimizerV3( opt, initial_scale=2., dynamic_growth_steps=3) config = optimizers.serialize(opt) opt = optimizers.deserialize(config) # Force hyperparameters to be created opt.learning_rate # pylint: disable=pointless-statement self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(self.evaluate(opt.learning_rate), 2.) self.assertEqual(self._eval_if_tensor(opt.inner_optimizer.momentum), 0.5) self.assertEqual(self.evaluate(opt.loss_scale), 2.) self.assertEqual(opt.dynamic_growth_steps, 3.) self.assertTrue(opt.dynamic) if lso_type in ('v1', 'v2'): self.assertEqual(type(opt), loss_scale_optimizer.LossScaleOptimizer) else: self.assertEqual(type(opt), loss_scale_optimizer.LossScaleOptimizerV3) # Ensure the optimizer can be used var = tf.Variable([5.0]) run_op = self._run_fn_with_grad_check( tf.distribute.get_strategy(), var, opt, 2)() self.evaluate(tf.compat.v1.global_variables_initializer()) self._run_if_in_graph_mode(run_op) self.assertEqual(self.evaluate(var), [3.]) self.assertEqual(self.evaluate(opt.dynamic_counter), 1)
def testMovingAverageOptimizer(self): optimizer = sgd_new.SGD(learning_rate=1, use_ema=True, ema_momentum=0.5, ema_overwrite_frequency=3) var1, var2 = tf.Variable(2.0), tf.Variable(2.0) with tf.GradientTape() as tape: loss = var1 + var2 grads = tape.gradient(loss, [var1, var2]) # First iteration: [var1, var2] = [1.0, 1.0] optimizer.apply_gradients(zip(grads, [var1, var2])) self.assertAllEqual([var1.numpy(), var2.numpy()], [1.0, 1.0]) # Second iteration: [var1, var2] = [0.0, 0.0] optimizer.apply_gradients(zip(grads, [var1, var2])) self.assertAllEqual([var1.numpy(), var2.numpy()], [0.0, 0.0]) # Third iteration, without EMA, we should see [var1, var2] = [-1.0, -1.0], # but overwriting results in [var1, var2] = [-0.125, -0.125]. optimizer.apply_gradients(zip(grads, [var1, var2])) self.assertAllEqual([var1.numpy(), var2.numpy()], [-0.125, -0.125])
def create_sgd(base_optimizer_cls, *args, **kwargs): """Creates an SGD optimizer. Will return either the new experimental SGD optimizer subclassing from `optimizer_experimental.Optimizer` or the old SGD optimizer subclassing from `optimizer_v2.OptimizerV2`, depending on `base_optimizer_cls`. Args: base_optimizer_cls: What the superclass of the returned SGD optimizer will be. Either `optimizer_experimental.Optimizer` or `optimizer_v2.OptimizerV2`. *args: Arguments to pass to the SGD constructor **kwargs: Keyword arguments to pass to the SGD constructor. Returns: An SGD optimizer. """ if base_optimizer_cls == optimizer_v2.OptimizerV2: return gradient_descent.SGD(*args, **kwargs) else: assert base_optimizer_cls == optimizer_experimental.Optimizer, ( f'Got invalid base_optimizer_cls: {base_optimizer_cls}') return sgd_experimental.SGD(*args, **kwargs)
def testSgd(self, nesterov): self._compare_numerical(sgd_old.SGD(nesterov=True), sgd_new.SGD(nesterov=True))
lambda: adadelta_new.Adadelta( # pylint: disable=g-long-lambda 0.002, use_ema=True, ema_overwrite_frequency=None)) adagrad_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentaladagrad", lambda: adagrad_new.Adagrad(0.002)) adam_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentaladam", lambda: adam_new.Adam(0.002)) adamw_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentaladamw", lambda: adamw_new.AdamW(0.002, weight_decay=0.004)) rmsprop_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentalrmsprop", lambda: rmsprop_new.RMSprop(0.002)) sgd_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentalsgdaverage", lambda: sgd_new.SGD( # pylint: disable=g-long-lambda 0.002, use_ema=True, ema_overwrite_frequency=1)) OPTIMIZER_FN = [ adadelta_new_fn, adagrad_new_fn, adam_new_fn, adamw_new_fn, rmsprop_new_fn, sgd_new_fn, ] class OptimizerFuntionalityTest(tf.test.TestCase, parameterized.TestCase): """Test the functionality of optimizer.""" def testAddVariableFromReference(self):
def testErrorWhenV2LsoWrapsV3Optimizer(self): sgd = sgd_experimental.SGD() with self.assertRaisesRegex( TypeError, 'only the classic optimizers subclassing from ' '`tf.keras.optimizers.Optimizer` can be passed'): loss_scale_optimizer.LossScaleOptimizer(sgd)
"experimentaladagrad", lambda: adagrad_new.Adagrad(0.002)) adam_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentaladam", lambda: adam_new.Adam(0.002)) adamax_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentaladamax", lambda: adamax_new.Adamax(0.002)) adamw_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentaladamw", lambda: adamw_new.AdamW(0.002, weight_decay=0.004)) ftrl_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentalftrl", lambda: ftrl_new.Ftrl(0.002)) nadam_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentnadam", lambda: nadam_new.Nadam(0.002)) rmsprop_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentalrmsprop", lambda: rmsprop_new.RMSprop(0.002)) sgd_new_fn = tf.__internal__.test.combinations.NamedObject( "experimentalsgdaverage", lambda: sgd_new.SGD(0.002, use_ema=True, ema_overwrite_frequency=1), ) OPTIMIZER_FN = [ adadelta_new_fn, adagrad_new_fn, adam_new_fn, adamax_new_fn, adamw_new_fn, ftrl_new_fn, nadam_new_fn, rmsprop_new_fn, sgd_new_fn, ]