def test_constant_lr_with_warmup_schedule(self): params = { 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'constant', 'constant': { 'learning_rate': 0.1 } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.01 } } } expected_lr_step_values = [[0, 0.01], [250, 0.055], [500, 0.1], [5000, 0.1], [10000, 0.1], [20000, 0.1]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() for step, value in expected_lr_step_values: self.assertAlmostEqual(lr(step).numpy(), value)
def test_stepwise_lr_schedule(self): params = { 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [10000, 20000], 'values': [0.1, 0.01, 0.001] } } } expected_lr_step_values = [[0, 0.1], [5000, 0.1], [10000, 0.1], [10001, 0.01], [20000, 0.01], [20001, 0.001]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() for step, value in expected_lr_step_values: self.assertAlmostEqual(lr(step).numpy(), value)
def test_polynomial_lr_schedule(self): params = { 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'polynomial', 'polynomial': { 'initial_learning_rate': 0.1, 'decay_steps': 1000, 'end_learning_rate': 0.001 } } } expected_lr_step_values = [[0, 0.1], [500, 0.0505], [1000, 0.001]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() for step, value in expected_lr_step_values: self.assertAlmostEqual(lr(step).numpy(), value)
def test_cosine_lr_schedule(self): params = { 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'cosine', 'cosine': { 'initial_learning_rate': 0.1, 'decay_steps': 1000 } } } expected_lr_step_values = [[0, 0.1], [250, 0.08535534], [500, 0.04999999], [750, 0.01464466], [1000, 0]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() for step, value in expected_lr_step_values: self.assertAlmostEqual(lr(step).numpy(), value)
def test_exponential_lr_schedule(self): params = { 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'exponential', 'exponential': { 'initial_learning_rate': 0.1, 'decay_steps': 1000, 'decay_rate': 0.96, 'staircase': True } } } expected_lr_step_values = [ [0, 0.1], [999, 0.1], [1000, 0.096], [1999, 0.096], [2000, 0.09216], ] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() for step, value in expected_lr_step_values: self.assertAlmostEqual(lr(step).numpy(), value)
def test_power_linear_lr_schedule(self): params = { 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'power_linear', 'power_linear': { 'initial_learning_rate': 1.0, 'power': -1.0, 'linear_decay_fraction': 0.5, 'total_decay_steps': 100, } } } expected_lr_step_values = [[1, 1.0], [40, 1. / 40.], [60, 1. / 60. * 0.8]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() for step, value in expected_lr_step_values: self.assertAlmostEqual(lr(step).numpy(), value)
def test_step_cosine_lr_schedule_with_warmup(self): params = { 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'step_cosine_with_offset', 'step_cosine_with_offset': { 'values': (0.0001, 0.00005), 'boundaries': (0, 500000), 'offset': 10000, } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 10000, 'warmup_learning_rate': 0.0 } } } expected_lr_step_values = [[0, 0.0], [5000, 1e-4 / 2.0], [10000, 1e-4], [20000, 9.994863e-05], [499999, 5e-05]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() for step, value in expected_lr_step_values: self.assertAlmostEqual(lr(step).numpy(), value)
def test_power_with_offset_lr_schedule(self): params = { 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'power_with_offset', 'power_with_offset': { 'initial_learning_rate': 1.0, 'power': -1.0, 'offset': 10, 'pre_offset_learning_rate': 3.0, } } } expected_lr_step_values = [[1, 3.0], [10, 3.0], [20, 1. / 10.]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() for step, value in expected_lr_step_values: self.assertAlmostEqual(lr(step).numpy(), value)
def test_missing_types(self): params = {'optimizer': {'type': 'sgd', 'sgd': {'momentum': 0.9}}} with self.assertRaises(ValueError): optimizer_factory.OptimizerFactory( optimization_config.OptimizationConfig(params)) params = { 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [10000, 20000], 'values': [0.1, 0.01, 0.001] } } } with self.assertRaises(ValueError): optimizer_factory.OptimizerFactory( optimization_config.OptimizationConfig(params))
def test_lamb_optimizer(self): params = {'optimizer': {'type': 'lamb'}} expected_optimizer_config = tfa_optimizers.LAMB().get_config() opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() optimizer = opt_factory.build_optimizer(lr) self.assertIsInstance(optimizer, tfa_optimizers.LAMB) self.assertEqual(expected_optimizer_config, optimizer.get_config())
def test_adam_weight_decay_optimizer(self): params = {'optimizer': {'type': 'adamw'}} expected_optimizer_config = nlp_optimization.AdamWeightDecay( ).get_config() opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() optimizer = opt_factory.build_optimizer(lr) self.assertIsInstance(optimizer, nlp_optimization.AdamWeightDecay) self.assertEqual(expected_optimizer_config, optimizer.get_config())
def test_optimizers(self, optimizer_type): params = {'optimizer': {'type': optimizer_type}} optimizer_cls = optimizer_factory.OPTIMIZERS_CLS[optimizer_type] expected_optimizer_config = optimizer_cls().get_config() opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() optimizer = opt_factory.build_optimizer(lr) self.assertIsInstance(optimizer, optimizer_cls) self.assertEqual(expected_optimizer_config, optimizer.get_config())
def test_adam_optimizer(self): # Define adam optimizer with default values. params = {'optimizer': {'type': 'adam'}} expected_optimizer_config = tf.keras.optimizers.Adam().get_config() opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() optimizer = opt_factory.build_optimizer(lr) self.assertIsInstance(optimizer, tf.keras.optimizers.Adam) self.assertEqual(expected_optimizer_config, optimizer.get_config())
def test_config(self): opt_config = optimization_config.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': {} # default config }, 'learning_rate': { 'type': 'polynomial', 'polynomial': {} }, 'warmup': { 'type': 'linear' } }) self.assertEqual(opt_config.optimizer.get(), opt_cfg.SGDConfig()) self.assertEqual(opt_config.learning_rate.get(), lr_cfg.PolynomialLrConfig()) self.assertEqual(opt_config.warmup.get(), lr_cfg.LinearWarmupConfig())
def test_wrong_return_type(self): optimizer_type = 'sgd' params = { 'optimizer': { 'type': optimizer_type }, 'learning_rate': { 'type': 'constant', 'constant': { 'learning_rate': 0.1 } } } opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) with self.assertRaises(TypeError): _ = opt_factory.build_optimizer(0.1, postprocessor=lambda x: None)
def test_sgd_optimizer(self): params = { 'optimizer': { 'type': 'sgd', 'sgd': {'learning_rate': 0.1, 'momentum': 0.9} } } expected_optimizer_config = { 'name': 'SGD', 'learning_rate': 0.1, 'decay': 0.0, 'momentum': 0.9, 'nesterov': False } opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() optimizer = opt_factory.build_optimizer(lr) self.assertIsInstance(optimizer, tf.keras.optimizers.SGD) self.assertEqual(expected_optimizer_config, optimizer.get_config())
def test_gradient_clipping(self, clipnorm, clipvalue): params = { 'optimizer': { 'type': 'sgd', 'sgd': { 'clipnorm': clipnorm, 'clipvalue': clipvalue } }, 'learning_rate': { 'type': 'constant', 'constant': { 'learning_rate': 1.0 } } } opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() optimizer = opt_factory.build_optimizer(lr) var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([3.0, 4.0]) grads0 = tf.constant([0.1, 0.1]) grads1 = tf.constant([2.0, 3.0]) grads_and_vars = list(zip([grads0, grads1], [var0, var1])) optimizer.apply_gradients(grads_and_vars) self.assertAllClose(np.array([0.9, 1.9]), var0.numpy()) if clipvalue is not None: self.assertAllClose(np.array([2.0, 3.0]), var1.numpy()) elif clipnorm is not None: self.assertAllClose(np.array([2.4452999, 3.1679497]), var1.numpy()) else: self.assertAllClose(np.array([1.0, 1.0]), var1.numpy())
def test_optimizers(self, optimizer_type): params = { 'optimizer': { 'type': optimizer_type }, 'learning_rate': { 'type': 'constant', 'constant': { 'learning_rate': 0.1 } } } optimizer_cls = optimizer_factory.OPTIMIZERS_CLS[optimizer_type] expected_optimizer_config = optimizer_cls().get_config() expected_optimizer_config['learning_rate'] = 0.1 opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() optimizer = opt_factory.build_optimizer(lr, postprocessor=lambda x: x) self.assertIsInstance(optimizer, optimizer_cls) self.assertEqual(expected_optimizer_config, optimizer.get_config())
def test_gradient_aggregator(self): params = { 'optimizer': { 'type': 'adam', }, 'learning_rate': { 'type': 'constant', 'constant': { 'learning_rate': 1.0 } } } opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() # Dummy function to zero out gradients. zero_grads = lambda gv: [(tf.zeros_like(g), v) for g, v in gv] optimizer = opt_factory.build_optimizer(lr, gradient_aggregator=zero_grads) if isinstance(optimizer, tf.keras.optimizers.experimental.Optimizer): self.skipTest('New Keras optimizer does not support ' '`gradient_aggregator` arg.') var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([3.0, 4.0]) grads0 = tf.constant([1.0, 1.0]) grads1 = tf.constant([1.0, 1.0]) grads_and_vars = list(zip([grads0, grads1], [var0, var1])) optimizer.apply_gradients(grads_and_vars) self.assertAllClose(np.array([1.0, 2.0]), var0.numpy()) self.assertAllClose(np.array([3.0, 4.0]), var1.numpy())
def test_no_warmup_schedule(self): warmup = optimization_config.OptimizationConfig({}).warmup.get() self.assertEqual(warmup, None)
def test_no_lr_schedule(self): lr = optimization_config.OptimizationConfig({}).learning_rate.get() self.assertEqual(lr, None)
def test_no_optimizer(self): optimizer = optimization_config.OptimizationConfig({}).optimizer.get() self.assertEqual(optimizer, None)