def test_constant_lr_with_warmup_schedule(self):
        params = {
            'optimizer': {
                'type': 'sgd',
                'sgd': {
                    'momentum': 0.9
                }
            },
            'learning_rate': {
                'type': 'constant',
                'constant': {
                    'learning_rate': 0.1
                }
            },
            'warmup': {
                'type': 'linear',
                'linear': {
                    'warmup_steps': 500,
                    'warmup_learning_rate': 0.01
                }
            }
        }

        expected_lr_step_values = [[0, 0.01], [250, 0.055], [500, 0.1],
                                   [5000, 0.1], [10000, 0.1], [20000, 0.1]]
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()

        for step, value in expected_lr_step_values:
            self.assertAlmostEqual(lr(step).numpy(), value)
    def test_stepwise_lr_schedule(self):
        params = {
            'optimizer': {
                'type': 'sgd',
                'sgd': {
                    'momentum': 0.9
                }
            },
            'learning_rate': {
                'type': 'stepwise',
                'stepwise': {
                    'boundaries': [10000, 20000],
                    'values': [0.1, 0.01, 0.001]
                }
            }
        }
        expected_lr_step_values = [[0, 0.1], [5000, 0.1], [10000, 0.1],
                                   [10001, 0.01], [20000, 0.01],
                                   [20001, 0.001]]
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()

        for step, value in expected_lr_step_values:
            self.assertAlmostEqual(lr(step).numpy(), value)
    def test_polynomial_lr_schedule(self):
        params = {
            'optimizer': {
                'type': 'sgd',
                'sgd': {
                    'momentum': 0.9
                }
            },
            'learning_rate': {
                'type': 'polynomial',
                'polynomial': {
                    'initial_learning_rate': 0.1,
                    'decay_steps': 1000,
                    'end_learning_rate': 0.001
                }
            }
        }

        expected_lr_step_values = [[0, 0.1], [500, 0.0505], [1000, 0.001]]
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()

        for step, value in expected_lr_step_values:
            self.assertAlmostEqual(lr(step).numpy(), value)
    def test_cosine_lr_schedule(self):
        params = {
            'optimizer': {
                'type': 'sgd',
                'sgd': {
                    'momentum': 0.9
                }
            },
            'learning_rate': {
                'type': 'cosine',
                'cosine': {
                    'initial_learning_rate': 0.1,
                    'decay_steps': 1000
                }
            }
        }
        expected_lr_step_values = [[0, 0.1], [250, 0.08535534],
                                   [500, 0.04999999], [750, 0.01464466],
                                   [1000, 0]]
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()

        for step, value in expected_lr_step_values:
            self.assertAlmostEqual(lr(step).numpy(), value)
    def test_exponential_lr_schedule(self):
        params = {
            'optimizer': {
                'type': 'sgd',
                'sgd': {
                    'momentum': 0.9
                }
            },
            'learning_rate': {
                'type': 'exponential',
                'exponential': {
                    'initial_learning_rate': 0.1,
                    'decay_steps': 1000,
                    'decay_rate': 0.96,
                    'staircase': True
                }
            }
        }
        expected_lr_step_values = [
            [0, 0.1],
            [999, 0.1],
            [1000, 0.096],
            [1999, 0.096],
            [2000, 0.09216],
        ]
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()

        for step, value in expected_lr_step_values:
            self.assertAlmostEqual(lr(step).numpy(), value)
    def test_power_linear_lr_schedule(self):
        params = {
            'optimizer': {
                'type': 'sgd',
                'sgd': {
                    'momentum': 0.9
                }
            },
            'learning_rate': {
                'type': 'power_linear',
                'power_linear': {
                    'initial_learning_rate': 1.0,
                    'power': -1.0,
                    'linear_decay_fraction': 0.5,
                    'total_decay_steps': 100,
                }
            }
        }
        expected_lr_step_values = [[1, 1.0], [40, 1. / 40.],
                                   [60, 1. / 60. * 0.8]]
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()

        for step, value in expected_lr_step_values:
            self.assertAlmostEqual(lr(step).numpy(), value)
예제 #7
0
    def test_step_cosine_lr_schedule_with_warmup(self):
        params = {
            'optimizer': {
                'type': 'sgd',
                'sgd': {
                    'momentum': 0.9
                }
            },
            'learning_rate': {
                'type': 'step_cosine_with_offset',
                'step_cosine_with_offset': {
                    'values': (0.0001, 0.00005),
                    'boundaries': (0, 500000),
                    'offset': 10000,
                }
            },
            'warmup': {
                'type': 'linear',
                'linear': {
                    'warmup_steps': 10000,
                    'warmup_learning_rate': 0.0
                }
            }
        }
        expected_lr_step_values = [[0, 0.0], [5000, 1e-4 / 2.0], [10000, 1e-4],
                                   [20000, 9.994863e-05], [499999, 5e-05]]
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()

        for step, value in expected_lr_step_values:
            self.assertAlmostEqual(lr(step).numpy(), value)
예제 #8
0
    def test_power_with_offset_lr_schedule(self):
        params = {
            'optimizer': {
                'type': 'sgd',
                'sgd': {
                    'momentum': 0.9
                }
            },
            'learning_rate': {
                'type': 'power_with_offset',
                'power_with_offset': {
                    'initial_learning_rate': 1.0,
                    'power': -1.0,
                    'offset': 10,
                    'pre_offset_learning_rate': 3.0,
                }
            }
        }
        expected_lr_step_values = [[1, 3.0], [10, 3.0], [20, 1. / 10.]]
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()

        for step, value in expected_lr_step_values:
            self.assertAlmostEqual(lr(step).numpy(), value)
예제 #9
0
 def test_missing_types(self):
   params = {'optimizer': {'type': 'sgd', 'sgd': {'momentum': 0.9}}}
   with self.assertRaises(ValueError):
     optimizer_factory.OptimizerFactory(
         optimization_config.OptimizationConfig(params))
   params = {
       'learning_rate': {
           'type': 'stepwise',
           'stepwise': {
               'boundaries': [10000, 20000],
               'values': [0.1, 0.01, 0.001]
           }
       }
   }
   with self.assertRaises(ValueError):
     optimizer_factory.OptimizerFactory(
         optimization_config.OptimizationConfig(params))
    def test_lamb_optimizer(self):
        params = {'optimizer': {'type': 'lamb'}}
        expected_optimizer_config = tfa_optimizers.LAMB().get_config()
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()
        optimizer = opt_factory.build_optimizer(lr)

        self.assertIsInstance(optimizer, tfa_optimizers.LAMB)
        self.assertEqual(expected_optimizer_config, optimizer.get_config())
    def test_adam_weight_decay_optimizer(self):
        params = {'optimizer': {'type': 'adamw'}}
        expected_optimizer_config = nlp_optimization.AdamWeightDecay(
        ).get_config()
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()
        optimizer = opt_factory.build_optimizer(lr)

        self.assertIsInstance(optimizer, nlp_optimization.AdamWeightDecay)
        self.assertEqual(expected_optimizer_config, optimizer.get_config())
예제 #12
0
    def test_optimizers(self, optimizer_type):
        params = {'optimizer': {'type': optimizer_type}}
        optimizer_cls = optimizer_factory.OPTIMIZERS_CLS[optimizer_type]
        expected_optimizer_config = optimizer_cls().get_config()

        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()
        optimizer = opt_factory.build_optimizer(lr)

        self.assertIsInstance(optimizer, optimizer_cls)
        self.assertEqual(expected_optimizer_config, optimizer.get_config())
    def test_adam_optimizer(self):

        # Define adam optimizer with default values.
        params = {'optimizer': {'type': 'adam'}}
        expected_optimizer_config = tf.keras.optimizers.Adam().get_config()

        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()
        optimizer = opt_factory.build_optimizer(lr)

        self.assertIsInstance(optimizer, tf.keras.optimizers.Adam)
        self.assertEqual(expected_optimizer_config, optimizer.get_config())
예제 #14
0
 def test_config(self):
     opt_config = optimization_config.OptimizationConfig({
         'optimizer': {
             'type': 'sgd',
             'sgd': {}  # default config
         },
         'learning_rate': {
             'type': 'polynomial',
             'polynomial': {}
         },
         'warmup': {
             'type': 'linear'
         }
     })
     self.assertEqual(opt_config.optimizer.get(), opt_cfg.SGDConfig())
     self.assertEqual(opt_config.learning_rate.get(),
                      lr_cfg.PolynomialLrConfig())
     self.assertEqual(opt_config.warmup.get(), lr_cfg.LinearWarmupConfig())
예제 #15
0
    def test_wrong_return_type(self):
        optimizer_type = 'sgd'
        params = {
            'optimizer': {
                'type': optimizer_type
            },
            'learning_rate': {
                'type': 'constant',
                'constant': {
                    'learning_rate': 0.1
                }
            }
        }

        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        with self.assertRaises(TypeError):
            _ = opt_factory.build_optimizer(0.1, postprocessor=lambda x: None)
예제 #16
0
  def test_sgd_optimizer(self):
    params = {
        'optimizer': {
            'type': 'sgd',
            'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
        }
    }
    expected_optimizer_config = {
        'name': 'SGD',
        'learning_rate': 0.1,
        'decay': 0.0,
        'momentum': 0.9,
        'nesterov': False
    }
    opt_config = optimization_config.OptimizationConfig(params)
    opt_factory = optimizer_factory.OptimizerFactory(opt_config)
    lr = opt_factory.build_learning_rate()
    optimizer = opt_factory.build_optimizer(lr)

    self.assertIsInstance(optimizer, tf.keras.optimizers.SGD)
    self.assertEqual(expected_optimizer_config, optimizer.get_config())
예제 #17
0
  def test_gradient_clipping(self, clipnorm, clipvalue):
    params = {
        'optimizer': {
            'type': 'sgd',
            'sgd': {
                'clipnorm': clipnorm,
                'clipvalue': clipvalue
            }
        },
        'learning_rate': {
            'type': 'constant',
            'constant': {
                'learning_rate': 1.0
            }
        }
    }

    opt_config = optimization_config.OptimizationConfig(params)
    opt_factory = optimizer_factory.OptimizerFactory(opt_config)
    lr = opt_factory.build_learning_rate()
    optimizer = opt_factory.build_optimizer(lr)

    var0 = tf.Variable([1.0, 2.0])
    var1 = tf.Variable([3.0, 4.0])

    grads0 = tf.constant([0.1, 0.1])
    grads1 = tf.constant([2.0, 3.0])

    grads_and_vars = list(zip([grads0, grads1], [var0, var1]))
    optimizer.apply_gradients(grads_and_vars)

    self.assertAllClose(np.array([0.9, 1.9]), var0.numpy())
    if clipvalue is not None:
      self.assertAllClose(np.array([2.0, 3.0]), var1.numpy())
    elif clipnorm is not None:
      self.assertAllClose(np.array([2.4452999, 3.1679497]), var1.numpy())
    else:
      self.assertAllClose(np.array([1.0, 1.0]), var1.numpy())
예제 #18
0
  def test_optimizers(self, optimizer_type):
    params = {
        'optimizer': {
            'type': optimizer_type
        },
        'learning_rate': {
            'type': 'constant',
            'constant': {
                'learning_rate': 0.1
            }
        }
    }
    optimizer_cls = optimizer_factory.OPTIMIZERS_CLS[optimizer_type]
    expected_optimizer_config = optimizer_cls().get_config()
    expected_optimizer_config['learning_rate'] = 0.1

    opt_config = optimization_config.OptimizationConfig(params)
    opt_factory = optimizer_factory.OptimizerFactory(opt_config)
    lr = opt_factory.build_learning_rate()
    optimizer = opt_factory.build_optimizer(lr, postprocessor=lambda x: x)

    self.assertIsInstance(optimizer, optimizer_cls)
    self.assertEqual(expected_optimizer_config, optimizer.get_config())
예제 #19
0
    def test_gradient_aggregator(self):
        params = {
            'optimizer': {
                'type': 'adam',
            },
            'learning_rate': {
                'type': 'constant',
                'constant': {
                    'learning_rate': 1.0
                }
            }
        }
        opt_config = optimization_config.OptimizationConfig(params)
        opt_factory = optimizer_factory.OptimizerFactory(opt_config)
        lr = opt_factory.build_learning_rate()

        # Dummy function to zero out gradients.
        zero_grads = lambda gv: [(tf.zeros_like(g), v) for g, v in gv]

        optimizer = opt_factory.build_optimizer(lr,
                                                gradient_aggregator=zero_grads)
        if isinstance(optimizer, tf.keras.optimizers.experimental.Optimizer):
            self.skipTest('New Keras optimizer does not support '
                          '`gradient_aggregator` arg.')

        var0 = tf.Variable([1.0, 2.0])
        var1 = tf.Variable([3.0, 4.0])

        grads0 = tf.constant([1.0, 1.0])
        grads1 = tf.constant([1.0, 1.0])

        grads_and_vars = list(zip([grads0, grads1], [var0, var1]))
        optimizer.apply_gradients(grads_and_vars)

        self.assertAllClose(np.array([1.0, 2.0]), var0.numpy())
        self.assertAllClose(np.array([3.0, 4.0]), var1.numpy())
예제 #20
0
 def test_no_warmup_schedule(self):
     warmup = optimization_config.OptimizationConfig({}).warmup.get()
     self.assertEqual(warmup, None)
예제 #21
0
 def test_no_lr_schedule(self):
     lr = optimization_config.OptimizationConfig({}).learning_rate.get()
     self.assertEqual(lr, None)
예제 #22
0
 def test_no_optimizer(self):
     optimizer = optimization_config.OptimizationConfig({}).optimizer.get()
     self.assertEqual(optimizer, None)