def testSerializationWithBuiltInOptimizer(self, use_v1): opt = gradient_descent.SGD(2., momentum=0.5) if use_v1: loss_scale = tf_loss_scale_module.DynamicLossScale( initial_loss_scale=2., increment_period=3.) opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale) else: opt = loss_scale_optimizer.LossScaleOptimizer( opt, initial_scale=2., dynamic_growth_steps=3.) config = optimizers.serialize(opt) opt = optimizers.deserialize(config) # Force hyperparameters to be created opt.lr # pylint: disable=pointless-statement self.evaluate(variables.global_variables_initializer()) self.assertEqual(self.evaluate(opt.lr), 2.) self.assertEqual(self.evaluate(opt._optimizer.momentum), 0.5) self.assertEqual(self.evaluate(opt.loss_scale), 2.) self.assertEqual(opt.dynamic_growth_steps, 3.) self.assertTrue(opt.dynamic, 4.) # Deserializing a LossScaleOptimizer always always results in a V2 # LossScaleOptimizer, even if serialized with a LossScaleOptimizerV1. self.assertAllEqual(type(opt), loss_scale_optimizer.LossScaleOptimizer) # Ensure the optimizer can be used var = variables.Variable([5.0]) run_op = self._run_fn_with_grad_check( distribution_strategy_context.get_strategy(), var, opt, 2)() self.evaluate(variables.global_variables_initializer()) self._run_if_in_graph_mode(run_op) self.assertEqual(self.evaluate(var), [3.]) self.assertEqual(self.evaluate(opt.dynamic_counter), 1)
def testGetConfigFixed(self, get_config, from_config): # Get a config from LossScaleOptimizerV1, LossScaleOptimizer, or the # LossScaleOptimizer from TF 2.3. Then restore the config into a # LossScaleOptimizerV1 or LossScaleOptimizer opt = gradient_descent.SGD(2., momentum=0.5) if get_config == 'v1': opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, 2) config = opt.get_config() elif get_config == 'v2': opt = loss_scale_optimizer.LossScaleOptimizer( opt, dynamic=False, initial_scale=2) config = opt.get_config() else: self.assertEqual(get_config, 'tf2_3') config = { 'optimizer': { 'class_name': 'SGD', 'config': { 'learning_rate': 2.0, 'momentum': 0.5, 'decay': 0.0, 'nesterov': False, 'name': 'SGD', } }, 'loss_scale': { 'class_name': 'FixedLossScale', 'config': {'loss_scale_value': 2.0} }, } if from_config == 'v1': opt = loss_scale_optimizer.LossScaleOptimizerV1.from_config(config) else: self.assertEqual(from_config, 'v2') opt = loss_scale_optimizer.LossScaleOptimizer.from_config(config) # Force hyperparameters to be created opt.lr # pylint: disable=pointless-statement self.evaluate(variables.global_variables_initializer()) # Test attributes on the optimizer self.assertEqual(self.evaluate(opt.lr), 2.) self.assertEqual(self.evaluate(opt.inner_optimizer.lr), 2.) self.assertEqual(self.evaluate(opt.momentum), 0.5) self.assertEqual(self.evaluate(opt.loss_scale), 2.) self.assertEqual(opt.initial_scale, 2.) self.assertIsNone(opt.dynamic_growth_steps) self.assertIsNone(opt.dynamic_counter) self.assertFalse(opt.dynamic) # Ensure the optimizer can be used var = variables.Variable([5.0]) run_op = self._run_fn_with_grad_check( distribution_strategy_context.get_strategy(), var, opt, 2)() self.evaluate(variables.global_variables_initializer()) self._run_if_in_graph_mode(run_op) self.assertEqual(self.evaluate(var), [3.])
def test_optimizer_errors(self): opt = gradient_descent_v2.SGD(1.0) opt = loss_scale_optimizer_v2.LossScaleOptimizerV1(opt, 'dynamic') with self.assertRaisesRegex( ValueError, '"opt" must not already be an instance of a ' 'LossScaleOptimizer.'): enable_mixed_precision_graph_rewrite(opt) self.assertFalse(config.get_optimizer_experimental_options().get( 'auto_mixed_precision', False))
def testPassingV1LossScale(self, strategy_fn): strategy = strategy_fn() learning_rate = 2. with strategy.scope(): # Test FixedLossScale var = variables.Variable([5.0]) opt = gradient_descent.SGD(learning_rate) loss_scale = tf_loss_scale_module.FixedLossScale(2.) opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale) self.assertIsInstance(opt.loss_scale, ops.Tensor) self.evaluate(variables.global_variables_initializer()) self.assertEqual(self.evaluate(opt.loss_scale), 2) run_fn = self._run_fn_with_grad_check( strategy, var, opt, 2 / strategy.num_replicas_in_sync) run_op = strategy.experimental_run(run_fn) self.evaluate(variables.global_variables_initializer()) self._run_if_in_graph_mode(run_op) # The loss is the identity of the variable. Therefore the gradient is 1, # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3 self.assertAllClose([3.], self.evaluate(var)) # Test DynamicLossScale var = variables.Variable([5.0]) opt = gradient_descent.SGD(learning_rate) loss_scale = tf_loss_scale_module.DynamicLossScale( initial_loss_scale=4, increment_period=1, multiplier=2) loss_scale._current_loss_scale.assign(2) opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale) self.assertEqual(opt.initial_scale, 4) self.assertEqual(opt.dynamic_growth_steps, 1) self.evaluate(variables.global_variables_initializer()) # Current loss scale is not copied so loss scale is reinitialized to 4 self.assertEqual(self.evaluate(opt.loss_scale), 4) for s in strategy.experimental_local_results(opt.dynamic_counter): self.assertEqual(self.evaluate(s), 0) run_fn = self._run_fn_with_grad_check( strategy, var, opt, 4 / strategy.num_replicas_in_sync) run_op = strategy.experimental_run(run_fn) self.evaluate(variables.global_variables_initializer()) self._run_if_in_graph_mode(run_op) self.assertAllClose([3.], self.evaluate(var))
def testV1Optimizer(self, strategy_fn): strategy = strategy_fn() learning_rate = 2. with strategy.scope(): # Test FixedLossScale var = variables.Variable([5.0]) opt = gradient_descent.SGD(learning_rate) opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale=2) self.assertIsInstance(opt.loss_scale, ops.Tensor) self.evaluate(variables.global_variables_initializer()) self.assertEqual(self.evaluate(opt.loss_scale), 2) self.assertEqual(opt.initial_scale, 2) self.assertIsNone(opt.dynamic_growth_steps) run_fn = self._run_fn_with_grad_check( strategy, var, opt, 2 / strategy.num_replicas_in_sync) run_op = strategy.experimental_run(run_fn) self.evaluate(variables.global_variables_initializer()) self._run_if_in_graph_mode(run_op) # The loss is the identity of the variable. Therefore the gradient is 1, # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3 self.assertAllClose([3.], self.evaluate(var)) # Test DynamicLossScale var = variables.Variable([5.0]) opt = gradient_descent.SGD(learning_rate) opt = loss_scale_optimizer.LossScaleOptimizerV1(opt, 'dynamic') self.assertEqual(opt.initial_scale, 2**15) self.assertEqual(opt.dynamic_growth_steps, 2000) self.evaluate(variables.global_variables_initializer()) self.assertEqual(self.evaluate(opt.loss_scale), 2**15) for s in strategy.experimental_local_results(opt.dynamic_counter): self.assertEqual(self.evaluate(s), 0) loss = lambda: var * float('NaN') run_fn = lambda: opt.minimize(loss, var_list=[var]) run_op = strategy.experimental_run(run_fn) self.evaluate(variables.global_variables_initializer()) self._run_if_in_graph_mode(run_op) self.assertAllClose([5.], self.evaluate(var)) self.assertEqual(self.evaluate(opt.loss_scale), 2**14) for s in strategy.experimental_local_results(opt.dynamic_counter): self.assertEqual(self.evaluate(s), 0)
def testPassingV1LossScaleErrors(self): opt = gradient_descent.SGD() loss_scale = tf_loss_scale_module.DynamicLossScale(multiplier=4) with self.assertRaisesRegex( ValueError, 'When passing a DynamicLossScale to "loss_scale", ' 'DynamicLossScale.multiplier must be 2. Got: ' 'DynamicLossScale'): loss_scale_optimizer.LossScaleOptimizerV1(opt, loss_scale) class MyLossScale(tf_loss_scale_module.LossScale): def __call__(self): return 1. def update(self, grads): return None, True def get_config(self): return {} with self.assertRaisesRegex( TypeError, 'Passing a LossScale that is not a FixedLossScale or a ' 'DynamicLossScale is no longer supported. Got:'): loss_scale_optimizer.LossScaleOptimizerV1(opt, MyLossScale())