def __init__(self, opt, loss_scale): """Initializes this loss scale optimizer. Args: opt: The Optimizer instance to wrap. loss_scale: The loss scale to scale the loss and gradients. This can either be an int/float to use a fixed loss scale, the string "dynamic" to use dynamic loss scaling, or an instance of a LossScale. The string "dynamic" equivalent to passing `DynamicLossScale()`, and passing an int/float is equivalent to passing a FixedLossScale with the given loss scale. """ if not isinstance(opt, optimizer_v2.OptimizerV2): raise ValueError( '"opt" must be an instance of OptimizerV2, but got: %s' % opt) if hasattr(opt, 'clipnorm'): raise ValueError( 'LossScaleOptimizer does not support wrapping ' 'optimizers with a clipnorm. Optimizer %s has clipnorm ' '%s' % (opt, opt.clipnorm)) if hasattr(opt, 'clipvalue'): raise ValueError('LossScaleOptimizer does not support wrapping ' 'optimizers with a clipvalue. Optimizer %s has ' 'clipvalue %s' % (opt, opt.clipvalue)) self._optimizer = opt self._loss_scale = loss_scale_module.get(loss_scale) self._track_trackable(self._loss_scale, 'loss_scale')
def __init__(self, opt, loss_scale): """Initializes this loss scale optimizer. Args: opt: The Optimizer instance to wrap. loss_scale: The loss scale to scale the loss and gradients. This can either be an int/float to use a fixed loss scale, the string "dynamic" to use dynamic loss scaling, or an instance of a LossScale. The string "dynamic" equivalent to passing `DynamicLossScale()`, and passing an int/float is equivalent to passing a FixedLossScale with the given loss scale. """ if not isinstance(opt, optimizer_v2.OptimizerV2): raise ValueError('"opt" must be an instance of OptimizerV2, but got: %s' % opt) if hasattr(opt, 'clipnorm'): raise ValueError('LossScaleOptimizer does not support wrapping ' 'optimizers with a clipnorm. Optimizer %s has clipnorm ' '%s' % (opt, opt.clipnorm)) if hasattr(opt, 'clipvalue'): raise ValueError('LossScaleOptimizer does not support wrapping ' 'optimizers with a clipvalue. Optimizer %s has ' 'clipvalue %s' % (opt, opt.clipvalue)) self._optimizer = opt self._loss_scale = loss_scale_module.get(loss_scale) self._track_trackable(self._loss_scale, 'loss_scale')
def test_get(self): scalar = loss_scale_module.get('dynamic') scalar2 = loss_scale_module.DynamicLossScale() self.assertEqual(scalar.initial_loss_scale, scalar2.initial_loss_scale) self.assertEqual(scalar.increment_period, scalar2.increment_period) self.assertEqual(scalar.multiplier, scalar2.multiplier)
def __init__(self, optimizer, loss_scale): """Initializes this loss scale optimizer. Args: optimizer: The Optimizer instance to wrap. loss_scale: The loss scale to scale the loss and gradients. This can either be an int/float to use a fixed loss scale, the string "dynamic" to use dynamic loss scaling, or an instance of a LossScale. The string "dynamic" equivalent to passing `DynamicLossScale()`, and passing an int/float is equivalent to passing a FixedLossScale with the given loss scale. """ if not isinstance(optimizer, optimizer_v2.OptimizerV2): raise ValueError( '"optimizer" must be an instance of OptimizerV2, but ' 'got: %s' % optimizer) if optimizer.clipnorm is not None: raise ValueError( 'LossScaleOptimizer does not support wrapping ' 'optimizers with a clipnorm. Optimizer %s has clipnorm ' '%s' % (optimizer, optimizer.clipnorm)) if optimizer.clipvalue is not None: raise ValueError('LossScaleOptimizer does not support wrapping ' 'optimizers with a clipvalue. Optimizer %s has ' 'clipvalue %s' % (optimizer, optimizer.clipvalue)) self._raise_if_strategy_unsupported() self.clipnorm = None self.clipvalue = None self._optimizer = optimizer self._loss_scale = keras_loss_scale_module.get(loss_scale) if self._loss_scale is None: raise ValueError('loss_scale cannot be None.') # We don't call super().__init__, since we do not want to call OptimizerV2's # constructor. _DelegatingTrackableMixin.__init__(self, self._optimizer) for weight in loss_scale_module.get_loss_scale_weights( self._loss_scale): # We cannot call `track_variable` in the LossScale class itself, because a # file outside of Keras cannot depend on a Keras file. Calling it here # instead is OK, because a variable only needs to be tracked if used with # a Keras class, and the only way to use LossScale with a Keras class is # through the LossScaleOptimizer. backend.track_variable(weight) self._track_trackable(self._loss_scale, 'loss_scale') # Needed because the superclass's __getattribute__ checks this. self._hyper = {} # To support restoring TensorFlow 2.2 checkpoints. self._track_trackable(FakeOptimizerForRestoration(self._optimizer), 'base_optimizer')
def __init__(self, name, loss_scale=USE_DEFAULT): """Constructs the policy. The `name` argument determines the compute and variable dtype, the default loss scale, and has no additional effect on the Policy. The compute and variable dtypes can only be specified through `name`, and cannot be specified directly. Args: name: A string. Can be one of the following values: * Any dtype name, such as 'float32' or 'float64'. Both the variable and compute dtypes will be that dtype. * 'mixed_float16' or 'mixed_bfloat16': The compute dtype is float16 or bfloat16, while the variable dtype is float32. With 'mixed_float16', a dynamic loss scale is used. These policies are used for mixed precision training. * 'infer' (deprecated): Infer the compute and variable dtype from the input dtype. loss_scale: A `tf.mixed_precision.experimental.LossScale`, an int (which uses a `FixedLossScale`), or the string "dynamic" (which uses a `DynamicLossScale`). Defaults to using no loss scaling unless `name` is "mixed_float16", in which case this defaults to "dynamic". Only `tf.keras.Model`s, not layers, use the loss scale, and it is only used during `Model.fit`, `Model.train_on_batch`, and other similar methods. """ if isinstance(name, dtypes.DType): raise TypeError("'name' must be a string, not a DType. " "Instead, pass DType.name. Got: %s" % (name.name, )) elif not isinstance(name, six.string_types): raise TypeError("'name' must be a string, but got: %s" % (name, )) self._name = name self._compute_dtype, self._variable_dtype = self._parse_name(name) if loss_scale == USE_DEFAULT: loss_scale = 'dynamic' if name == 'mixed_float16' else None self._using_default_loss_scale = True else: self._using_default_loss_scale = False if loss_scale and self._compute_dtype not in (None, 'float16'): tf_logging.warn( 'Creating a Policy with a loss scale is only useful for ' 'float16 policies. You passed loss_scale=%r for policy ' '%s. Consider not passing any loss_scale instead.' % (loss_scale, name)) self._loss_scale = keras_loss_scale_module.get(loss_scale) if name in ('mixed_float16', 'mixed_bloat16'): device_compatibility_check.log_device_compatibility_check( name, skip_local=True)
def __init__(self, optimizer, loss_scale): """Initializes this loss scale optimizer. Args: optimizer: The Optimizer instance to wrap. loss_scale: The loss scale to scale the loss and gradients. This can either be an int/float to use a fixed loss scale, the string "dynamic" to use dynamic loss scaling, or an instance of a LossScale. The string "dynamic" equivalent to passing `DynamicLossScale()`, and passing an int/float is equivalent to passing a FixedLossScale with the given loss scale. """ if not isinstance(optimizer, optimizer_v2.OptimizerV2): raise ValueError( '"optimizer" must be an instance of OptimizerV2, but ' 'got: %s' % optimizer) if hasattr(optimizer, 'clipnorm'): raise ValueError( 'LossScaleOptimizer does not support wrapping ' 'optimizers with a clipnorm. Optimizer %s has clipnorm ' '%s' % (optimizer, optimizer.clipnorm)) if hasattr(optimizer, 'clipvalue'): raise ValueError('LossScaleOptimizer does not support wrapping ' 'optimizers with a clipvalue. Optimizer %s has ' 'clipvalue %s' % (optimizer, optimizer.clipvalue)) self._optimizer = optimizer self._loss_scale = keras_loss_scale_module.get(loss_scale) for weight in loss_scale_module.get_loss_scale_weights( self._loss_scale): # We cannot call `track_variable` in the LossScale class itself, because a # file outside of Keras cannot depend on a Keras file. Calling it here # instead is OK, because a variable only needs to be tracked if used with # a Keras class, and the only way to use LossScale with a Keras class is # through the LossScaleOptimizer. backend.track_variable(weight) self._track_trackable(self._optimizer, 'base_optimizer') self._track_trackable(self._loss_scale, 'loss_scale') # Needed because the superclass's __getattribute__ checks this. self._hyper = {}
def __init__(self, name, loss_scale='auto'): """Constructs the policy. The `name` argument determines the compute and variable dtype, the default loss scale, and has no additional effect on the Policy. The compute and variable dtypes can only be specified through `name`, and cannot be specified directly. Args: name: A string. Can be one of the following values: * Any dtype name, such as 'float32' or 'float64'. Both the variable and compute dtypes will be that dtype. * 'mixed_float16' or 'mixed_bfloat16': The compute dtype is float16 or bfloat16, while the variable dtype is float32. With 'mixed_float16', a dynamic loss scale is used. These policies are used for mixed precision training. loss_scale: A `tf.compat.v1.mixed_precision.LossScale`, an int (which uses a `FixedLossScale`), the string "dynamic" (which uses a `DynamicLossScale`), or None (which uses no loss scale). Defaults to `"auto"`. In the `"auto"` case: 1) if `name` is `"mixed_float16"`, then use `loss_scale="dynamic"`. 2) otherwise, do not use a loss scale. Only `tf.keras.Model`s, not layers, use the loss scale, and it is only used during `Model.fit`, `Model.train_on_batch`, and other similar methods. """ super(PolicyV1, self).__init__(name) if loss_scale == 'auto': loss_scale = 'dynamic' if name == 'mixed_float16' else None self._using_default_loss_scale = True else: self._using_default_loss_scale = False if loss_scale and self._compute_dtype not in (None, 'float16'): tf_logging.warn( 'Creating a Policy with a loss scale is only useful for ' 'float16 policies. You passed loss_scale=%r for policy ' '%s. Consider not passing any loss_scale instead.' % (loss_scale, name)) self._loss_scale = keras_loss_scale_module.get(loss_scale)
def test_serialization(self): loss_scale = loss_scale_module.get(123) config = loss_scale_module.serialize(loss_scale) loss_scale = loss_scale_module.deserialize(config) self.assertEqual(self.evaluate(loss_scale()), 123.)
def test_get(self): scalar = loss_scale_module.get('dynamic') scalar2 = loss_scale_module.DynamicLossScale() self.assertEqual(scalar.initial_loss_scale, scalar2.initial_loss_scale) self.assertEqual(scalar.increment_period, scalar2.increment_period) self.assertEqual(scalar.multiplier, scalar2.multiplier)
def __init__(self, name, loss_scale=USE_DEFAULT): """Constructs the policy. The `name` argument determines the compute and variable dtype, and has no additional effect on the Policy. The compute and variable dtypes can only be specified through `name`, and cannot be specified directly. Args: name: A string. Can be one of the following values: * Any dtype name, such as 'float32' or 'float64'. Both the variable and compute dtypes will be that dtype. * 'mixed_float16' or 'mixed_bfloat16': The compute dtype is float16 or bfloat16, while the variable dtype is float32. With 'mixed_float16', a dynamic loss scale is used. These policies are used for mixed precision training. * 'infer' (deprecated): Infer the compute and variable dtype from the input dtype. loss_scale: A `tf.mixed_precision.experimental.LossScale`, or a value convertible to one such as "dynamic". Defaults to using no loss scaling unless `name` is "mixed_float16", in which case this defaults to "dynamic". Only `tf.keras.Model`s, not layers, use the loss scale, and it is only used during `Model.fit`, `Model.train_on_batch`, and other similar methods. """ if isinstance(name, dtypes.DType): raise TypeError("'name' must be a string, not a DType. " "Instead, pass DType.name. Got: %s" % (name.name, )) elif not isinstance(name, six.string_types): raise TypeError("'name' must be a string, but got: %s" % (name, )) if name == 'infer_float32_vars': # For backwards compatibility. TODO(reedwm): Remove this. name = 'infer_with_float32_vars' if name == 'float32_with_float32_vars': # Doesn't affect correctness, but causes "float32" instead of # "float32_with_float32_vars" to be printed in __repr__. name = 'float32' self._name = name self._compute_dtype, self._variable_dtype = self._parse_name(name) if name.endswith( '_with_float32_vars') and self._warn_about_float32_vars: warning = ( "WARNING: The '%s' policy is deprecated and will be removed " "in TensorFlow 2.1." % name) if name == 'infer_with_float32_vars': warning += ( " Please use the 'mixed_float16' or 'mixed_bfloat16' " "policy instead.") elif name == 'float16_with_float32_vars': warning += " Please use the 'mixed_float16' policy instead." elif name == 'bfloat16_with_float32_vars': warning += " Please use the 'mixed_bfloat16' policy instead." tf_logging.warn(warning) if loss_scale == USE_DEFAULT: loss_scale = 'dynamic' if name == 'mixed_float16' else None self._using_default_loss_scale = True else: self._using_default_loss_scale = False if loss_scale and self._compute_dtype not in (None, 'float16'): tf_logging.warn( 'Creating a Policy with a loss scale is only useful for ' 'float16 policies. You passed loss_scale=%r for policy ' '%s. Consider not passing any loss_scale instead.' % (loss_scale, name)) self._loss_scale = keras_loss_scale_module.get(loss_scale)
def test_serialization(self): loss_scale = loss_scale_module.get(123) config = loss_scale_module.serialize(loss_scale) loss_scale = loss_scale_module.deserialize(config) self.assertEqual(self.evaluate(loss_scale()), 123.)