Example #1
0
def handle_fp16_and_distributed_optimizer(optimizer,
                                          lr_schedule,
                                          hvd_backend=None):
    if hvd_backend == "horovod":
        import horovod.tensorflow.keras as hvd
        from horovod.tensorflow import Compression
    elif hvd_backend == "byteps":
        import byteps.tensorflow.keras as hvd
        from byteps.tensorflow import Compression

    if hvd_backend:
        compression = Compression.none
        if compat.CUSTOM_GLOBAL_FLOATX == "float16":
            compression = Compression.fp16

    if lr_schedule is not None and hvd_backend is None:
        # TODO(ZhaoChengqi): pay attention to API changes
        optimizer._set_hyper("learning_rate", lr_schedule)
    # specify the following scenario
    if compat.CUSTOM_GLOBAL_FLOATX == "float16":
        if compat.IS_PREV_TF_2_4_0:
            from tensorflow.keras.mixed_precision.experimental import LossScaleOptimizer
            from tensorflow.python.keras import backend
            from tensorflow.python.training.experimental.loss_scale import get_loss_scale_weights

            revised_loss_scale = RevisedDynamicLossScale()
            if hvd_backend:
                opt = LossScaleOptimizer(optimizer, loss_scale=1)
                opt = hvd.DistributedOptimizer(opt,
                                               compression=compression,
                                               sparse_as_dense=True)
                opt._loss_scale = revised_loss_scale
                for weight in get_loss_scale_weights(opt._loss_scale):
                    backend.track_variable(weight)
                opt._track_trackable(opt._loss_scale,
                                     'loss_scale',
                                     overwrite=True)
            else:
                opt = LossScaleOptimizer(optimizer,
                                         loss_scale=revised_loss_scale)
        else:
            if hvd_backend:
                opt = HorovodDistributedLossScaleOptimizer(
                    inner_optimizer=optimizer,
                    compression=compression,
                    sparse_as_dense=True,
                    hvd_backend=hvd_backend)
            else:
                opt = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)
                opt._loss_scale = RevisedDynamicLossScale(
                    initial_loss_scale=2**15, growth_steps=2000, multiplier=2)
                opt._track_trackable(opt._loss_scale,
                                     "loss_scale",
                                     overwrite=True)
        return opt

    return optimizer
    def __init__(self, optimizer, loss_scale):
        """Initializes this loss scale optimizer.

    Args:
      optimizer: The Optimizer instance to wrap.
      loss_scale: The loss scale to scale the loss and gradients. This can
        either be an int/float to use a fixed loss scale, the string "dynamic"
        to use dynamic loss scaling, or an instance of a LossScale. The string
        "dynamic" equivalent to passing `DynamicLossScale()`, and passing an
        int/float is equivalent to passing a FixedLossScale with the given loss
        scale.
    """
        if not isinstance(optimizer, optimizer_v2.OptimizerV2):
            raise ValueError(
                '"optimizer" must be an instance of OptimizerV2, but '
                'got: %s' % optimizer)
        if optimizer.clipnorm is not None:
            raise ValueError(
                'LossScaleOptimizer does not support wrapping '
                'optimizers with a clipnorm. Optimizer %s has clipnorm '
                '%s' % (optimizer, optimizer.clipnorm))

        if optimizer.clipvalue is not None:
            raise ValueError('LossScaleOptimizer does not support wrapping '
                             'optimizers with a clipvalue. Optimizer %s has '
                             'clipvalue %s' % (optimizer, optimizer.clipvalue))
        self._raise_if_strategy_unsupported()

        self.clipnorm = None
        self.clipvalue = None

        self._optimizer = optimizer
        self._loss_scale = keras_loss_scale_module.get(loss_scale)
        if self._loss_scale is None:
            raise ValueError('loss_scale cannot be None.')

        # We don't call super().__init__, since we do not want to call OptimizerV2's
        # constructor.
        _DelegatingTrackableMixin.__init__(self, self._optimizer)

        for weight in loss_scale_module.get_loss_scale_weights(
                self._loss_scale):
            # We cannot call `track_variable` in the LossScale class itself, because a
            # file outside of Keras cannot depend on a Keras file. Calling it here
            # instead is OK, because a variable only needs to be tracked if used with
            # a Keras class, and the only way to use LossScale with a Keras class is
            # through the LossScaleOptimizer.
            backend.track_variable(weight)
        self._track_trackable(self._loss_scale, 'loss_scale')

        # Needed because the superclass's __getattribute__ checks this.
        self._hyper = {}

        # To support restoring TensorFlow 2.2 checkpoints.
        self._track_trackable(FakeOptimizerForRestoration(self._optimizer),
                              'base_optimizer')
    def __call__(opt, loss_scale):
        """Initializes a loss scaled optimizer.

        Args:
          opt: The Optimizer instance to wrap.
          loss_scale: The loss scale to scale the loss and gradients. This can
            either be an int/float to use a fixed loss scale, the string "dynamic"
            to use dynamic loss scaling, or an instance of a LossScale. The string
            "dynamic" equivalent to passing `DynamicLossScale()`, and passing an
            int/float is equivalent to passing a FixedLossScale with the given loss
            scale.
        Returns:
          Keras Optimizer with loss scaling
        """

        opt._loss_scale = loss_scale_module.get(loss_scale)

        for weight in loss_scale_module.get_loss_scale_weights(
                opt._loss_scale):
            # We cannot call `track_variable` in the LossScale class itself, because a
            # file outside of Keras cannot depend on a Keras file. Calling it here
            # instead is OK, because a variable only needs to be tracked if used with
            # a Keras class, and the only way to use LossScale with a Keras class is
            # through the LossScaleOptimizer.
            backend.track_variable(weight)

        opt._track_trackable(opt._loss_scale, 'loss_scale')

        class BaseOptimizer(object):
            _class = opt.__class__
            _classname = "%s.%s" % (opt.__module__, opt.__class__.__name__)
            _compute_gradients = opt._compute_gradients
            get_gradients = opt.get_gradients
            apply_gradients = opt.apply_gradients
            get_config = opt.get_config
            from_config = opt.from_config

        opt.loss_scale_base_opt = BaseOptimizer

        # Generate a fake class with name "LossScaleOptimizer"
        # Essential to avoid modifying the optimizer original class

        base_opt_class_dict = dict(opt.__class__.__dict__)
        base_opt_class_dict.update(dict(LossScaleOptimizer.__dict__))

        del base_opt_class_dict["__call__"]
        del base_opt_class_dict["__dict__"]
        del base_opt_class_dict["__weakref__"]

        opt.__class__ = type(LossScaleOptimizer.__name__,
                             (opt.loss_scale_base_opt._class, ),
                             base_opt_class_dict)

        return opt
    def __init__(self, optimizer, loss_scale):
        """Initializes this loss scale optimizer.

    Args:
      optimizer: The Optimizer instance to wrap.
      loss_scale: The loss scale to scale the loss and gradients. This can
        either be an int/float to use a fixed loss scale, the string "dynamic"
        to use dynamic loss scaling, or an instance of a LossScale. The string
        "dynamic" equivalent to passing `DynamicLossScale()`, and passing an
        int/float is equivalent to passing a FixedLossScale with the given loss
        scale.
    """
        if not isinstance(optimizer, optimizer_v2.OptimizerV2):
            raise ValueError(
                '"optimizer" must be an instance of OptimizerV2, but '
                'got: %s' % optimizer)
        if hasattr(optimizer, 'clipnorm'):
            raise ValueError(
                'LossScaleOptimizer does not support wrapping '
                'optimizers with a clipnorm. Optimizer %s has clipnorm '
                '%s' % (optimizer, optimizer.clipnorm))

        if hasattr(optimizer, 'clipvalue'):
            raise ValueError('LossScaleOptimizer does not support wrapping '
                             'optimizers with a clipvalue. Optimizer %s has '
                             'clipvalue %s' % (optimizer, optimizer.clipvalue))

        self._optimizer = optimizer
        self._loss_scale = keras_loss_scale_module.get(loss_scale)
        for weight in loss_scale_module.get_loss_scale_weights(
                self._loss_scale):
            # We cannot call `track_variable` in the LossScale class itself, because a
            # file outside of Keras cannot depend on a Keras file. Calling it here
            # instead is OK, because a variable only needs to be tracked if used with
            # a Keras class, and the only way to use LossScale with a Keras class is
            # through the LossScaleOptimizer.
            backend.track_variable(weight)
        self._track_trackable(self._optimizer, 'base_optimizer')
        self._track_trackable(self._loss_scale, 'loss_scale')

        # Needed because the superclass's __getattribute__ checks this.
        self._hyper = {}
Example #5
0
def _handle_fp16_and_distributed_optimizer(optimizer,
                                           lr_schedule,
                                           hvd_backend=None):
    if hvd_backend == "horovod":
        import horovod.tensorflow.keras as hvd
        from horovod.tensorflow import Compression
    elif hvd_backend == "byteps":
        import byteps.tensorflow.keras as hvd
        from byteps.tensorflow import Compression

    if hvd_backend:
        compression = Compression.none
        if compat.CUSTOM_GLOBAL_FLOATX == "float16":
            compression = Compression.fp16

    if lr_schedule is not None and hvd_backend is None:
        # TODO(ZhaoChengqi): pay attention to API changes
        optimizer._set_hyper("learning_rate", lr_schedule)
    # specify the following scenario
    # there is a bug under TF2.3+Horovod+fp16+XLA
    if compat.CUSTOM_GLOBAL_FLOATX == "float16":
        logging.info("NOTICE: using revised DynamicLossScale under fp16")
        revised_loss_scale = training_utils.RevisedDynamicLossScale()
        if hvd_backend:
            opt = LossScaleOptimizer(optimizer, loss_scale=1)
            opt = hvd.DistributedOptimizer(opt,
                                           compression=compression,
                                           sparse_as_dense=True)
            opt._loss_scale = revised_loss_scale
            for weight in loss_scale_module.get_loss_scale_weights(
                    opt._loss_scale):
                backend.track_variable(weight)
            opt._track_trackable(opt._loss_scale, 'loss_scale', overwrite=True)
        else:
            opt = LossScaleOptimizer(optimizer, loss_scale=revised_loss_scale)
        return opt
    return optimizer