Example #1
0
 def add_slot(self, var, slot_name, initializer="zeros"):
   """Add a new slot variable for `var`."""
   if slot_name not in self._slot_names:
     self._slot_names.append(slot_name)
   var_key = _var_key(var)
   slot_dict = self._slots.setdefault(var_key, {})
   weight = slot_dict.get(slot_name, None)
   if weight is None:
     if isinstance(initializer, six.string_types) or callable(initializer):
       initializer = initializers.get(initializer)
       initial_value = functools.partial(
           initializer, shape=var.shape, dtype=var.dtype)
     else:
       initial_value = initializer
     weight = tf_variables.Variable(
         name="%s/%s" % (var._shared_name, slot_name),  # pylint: disable=protected-access
         dtype=var.dtype,
         trainable=False,
         initial_value=initial_value)
     backend.track_variable(weight)
     slot_dict[slot_name] = weight
     self._restore_slot_variable(
         slot_name=slot_name, variable=var,
         slot_variable=weight)
     self._weights.append(weight)
   return weight
Example #2
0
  def _add_weight(self,
                  name,
                  shape=(),
                  dtype=None,
                  initializer='zeros'):
    """Adds a weight to this loss scale.

    Args:
      name: Variable name.
      shape: Variable shape.
      dtype: The type of the variable.
      initializer: The initializer to use.

    Returns:
      A variable.
    """
    if isinstance(initializer, six.string_types) or callable(initializer):
      initializer = initializers.get(initializer)
    variable = self._add_variable_with_custom_getter(
        name=name,
        shape=shape,
        getter=base_layer_utils.make_variable,
        overwrite=True,
        initializer=initializer,
        dtype=dtype,
        trainable=False,
        use_resource=True,
        synchronization=variables.VariableSynchronization.AUTO,
        # Set aggregation to NONE, as loss scaling variables should never be
        # aggregated.
        aggregation=variables.VariableAggregation.NONE)
    backend.track_variable(variable)
    return variable
    def opt_variable(value, dtype=None, name=None, constraint=None):
      """Instantiates a variable and returns it."""
      if dtype is None:
        dtype = backend.floatx()

      variables = []
      for i in range(num_replicas):
        # Keras holds the variables in optimizer class instance , so the name
        # does not matter here. ResourceVariable constructor will find a unique
        # name (including name=None) for each replica.
        with ops.device("device:TPU:{}".format(i)):
          v = resource_variable_ops.ResourceVariable(
              value,
              dtype=dtypes_module.as_dtype(dtype),
              name=name,
              constraint=constraint)
          variables.append(v)
      name = "replicate_{}_{}".format("variable" if name is None else name,
                                      ops.uid())
      v = ReplicatedVariable(name, variables)

      # pylint: disable=protected-access

      if isinstance(value, np.ndarray):
        v._keras_shape = value.shape
      elif hasattr(value, "shape"):
        v._keras_shape = backend.int_shape(value)
      v._uses_learning_phase = False
      backend.track_variable(v)
      return v
Example #4
0
 def _variable_creator(self, next_creator, **kwargs):
   name = kwargs['name']
   if name in self._variable_dict:
     return self._variable_dict[name]
   var = next_creator(**kwargs)
   self._variable_dict[name] = var
   if var.trainable:
     self._trainable_weights.append(var)
   else:
     self._non_trainable_weights.append(var)
   K.track_variable(var)
   return var
Example #5
0
  def add_weight(self,
                 name,
                 shape,
                 dtype=None,
                 initializer="zeros",
                 trainable=None,
                 synchronization=tf_variables.VariableSynchronization.AUTO,
                 aggregation=tf_variables.VariableAggregation.NONE):

    if dtype is None:
      dtype = dtypes.float32
    if isinstance(initializer, six.string_types) or callable(initializer):
      initializer = initializers.get(initializer)

    if synchronization == tf_variables.VariableSynchronization.ON_READ:
      if trainable:
        raise ValueError(
            "Synchronization value can be set to "
            "VariableSynchronization.ON_READ only for non-trainable variables. "
            "You have specified trainable=True and "
            "synchronization=VariableSynchronization.ON_READ.")
      else:
        # Set trainable to be false when variable is to be synced on read.
        trainable = False
    elif trainable is None:
      trainable = True

    variable = self._add_variable_with_custom_getter(
        name=name,
        shape=shape,
        getter=base_layer_utils.make_variable,
        overwrite=True,
        initializer=initializer,
        dtype=dtype,
        trainable=trainable,
        use_resource=True,
        synchronization=synchronization,
        aggregation=aggregation)
    backend.track_variable(variable)

    return variable
  def __init__(self, optimizer, loss_scale):
    """Initializes this loss scale optimizer.

    Args:
      optimizer: The Optimizer instance to wrap.
      loss_scale: The loss scale to scale the loss and gradients. This can
        either be an int/float to use a fixed loss scale, the string "dynamic"
        to use dynamic loss scaling, or an instance of a LossScale. The string
        "dynamic" equivalent to passing `DynamicLossScale()`, and passing an
        int/float is equivalent to passing a FixedLossScale with the given loss
        scale.
    """
    if not isinstance(optimizer, optimizer_v2.OptimizerV2):
      raise ValueError('"optimizer" must be an instance of OptimizerV2, but '
                       'got: %s' % optimizer)
    self._raise_if_strategy_unsupported()

    self._optimizer = optimizer
    self._loss_scale = keras_loss_scale_module.get(loss_scale)
    if self._loss_scale is None:
      raise ValueError('loss_scale cannot be None.')

    # We don't call super().__init__, since we do not want to call OptimizerV2's
    # constructor.
    _DelegatingTrackableMixin.__init__(self, self._optimizer)

    for weight in self._loss_scale._weights.values():  # pylint: disable=protected-access
      # We cannot call `track_variable` in the LossScale class itself, because a
      # file outside of Keras cannot depend on a Keras file. Calling it here
      # instead is OK, because a variable only needs to be tracked if used with
      # a Keras class, and the only way to use LossScale with a Keras class is
      # through the LossScaleOptimizer.
      backend.track_variable(weight)
    self._track_trackable(self._loss_scale, 'loss_scale')

    # To support restoring TensorFlow 2.2 checkpoints.
    self._track_trackable(FakeOptimizerForRestoration(self._optimizer),
                          'base_optimizer')
 def add_slot(var, slot_name, initializer="zeros"):
     """Add a new slot variable for `var`."""
     if slot_name not in self._slot_names:
         self._slot_names.append(slot_name)
     var_key = optimizer_v2._var_key(var)
     slot_dict = self._slots.setdefault(var_key, {})
     weight = slot_dict.get(slot_name, None)
     if weight is None:
         if isinstance(initializer,
                       six.string_types) or callable(initializer):
             initializer = initializers.get(initializer)
             initial_value = functools.partial(initializer,
                                               shape=var.shape,
                                               dtype=var.dtype)
         else:
             initial_value = initializer
         strategy = distribute_ctx.get_strategy()
         with strategy.extended.colocate_vars_with(var):
             if isinstance(var, de.TrainableWrapper):
                 weight = de.create_slots(var, initial_value, slot_name,
                                          var._shared_name)
             else:
                 weight = variables.Variable(
                     name="%s/%s" % (
                         var._shared_name,
                         slot_name,
                     ),  # pylint: disable=protected-access
                     dtype=var.dtype,
                     trainable=False,
                     initial_value=initial_value,
                 )
         backend.track_variable(weight)
         slot_dict[slot_name] = weight
         self._restore_slot_variable(slot_name=slot_name,
                                     variable=var,
                                     slot_variable=weight)
         self._weights.append(weight)
     return weight
    def __init__(self, opt, loss_scale):
        """Initializes this loss scale optimizer.

    Args:
      opt: The Optimizer instance to wrap.
      loss_scale: The loss scale to scale the loss and gradients. This can
        either be an int/float to use a fixed loss scale, the string "dynamic"
        to use dynamic loss scaling, or an instance of a LossScale. The string
        "dynamic" equivalent to passing `DynamicLossScale()`, and passing an
        int/float is equivalent to passing a FixedLossScale with the given loss
        scale.
    """
        if not isinstance(opt, optimizer_v2.OptimizerV2):
            raise ValueError(
                '"opt" must be an instance of OptimizerV2, but got: %s' % opt)
        if hasattr(opt, 'clipnorm'):
            raise ValueError(
                'LossScaleOptimizer does not support wrapping '
                'optimizers with a clipnorm. Optimizer %s has clipnorm '
                '%s' % (opt, opt.clipnorm))

        if hasattr(opt, 'clipvalue'):
            raise ValueError('LossScaleOptimizer does not support wrapping '
                             'optimizers with a clipvalue. Optimizer %s has '
                             'clipvalue %s' % (opt, opt.clipvalue))

        self._optimizer = opt
        self._loss_scale = loss_scale_module.get(loss_scale)
        for weight in loss_scale_module.get_loss_scale_weights(
                self._loss_scale):
            # We cannot call `track_variable` in the LossScale class itself, because a
            # file outside of Keras cannot depend on a Keras file. Calling it here
            # instead is OK, because a variable only needs to be tracked if used with
            # a Keras class, and the only way to use LossScale with a Keras class is
            # through the LossScaleOptimizer.
            backend.track_variable(weight)
        self._track_trackable(self._optimizer, 'base_optimizer')
        self._track_trackable(self._loss_scale, 'loss_scale')
Example #9
0
def _clone_and_build_model(mode,
                           keras_model,
                           custom_objects,
                           features=None,
                           labels=None):
  """Clone and build the given keras_model.

  Args:
    mode: training mode.
    keras_model: an instance of compiled keras model.
    custom_objects: Dictionary for custom objects.
    features: Dict of tensors.
    labels: Dict of tensors, or single tensor instance.

  Returns:
    The newly built model.
  """
  # Set to True during training, False for inference or testing.
  K.set_learning_phase(mode == model_fn_lib.ModeKeys.TRAIN)
  input_tensors, target_tensors = _convert_estimator_io_to_keras(
      keras_model, features, labels)

  compile_clone = (mode != model_fn_lib.ModeKeys.PREDICT)

  global_step = None
  if compile_clone:
    # Set iterations to the global step created by tf.train.create_global_step()
    # which is automatically run in the estimator framework.
    global_step = training_util.get_or_create_global_step()
    K.track_variable(global_step)

  clone = models.clone_and_build_model(
      keras_model, input_tensors, target_tensors, custom_objects,
      compile_clone=compile_clone,
      in_place_reset=(not keras_model._is_graph_network),
      optimizer_iterations=global_step)

  return clone
Example #10
0
def _clone_and_build_model(mode,
                           keras_model,
                           custom_objects,
                           features=None,
                           labels=None):
  """Clone and build the given keras_model.

  Args:
    mode: training mode.
    keras_model: an instance of compiled keras model.
    custom_objects: Dictionary for custom objects.
    features: Dict of tensors.
    labels: Dict of tensors, or single tensor instance.

  Returns:
    The newly built model.
  """
  # Set to True during training, False for inference or testing.
  K.set_learning_phase(mode == model_fn_lib.ModeKeys.TRAIN)
  input_tensors, target_tensors = _convert_estimator_io_to_keras(
      keras_model, features, labels)

  compile_clone = (mode != model_fn_lib.ModeKeys.PREDICT)

  global_step = None
  if compile_clone:
    # Set iterations to the global step created by tf.train.create_global_step()
    # which is automatically run in the estimator framework.
    global_step = training_util.get_or_create_global_step()
    K.track_variable(global_step)

  clone = models.clone_and_build_model(
      keras_model, input_tensors, target_tensors, custom_objects,
      compile_clone=compile_clone,
      in_place_reset=(not keras_model._is_graph_network),
      optimizer_iterations=global_step)

  return clone
Example #11
0
    def _variable_creator(self, next_creator, **kwargs):
        name = kwargs['name']

        # Variable named "name" already created in this invocation of `call`.
        if name in self._variables_added_in_call:
            raise RuntimeError(
                '`Variable`s in a `Lambda` layer must have unique '
                'names, found duplicate name: {}'.format(name))
        self._variables_added_in_call.add(name)

        # Reuse Variables across invocations of `call`.
        if name in self._variable_dict:
            return self._variable_dict[name]

        # Variable was never created before.
        var = next_creator(**kwargs)
        self._variable_dict[name] = var
        if var.trainable:
            self._trainable_weights.append(var)
        else:
            self._non_trainable_weights.append(var)
        K.track_variable(var)
        return var
Example #12
0
  def add_slot(self, var, slot_name, initializer="zeros"):
    """Add a new slot variable for `var`."""
    if slot_name not in self._slot_names:
      self._slot_names.append(slot_name)
    var_key = _var_key(var)
    slot_dict = self._slots.setdefault(var_key, {})
    weight = slot_dict.get(slot_name, None)
    if weight is None:
      if isinstance(initializer, six.string_types) or callable(initializer):
        initializer = initializers.get(initializer)
        initial_value = functools.partial(
            initializer, shape=var.shape, dtype=var.dtype)
      else:
        initial_value = initializer
      strategy = distribute_ctx.get_strategy()
      if not strategy.extended.variable_created_in_scope(var):
        raise ValueError(
            "Trying to create optimizer slot variable under the scope for "
            "tf.distribute.Strategy ({}), which is different from the scope "
            "used for the original variable ({}). Make sure the slot "
            "variables are created under the same strategy scope. This may "
            "happen if you're restoring from a checkpoint outside the scope"
            .format(strategy, var))

      with strategy.extended.colocate_vars_with(var):
        weight = tf_variables.Variable(
            name="%s/%s" % (var._shared_name, slot_name),  # pylint: disable=protected-access
            dtype=var.dtype,
            trainable=False,
            initial_value=initial_value)
      backend.track_variable(weight)
      slot_dict[slot_name] = weight
      self._restore_slot_variable(
          slot_name=slot_name, variable=var,
          slot_variable=weight)
      self._weights.append(weight)
    return weight
Example #13
0
def _handle_fp16_and_distributed_optimizer(optimizer,
                                           lr_schedule,
                                           hvd_backend=None):
    if hvd_backend == "horovod":
        import horovod.tensorflow.keras as hvd
        from horovod.tensorflow import Compression
    elif hvd_backend == "byteps":
        import byteps.tensorflow.keras as hvd
        from byteps.tensorflow import Compression

    if hvd_backend:
        compression = Compression.none
        if compat.CUSTOM_GLOBAL_FLOATX == "float16":
            compression = Compression.fp16

    if lr_schedule is not None and hvd_backend is None:
        # TODO(ZhaoChengqi): pay attention to API changes
        optimizer._set_hyper("learning_rate", lr_schedule)
    # specify the following scenario
    # there is a bug under TF2.3+Horovod+fp16+XLA
    if compat.CUSTOM_GLOBAL_FLOATX == "float16":
        logging.info("NOTICE: using revised DynamicLossScale under fp16")
        revised_loss_scale = training_utils.RevisedDynamicLossScale()
        if hvd_backend:
            opt = LossScaleOptimizer(optimizer, loss_scale=1)
            opt = hvd.DistributedOptimizer(opt,
                                           compression=compression,
                                           sparse_as_dense=True)
            opt._loss_scale = revised_loss_scale
            for weight in loss_scale_module.get_loss_scale_weights(
                    opt._loss_scale):
                backend.track_variable(weight)
            opt._track_trackable(opt._loss_scale, 'loss_scale', overwrite=True)
        else:
            opt = LossScaleOptimizer(optimizer, loss_scale=revised_loss_scale)
        return opt
    return optimizer
Example #14
0
def clone_and_build_model(
    model, input_tensors=None, target_tensors=None, custom_objects=None,
    compile_clone=True, in_place_reset=False):
  """Clone a `Model` and build/compile it with the same settings used before.

  This function should be run in the same graph as the model.

  Args:
    model: `tf.keras.Model` object. Can be Functional, Sequential, or
      sub-classed.
    input_tensors: Optional list of input tensors to build the model upon. If
      not provided, placeholders will be created.
    target_tensors: Optional list of target tensors for compiling the model. If
      not provided, placeholders will be created.
    custom_objects: Optional dictionary mapping string names to custom classes
      or functions.
    compile_clone: Boolean, whether to compile model clone (default `True`).
    in_place_reset: Boolean, whether to reset the model in place. Only used if
      the model is not a graph network. If the model is a subclassed model, then
      this argument must be set to `True` (default `False`). To restore the
      original model, use the function
      `in_place_subclassed_model_state_restoration(model)`.

  Returns:
    Clone of the model.

  Raises:
    ValueError: if trying to clone a subclassed model, and `in_place_reset` is
      set to False.
  """
  if model._is_graph_network:
    if custom_objects:
      with CustomObjectScope(custom_objects):
        clone = clone_model(model, input_tensors=input_tensors)
    else:
      clone = clone_model(model, input_tensors=input_tensors)
  else:
    if not in_place_reset:
      raise ValueError(
          'Model is not a graph network (usually means that it is a subclassed '
          'model). The model cannot be cloned, but there is a workaround where '
          'the model is reset in-place. To use this, please set the argument '
          '`in_place_reset` to `True`. This will reset the attributes in the '
          'original model. To restore the attributes, call '
          '`in_place_subclassed_model_state_restoration(model)`.')
    clone = model
    _in_place_subclassed_model_reset(clone)
    if input_tensors is not None:
      clone._set_inputs(input_tensors)

  # Compile/Build model
  if not compile_clone:
    if isinstance(clone, Sequential):
      clone.build()
  elif model.optimizer:
    if isinstance(model.optimizer, optimizers.TFOptimizer):
      optimizer = model.optimizer
      K.track_tf_optimizer(optimizer)
    else:
      optimizer_config = model.optimizer.get_config()
      optimizer = model.optimizer.__class__.from_config(optimizer_config)
    global_step = training_util.get_or_create_global_step()
    K.track_variable(global_step)
    optimizer.iterations = global_step

    clone.compile(
        optimizer,
        model.loss,
        metrics=model.metrics,
        loss_weights=model.loss_weights,
        sample_weight_mode=model.sample_weight_mode,
        weighted_metrics=model.weighted_metrics,
        target_tensors=target_tensors)

  return clone
Example #15
0
def clone_and_build_model(
    model, input_tensors=None, target_tensors=None, custom_objects=None,
    compile_clone=True, in_place_reset=False):
  """Clone a `Model` and build/compile it with the same settings used before.

  This function should be run in the same graph as the model.

  Args:
    model: `tf.keras.Model` object. Can be Functional, Sequential, or
      sub-classed.
    input_tensors: Optional list of input tensors to build the model upon. If
      not provided, placeholders will be created.
    target_tensors: Optional list of target tensors for compiling the model. If
      not provided, placeholders will be created.
    custom_objects: Optional dictionary mapping string names to custom classes
      or functions.
    compile_clone: Boolean, whether to compile model clone (default `True`).
    in_place_reset: Boolean, whether to reset the model in place. Only used if
      the model is not a graph network. If the model is a subclassed model, then
      this argument must be set to `True` (default `False`). To restore the
      original model, use the function
      `in_place_subclassed_model_state_restoration(model)`.

  Returns:
    Clone of the model.

  Raises:
    ValueError: if trying to clone a subclassed model, and `in_place_reset` is
      set to False.
  """
  if model._is_graph_network:
    if custom_objects:
      with CustomObjectScope(custom_objects):
        clone = clone_model(model, input_tensors=input_tensors)
    else:
      clone = clone_model(model, input_tensors=input_tensors)
  else:
    if not in_place_reset:
      raise ValueError(
          'Model is not a graph network (usually means that it is a subclassed '
          'model). The model cannot be cloned, but there is a workaround where '
          'the model is reset in-place. To use this, please set the argument '
          '`in_place_reset` to `True`. This will reset the attributes in the '
          'original model. To restore the attributes, call '
          '`in_place_subclassed_model_state_restoration(model)`.')
    clone = model
    _in_place_subclassed_model_reset(clone)
    if input_tensors is not None:
      clone._set_inputs(input_tensors)

  # Compile/Build model
  if not compile_clone:
    if isinstance(clone, Sequential):
      clone.build()
  elif model.optimizer:
    if isinstance(model.optimizer, optimizers.TFOptimizer):
      optimizer = model.optimizer
      K.track_tf_optimizer(optimizer)
    else:
      optimizer_config = model.optimizer.get_config()
      optimizer = model.optimizer.__class__.from_config(optimizer_config)
    global_step = training_util.get_or_create_global_step()
    K.track_variable(global_step)
    optimizer.iterations = global_step

    clone.compile(
        optimizer,
        model.loss,
        metrics=model.metrics,
        loss_weights=model.loss_weights,
        sample_weight_mode=model.sample_weight_mode,
        weighted_metrics=model.weighted_metrics,
        target_tensors=target_tensors)

  return clone