Ejemplo n.º 1
0
  def CreateVariables(self):
    """Create variables for this layer and child layers.

    DO NOT OVERRIDE. Override self._CreateVariables instead.
    """
    if self._create_variables_called:
      return
    self._create_variables_called = True

    self._global_step = py_utils.GetGlobalStep()

    if self._is_variable_free:
      for child in self._children_list:
        if not child._is_variable_free:  # pylint: disable=protected-access
          raise ValueError(
              'Variable free layer %s(%s) child %s(%s) has variables.' %
              (self.params.name, self.params.cls, child.params.name,
               child.params.cls))
    else:
      self.AddExtraTheta('global_step', self._global_step)
      self._CreateChildrenVariables()
      with tf.variable_scope(
          py_utils.SanitizeScopeKey(self.params.name),
          auxiliary_name_scope=False):
        for name, meta in list(self._variables_to_create.items()):
          self._CreateVariable(name, meta)
        self._CreateVariables()
    self._VerifyVarsAndTheta()
Ejemplo n.º 2
0
  def InstantiateVariables(self):
    """Create variables for this layer and child layers.

    DO NOT OVERRIDE. Override self._CreateLayerVariables instead.
    """
    if self._create_variables_status != _CreateLayerVariablesStatus.NOT_CALLED:
      return
    self._create_variables_status = _CreateLayerVariablesStatus.IN_PROGRESS

    stack_size = len(_CREATE_VARIABLES_STACK.stack)
    _CREATE_VARIABLES_STACK.stack.append(self)
    try:
      self._global_step = py_utils.GetGlobalStep()
      self._CreateChildrenVariables()

      if not self._is_variable_free:
        self.AddExtraTheta('global_step', self._global_step)
        with tf.variable_scope(
            py_utils.SanitizeScopeKey(self.params.name),
            auxiliary_name_scope=False):
          for name, meta in list(self._variables_to_create.items()):
            self._CreateVariableInternal(name, meta)
          self._CreateLayerVariables()
    finally:
      assert _CREATE_VARIABLES_STACK.stack[-1] is self
      _CREATE_VARIABLES_STACK.stack.pop()
      assert len(_CREATE_VARIABLES_STACK.stack) == stack_size

    self._create_variables_status = _CreateLayerVariablesStatus.COMPLETED

    if not _CREATE_VARIABLES_STACK.stack:
      # Outermost layer just finished InstantiateVariables.
      self._VerifyVarsAndTheta()
Ejemplo n.º 3
0
 def _SelfVariableScope(self):
     """Internal. Used to ensure the same variable & name scopes are used."""
     if not self._self_variable_scope:
         with tf.variable_scope(py_utils.SanitizeScopeKey(
                 self.params.name)) as scope:
             self._self_variable_scope = scope
     with contextlib.ExitStack() as stack:
         stack.enter_context(
             tf.variable_scope(self._self_variable_scope,
                               auxiliary_name_scope=False))
         stack.enter_context(
             tf.name_scope(self._self_variable_scope.original_name_scope))
         yield stack
Ejemplo n.º 4
0
def CollectVarHistogram(vs_gs):
    """Adds histogram summaries for variables and gradients."""

    for name, (var, grad) in vs_gs.FlattenItems():
        name = py_utils.SanitizeScopeKey(name)
        with tf.device(var.device), tf.name_scope(name + '/summary'):
            if isinstance(grad, tf.IndexedSlices):
                var = tf.gather(var, grad.indices)
                grad = grad.values
            if var.dtype.is_complex:
                var = tf.abs(var)
                grad = tf.abs(grad)

        histogram('var_hist/' + name, var)
        histogram('grad_hist/' + name, grad)
Ejemplo n.º 5
0
 def _SelfVariableScope(self, params=None, enter_name_scope=True):
     """Internal. Used to ensure the same variable & name scopes are used."""
     if not hasattr(self, '_self_variable_scope'):
         params = params or self.params
         self._parent_variable_scope = tf.get_variable_scope()
         with tf.variable_scope(py_utils.SanitizeScopeKey(
                 params.name)) as scope:
             self._self_variable_scope = scope
     with contextlib.ExitStack() as stack:
         stack.enter_context(
             tf.variable_scope(self._self_variable_scope,
                               auxiliary_name_scope=False))
         if enter_name_scope:
             stack.enter_context(
                 tf.name_scope(
                     self._self_variable_scope.original_name_scope))
         yield stack
Ejemplo n.º 6
0
  def _CreateChildrenVariables(self):
    """Create variables for child layers.

    Should be rarely overridden, only in cases when control over the context of
    children CreateVariables calls are needed. eg, if children variables need to
    be created inside of a specific context manager.

    There are a few cases of this in the codebase marked as for backwards
    compability. This is only to ensure that variable scopes remain compatible
    through the code migration. New layers should not copy that pattern, and
    instead follow the standard pattern of self.CreateChild() in __init__() and
    self.CreateVariable() in _CreateVariables(). If you are okay with breaking
    old checkpoints, you can go ahead and delete those functions.
    """
    with tf.variable_scope(
        py_utils.SanitizeScopeKey(self.params.name),
        auxiliary_name_scope=False):
      for _ in self._children_list:
        # For now each layer is responsible for calling its CreateVariables.
        pass
Ejemplo n.º 7
0
    def _CreateChildrenVariables(self):
        """Create variables for child layers.

    Should be rarely overridden, only in cases when control over the context of
    children InstantiateVariables calls are needed. eg, if children variables
    need to be created inside of a specific context manager.

    There are a few cases of this in the codebase marked as for backwards
    compability. This is only to ensure that variable scopes remain compatible
    through the code migration. New layers should not copy that pattern, and
    instead follow the standard pattern of self.CreateChild() in __init__() and
    self.CreateVariable() in _CreateLayerVariables(). If you are okay with
    breaking old checkpoints, you can go ahead and delete those functions.
    """
        with tf.variable_scope(py_utils.SanitizeScopeKey(self.params.name),
                               auxiliary_name_scope=False):
            for child in self._children_list:
                if self._is_variable_free and not child._is_variable_free:  # pylint: disable=protected-access
                    raise ValueError(
                        'Variable free layer %s(%s) child %s(%s) has variables.'
                        % (self.params.name, self.params.cls,
                           child.params.name, child.params.cls))
                child.InstantiateVariables()
Ejemplo n.º 8
0
    def ScaleGradients(self, var_grads, gradient_adjuster=None):
        """Scales gradients according to training params.

    Args:
      var_grads: a `.NestedMap` whose values are (var, grad) pairs.
      gradient_adjuster: if not None, a function that mutates a given var_grads.

    Returns:
      A `.NestedMap` containing

      - final_var_grads: a `.NestedMap` whose values are (var, grad) pairs,
        where gradients have already been scaled.
      - grad_scale: the gradient scale. 0 if gradient updates should be skipped
        for the step. (Optional, only returned in case global norm clipping is
        used.)
    """
        p = self.params

        # Computes gradients' norm and adds their summaries. Note that all_grad_norm
        # may be nan, which may cause grad_scale to be nan.
        for name, vg in var_grads.FlattenItems():
            summary_utils.AddNormSummary(
                py_utils.SanitizeScopeKey(name) + '/' + p.name, vg)
        flatten = py_utils.Flatten(var_grads)
        all_grad_norm = tf.sqrt(py_utils.SumSquared([g for (_, g) in flatten]))
        all_var_norm = tf.sqrt(py_utils.SumSquared([v for (v, _) in flatten]))
        grad_norm_is_nan_or_inf = tf.logical_or(tf.is_nan(all_grad_norm),
                                                tf.is_inf(all_grad_norm))

        # Optional gradient adjustment. Note that this happens after computing
        # all_grad_norm.
        if gradient_adjuster is not None:
            tf.logging.info('gradient_adjuster=%s', gradient_adjuster)
            var_grads = gradient_adjuster(var_grads)

        # Handles NaN/Inf gradients.
        has_nan_or_inf = py_utils.HasNanOrInfGradient(var_grads)
        # Grad norm can still be inf even if none of the individual grad is inf.
        has_nan_or_inf = tf.logical_or(has_nan_or_inf, grad_norm_is_nan_or_inf)
        self._AddEvalMetric('has_nan_or_inf', has_nan_or_inf, tf.constant(1.0))

        return_values = py_utils.NestedMap()
        if p.clip_gradient_single_norm_to_value:
            # Currently using both types of clipping simultaneously is unsupported.
            if p.clip_gradient_norm_to_value:
                raise ValueError(
                    'Cannot use clip_gradient_single_norm_to_value=%f and '
                    'clip_gradient_norm_to_value=%f.' %
                    (p.clip_gradient_single_norm_to_value,
                     p.clip_gradient_norm_to_value))
            final_var_grads = py_utils.ApplyGradNormClipping(
                var_grads, p.clip_gradient_single_norm_to_value)

        else:
            grad_scale = self._GetGlobalGradScale(all_grad_norm,
                                                  has_nan_or_inf)
            self._AddEvalMetric('grad_norm/all', all_grad_norm,
                                tf.constant(1.0))
            self._AddEvalMetric('var_norm/all', all_var_norm, tf.constant(1.0))
            self._AddEvalMetric('grad_scale_all', grad_scale, tf.constant(1.0))
            final_var_grads = py_utils.ApplyGradMultiplier(
                var_grads, grad_scale)
            return_values.grad_scale = grad_scale

        return_values.final_var_grads = final_var_grads
        return return_values