Example #1
0
 def _invalid_model_fn(features, labels):
   # pylint: disable=unused-argument
   w = variables_lib.Variable(42.0, 'weight')
   update_global_step = variables.get_global_step().assign_add(1)
   with control_flow_ops.control_dependencies([update_global_step]):
     loss = 100.0 - w
   return None, loss, None
 def mean_baseline(_, loss):
     with ops.name_scope(name):
         ema = training.ExponentialMovingAverage(decay=ema_decay)
         update_op = ema.apply(math_ops.reduce_mean(loss))
         with control_flow_ops.control_dependencies([update_op]):
             # TODO(rsepassi): Possibly implement the initialization bias correction
             # term from Adam (section 3 of https://arxiv.org/pdf/1412.6980v8.pdf).
             baseline = ema.average(loss)
         return baseline
 def mean_baseline(_, loss):
   with ops.name_scope(name):
     ema = training.ExponentialMovingAverage(decay=ema_decay)
     update_op = ema.apply(math_ops.reduce_mean(loss))
     with control_flow_ops.control_dependencies([update_op]):
       # TODO(rsepassi): Possibly implement the initialization bias correction
       # term from Adam (section 3 of https://arxiv.org/pdf/1412.6980v8.pdf).
       baseline = ema.average(loss)
     return baseline
Example #4
0
 def _invalid_model_fn(features, labels, mode):
   # pylint: disable=unused-argument
   w = variables_lib.Variable(42.0, 'weight')
   loss = 100.0 - w
   update_global_step = variables.get_global_step().assign_add(1)
   with control_flow_ops.control_dependencies([update_global_step]):
     train_op = w.assign_add(loss / 100.0)
   predictions = loss
   if mode == model_fn.ModeKeys.EVAL:
     loss = None
   return predictions, loss, train_op
Example #5
0
def create_train_op(
    total_loss,
    optimizer,
    global_step=None,
    update_ops=None,
    variables_to_train=None,
    clip_gradient_norm=0,
    summarize_gradients=False,
    gate_gradients=tf_optimizer.Optimizer.GATE_OP,
    aggregation_method=None,
    colocate_gradients_with_ops=False):
  """Creates an `Operation` that evaluates the gradients and returns the loss.

  Args:
    total_loss: A `Tensor` representing the total loss.
    optimizer: A tf.Optimizer to use for computing the gradients.
    global_step: A `Tensor` representing the global step variable. If left as
      `None`, then slim.variables.global_step() is used.
    update_ops: an optional list of updates to execute. Note that the update_ops
      that are used are the union of those update_ops passed to the function and
      the value of slim.ops.GetUpdateOps(). Therefore, if `update_ops` is None,
      then the value of slim.ops.GetUpdateOps() is still used.
    variables_to_train: an optional list of variables to train. If None, it will
      default to all tf.trainable_variables().
    clip_gradient_norm: If greater than 0 then the gradients would be clipped
      by it.
    summarize_gradients: Whether or not add summaries for each gradient.
    gate_gradients: How to gate the computation of gradients. See tf.Optimizer.
    aggregation_method: Specifies the method used to combine gradient terms.
      Valid values are defined in the class `AggregationMethod`.
    colocate_gradients_with_ops: Whether or not to try colocating the gradients
      with the ops that generated them.

  Returns:
    A `Tensor` that when evaluated, computes the gradients and returns the total
      loss value.
  """
  if global_step is None:
    global_step = variables.get_or_create_global_step()

  update_ops = set(update_ops or [])

  # Make sure update_ops are computed before total_loss.
  if update_ops:
    with control_flow_ops.control_dependencies(update_ops):
      barrier = control_flow_ops.no_op(name='update_barrier')
    total_loss = control_flow_ops.with_dependencies([barrier], total_loss)

  if variables_to_train is None:
    # Default to tf.trainable_variables()
    variables_to_train = tf_variables.trainable_variables()
  else:
    # Make sure that variables_to_train are in tf.trainable_variables()
    for v in variables_to_train:
      assert v in tf_variables.trainable_variables()

  assert variables_to_train

  # Create the gradients. Note that apply_gradients adds the gradient
  # computation to the current graph.
  grads = optimizer.compute_gradients(
      total_loss, variables_to_train, gate_gradients=gate_gradients,
      aggregation_method=aggregation_method,
      colocate_gradients_with_ops=colocate_gradients_with_ops)

  # Clip gradients.
  if clip_gradient_norm > 0:
    grads = clip_gradient_norms(grads, clip_gradient_norm)

  # Summarize gradients.
  if summarize_gradients:
    for grad, var in grads:
      if grad is not None:
        if isinstance(grad, ops.IndexedSlices):
          grad_values = grad.values
        else:
          grad_values = grad
        logging_ops.histogram_summary(var.op.name + ':gradient', grad_values)
        logging_ops.histogram_summary(var.op.name + ':gradient_norm',
                                      clip_ops.global_norm([grad_values]))
      else:
        logging.info('Var %s has no gradient', var.op.name)

  # Create gradient updates.
  grad_updates = optimizer.apply_gradients(grads, global_step=global_step)

  # Make sure total_loss is valid.
  total_loss = array_ops.check_numerics(total_loss, 'LossTensor is inf or nan')

  # Ensure the train_tensor computes grad_updates.
  return control_flow_ops.with_dependencies([grad_updates], total_loss)