Beispiel #1
0
    def mean_baseline(_, loss):
        with vs.variable_scope(name, default_name="MeanBaseline"):
            reduced_loss = math_ops.reduce_mean(loss)

            ema = training.ExponentialMovingAverage(decay=ema_decay)
            update_op = ema.apply([reduced_loss])

            # The bias correction term requires keeping track of how many times the
            # EMA has been updated. Creating a variable here to do so. The global step
            # is not used because it may or may not track exactly the number of times
            # the EMA is updated.
            ema_var = ema.average(reduced_loss)
            assert ema_var is not None
            with ops.colocate_with(ema_var):
                num_updates = vs.get_variable("local_ema_step",
                                              initializer=0,
                                              trainable=False)
            num_updates = num_updates.assign_add(1)
            bias_correction = 1. - math_ops.pow(
                ema_decay, math_ops.cast(num_updates, reduced_loss.dtype))

            with ops.control_dependencies([update_op]):
                baseline = ema.average(reduced_loss) / bias_correction

            return baseline
 def mean_baseline(_, loss):
     with ops.name_scope(name):
         ema = training.ExponentialMovingAverage(decay=ema_decay)
         update_op = ema.apply(math_ops.reduce_mean(loss))
         with control_flow_ops.control_dependencies([update_op]):
             # TODO(rsepassi): Possibly implement the initialization bias correction
             # term from Adam (section 3 of https://arxiv.org/pdf/1412.6980v8.pdf).
             baseline = ema.average(loss)
         return baseline
  def mean_baseline(_, loss):
    with vs.variable_scope(name, default_name="MeanBaseline"):
      reduced_loss = math_ops.reduce_mean(loss)

      ema = training.ExponentialMovingAverage(decay=ema_decay)
      update_op = ema.apply([reduced_loss])

      with ops.control_dependencies([update_op]):
        # Using `identity` causes an op to be added in this context, which
        # triggers the update. Removing the `identity` means nothing is updated.
        baseline = array_ops.identity(ema.average(reduced_loss))

      return baseline
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  clip_gradients=None,
                  moving_average_decay=0.9,
                  learning_rate_decay_fn=None,
                  variables=None):
  """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of tf.Optimizer that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantion of tf.Optimizer sub-class
                 and have `compute_gradients` and `apply_gradients` functions.
    clip_gradients: float or None, clips gradients by this value.
    moving_average_decay: float or None, takes into account previous loss
                          to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes learning_rate and global_step
                            Tensors, returns Tensor. Can be used to implement
                            any learning rate decay funcitons.
                            For example: tf.train.exponential_decay.
    variables: list of variables to optimizer or none.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
  # Moving average of the loss with decay.
  if moving_average_decay is not None:
    # Generate moving averages of the loss.
    loss_averages = train.ExponentialMovingAverage(moving_average_decay,
                                                   name="avg")
    loss_averages_op = loss_averages.apply([loss])
    logging_ops.scalar_summary("loss/mean", loss_averages.average(loss))
    loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

  # Learning rate variable, with possible decay.
  if isinstance(learning_rate, ops.Tensor) and len(learning_rate.get_shape()) == 0:
    lr = learning_rate
  elif isinstance(learning_rate, float):
    lr = vs.get_variable("learning_rate",
                         [],
                         trainable=False,
                         initializer=init_ops.constant_initializer(learning_rate))
  else:
    raise ValueError("Learning rate should be 0d Tensor or float. Got %s" %
        str(learning_rate))
  if learning_rate_decay_fn is not None:
    lr = learning_rate_decay_fn(lr, global_step)

  # Create optimizer, given specified parameters.
  if isinstance(optimizer, six.string_types):
    if optimizer not in OPTIMIZER_CLS_NAMES:
      raise ValueError("Optimizer name should be one of [%s], you provided %s."
                       % (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
    opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
  elif isinstance(optimizer, type) and issubclass(optimizer,
                                                  optimizer_.Optimizer):
    opt = optimizer(learning_rate=lr)
  elif isinstance(optimizer, optimizer_.Optimizer):
    opt = optimizer
  else:
    raise ValueError("Unrecognized optimizer: should be string, "
                     "subclass of Optimizer or instance of "
                     "subclass of Optimizer. Got %s." % str(optimizer))

  # All trainable variables, if specific variables are not specified.
  if variables is None:
    variables = vars_.trainable_variables()

  # Compute gradients and clip them if provided.
  gradients = opt.compute_gradients(loss, variables)
  if clip_gradients is not None:
    gradients, variables = zip(*gradients)
    clipped_gradients, _ = clip_ops.clip_by_global_norm(gradients,
                                                        clip_gradients)
    gradients = list(zip(clipped_gradients, variables))

  # Add scalar summary for loss.
  logging_ops.scalar_summary("loss", loss)

  # Add histograms for variables, gradients and gradient norms.
  for gradient, variable in gradients:
    if isinstance(gradient, ops.IndexedSlices):
      grad_values = gradient.values
    else:
      grad_values = gradient

    if grad_values is not None:
      logging_ops.histogram_summary(variable.name, variable)
      logging_ops.histogram_summary(variable.name + "/gradients", grad_values)
      logging_ops.histogram_summary(variable.name + "/gradient_norm",
                                    clip_ops.global_norm([grad_values]))

  # Create gradient updates.
  grad_updates = opt.apply_gradients(gradients,
                                     global_step=global_step,
                                     name="train")
  # Make sure total_loss is valid.
  final_loss = array_ops.check_numerics(loss, "Loss is inf or nan")

  # Ensure the train_tensor computes grad_updates.
  train_tensor = control_flow_ops.with_dependencies([grad_updates], final_loss)

  return train_tensor
Beispiel #5
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  moving_average_decay=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None):
    """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string, class or optimizer instance, used as trainer.
               string should be name of optimizer, like 'SGD',
                 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
               class should be sub-class of tf.Optimizer that implements
                 `compute_gradients` and `apply_gradients` functions.
               optimizer instance should be instantion of tf.Optimizer sub-class
                 and have `compute_gradients` and `apply_gradients` functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
                          value.
    gradient_multipliers: dict of variables or variable names to floats.
                          If present, gradients for specified
                          variables will be multiplied by given constant.
    clip_gradients: float or `None`, clips gradients by this value.
    moving_average_decay: Deprecated. float or None, takes into account previous
                          loss to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
                            `Tensor`s, returns `Tensor`.
                            Can be used to implement any learning rate decay
                            functions.
                            For example: tf.train.exponential_decay.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection. The order of execution
                between `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
               set only the loss and the learning rate will be reported. The
               complete list is in OPTIMIZER_SUMMARIES.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
    with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
        # Update ops take UPDATE_OPS collection if not provided.
        if update_ops is None:
            update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
        # Make sure update ops are ran before computing loss.
        if update_ops:
            loss = control_flow_ops.with_dependencies(list(update_ops), loss)

        # Moving average of the loss with decay.
        # TODO(b/30439864): moving_average_decay should be removed.
        if moving_average_decay is not None:
            logging.warn("'moving_average_decay' is deprecated. Please use "
                         "tensorboard's builtin averaging instead.")
            # Generate moving averages of the loss.
            loss_averages = train.ExponentialMovingAverage(
                moving_average_decay, name="avg")
            loss_averages_op = loss_averages.apply([loss])
            logging_ops.scalar_summary("loss/mean",
                                       loss_averages.average(loss))
            loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

        # Learning rate variable, with possible decay.
        if (isinstance(learning_rate, ops.Tensor)
                and learning_rate.get_shape().ndims == 0):
            lr = learning_rate
        elif isinstance(learning_rate, float):
            lr = vs.get_variable(
                "learning_rate", [],
                trainable=False,
                initializer=init_ops.constant_initializer(learning_rate))
        else:
            raise ValueError("Learning rate should be 0d Tensor or float. "
                             "Got %s of type %s" %
                             (str(learning_rate), str(type(learning_rate))))
        if summaries is None:
            summaries = ["loss", "learning_rate"]
        if learning_rate_decay_fn is not None:
            lr = learning_rate_decay_fn(lr, global_step)
            if "learning_rate" in summaries:
                logging_ops.scalar_summary("learning_rate", lr)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s." %
                    (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif isinstance(optimizer, type) and issubclass(
                optimizer, optimizer_.Optimizer):
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        else:
            raise ValueError("Unrecognized optimizer: should be string, "
                             "subclass of Optimizer or instance of "
                             "subclass of Optimizer. Got %s." % str(optimizer))

        # All trainable variables, if specific variables are not specified.
        if variables is None:
            variables = vars_.trainable_variables()

        # Compute gradients.
        gradients = opt.compute_gradients(loss, variables)

        # Optionally add gradient noise.
        if gradient_noise_scale is not None:
            gradients = _add_scaled_noise_to_gradients(gradients,
                                                       gradient_noise_scale)

        # Multiply some gradients.
        if gradient_multipliers is not None:
            gradients = _multiply_gradients(gradients, gradient_multipliers)

        # Optionally clip gradients by global norm.
        if clip_gradients is not None:
            gradients = _clip_gradients_by_norm(gradients, clip_gradients)

        # Add scalar summary for loss.
        if "loss" in summaries:
            logging_ops.scalar_summary("loss", loss)

        # Add histograms for variables, gradients and gradient norms.
        for gradient, variable in gradients:
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if grad_values is not None:
                if "gradients" in summaries:
                    logging_ops.histogram_summary(variable.name + "/gradients",
                                                  grad_values)
                if "gradient_norm" in summaries:
                    logging_ops.histogram_summary(
                        variable.name + "/gradient_norm",
                        clip_ops.global_norm([grad_values]))

        # Create gradient updates.
        grad_updates = opt.apply_gradients(gradients,
                                           global_step=global_step,
                                           name="train")

        # Ensure the train_tensor computes grad_updates.
        train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

        return train_tensor
Beispiel #6
0
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  clip_gradients=None,
                  moving_average_decay=0.9,
                  learning_rate_decay_fn=None,
                  variables=None):
  """Given loss and parameters for optimizer, returns a training op.

  Args:
    loss: Tensor, 0 dimensional.
    global_step: Tensor, step counter for each update.
    learning_rate: float or Tensor, magnitude of update per each training step.
    optimizer: string or function, used as optimizer for training.
    clip_gradients: float or None, clips gradients by this value.
    moving_average_decay: float or None, takes into account previous loss
                          to make learning smoother due to outliers.
    learning_rate_decay_fn: function, takes learning_rate and global_step
                            Tensors, returns Tensor. Can be used to implement
                            any learning rate decay funcitons.
                            For example: tf.train.exponential_decay.
    variables: list of variables to optimizer or none.

  Returns:
    Training op.

  Raises:
    ValueError: if optimizer is wrong type.
  """
  # Moving average of the loss with decay.
  if moving_average_decay is not None:
    # Generate moving averages of the loss.
    loss_averages = train.ExponentialMovingAverage(moving_average_decay,
                                                   name="avg")
    loss_averages_op = loss_averages.apply([loss])
    logging_ops.scalar_summary("loss/mean", loss_averages.average(loss))
    loss = control_flow_ops.with_dependencies([loss_averages_op], loss)

  # Convert optimizer into the optimizer class.
  if isinstance(optimizer, str):
    opt_cls = OPTIMIZER_CLS_NAMES[optimizer]
  elif callable(optimizer):
    opt_cls = optimizer
  else:
    raise ValueError("Unrecognized optimizer: should be string or function.")

  # Learning rate variable, with possible decay.
  lr = vs.get_variable("learning_rate",
                       [],
                       trainable=False,
                       initializer=init_ops.constant_initializer(learning_rate))
  if learning_rate_decay_fn is not None:
    lr = learning_rate_decay_fn(lr, global_step)

  # Create optimizer.
  opt = opt_cls(learning_rate=lr)

  # All trainable variables, if specific variables are not specified.
  if variables is None:
    variables = vars_.trainable_variables()

  # Compute gradients and clip them if provided.
  gradients = opt.compute_gradients(loss, variables)
  if clip_gradients is not None:
    clipped_gradients, _ = clip_ops.clip_by_global_norm(gradients,
                                                        clip_gradients)
    gradients = zip(clipped_gradients, variables)

  # Add scalar summary for loss.
  logging_ops.scalar_summary("loss", loss)

  # Add histograms for variables, gradients and gradient norms.
  for gradient, variable in gradients:
    if isinstance(gradient, ops.IndexedSlices):
      grad_values = gradient.values
    else:
      grad_values = gradient
    logging_ops.histogram_summary(variable.name, variable)
    logging_ops.histogram_summary(variable.name + "/gradients", grad_values)
    logging_ops.histogram_summary(variable.name + "/gradient_norm",
                                  clip_ops.global_norm([grad_values]))

  # Create gradient updates.
  grad_updates = opt.apply_gradients(gradients,
                                     global_step=global_step,
                                     name="train")
  # Make sure total_loss is valid.
  final_loss = array_ops.check_numerics(loss, "Loss is inf or nan")

  # Ensure the train_tensor computes grad_updates.
  train_tensor = control_flow_ops.with_dependencies([grad_updates], final_loss)

  return train_tensor
Beispiel #7
0
def _batch_norm(_input,
                trnFlag,
                eps=1e-3,
                name="batch_norm",
                ema_decay=0.5,
                dtype=dtypes.float32):
    """
    A wrapped BN operation used for 2D or 3D convolution as described in:

        https://gist.github.com/tomokishii/0ce3bdac1588b5cca9fa5fbdf6e1c412
        https://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow?answertab=votes#tab-top

    :param _input: tensor, always convolution result before Relu
    :param eps: scalar,
    :param trnFlag: bool, whether training or not
    :param ema_decay: scalar, moving average used of BN's beta and gamma
    :param dtype: tf.dtype, data type
    :return:
        tensor, BN reuslt which has the same shape as _input
    """
    shape = _input.get_shape().as_list()
    with variable_scope.variable_scope(name) as scope:
        beta = variable_scope.get_variable(
            "beta", [shape[-1]],
            dtype=dtype,
            initializer=init_ops.constant_initializer(0., dtype=dtype),
            trainable=True)
        gamma = variable_scope.get_variable(
            "gamma", [shape[-1]],
            dtype=dtype,
            initializer=init_ops.random_normal_initializer(1.,
                                                           0.01,
                                                           dtype=dtype,
                                                           seed=20170705),
            trainable=True)

        if shape.__len__() == 2:  # fc, [batch_size, num_dim]
            batch_mean, batch_var = nn_impl.moments(_input, [0],
                                                    name="moments")
        elif shape.__len__(
        ) == 4:  # conv, [batch_size, width, heigth, channel]
            batch_mean, batch_var = nn_impl.moments(_input, [0, 1, 2],
                                                    name="moments")
        elif shape.__len__(
        ) == 5:  # conv, [batch_size, depth, width, heigth, channel]
            batch_mean, batch_var = nn_impl.moments(_input, [0, 1, 2, 3],
                                                    name="moments")
        else:
            raise 'wrong _input shape, it must have dim of 2 or 4 or 5'

        ema = training.ExponentialMovingAverage(decay=ema_decay)

        def mean_var_with_update():
            ema_apply_op = ema.apply([batch_mean, batch_var])
            with ops.control_dependencies([ema_apply_op]):
                return array_ops.identity(batch_mean), array_ops.identity(
                    batch_var)

        mean, var = control_flow_ops.cond(
            trnFlag, mean_var_with_update, lambda:
            (ema.average(batch_mean), ema.average(batch_var)))

        bn_out = nn_impl.batch_normalization(_input, mean, var, beta, gamma,
                                             eps)
    return bn_out