Exemple #1
0
def gradient_summaries(gvs, suppress_inf_and_nans=False):
    """Creates summaries for norm, mean and var of gradients."""
    gs = [gv[0] for gv in gvs]
    grad_global_norm = tf.global_norm(gs, 'gradient_global_norm')

    if suppress_inf_and_nans:
        is_nan_or_inf = tf.logical_or(tf.is_nan(grad_global_norm),
                                      tf.is_inf(grad_global_norm))

        grad_global_norm = tf.where(is_nan_or_inf,
                                    tf.zeros_like(grad_global_norm) - 1.,
                                    grad_global_norm)

    grad_abs_max, grad_abs_mean, grad_mean, grad_var = [0.] * 4
    n_grads = 1e-8
    for g, _ in gvs:
        if isinstance(g, tf.IndexedSlices):
            g = g.values

        if g is not None:
            current_n_grads = np.prod(g.shape.as_list())
            abs_g = abs(g)
            mean, var = tf.nn.moments(g, list(range(len(g.shape))))
            grad_abs_max = tf.maximum(grad_abs_max, tf.reduce_max(abs_g))
            grad_abs_mean += tf.reduce_sum(abs_g)
            grad_mean += mean * current_n_grads
            grad_var += var
            n_grads += current_n_grads

    tf.summary.scalar('grad/abs_max', grad_abs_max)
    tf.summary.scalar('grad/abs_mean', grad_abs_mean / n_grads)
    tf.summary.scalar('grad/mean', grad_mean / n_grads)
    tf.summary.scalar('grad/var', grad_var / n_grads)

    return dict(grad_global_norm=grad_global_norm)
Exemple #2
0
 def _get_cubic_root(self):
     """Get the cubic root."""
     # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2
     # where x = sqrt(mu).
     # We substitute x, which is sqrt(mu), with x = y + 1.
     # It gives y^3 + py = q
     # where p = (D^2 h_min^2)/(2*C) and q = -p.
     # We use the Vieta's substitution to compute the root.
     # There is only one real solution y (which is in [0, 1] ).
     # http://mathworld.wolfram.com/VietasSubstitution.html
     assert_array = [
         tf.Assert(tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), [
             self._dist_to_opt_avg,
         ]),
         tf.Assert(tf.logical_not(tf.is_nan(self._h_min)), [
             self._h_min,
         ]),
         tf.Assert(tf.logical_not(tf.is_nan(self._grad_var)), [
             self._grad_var,
         ]),
         tf.Assert(tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), [
             self._dist_to_opt_avg,
         ]),
         tf.Assert(tf.logical_not(tf.is_inf(self._h_min)), [
             self._h_min,
         ]),
         tf.Assert(tf.logical_not(tf.is_inf(self._grad_var)), [
             self._grad_var,
         ])
     ]
     with tf.control_dependencies(assert_array):
         p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var
         w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0
         w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0 / 3.0)
         y = w - p / 3.0 / w
         x = y + 1
     return x
Exemple #3
0
def psnr(labels, predictions):
  """Computes average peak signal-to-noise ratio of `predictions`.

  Here PSNR is defined with respect to the maximum value of 1. All image tensors
  must be within the range [0, 1].

  Args:
    labels: Tensor of shape [B, H, W, N].
    predictions: Tensor of shape [B, H, W, N].

  Returns:
    Tuple of (psnr, update_op) as returned by tf.metrics.
  """
  predictions.shape.assert_is_compatible_with(labels.shape)
  with tf.control_dependencies([tf.assert_greater_equal(labels, 0.0),
                                tf.assert_less_equal(labels, 1.0)]):
    psnrs = tf.image.psnr(labels, predictions, max_val=1.0)
    psnrs = tf.boolean_mask(psnrs, tf.logical_not(tf.is_inf(psnrs)))
    return tf.metrics.mean(psnrs, name='psnr')
def SanitizedAutoCorrelation(x, axis, *args, **kwargs):
    res = tfp.stats.auto_correlation(x, axis, *args, **kwargs)
    res = tf.where(tf.is_nan(res), tf.ones_like(res), res)
    res = tf.where(tf.is_inf(res), tf.ones_like(res), res)
    return res
Exemple #5
0
  def _apply_gradients(self, grads_and_vars, learning_rate):
    print('_apply_gradients is called!!!')
    """See base class."""

    # Create slot variables
    var_list = []
    for (grad, param) in grads_and_vars:
      if grad is None or param is None:
        continue
      var_list.append(param)
    with ops.init_scope():
      self._create_slots(var_list)

    # Build training operations
    assignments = []
    check_values = []
    for (grad, param) in grads_and_vars:
      if grad is None or param is None:
        continue

      param_name = self._get_variable_name(param.name)

      #m, v = self.mv_lookup[param_name]
      m = self.get_slot(param, param_name + "/adam_m")
      v = self.get_slot(param, param_name + "/adam_v")

      # Standard Adam update.
      next_m = (
          tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
      next_v = (
          tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
                                                    tf.square(grad)))
      update = next_m / (tf.sqrt(next_v) + self.epsilon)

      check_update_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(update))), [param_name, 'NAN update', update])
      check_update_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(update))), [param_name, 'INF update', update])
      check_values.append(check_update_nan)
      check_values.append(check_update_inf)
      #update = 0

      # Just adding the square of the weights to the loss function is *not*
      # the correct way of using L2 regularization/weight decay with Adam,
      # since that will interact with the m and v parameters in strange ways.
      #
      # Instead we want ot decay the weights in a manner that doesn't interact
      # with the m/v parameters. This is equivalent to adding the square
      # of the weights to the loss with plain (non-momentum) SGD.
      if self.weight_decay_rate > 0:
        if self._do_use_weight_decay(param_name):
          update += self.weight_decay_rate * param

      update_with_lr = learning_rate * update
      # update_with_lr = tf.Print(update_with_lr, ['\nupdate_with_lr', param_name, tf.shape(update_with_lr), update_with_lr], summarize=32)
      max_update_with_lr = tf.reduce_max(update_with_lr)
      min_update_with_lr = tf.reduce_min(update_with_lr)
      # update_with_lr = tf.Print(update_with_lr, ['\nupdate_with_lr', param_name, tf.shape(update_with_lr), min_update_with_lr, max_update_with_lr], summarize=32)

      check_update_with_lr_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(update_with_lr))), [param_name, 'NAN update_with_lr', update_with_lr])
      check_update_with_lr_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(update_with_lr))), [param_name, 'INF update_with_lr', update_with_lr])
      check_values.append(check_update_with_lr_nan)
      check_values.append(check_update_with_lr_inf)

      next_param = param - update_with_lr

      check_next_param_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(next_param))), [param_name, 'NAN next_param', next_param])
      check_next_param_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(next_param))), [param_name, 'INF next_param', next_param])
      check_values.append(check_next_param_nan)
      check_values.append(check_next_param_inf)

      # Ensure that the debug operations are executed.
      for op in check_values:
        op.mark_used()

      '''
      assignments.extend(
          [param.assign(next_param),]
      )
      '''
      assignments.extend(
          [
           param.assign(next_param),
           m.assign(next_m),
           v.assign(next_v)
          ]
          )
      assignments.extend(check_values)

    return assignments