def gradient_summaries(gvs, suppress_inf_and_nans=False): """Creates summaries for norm, mean and var of gradients.""" gs = [gv[0] for gv in gvs] grad_global_norm = tf.global_norm(gs, 'gradient_global_norm') if suppress_inf_and_nans: is_nan_or_inf = tf.logical_or(tf.is_nan(grad_global_norm), tf.is_inf(grad_global_norm)) grad_global_norm = tf.where(is_nan_or_inf, tf.zeros_like(grad_global_norm) - 1., grad_global_norm) grad_abs_max, grad_abs_mean, grad_mean, grad_var = [0.] * 4 n_grads = 1e-8 for g, _ in gvs: if isinstance(g, tf.IndexedSlices): g = g.values if g is not None: current_n_grads = np.prod(g.shape.as_list()) abs_g = abs(g) mean, var = tf.nn.moments(g, list(range(len(g.shape)))) grad_abs_max = tf.maximum(grad_abs_max, tf.reduce_max(abs_g)) grad_abs_mean += tf.reduce_sum(abs_g) grad_mean += mean * current_n_grads grad_var += var n_grads += current_n_grads tf.summary.scalar('grad/abs_max', grad_abs_max) tf.summary.scalar('grad/abs_mean', grad_abs_mean / n_grads) tf.summary.scalar('grad/mean', grad_mean / n_grads) tf.summary.scalar('grad/var', grad_var / n_grads) return dict(grad_global_norm=grad_global_norm)
def _get_cubic_root(self): """Get the cubic root.""" # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 # where x = sqrt(mu). # We substitute x, which is sqrt(mu), with x = y + 1. # It gives y^3 + py = q # where p = (D^2 h_min^2)/(2*C) and q = -p. # We use the Vieta's substitution to compute the root. # There is only one real solution y (which is in [0, 1] ). # http://mathworld.wolfram.com/VietasSubstitution.html assert_array = [ tf.Assert(tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), [ self._dist_to_opt_avg, ]), tf.Assert(tf.logical_not(tf.is_nan(self._h_min)), [ self._h_min, ]), tf.Assert(tf.logical_not(tf.is_nan(self._grad_var)), [ self._grad_var, ]), tf.Assert(tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), [ self._dist_to_opt_avg, ]), tf.Assert(tf.logical_not(tf.is_inf(self._h_min)), [ self._h_min, ]), tf.Assert(tf.logical_not(tf.is_inf(self._grad_var)), [ self._grad_var, ]) ] with tf.control_dependencies(assert_array): p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0 / 3.0) y = w - p / 3.0 / w x = y + 1 return x
def psnr(labels, predictions): """Computes average peak signal-to-noise ratio of `predictions`. Here PSNR is defined with respect to the maximum value of 1. All image tensors must be within the range [0, 1]. Args: labels: Tensor of shape [B, H, W, N]. predictions: Tensor of shape [B, H, W, N]. Returns: Tuple of (psnr, update_op) as returned by tf.metrics. """ predictions.shape.assert_is_compatible_with(labels.shape) with tf.control_dependencies([tf.assert_greater_equal(labels, 0.0), tf.assert_less_equal(labels, 1.0)]): psnrs = tf.image.psnr(labels, predictions, max_val=1.0) psnrs = tf.boolean_mask(psnrs, tf.logical_not(tf.is_inf(psnrs))) return tf.metrics.mean(psnrs, name='psnr')
def SanitizedAutoCorrelation(x, axis, *args, **kwargs): res = tfp.stats.auto_correlation(x, axis, *args, **kwargs) res = tf.where(tf.is_nan(res), tf.ones_like(res), res) res = tf.where(tf.is_inf(res), tf.ones_like(res), res) return res
def _apply_gradients(self, grads_and_vars, learning_rate): print('_apply_gradients is called!!!') """See base class.""" # Create slot variables var_list = [] for (grad, param) in grads_and_vars: if grad is None or param is None: continue var_list.append(param) with ops.init_scope(): self._create_slots(var_list) # Build training operations assignments = [] check_values = [] for (grad, param) in grads_and_vars: if grad is None or param is None: continue param_name = self._get_variable_name(param.name) #m, v = self.mv_lookup[param_name] m = self.get_slot(param, param_name + "/adam_m") v = self.get_slot(param, param_name + "/adam_v") # Standard Adam update. next_m = ( tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad)) next_v = ( tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2, tf.square(grad))) update = next_m / (tf.sqrt(next_v) + self.epsilon) check_update_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(update))), [param_name, 'NAN update', update]) check_update_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(update))), [param_name, 'INF update', update]) check_values.append(check_update_nan) check_values.append(check_update_inf) #update = 0 # Just adding the square of the weights to the loss function is *not* # the correct way of using L2 regularization/weight decay with Adam, # since that will interact with the m and v parameters in strange ways. # # Instead we want ot decay the weights in a manner that doesn't interact # with the m/v parameters. This is equivalent to adding the square # of the weights to the loss with plain (non-momentum) SGD. if self.weight_decay_rate > 0: if self._do_use_weight_decay(param_name): update += self.weight_decay_rate * param update_with_lr = learning_rate * update # update_with_lr = tf.Print(update_with_lr, ['\nupdate_with_lr', param_name, tf.shape(update_with_lr), update_with_lr], summarize=32) max_update_with_lr = tf.reduce_max(update_with_lr) min_update_with_lr = tf.reduce_min(update_with_lr) # update_with_lr = tf.Print(update_with_lr, ['\nupdate_with_lr', param_name, tf.shape(update_with_lr), min_update_with_lr, max_update_with_lr], summarize=32) check_update_with_lr_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(update_with_lr))), [param_name, 'NAN update_with_lr', update_with_lr]) check_update_with_lr_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(update_with_lr))), [param_name, 'INF update_with_lr', update_with_lr]) check_values.append(check_update_with_lr_nan) check_values.append(check_update_with_lr_inf) next_param = param - update_with_lr check_next_param_nan = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_nan(next_param))), [param_name, 'NAN next_param', next_param]) check_next_param_inf = tf.Assert(tf.logical_not(tf.reduce_all(tf.is_inf(next_param))), [param_name, 'INF next_param', next_param]) check_values.append(check_next_param_nan) check_values.append(check_next_param_inf) # Ensure that the debug operations are executed. for op in check_values: op.mark_used() ''' assignments.extend( [param.assign(next_param),] ) ''' assignments.extend( [ param.assign(next_param), m.assign(next_m), v.assign(next_v) ] ) assignments.extend(check_values) return assignments