Beispiel #1
0
def fucking_deep_gaze_logsumexp(input_tensor,axis=None, keepdims=False,
        name=None):
    """
    Adaptd from
    https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/math_ops.py.
    It is the same as the classic logsumexp instead you substact log(N) where N
    in the number of tensor over which compute the logsumexp (if you have 10
    readout nets, N=10). I don't know why they do this.
    """
    keepdims = False if keepdims is None else keepdims
    input_tensor = ops.convert_to_tensor(input_tensor)
    with ops.name_scope(name, "ReduceLogSumExp", [input_tensor]) as name:
        raw_max = tf.reduce_max(input_tensor, axis=axis, keep_dims=True)
        my_max = array_ops.stop_gradient( array_ops.where(
            gen_math_ops.is_finite(raw_max), raw_max,
            array_ops.zeros_like(raw_max)))
        result = gen_math_ops.log(
                #reduce_sum( # normal logsumexp
                tf.reduce_mean( # fuckimg modif from deep_gaze for the output only
                    gen_math_ops.exp(tf.subtract(input_tensor, my_max)),
                    axis, keep_dims=keepdims))
        if not keepdims:
            my_max = array_ops.reshape(my_max, array_ops.shape(result))
        result = gen_math_ops.add(result, my_max)
        return result
 def incr_loss_scale():
   new_loss_scale = control_flow_ops.cond(
       gen_math_ops.is_finite(self._loss_scale * self._incr_ratio),
       lambda: self._loss_scale * self._incr_ratio,
       lambda: self._loss_scale)
   update_op = state_ops.assign(self._loss_scale, new_loss_scale)
   # When loss_scale is updated, both good and bad steps are reset.
   return control_flow_ops.group(update_op, self._reset_stats())
 def incr_loss_scale():
   new_loss_scale = control_flow_ops.cond(
       gen_math_ops.is_finite(self._loss_scale * self._incr_ratio),
       lambda: self._loss_scale * self._incr_ratio,
       lambda: self._loss_scale)
   update_op = state_ops.assign(self._loss_scale, new_loss_scale)
   # When loss_scale is updated, both good and bad steps are reset.
   return control_flow_ops.group(update_op, self._reset_stats())
Beispiel #4
0
 def get_ix_slices_values_without_nan():
     """ Gets the indexed slice values without NaN """
     ix_slice_values_without_nans = tf.where(gen_math_ops.is_finite(input_tensor.values),
                                             input_tensor.values,
                                             gen_array_ops.zeros_like(input_tensor.values))
     print_op = logging_ops.print_v2('WARNING - Tensor %s has NaN or Inf values. %s' %
                                     (input_tensor.name, name or ''))
     with ops.control_dependencies([ix_slice_values_without_nans, print_op]):
         return array_ops.identity(ix_slice_values_without_nans)
Beispiel #5
0
def ensure_finite(input_tensor, name=None):
    """ Replaces NaN and Inf in the input tensor
        :param input_tensor: The input tensor to check
        :return: The tensor with NaN and Inf replaced with `0`
    """
    if not LOG_INF_NAN_TENSORS:
        return input_tensor

    def get_ix_slices_values_without_nan():
        """ Gets the indexed slice values without NaN """
        ix_slice_values_without_nans = tf.where(gen_math_ops.is_finite(input_tensor.values),
                                                input_tensor.values,
                                                gen_array_ops.zeros_like(input_tensor.values))
        print_op = logging_ops.print_v2('WARNING - Tensor %s has NaN or Inf values. %s' %
                                        (input_tensor.name, name or ''))
        with ops.control_dependencies([ix_slice_values_without_nans, print_op]):
            return array_ops.identity(ix_slice_values_without_nans)

    def get_tensor_without_nan():
        """ Gets the tensor without NaN """
        tensor_without_nans = tf.where(tf.is_finite(input_tensor), input_tensor, tf.zeros_like(input_tensor))
        print_op = logging_ops.print_v2('WARNING - Tensor %s has NaN or Inf values. %s' %
                                        (input_tensor.name, name or ''))
        with ops.control_dependencies([tensor_without_nans, print_op]):
            return array_ops.identity(tensor_without_nans)

    # Tensor
    if isinstance(input_tensor, ops.Tensor):
        return control_flow_ops.cond(math_ops.reduce_all(gen_math_ops.is_finite(input_tensor)),
                                     true_fn=lambda: input_tensor,
                                     false_fn=get_tensor_without_nan)

    # Indexed Slices
    if isinstance(input_tensor, ops.IndexedSlices):
        values = control_flow_ops.cond(math_ops.reduce_all(gen_math_ops.is_finite(input_tensor.values)),
                                       true_fn=lambda: input_tensor.values,
                                       false_fn=get_ix_slices_values_without_nan)
        return ops.IndexedSlices(values=values,
                                 indices=input_tensor.indices,
                                 dense_shape=input_tensor.dense_shape)

    # Unknown type
    return input_tensor
Beispiel #6
0
def softmax_2d(input_tensor, axis=None, keepdims=False, name=None):
    """
    Adaptd from
    https://gist.github.com/raingo/a5808fe356b8da031837
    """
    keepdims = False if keepdims is None else keepdims
    input_tensor = ops.convert_to_tensor(input_tensor)
    with ops.name_scope(name, "softmax_2d", [input_tensor]) as name:
        raw_max = tf.reduce_max(input_tensor, axis=axis, keep_dims=True)
        my_max = array_ops.stop_gradient( array_ops.where(
            gen_math_ops.is_finite(raw_max), raw_max,
            array_ops.zeros_like(raw_max)))
        target_exp = gen_math_ops.exp(tf.subtract(input_tensor, my_max))
        normalize = tf.reduce_sum(target_exp, axis, keep_dims=True)
        softmax = target_exp / normalize
        return softmax
Beispiel #7
0
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    """Apply gradients. See base class @{tf.train.Optimizer}."""
    grads = [g for (g, _) in grads_and_vars]

    is_finite_grad = []
    for g in grads:
      is_finite_grad.append(math_ops.reduce_all(gen_math_ops.is_finite(g)))
    is_overall_finite = math_ops.reduce_all(is_finite_grad)

    # Only update gradients when all grads are finite.
    def true_apply_gradients_fn():
      return self._opt.apply_gradients(grads_and_vars, global_step, name)

    update_vars = control_flow_ops.cond(
        is_overall_finite, true_apply_gradients_fn, gen_control_flow_ops.no_op)
    # Potentially adjust gradient scale in case of finite gradients.
    return control_flow_ops.group(
        update_vars,
        self._loss_scale_manager.update_loss_scale(is_overall_finite))
Beispiel #8
0
    def _conjugate_gradient(self,
                            loss,
                            z,
                            grads_and_vars,
                            cg_iter,
                            fix_first_step=False,
                            init_deltas=None):
        minus_gradient = [g for g, v in grads_and_vars]
        variables = [v for g, v in grads_and_vars]

        H_vars = [array_ops.zeros_like(g) for g in minus_gradient]
        if init_deltas is not None:
            H_vars = self._Hv(loss, z, variables, init_deltas, self._damping)

        curr_dirs = [g - b for g, b in list(zip(minus_gradient, H_vars))]
        curr_residuals = [g - b for g, b in list(zip(minus_gradient, H_vars))]
        deltas = [array_ops.zeros_like(g) for g in curr_dirs]

        deltas_history = []
        residuals_history = []
        first_alpha = 1
        for i in range(cg_iter):
            Hvs = self._Hv(loss, z, variables, curr_dirs, self._damping)

            if len(Hvs) != len(variables):
                raise ValueError("xs and Hvs must have the same length.")

            curr_residuals_flatten = [
                gen_array_ops.reshape(v, [-1]) for v in curr_residuals
            ]
            curr_dirs_flatten = [
                gen_array_ops.reshape(v, [-1]) for v in curr_dirs
            ]
            Hvs_flatten = [gen_array_ops.reshape(v, [-1]) for v in Hvs]

            curr_residuals_concat = array_ops.concat(curr_residuals_flatten, 0)
            curr_dirs_concat = array_ops.concat(curr_dirs_flatten, 0)
            Hvs_concat = array_ops.concat(Hvs_flatten, 0)
            alpha = _dot(curr_residuals_concat, curr_residuals_concat) / _dot(
                curr_dirs_concat, Hvs_concat)
            alpha = control_flow_ops.cond(
                gen_math_ops.is_finite(alpha),
                lambda: gen_math_ops.maximum(alpha, 1e-6),
                lambda: ops.convert_to_tensor(1.0))
            if i == 0 and fix_first_step:
                first_alpha = alpha
            curr_deltas = [d * (alpha / first_alpha) for d in curr_dirs]
            deltas = [d1 + d0 for d0, d1 in list(zip(curr_deltas, deltas))]
            deltas_history.append(curr_deltas)
            residuals_history.append(curr_residuals)
            new_residuals = [
                r - alpha * v for r, v in list(zip(curr_residuals, Hvs))
            ]
            new_residuals_flatten = [
                gen_array_ops.reshape(v, [-1]) for v in new_residuals
            ]
            new_residuals_concat = array_ops.concat(new_residuals_flatten, 0)

            beta = _dot(new_residuals_concat, new_residuals_concat) / _dot(
                curr_residuals_concat, curr_residuals_concat)
            beta = control_flow_ops.cond(gen_math_ops.is_finite(beta),
                                         lambda: beta,
                                         lambda: ops.convert_to_tensor(0.0))
            #beta = gen_math_ops.maximum(beta, 1e-4)
            new_dirs = [
                r + beta * d for r, d in list(zip(new_residuals, curr_dirs))
            ]
            curr_dirs = new_dirs
            curr_residuals = new_residuals

        return list(zip(deltas, variables)), deltas_history, residuals_history
Beispiel #9
0
def huber_loss(labels,
               predictions,
               weights=1.0,
               delta=1.0,
               scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
    """Adds a Huber Loss term to the training procedure.

  For each value x in `error=labels-predictions`, the following is calculated:

  ```
    0.5 * x^2                  if |x| <= d
    0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```

  where d is `delta`.

  See: https://en.wikipedia.org/wiki/Huber_loss

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  `[batch_size]`, then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    delta: `float`, the point where the huber loss function
      changes from a quadratic to linear.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or
     `predictions` is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
    if labels is None:
        raise ValueError("labels must not be None.")
    if predictions is None:
        raise ValueError("predictions must not be None.")
    with ops.name_scope(scope, "huber_loss",
                        (predictions, labels, weights)) as scope:
        if not gen_math_ops.is_finite(delta):
            return mean_squared_error(labels, predictions, weights, scope,
                                      loss_collection, reduction)
        predictions = math_ops.cast(predictions, dtype=dtypes.float32)
        labels = math_ops.cast(labels, dtype=dtypes.float32)
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        error = math_ops.subtract(predictions, labels)
        abs_error = math_ops.abs(error)
        quadratic = math_ops.minimum(abs_error, delta)
        # The following expression is the same in value as
        # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
        # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
        # This is necessary to avoid doubling the gradient, since there is already a
        # nonzero contribution to the gradient from the quadratic term.
        linear = math_ops.subtract(abs_error, quadratic)
        losses = math_ops.add(
            math_ops.multiply(
                ops.convert_to_tensor(0.5, dtype=quadratic.dtype),
                math_ops.multiply(quadratic, quadratic)),
            math_ops.multiply(delta, linear))
        return compute_weighted_loss(losses,
                                     weights,
                                     scope,
                                     loss_collection,
                                     reduction=reduction)