Exemple #1
0
    def result(self):
        """Compute the mean intersection-over-union via the confusion matrix."""
        sum_over_row = math_ops.cast(math_ops.reduce_sum(self.total_cm,
                                                         axis=0),
                                     dtype=self._dtype)
        sum_over_col = math_ops.cast(math_ops.reduce_sum(self.total_cm,
                                                         axis=1),
                                     dtype=self._dtype)
        true_positives = math_ops.cast(array_ops.diag_part(self.total_cm),
                                       dtype=self._dtype)

        # sum_over_row + sum_over_col =
        #     2 * true_positives + false_positives + false_negatives.
        denominator = sum_over_row + sum_over_col

        numerator = true_positives + true_positives
        # The mean is only computed over classes that appear in the
        # label or prediction tensor. If the denominator is 0, we need to
        # ignore the class.
        num_valid_entries = math_ops.reduce_sum(
            math_ops.cast(math_ops.not_equal(denominator, 0),
                          dtype=self._dtype))

        dice = math_ops.div_no_nan(true_positives, denominator)

        return math_ops.div_no_nan(math_ops.reduce_sum(dice, name='mean_dice'),
                                   num_valid_entries)
    def compute_loss(labels, predictions, weights, loss_collection):
      predictions = math_ops.cast(predictions, dtype=dtypes.float32)
      predictions.get_shape().assert_is_compatible_with(labels.get_shape())

      diffs = math_ops.subtract(predictions, labels)

      axis = math_ops.range(1, array_ops.rank(diffs))

      sum_squares_diff_per_batch = math_ops.reduce_sum(
          math_ops.square(diffs), axis=axis, keepdims=True)
      num_present_per_batch = _num_present(diffs, weights, per_batch=True)

      term1 = 2.0 * math_ops.div_no_nan(
          sum_squares_diff_per_batch,
          math_ops.maximum(num_present_per_batch - 1, 0),
          name="value")

      sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True)
      term2 = 2.0 * math_ops.div_no_nan(
          math_ops.square(sum_diff),
          math_ops.maximum(
              math_ops.multiply(num_present_per_batch,
                                num_present_per_batch - 1), 0),
          name="value")

      weighted_losses = math_ops.multiply(term1 - term2, weights)
      loss = math_ops.reduce_sum(weighted_losses)

      mean_loss = array_ops.where(
          math_ops.reduce_sum(num_present_per_batch) > 0,
          loss,
          array_ops.zeros_like(loss),
          name="value")
      util.add_loss(mean_loss, loss_collection)
      return mean_loss
Exemple #3
0
def _ragged_embedding_lookup_with_reduce(table: tf_variables.Variable,
                                         ragged: ragged_tensor.RaggedTensor,
                                         weights: ragged_tensor.RaggedTensor,
                                         combiner: Text) -> core.Tensor:
    """Compute a ragged lookup followed by a reduce on axis 1.

  Args:
    table: The embedding table.
    ragged: A RaggedTensor of ids to look up.
    weights: A RaggedTensor of weights (or None).
    combiner: One of "mean", "sum", "sqrtn".

  Returns:
    A Tensor.
  """
    if weights is None:
        weights = array_ops.ones_like(ragged, dtype=table.dtype)
    weights = array_ops.expand_dims(weights, axis=2)
    ragged_result = embedding_ops.embedding_lookup_ragged(table, ragged)
    ragged_result = math_ops.reduce_sum(ragged_result * weights, axis=1)
    if combiner == "mean":
        ragged_result = math_ops.div_no_nan(
            ragged_result, math_ops.reduce_sum(weights, axis=1))
    elif combiner == "sqrtn":
        ragged_result = math_ops.div_no_nan(
            ragged_result,
            math_ops.sqrt(math_ops.reduce_sum(weights * weights, axis=1)))
    return ragged_result
    def _get_reduction(self, loss):
        with self._name_scope:
            if ndim(loss) <= 1 or self.reduction == 'none':
                return loss
            num_present = math_ops.cast(array_ops.size(loss,
                                                       name='num_elements'),
                                        dtype=loss.dtype)
            batch_size = math_ops.cast(tf.constant(
                array_ops.shape(loss, name='shape')[0]),
                                       dtype=loss.dtype)

            if ndim(loss) >= 2 and self.reduction == 'batch_sum':
                total_loss = math_ops.div_no_nan(math_ops.reduce_sum(loss),
                                                 batch_size,
                                                 name='value')
                return loss.mean(1).sum()
            elif ndim(loss) >= 2 and self.reduction == 'batch_mean':
                total_loss = math_ops.reduce_sum(loss)
                return math_ops.div_no_nan(total_loss,
                                           math_ops.div_no_nan(
                                               num_present, batch_size),
                                           name='value')
            elif self.reduction in ('mean', 'batch_mean'):
                total_loss = math_ops.reduce_sum(loss)
                return math_ops.div_no_nan(total_loss,
                                           num_present,
                                           name='value')
            elif self.reduction == ('sum', 'batch_sum'):
                return math_ops.reduce_sum(loss)
            else:
                total_loss = math_ops.reduce_sum(loss)
                return math_ops.div_no_nan(total_loss,
                                           num_present,
                                           name='value')
 def _get_reduction(self, loss):
     with self._name_scope:
         num_present = math_ops.cast(array_ops.size(loss,
                                                    name='num_elements'),
                                     dtype=loss.dtype)
         if ndim(loss) == 0 or self.reduction == 'none':
             return loss
         if ndim(loss) >= 2 and self.reduction == 'batch_sum':
             loss = reshape(loss, (int_shape(loss)[0], -1))
             return loss.mean(1).sum()
         elif ndim(loss) >= 2 and self.reduction == 'batch_mean':
             loss = reshape(loss, (int_shape(loss)[0], -1))
             return loss.mean(1).mean()
         elif self.reduction in ['mean', 'batch_mean']:
             total_loss = math_ops.reduce_sum(loss)
             return math_ops.div_no_nan(total_loss,
                                        num_present,
                                        name='value')
         elif self.reduction in ['sum', 'batch_sum']:
             return math_ops.reduce_sum(loss)
         else:
             total_loss = math_ops.reduce_sum(loss)
             return math_ops.div_no_nan(total_loss,
                                        num_present,
                                        name='value')
    def result(self):
        precision = math_ops.div_no_nan(self.tp, self.tp + self.fp)
        recall = math_ops.div_no_nan(self.tp, self.tp + self.fn)
        numerator = math_ops.multiply(precision, recall)
        denominator = math_ops.add(precision, recall)
        frac = math_ops.div_no_nan(numerator, denominator)
        result = math_ops.multiply(tf.constant(2.), frac)

        return result[0] if len(self.thresholds) == 1 else result
Exemple #7
0
    def result(self):

        recall = math_ops.div_no_nan(
            self.true_positives, self.true_positives + self.false_negatives)
        precision = math_ops.div_no_nan(
            self.true_positives, self.true_positives + self.false_positives)
        f1 = 2 / (1 / recall[0] + 1 / precision[0])

        return f1
Exemple #8
0
  def result(self):
    """Add option to remove summary."""
    if (self.curve == metrics_utils.AUCCurve.PR and
        self.summation_method == metrics_utils.AUCSummationMethod.INTERPOLATION):
      # This use case is different and is handled separately.
      return self.interpolate_pr_auc()

    # Set `x` and `y` values for the curves based on `curve` config.
    recall = math_ops.div_no_nan(self.true_positives,
                                 self.true_positives + self.false_negatives)
    if self.curve == metrics_utils.AUCCurve.ROC:
      fp_rate = math_ops.div_no_nan(self.false_positives,
                                    self.false_positives + self.true_negatives)
      x = fp_rate
      y = recall
    else:  # curve == 'PR'.
      precision = math_ops.div_no_nan(
          self.true_positives, self.true_positives + self.false_positives)
      x = recall
      y = precision

    # Find the rectangle heights based on `summation_method`.
    if self.summation_method == metrics_utils.AUCSummationMethod.INTERPOLATION:
      # Note: the case ('PR', 'interpolation') has been handled above.
      heights = (y[:self.num_thresholds - 1] + y[1:]) / 2.
    elif self.summation_method == metrics_utils.AUCSummationMethod.MINORING:
      heights = math_ops.minimum(y[:self.num_thresholds - 1], y[1:])
    else:  # self.summation_method = metrics_utils.AUCSummationMethod.MAJORING:
      heights = math_ops.maximum(y[:self.num_thresholds - 1], y[1:])

    # Sum up the areas of all the rectangles.
    if self.multi_label:
      riemann_terms = math_ops.multiply(x[:self.num_thresholds - 1] - x[1:],
                                        heights)
      by_label_auc = math_ops.reduce_sum(
          riemann_terms, name=self.name + '_by_label', axis=0)

      if self._summarize:
        if self.label_weights is None:
          # Unweighted average of the label AUCs.
          return math_ops.reduce_mean(by_label_auc, name=self.name)
        else:
          # Weighted average of the label AUCs.
          return math_ops.div_no_nan(
              math_ops.reduce_sum(
                  math_ops.multiply(by_label_auc, self.label_weights)),
              math_ops.reduce_sum(self.label_weights),
              name=self.name)
      else:
        return by_label_auc
    else:
      if self._summarize:
        return math_ops.reduce_sum(
            math_ops.multiply(x[:self.num_thresholds-1] - x[1:], heights),
            name=self.name)
      else:
        return math_ops.multiply(x[:self.num_thresholds-1] - x[1:], heights)
Exemple #9
0
 def testNonFiniteInNumerator(self, dtype):
   nums = constant_op.constant([np.nan, np.inf, np.NINF], dtype=dtype)
   zeros = constant_op.constant([0, 0, 0], dtype=dtype)
   ones = constant_op.constant([1, 1, 1], dtype=dtype)
   with test_util.use_gpu():
     tf_result_zeros = math_ops.div_no_nan(nums, zeros)
     self.assertAllEqual([0, 0, 0], tf_result_zeros)
     tf_result_ones = math_ops.div_no_nan(nums, ones)
     self.assertAllEqual(nums / ones, tf_result_ones)
Exemple #10
0
 def result(self):
     '''
     Compute the value for the F1 score. Calculates precision and recall, then F1 score.
     F1 = 2 * precision * recall / (precision + recall)
     :return: F1 score
     '''
     precision = math_ops.div_no_nan(self.true_positives, self.true_positives + self.false_positives)
     recall = math_ops.div_no_nan(self.true_positives, self.true_positives + self.false_negatives)
     result = math_ops.div_no_nan(2 * precision * recall, precision + recall)
     return result[0] if len(self.thresholds) == 1 else result
 def f_dummy(x):
     # This dummy function is a implementation of RGB to HSV using
     # primitive TF functions for one particular case when R>G>B.
     r = x[..., 0]
     g = x[..., 1]
     b = x[..., 2]
     # Since MAX = r and MIN = b, we get the following h,s,v values.
     v = r
     s = 1 - math_ops.div_no_nan(b, r)
     h = 60 * math_ops.div_no_nan(g - b, r - b)
     h = h / 360
     return array_ops.stack([h, s, v], axis=-1)
Exemple #12
0
    def interpolate_pr_auc(self):
        """Add option to remove summary."""
        dtp = self.true_positives[:self.num_thresholds -
                                  1] - self.true_positives[1:]
        p = self.true_positives + self.false_positives
        dp = p[:self.num_thresholds - 1] - p[1:]
        prec_slope = math_ops.div_no_nan(dtp,
                                         math_ops.maximum(dp, 0),
                                         name='prec_slope')
        intercept = self.true_positives[1:] - math_ops.multiply(
            prec_slope, p[1:])

        safe_p_ratio = array_ops.where(
            math_ops.logical_and(p[:self.num_thresholds - 1] > 0, p[1:] > 0),
            math_ops.div_no_nan(p[:self.num_thresholds - 1],
                                math_ops.maximum(p[1:], 0),
                                name='recall_relative_ratio'),
            array_ops.ones_like(p[1:]))

        pr_auc_increment = math_ops.div_no_nan(
            prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)),
            math_ops.maximum(
                self.true_positives[1:] + self.false_negatives[1:], 0),
            name='pr_auc_increment')

        if self.multi_label:
            by_label_auc = math_ops.reduce_sum(pr_auc_increment,
                                               name=self.name + '_by_label',
                                               axis=0)

            if self._summarize:
                if self.label_weights is None:
                    # Evenly weighted average of the label AUCs.
                    return math_ops.reduce_mean(by_label_auc, name=self.name)
                else:
                    # Weighted average of the label AUCs.
                    return math_ops.div_no_nan(math_ops.reduce_sum(
                        math_ops.multiply(by_label_auc, self.label_weights)),
                                               math_ops.reduce_sum(
                                                   self.label_weights),
                                               name=self.name)
            else:
                return by_label_auc
        else:
            if self._summarize:
                return math_ops.reduce_sum(pr_auc_increment,
                                           name='interpolate_pr_auc')
            else:
                return pr_auc_increment
Exemple #13
0
def _DivNoNanGrad(op, grad):
    """DivNoNan op gradient."""
    x = op.inputs[0]
    y = op.inputs[1]
    sx = array_ops.shape(x)
    sy = array_ops.shape(y)
    rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
    x = math_ops.conj(x)
    y = math_ops.conj(y)
    return (array_ops.reshape(
        math_ops.reduce_sum(math_ops.div_no_nan(grad, y), rx), sx),
            array_ops.reshape(
                math_ops.reduce_sum(
                    grad * math_ops.div_no_nan(math_ops.div_no_nan(-x, y), y),
                    ry), sy))
Exemple #14
0
def _DivNoNanGrad(op, grad):
  """DivNoNan op gradient."""
  x = op.inputs[0]
  y = op.inputs[1]
  sx = array_ops.shape(x)
  sy = array_ops.shape(y)
  rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
  x = math_ops.conj(x)
  y = math_ops.conj(y)
  return (array_ops.reshape(
      math_ops.reduce_sum(math_ops.div_no_nan(grad, y), rx), sx),
          array_ops.reshape(
              math_ops.reduce_sum(
                  grad * math_ops.div_no_nan(math_ops.div_no_nan(-x, y), y),
                  ry), sy))
Exemple #15
0
 def result(self):
     # if tf.equal(tf.cast(0.0, dtype=self.total.dtype), self.total):
     #     return tf.cast(0.0, dtype=self.dtype)
     # if tf.equal(tf.cast(0.0, dtype=self.count.dtype), self.count):
     #     return tf.cast(0.0, dtype=self.dtype)
     # return tf.cast(self.total / self.count, self.dtype)
     return math_ops.div_no_nan(self.total, self.count)
def span_precision(pred_begin, pred_end, gold_begin, gold_end):
    """Calculates the precision metric given prediction and labelled spans.

  Computes an Estimator-style metric for precision given begin and end spans of
  predictions and golden labels.

  Args:
    pred_begin: A `RaggedTensor` w/ `ragged_rank`=1 of type int64. This contains
      the starting positions of the predicted spans.
    pred_end: A `RaggedTensor` w/ `ragged_rank`=1 of type int64. This contains
      the ending positions of the predicted spans.
    gold_begin: A `RaggedTensor` w/ `ragged_rank`=1 of type int64. This contains
      the starting positions of the golden labelled spans.
    gold_end: A `RaggedTensor` w/ `ragged_rank`=1 of type int64. This contains
      the ending positions of the golden labelled spans.

  Returns:
    A tuple of (precision_value, precision_update_op) where `f1_value` returns
    the precision metric value and `precision_update_op` updates the internal
    variables.
  """
    with ops.name_scope("Precision"):
        counts, update_op = _update_confusion_matrix(pred_begin, pred_end,
                                                     gold_begin, gold_end)
        tp, fp, _ = counts
        value = math_ops.div_no_nan(math_ops.cast(tp, dtypes.float32),
                                    math_ops.cast(tp + fp, dtypes.float32))
        return value, update_op
Exemple #17
0
    def call(self, values, denominator):
        """Computes the rate since the last call.

    Args:
      values: Tensor with the per-example value.
      denominator: Measure to take the rate with respect to.

    Returns:
      The rate or 0 if denominator is unchanged since last call.
    """
        if denominator.dtype != dtypes.float64:
            denominator = math_ops.cast(denominator, dtypes.float64)
        if values.dtype != dtypes.float64:
            values = math_ops.cast(values, dtypes.float64)

        state_ops.assign(self.numer, math_ops.subtract(values,
                                                       self.prev_values))
        state_ops.assign(self.denom,
                         math_ops.subtract(denominator, self.prev_denominator))
        state_ops.assign(self.prev_values, values)
        state_ops.assign(self.prev_denominator, denominator)

        return math_ops.div_no_nan(self.numer,
                                   math_ops.maximum(self.denom, 0),
                                   name="safe_rate")
Exemple #18
0
def jaccard_index(y_true, y_pred, data_format=None):
    '''Jaccard index, or Intersection over Union (IoU). (metric)
    The IoU is thought to be a better measurement to estimate the accuracy for segmentation.
    If both y_true and y_pred are binary, the intersection I(y_true, y_pred) shows the part
    where the prediction is correct, while the union U(y_true, y_pred) contains both correct
    prediction and wrong prediction. I/U shows the proportion of correct prediction.
    Compared to other error functions (like MSE), it is more concentrated on the part where
    y_true=1 or y_pred=1.
    This function is implemented by:
        jacc = logical_and(y_true, y_pred) / logical_or(y_true, y_pred)
    Arguments:
        data_format: 'channels_first' or 'channels_last'. The default setting is generally
                     'channels_last' like other tf.keras APIs.
    Input:
        y_true: label, tensor in any shape, should have at least 3 axes.
        y_pred: prediction, tensor in any shape, should have at least 3 axes.
    Output:
        scalar, the mean Jaccard index between y_true and y_pred over all channels.
    '''
    get_reduced_axes = get_channels(y_true, data_format)
    bin_y_true = gen_math_ops.greater(y_true, 0.5)
    bin_y_pred = gen_math_ops.greater(y_pred, 0.5)
    valNumer = gen_math_ops.logical_and(bin_y_pred, bin_y_true)
    valDomin = gen_math_ops.logical_or(bin_y_pred, bin_y_true)
    valNumer = math_ops.reduce_sum(math_ops.cast(valNumer, dtype=y_pred.dtype), axis=get_reduced_axes)
    valDomin = math_ops.reduce_sum(math_ops.cast(valDomin, dtype=y_pred.dtype), axis=get_reduced_axes)
    return math_ops.reduce_mean(math_ops.div_no_nan(valNumer, valDomin))
Exemple #19
0
def linear_jaccard_loss(y_true, y_pred, data_format=None):
    '''Simple linear approximation for Jaccard index, 
           or Intersection over Union (IoU). (loss)
    This function is a simple and linear approximation for IoU. The main idea is:
        1. logical_and(y_true * y_pred) could be approximated by y_true * y_pred;
        2. logical_or(y_true * y_pred) could be approximated by 
           y_true + y_pred - y_true * y_pred.
    Such an approximation could ensure that when both y_true and y_pred are
    binary, this approximation would returns the exact same value compared to
    the original metric, IoU.
    It has been proved that when both x, y in [0, 1], there is
        x * y < x + y - x * y.
    To learn more about IoU, please check mdnt.metrics.jaccard_index.
    This function is implemented by:
        appx_jacc = 1 - [ sum(y_true * y_pred) ] / [ sum(y_true + y_pred - y_true * y_pred) ]
    We use unsafe division in the above equation. When x / y = 0, the unsafe division would
    returns 0.
    NOTE THAT THIS IMPLEMENTATION IS THE COMPLEMENTARY OF JACCARD INDEX.
    Arguments:
        data_format: 'channels_first' or 'channels_last'. The default setting is generally
                     'channels_last' like other tf.keras APIs.
    Input:
        y_true: label, tensor in any shape, should have at least 3 axes.
        y_pred: prediction, tensor in any shape, should have at least 3 axes.
    Output:
        scalar, the approximated and complementary mean Jaccard index between y_true and
        y_pred over all channels.
    '''
    get_reduced_axes = get_channels(y_true, data_format)
    get_mul = y_true * y_pred
    valNumer = math_ops.reduce_sum(get_mul, axis=get_reduced_axes)
    valDomin = math_ops.reduce_sum(y_true + y_pred - get_mul,
                                   axis=get_reduced_axes)
    return 1 - math_ops.reduce_mean(math_ops.div_no_nan(valNumer, valDomin))
def span_f1(pred_begin, pred_end, gold_begin, gold_end):
    """Calculates the F1 metric given prediction and labelled spans.

  Computes an Estimator-style metric for F1 given begin and end spans of
  predictions and golden labels.

  Args:
    pred_begin: A `RaggedTensor` w/ `ragged_rank`=1 of type int64. This contains
      the starting positions of the predicted spans.
    pred_end: A `RaggedTensor` w/ `ragged_rank`=1 of type int64. This contains
      the ending positions of the predicted spans.
    gold_begin: A `RaggedTensor` w/ `ragged_rank`=1 of type int64. This contains
      the starting positions of the golden labelled spans.
    gold_end: A `RaggedTensor` w/ `ragged_rank`=1 of type int64. This contains
      the ending positions of the golden labelled spans.

  Returns:
    A tuple of (f1_value, f1_update_op) where `f1_value` returns the F1 metric
    value and `f1_update_op` updates the internal variables.
  """
    with ops.name_scope("F1"):
        precision_value, prec_update_op = span_precision(
            pred_begin, pred_end, gold_begin, gold_end)
        recall_value, recall_update_op = span_recall(pred_begin, pred_end,
                                                     gold_begin, gold_end)
        value = 2 * math_ops.div_no_nan(precision_value * recall_value,
                                        precision_value + recall_value)
        update_op = control_flow_ops.group(prec_update_op, recall_update_op)
        return value, update_op
Exemple #21
0
def _safe_div(numerator, denominator, name="value"):
  """Computes a safe divide which returns 0 if the denominator is zero.

  Note that the function contains an additional conditional check that is
  necessary for avoiding situations where the loss is zero causing NaNs to
  creep into the gradient computation.

  Args:
    numerator: An arbitrary `Tensor`.
    denominator: A `Tensor` whose shape matches `numerator` and whose values are
      assumed to be non-negative.
    name: An optional name for the returned op.

  Returns:
    The element-wise value of the numerator divided by the denominator.
  """
  if compat.forward_compatible(2018, 11, 1):
    return math_ops.div_no_nan(numerator, denominator, name=name)
  return array_ops.where(
      math_ops.greater(denominator, 0),
      math_ops.div(numerator,
                   array_ops.where(
                       math_ops.equal(denominator, 0),
                       array_ops.ones_like(denominator), denominator)),
      array_ops.zeros_like(numerator),
      name=name)
Exemple #22
0
def _safe_div(numerator, denominator, name="value"):
    """Computes a safe divide which returns 0 if the denominator is zero.

  Note that the function contains an additional conditional check that is
  necessary for avoiding situations where the loss is zero causing NaNs to
  creep into the gradient computation.

  Args:
    numerator: An arbitrary `Tensor`.
    denominator: A `Tensor` whose shape matches `numerator` and whose values are
      assumed to be non-negative.
    name: An optional name for the returned op.

  Returns:
    The element-wise value of the numerator divided by the denominator.
  """
    if compat.forward_compatible(2018, 11, 1):
        return math_ops.div_no_nan(numerator, denominator, name=name)
    return array_ops.where(math_ops.greater(denominator, 0),
                           math_ops.div(
                               numerator,
                               array_ops.where(
                                   math_ops.equal(denominator, 0),
                                   array_ops.ones_like(denominator),
                                   denominator)),
                           array_ops.zeros_like(numerator),
                           name=name)
 def testGradientWithDenominatorIsZero(self):
     x = constant_op.constant(np.arange(-3, 3), dtype=dtypes.float32)
     y = array_ops.zeros_like(x, dtype=dtypes.float32)
     outputs = math_ops.div_no_nan(x, y)
     with self.cached_session():
         dx, dy = gradients.gradients(outputs, [x, y])
         self.assertAllClose(dx, np.zeros(x.shape.as_list()))
         self.assertAllClose(dy, np.zeros(y.shape.as_list()))
Exemple #24
0
  def testSmall(self, dtype):
    # Choose values whose squared magnitude underflows to zero/subnormal.
    zero = constant_op.constant([0, 0, 0, 0], dtype=dtype)
    divs = constant_op.constant([1e-25, -1e-20, 1e-165, -1e-160], dtype=dtype)
    tf_result = math_ops.div_no_nan(zero, divs)

    # Results should always be exactly zero.
    self.assertAllEqual(tf_result, zero)
Exemple #25
0
def _CustomReciprocal(x):
    """Wrapper function around `math_ops.div_no_nan()` to perform a "safe" reciprocal incase the input is zero. Avoids divide by zero and NaNs.

  Input:
    x -> input tensor to be reciprocat-ed.
  Returns:
    x_reciprocal -> reciprocal of x without NaNs.
  """
    return math_ops.div_no_nan(1.0, x)
    def weighted(y_true, y_pred, weights, mask=None):
        """Wrapper function.

    Arguments:
        y_true: `y_true` argument of `fn`.
        y_pred: `y_pred` argument of `fn`.
        weights: Weights tensor.
        mask: Mask tensor.

    Returns:
        Scalar tensor.
    """
        # score_array has ndim >= 2
        score_array = fn(y_true, y_pred)
        if mask is not None:
            mask = math_ops.cast(mask, y_pred.dtype)
            # Update weights with mask.
            if weights is None:
                weights = mask
            else:
                # Update shape of weights if possible before adding mask.
                # Update dimensions of weights to match with mask if possible.
                mask, _, weights = metrics_module.squeeze_or_expand_dimensions(
                    mask, None, weights)
                try:
                    # Broadcast weights if possible.
                    weights = weights_broadcast_ops.broadcast_weights(
                        weights, mask)
                    weights *= mask
                except ValueError:
                    score_array *= mask
                    score_array /= K.mean(mask)
                    # TODO(psv): Handle case when mask and weight shapes are not
                    # compatible.

        # Apply sample weighting.
        if weights is not None:

            # Update dimensions of weights to match with values if possible.
            score_array, _, weights = metrics_module.squeeze_or_expand_dimensions(
                score_array, None, weights)
            try:
                # Broadcast weights if possible.
                weights = weights_broadcast_ops.broadcast_weights(
                    weights, score_array)
            except ValueError:
                # Reduce values to same ndim as weight array.
                ndim = K.ndim(score_array)
                weight_ndim = K.ndim(weights)
                score_array = K.mean(score_array,
                                     axis=list(range(weight_ndim, ndim)))

            score_array = math_ops.multiply(score_array, weights)
            score_array = math_ops.reduce_sum(score_array)
            weights = math_ops.reduce_sum(weights)
            score_array = math_ops.div_no_nan(score_array, weights)
        return K.mean(score_array)
 def testBasicGradient(self):
     inputs = constant_op.constant(np.arange(-3, 3), dtype=dtypes.float32)
     outputs = math_ops.div_no_nan(inputs, 1 + math_ops.abs(inputs))
     with self.cached_session():
         error = gradient_checker.compute_gradient_error(
             inputs,
             inputs.get_shape().as_list(), outputs,
             outputs.get_shape().as_list())
         self.assertLess(error, 1e-4)
    def testBasic(self, dtype):
        nums = np.arange(-10, 10, .25, dtype=dtype).reshape(80, 1)
        divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24)

        np_result = np.true_divide(nums, divs)
        np_result[:, divs[0] == 0] = 0

        with test_util.use_gpu():
            tf_result = math_ops.div_no_nan(nums, divs)
            self.assertAllClose(tf_result, np_result)
 def testBasicGradient(self):
   inputs = constant_op.constant(np.arange(-3, 3),
                                 dtype=dtypes.float32)
   outputs = math_ops.div_no_nan(inputs, 1 + math_ops.abs(inputs))
   with self.cached_session():
     error = gradient_checker.compute_gradient_error(
         inputs,
         inputs.get_shape().as_list(), outputs,
         outputs.get_shape().as_list())
     self.assertLess(error, 1e-4)
 def testGradientWithDenominatorIsZero(self):
   x = constant_op.constant(np.arange(-3, 3),
                            dtype=dtypes.float32)
   y = array_ops.zeros_like(x,
                            dtype=dtypes.float32)
   outputs = math_ops.div_no_nan(x, y)
   with self.cached_session():
     dx, dy = gradients.gradients(outputs, [x, y])
     self.assertAllClose(dx.eval(), np.zeros(x.shape.as_list()))
     self.assertAllClose(dy.eval(), np.zeros(y.shape.as_list()))
Exemple #31
0
    def update_state(self, y_true, y_pred, sample_weight=None):

        #         y_true = tf.convert_to_tensor(sc.inverse_transform(y_true))
        #         y_pred = tf.convert_to_tensor(sc.inverse_transform(y_pred))

        match_count = tf.reduce_sum(
            tf.cast(tf.less_equal(tf.abs(y_true - y_pred), 0.02),
                    dtype=tf.float32))
        total_count = y_true.shape[0]
        self.matches_rate = math_ops.div_no_nan(match_count, total_count)
    def testBasic(self):
        for dtype in [np.float32, np.float64]:
            nums = np.arange(-10, 10, .25, dtype=dtype).reshape(80, 1)
            divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24)

            np_result = np.true_divide(nums, divs)
            np_result[:, divs[0] == 0] = 0

            with self.cached_session(use_gpu=True):
                tf_result = math_ops.div_no_nan(nums, divs).eval()
                self.assertAllEqual(tf_result, np_result)
Exemple #33
0
def _safe_mean(losses, num_present):
    """Computes a safe mean of the losses.
    Args:
      losses: `Tensor` whose elements contain individual loss measurements.
      num_present: The number of measurable elements in `losses`.
    Returns:
      A scalar representing the mean of `losses`. If `num_present` is zero,
        then zero is returned.
    """
    total_loss = math_ops.reduce_sum(losses)
    return math_ops.div_no_nan(total_loss, num_present, name="value")
  def testBasic(self):
    for dtype in [np.float32, np.float64]:
      nums = np.arange(-10, 10, .25, dtype=dtype).reshape(80, 1)
      divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24)

      np_result = np.true_divide(nums, divs)
      np_result[:, divs[0] == 0] = 0

      with self.cached_session(use_gpu=True):
        tf_result = math_ops.div_no_nan(nums, divs).eval()
        self.assertAllEqual(tf_result, np_result)
 def call(self, y_true, y_pred):
     loss = tf.keras.losses.sparse_categorical_crossentropy(
         y_true, y_pred, from_logits=self.from_logits, axis=self.axis)
     # masking positions where value equals `mask_id` (usually padding token's id)
     mask = tf.logical_not(tf.equal(
         y_true, self.mask_id))  # shape (batch_size, time_steps)
     mask = tf.cast(mask, dtype=loss.dtype)
     numerator = tf.reduce_sum(loss * mask)  # total loss of valid positions
     denominator = tf.reduce_sum(mask)  # total number of valid position
     loss = math_ops.div_no_nan(
         numerator, denominator)  # average loss over valid positions
     return loss
Exemple #36
0
def _safe_mean(losses, num_present):
  """Computes a safe mean of the losses.

  Args:
    losses: `Tensor` whose elements contain individual loss measurements.
    num_present: The number of measurable elements in `losses`.

  Returns:
    A scalar representing the mean of `losses`. If `num_present` is zero,
      then zero is returned.
  """
  total_loss = math_ops.reduce_sum(losses)
  return math_ops.div_no_nan(total_loss, num_present, name="value")
  def weighted(y_true, y_pred, weights, mask=None):
    """Wrapper function.

    Arguments:
        y_true: `y_true` argument of `fn`.
        y_pred: `y_pred` argument of `fn`.
        weights: Weights tensor.
        mask: Mask tensor.

    Returns:
        Scalar tensor.
    """
    # score_array has ndim >= 2
    score_array = fn(y_true, y_pred)
    if mask is not None:
      mask = math_ops.cast(mask, y_pred.dtype)
      # Update weights with mask.
      if weights is None:
        weights = mask
      else:
        # Update dimensions of weights to match with mask if possible.
        mask, _, weights = metrics_module.squeeze_or_expand_dimensions(
            mask, None, weights)
        weights *= mask

    # Apply sample weighting.
    if weights is not None:

      # Update dimensions of weights to match with values if possible.
      score_array, _, weights = metrics_module.squeeze_or_expand_dimensions(
          score_array, None, weights)
      try:
        # Broadcast weights if possible.
        weights = weights_broadcast_ops.broadcast_weights(weights, score_array)
      except ValueError:
        # Reduce values to same ndim as weight array.
        ndim = K.ndim(score_array)
        weight_ndim = K.ndim(weights)
        score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim)))

      score_array = math_ops.multiply(score_array, weights)
      score_array = math_ops.reduce_sum(score_array)
      weights = math_ops.reduce_sum(weights)
      score_array = math_ops.div_no_nan(score_array, weights)
    return K.mean(score_array)
Exemple #38
0
  def call(self, values, denominator):
    """Computes the rate since the last call.

    Args:
      values: Tensor with the per-example value.
      denominator: Measure to take the rate with respect to.

    Returns:
      The rate or 0 if denominator is unchanged since last call.
    """
    if denominator.dtype != dtypes.float64:
      denominator = math_ops.cast(denominator, dtypes.float64)
    if values.dtype != dtypes.float64:
      values = math_ops.cast(values, dtypes.float64)

    state_ops.assign(self.numer, math_ops.subtract(values, self.prev_values))
    state_ops.assign(self.denom,
                     math_ops.subtract(denominator, self.prev_denominator))
    state_ops.assign(self.prev_values, values)
    state_ops.assign(self.prev_denominator, denominator)

    return math_ops.div_no_nan(self.numer,
                               math_ops.maximum(self.denom, 0),
                               name="safe_rate")
Exemple #39
0
def sequence_loss(logits,
                  targets,
                  weights,
                  average_across_timesteps=True,
                  average_across_batch=True,
                  sum_over_timesteps=False,
                  sum_over_batch=False,
                  softmax_loss_function=None,
                  name=None):
  """Weighted cross-entropy loss for a sequence of logits.

  Depending on the values of `average_across_timesteps` / `sum_over_timesteps`
  and `average_across_batch` / `sum_over_batch`, the return Tensor will have
  rank 0, 1, or 2 as these arguments reduce the cross-entropy at each target,
  which has shape `[batch_size, sequence_length]`, over their respective
  dimensions. For example, if `average_across_timesteps` is `True` and
  `average_across_batch` is `False`, then the return Tensor will have shape
  `[batch_size]`.

  Note that `average_across_timesteps` and `sum_over_timesteps` cannot be True
  at same time. Same for `average_across_batch` and `sum_over_batch`.

  The recommended loss reduction in tf 2.0 has been changed to sum_over, instead
  of weighted average. User are recommend to use `sum_over_timesteps` and
  `sum_over_batch` for reduction.

  Args:
    logits: A Tensor of shape
      `[batch_size, sequence_length, num_decoder_symbols]` and dtype float.
      The logits correspond to the prediction across all classes at each
      timestep.
    targets: A Tensor of shape `[batch_size, sequence_length]` and dtype
      int. The target represents the true class at each timestep.
    weights: A Tensor of shape `[batch_size, sequence_length]` and dtype
      float. `weights` constitutes the weighting of each prediction in the
      sequence. When using `weights` as masking, set all valid timesteps to 1
      and all padded timesteps to 0, e.g. a mask returned by `tf.sequence_mask`.
    average_across_timesteps: If set, sum the cost across the sequence
      dimension and divide the cost by the total label weight across timesteps.
    average_across_batch: If set, sum the cost across the batch dimension and
      divide the returned cost by the batch size.
    sum_over_timesteps: If set, sum the cost across the sequence dimension and
      divide the size of the sequence. Note that any element with 0 weights will
      be excluded from size calculation.
    sum_over_batch: if set, sum the cost across the batch dimension and divide
      the total cost by the batch size. Not that any element with 0 weights will
      be excluded from size calculation.
    softmax_loss_function: Function (labels, logits) -> loss-batch
      to be used instead of the standard softmax (the default if this is None).
      **Note that to avoid confusion, it is required for the function to accept
      named arguments.**
    name: Optional name for this operation, defaults to "sequence_loss".

  Returns:
    A float Tensor of rank 0, 1, or 2 depending on the
    `average_across_timesteps` and `average_across_batch` arguments. By default,
    it has rank 0 (scalar) and is the weighted average cross-entropy
    (log-perplexity) per symbol.

  Raises:
    ValueError: logits does not have 3 dimensions or targets does not have 2
                dimensions or weights does not have 2 dimensions.
  """
  if len(logits.get_shape()) != 3:
    raise ValueError("Logits must be a "
                     "[batch_size x sequence_length x logits] tensor")
  if len(targets.get_shape()) != 2:
    raise ValueError("Targets must be a [batch_size x sequence_length] tensor")
  if len(weights.get_shape()) != 2:
    raise ValueError("Weights must be a [batch_size x sequence_length] tensor")
  if average_across_timesteps and sum_over_timesteps:
    raise ValueError("average_across_timesteps and sum_over_timesteps cannot "
                     "be set to True at same time.")
  if average_across_batch and sum_over_batch:
    raise ValueError("average_across_batch and sum_over_batch cannot be set "
                     "to True at same time.")
  with ops.name_scope(name, "sequence_loss", [logits, targets, weights]):
    num_classes = array_ops.shape(logits)[2]
    logits_flat = array_ops.reshape(logits, [-1, num_classes])
    targets = array_ops.reshape(targets, [-1])
    if softmax_loss_function is None:
      crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
          labels=targets, logits=logits_flat)
    else:
      crossent = softmax_loss_function(labels=targets, logits=logits_flat)
    crossent *= array_ops.reshape(weights, [-1])
    if average_across_timesteps and average_across_batch:
      crossent = math_ops.reduce_sum(crossent)
      total_size = math_ops.reduce_sum(weights)
      crossent = math_ops.div_no_nan(crossent, total_size)
    elif sum_over_timesteps and sum_over_batch:
      crossent = math_ops.reduce_sum(crossent)
      total_count = math_ops.cast(math_ops.count_nonzero(weights),
                                  crossent.dtype)
      crossent = math_ops.div_no_nan(crossent, total_count)
    else:
      crossent = array_ops.reshape(crossent, array_ops.shape(logits)[0:2])
      if average_across_timesteps or average_across_batch:
        reduce_axis = [0] if average_across_batch else [1]
        crossent = math_ops.reduce_sum(crossent, axis=reduce_axis)
        total_size = math_ops.reduce_sum(weights, axis=reduce_axis)
        crossent = math_ops.div_no_nan(crossent, total_size)
      elif sum_over_timesteps or sum_over_batch:
        reduce_axis = [0] if sum_over_batch else [1]
        crossent = math_ops.reduce_sum(crossent, axis=reduce_axis)
        total_count = math_ops.cast(
            math_ops.count_nonzero(weights, axis=reduce_axis),
            dtype=crossent.dtype)
        crossent = math_ops.div_no_nan(crossent, total_count)
    return crossent
Exemple #40
0
 def result(self):
   return math_ops.div_no_nan(self.total, self.count)
Exemple #41
0
def mean_pairwise_squared_error(predictions,
                                labels=None,
                                weights=1.0,
                                scope=None):
  """Adds a pairwise-errors-squared loss to the training procedure.

  Unlike `mean_squared_error`, which is a measure of the differences between
  corresponding elements of `predictions` and `labels`,
  `mean_pairwise_squared_error` is a measure of the differences between pairs of
  corresponding elements of `predictions` and `labels`.

  For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
  three pairs of differences are summed to compute the loss:
    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3

  Note that since the inputs are of size [batch_size, d0, ... dN], the
  corresponding pairs are computed within each batch sample but not across
  samples within a batch. For example, if `predictions` represents a batch of
  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
  is drawn from each image, but not across images.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector.

  Args:
    predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN]
      where N+1 is the total number of dimensions in `predictions`.
    labels: The ground truth output tensor, whose shape must match the shape of
      the `predictions` tensor.
    weights: Coefficients for the loss a scalar, a tensor of shape [batch_size]
      or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "mean_pairwise_squared_error",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    weights = math_ops.to_float(ops.convert_to_tensor(weights))

    diffs = math_ops.subtract(predictions, labels)

    # Need to verify here since the function doesn't use compute_weighted_loss
    if diffs.get_shape().ndims is None:
      raise ValueError("diffs.get_shape().ndims cannot be None")
    if weights.get_shape().ndims is None:
      raise ValueError("weights.get_shape().ndims cannot be None")

    reduction_indices = list(range(1, diffs.get_shape().ndims))

    sum_squares_diff_per_batch = math_ops.reduce_sum(
        math_ops.square(diffs), reduction_indices=reduction_indices)
    num_present_per_batch = _num_present(diffs, weights, per_batch=True)

    term1 = 2.0 * math_ops.div_no_nan(
        sum_squares_diff_per_batch, num_present_per_batch, name="value")

    sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices)
    term2 = 2.0 * math_ops.div_no_nan(
        math_ops.square(sum_diff),
        math_ops.square(num_present_per_batch),
        name="value")

    loss = _scale_losses(term1 - term2, weights)

    mean_loss = array_ops.where(
        math_ops.reduce_sum(num_present_per_batch) > 0,
        loss,
        array_ops.zeros_like(loss),
        name="value")
    add_loss(mean_loss)
    return mean_loss
Exemple #42
0
def mean_pairwise_squared_error(
    labels, predictions, weights=1.0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES):
  """Adds a pairwise-errors-squared loss to the training procedure.

  Unlike `mean_squared_error`, which is a measure of the differences between
  corresponding elements of `predictions` and `labels`,
  `mean_pairwise_squared_error` is a measure of the differences between pairs of
  corresponding elements of `predictions` and `labels`.

  For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
  three pairs of differences are summed to compute the loss:
    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3

  Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the
  corresponding pairs are computed within each batch sample but not across
  samples within a batch. For example, if `predictions` represents a batch of
  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
  is drawn from each image, but not across images.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  `[batch_size]`, then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector.

  Args:
    labels: The ground truth output tensor, whose shape must match the shape of
      `predictions`.
    predictions: The predicted outputs, a tensor of size
      `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in
      `predictions`.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
      is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
  if labels is None:
    raise ValueError("labels must not be None.")
  if predictions is None:
    raise ValueError("predictions must not be None.")
  with ops.name_scope(scope, "mean_pairwise_squared_error",
                      (predictions, labels, weights)) as scope:
    weights = math_ops.to_float(weights)
    labels = math_ops.to_float(labels)
    with ops.control_dependencies((
        weights_broadcast_ops.assert_broadcastable(weights, labels),)):
      predictions = math_ops.to_float(predictions)
      predictions.get_shape().assert_is_compatible_with(labels.get_shape())

      diffs = math_ops.subtract(predictions, labels)

      axis = math_ops.range(1, array_ops.rank(diffs))

      sum_squares_diff_per_batch = math_ops.reduce_sum(
          math_ops.square(diffs), axis=axis, keepdims=True)
      num_present_per_batch = _num_present(diffs, weights, per_batch=True)

      term1 = 2.0 * math_ops.div_no_nan(
          sum_squares_diff_per_batch,
          math_ops.maximum(num_present_per_batch - 1, 0),
          name="value")

      sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True)
      term2 = 2.0 * math_ops.div_no_nan(
          math_ops.square(sum_diff),
          math_ops.maximum(
              math_ops.multiply(num_present_per_batch,
                                num_present_per_batch - 1), 0),
          name="value")

      weighted_losses = math_ops.multiply(term1 - term2, weights)
      loss = math_ops.reduce_sum(weighted_losses)

      mean_loss = array_ops.where(
          math_ops.reduce_sum(num_present_per_batch) > 0,
          loss,
          array_ops.zeros_like(loss),
          name="value")
      util.add_loss(mean_loss, loss_collection)
      return mean_loss