def triplet_semihard_loss(embeddings1,
                          embeddings2,
                          img_labels,
                          txt_labels,
                          margin=0.4):
    pdist_matrix = pairwise_distances(embeddings1, embeddings2)
    adjacency = _get_anchor_positive_triplet_mask(img_labels, txt_labels)
    adjacency_not = _get_anchor_negative_triplet_mask(img_labels, txt_labels)
    batch_size = tf.shape(img_labels)[0]
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.less(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32),
                                1,
                                keep_dims=True), 0.0),
        tf.convert_to_tensor([batch_size, batch_size]))
    mask_final = array_ops.transpose(mask_final)
    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)
    # simahard
    negatives_outside = array_ops.reshape(
        masked_maximum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)
    negatives_inside = array_ops.tile(
        masked_minimum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)
    loss_mat = math_ops.add(margin, -pdist_matrix + semi_hard_negatives)
    mask_positives = math_ops.cast(adjacency, dtype=dtypes.float32)
    num_positives = math_ops.reduce_sum(mask_positives)
    triplet_loss = math_ops.truediv(math_ops.reduce_sum(
        math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)),
                                    num_positives,
                                    name='triplet_semihard_loss')

    margin_p = config.margin_p
    margin_n = config.margin_n
    loss_mat1 = math_ops.add(margin_p, -pdist_matrix)
    loss_mat2 = math_ops.add(-margin_n, semi_hard_negatives)
    triplet_loss1 = math_ops.truediv(math_ops.reduce_sum(
        math_ops.maximum(math_ops.multiply(loss_mat1, mask_positives), 0.0)),
                                     num_positives,
                                     name='triplet_semihard_loss1')
    triplet_loss2 = math_ops.truediv(math_ops.reduce_sum(
        math_ops.maximum(math_ops.multiply(loss_mat2, mask_positives), 0.0)),
                                     num_positives,
                                     name='triplet_semihard_loss2')
    # final boundary controlled triplet loss
    # we equally treat these two additional items
    return triplet_loss + 0.5 * (triplet_loss1 + triplet_loss2)
Example #2
0
def weighted_moving_average(value,
                            decay,
                            weight,
                            truediv=True,
                            collections=None,
                            name=None):
  """Compute the weighted moving average of `value`.

  Conceptually, the weighted moving average is:
    `moving_average(value * weight) / moving_average(weight)`,
  where a moving average updates by the rule
    `new_value = decay * old_value + (1 - decay) * update`
  Internally, this Op keeps moving average variables of both `value * weight`
  and `weight`.

  Args:
    value: A numeric `Tensor`.
    decay: A float `Tensor` or float value.  The moving average decay.
    weight:  `Tensor` that keeps the current value of a weight.
      Shape should be able to multiply `value`.
    truediv:  Boolean, if `True`, dividing by `moving_average(weight)` is
      floating point division.  If `False`, use division implied by dtypes.
    collections:  List of graph collections keys to add the internal variables
      `value * weight` and `weight` to.
      Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
    name: Optional name of the returned operation.
      Defaults to "WeightedMovingAvg".

  Returns:
    An Operation that updates and returns the weighted moving average.
  """
  # Unlike assign_moving_average, the weighted moving average doesn't modify
  # user-visible variables. It is the ratio of two internal variables, which are
  # moving averages of the updates.  Thus, the signature of this function is
  # quite different than assign_moving_average.
  if collections is None:
    collections = [ops.GraphKeys.GLOBAL_VARIABLES]
  with variable_scope.variable_scope(name, "WeightedMovingAvg",
                                     [value, weight, decay]) as scope:
    value_x_weight_var = variable_scope.get_variable(
        "value_x_weight",
        initializer=init_ops.zeros_initializer(value.get_shape(),
                                               dtype=value.dtype),
        trainable=False,
        collections=collections)
    weight_var = variable_scope.get_variable(
        "weight",
        initializer=init_ops.zeros_initializer(weight.get_shape(),
                                               dtype=weight.dtype),
        trainable=False,
        collections=collections)
    numerator = assign_moving_average(
        value_x_weight_var, value * weight, decay, zero_debias=False)
    denominator = assign_moving_average(
        weight_var, weight, decay, zero_debias=False)

    if truediv:
      return math_ops.truediv(numerator, denominator, name=scope.name)
    else:
      return math_ops.div(numerator, denominator, name=scope.name)
Example #3
0
def _estimate_data_distribution(labels, num_classes, smoothing_constant=10):
  """Estimate data distribution as labels are seen."""
  # Variable to track running count of classes. Smooth by a nonzero value to
  # avoid division-by-zero. Higher values provide more stability at the cost of
  # slower convergence.
  if smoothing_constant <= 0:
    raise ValueError('smoothing_constant must be nonzero.')
  num_examples_per_class_seen = variables.Variable(
      initial_value=[smoothing_constant] * num_classes, trainable=False,
      name='class_count', dtype=dtypes.int64)

  # Update the class-count based on what labels are seen in batch.
  num_examples_per_class_seen = num_examples_per_class_seen.assign_add(
      math_ops.reduce_sum(array_ops.one_hot(labels, num_classes,
                                            dtype=dtypes.int64), 0))

  # Normalize count into a probability.
  # NOTE: Without the `+= 0` line below, the test
  # `testMultiThreadedEstimateDataDistribution` fails. The reason is that
  # before this line, `num_examples_per_class_seen` is a Tensor that shares a
  # buffer with an underlying `ref` object. When the `ref` is changed by another
  # thread, `num_examples_per_class_seen` changes as well. Since this can happen
  # in the middle of the normalization computation, we get probabilities that
  # are very far from summing to one. Adding `+= 0` copies the contents of the
  # tensor to a new buffer, which will be consistent from the start to the end
  # of the normalization computation.
  num_examples_per_class_seen += 0
  init_prob_estimate = math_ops.truediv(
      num_examples_per_class_seen,
      math_ops.reduce_sum(num_examples_per_class_seen))

  # Must return float32 (not float64) to agree with downstream `_verify_input`
  # checks.
  return math_ops.cast(init_prob_estimate, dtypes.float32)
Example #4
0
def triplet_loss_adapted_from_tf(labels, embeddings):
    margin = 1
    labels = tf.argmax(labels, axis=1)
    # Code from Tensorflow function [tf.contrib.losses.metric_learning.triplet_semihard_loss] starts here:

    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    # lshape=array_ops.shape(labels)
    # assert lshape.shape == 1
    # labels = array_ops.reshape(labels, [lshape[0], 1])
    labels = array_ops.reshape(labels, [-1, 1])
    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    # global batch_size
    batch_size = array_ops.size(labels)  # was 'array_ops.size(labels)'

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(pdist_matrix_tile, array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1]))
    )
    mask_final = array_ops.reshape(
        math_ops.greater(math_ops.reduce_sum(math_ops.cast(mask, dtype=tf.float32), 1, keepdims=True), 0.0),
        [batch_size, batch_size]
    )

    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=tf.float32)
    mask = math_ops.cast(mask, dtype=tf.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final, negatives_outside, negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(adjacency, dtype=tf.float32) - \
        math_ops.cast(array_ops.diag(array_ops.ones([batch_size])), dtype=tf.float32)

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    semi_hard_triplet_loss_distance = math_ops.truediv(
        math_ops.reduce_sum(math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)),
        math_ops.cast(num_positives, dtype=tf.float32),
        name='triplet_semihard_loss')

    # Code from Tensorflow function semi-hard triplet loss ENDS here.
    return semi_hard_triplet_loss_distance
Example #5
0
def weighted_resample(inputs, weights, overall_rate, scope=None,
                      mean_decay=0.999, warmup=10, seed=None):
  """Performs an approximate weighted resampling of `inputs`.

  This method chooses elements from `inputs` where each item's rate of
  selection is proportional to its value in `weights`, and the average
  rate of selection across all inputs (and many invocations!) is
  `overall_rate`.

  Args:
    inputs: A list of tensors whose first dimension is `batch_size`.
    weights: A `[batch_size]`-shaped tensor with each batch member's weight.
    overall_rate: Desired overall rate of resampling.
    scope: Scope to use for the op.
    mean_decay: How quickly to decay the running estimate of the mean weight.
    warmup: Until the resulting tensor has been evaluated `warmup`
      times, the resampling menthod uses the true mean over all calls
      as its weight estimate, rather than a decayed mean.
    seed: Random seed.

  Returns:
    A list of tensors exactly like `inputs`, but with an unknown (and
      possibly zero) first dimension.
    A tensor containing the effective resampling rate used for each output.

  """
  # Algorithm: Just compute rates as weights/mean_weight *
  # overall_rate. This way the the average weight corresponds to the
  # overall rate, and a weight twice the average has twice the rate,
  # etc.
  with ops.name_scope(scope, 'weighted_resample', inputs) as opscope:
    # First: Maintain a running estimated mean weight, with decay
    # adjusted (by also maintaining an invocation count) during the
    # warmup period so that at the beginning, there aren't too many
    # zeros mixed in, throwing the average off.

    with variable_scope.variable_scope(scope, 'estimate_mean', inputs):
      count_so_far = variable_scope.get_local_variable(
          'resample_count', initializer=0)

      estimated_mean = variable_scope.get_local_variable(
          'estimated_mean', initializer=0.0)

      count = count_so_far.assign_add(1)
      real_decay = math_ops.minimum(
          math_ops.truediv((count - 1), math_ops.minimum(count, warmup)),
          mean_decay)

      batch_mean = math_ops.reduce_mean(weights)
      mean = moving_averages.assign_moving_average(
          estimated_mean, batch_mean, real_decay, zero_debias=False)

    # Then, normalize the weights into rates using the mean weight and
    # overall target rate:
    rates = weights * overall_rate / mean

    results = resample_at_rate([rates] + inputs, rates,
                               scope=opscope, seed=seed, back_prop=False)

    return (results[1:], results[0])
def histogram_fixed_width(values, value_range, nbins=100):
    from tensorflow.python.framework import dtypes
    from tensorflow.python.ops import clip_ops
    from tensorflow.python.ops import math_ops
    from tensorflow.python.ops import array_ops

    nbins_float = math_ops.to_float(nbins)

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(values - value_range[0],
                                     value_range[1] - value_range[0],
                                     name='scaled_values')

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)

    #counts = tf.Variable(...) <= array_ops.zeros_like(indices, dtype=dtypes.int32))
    #return tf.scatter_add(counts, indices, array_ops.ones_like(indices, dtype=dtypes.int32)), indices

    return math_ops.unsorted_segment_sum(
        array_ops.ones_like(indices, dtype=dtypes.float32), indices,
        nbins), indices
Example #7
0
def weighted_resample(inputs, weights, overall_rate, scope=None,
                      mean_decay=0.999, warmup=10, seed=None):
  """Performs an approximate weighted resampling of `inputs`.

  This method chooses elements from `inputs` where each item's rate of
  selection is proportional to its value in `weights`, and the average
  rate of selection across all inputs (and many invocations!) is
  `overall_rate`.

  Args:
    inputs: A list of tensors whose first dimension is `batch_size`.
    weights: A `[batch_size]`-shaped tensor with each batch member's weight.
    overall_rate: Desired overall rate of resampling.
    scope: Scope to use for the op.
    mean_decay: How quickly to decay the running estimate of the mean weight.
    warmup: Until the resulting tensor has been evaluated `warmup`
      times, the resampling menthod uses the true mean over all calls
      as its weight estimate, rather than a decayed mean.
    seed: Random seed.

  Returns:
    A list of tensors exactly like `inputs`, but with an unknown (and
      possibly zero) first dimension.
    A tensor containing the effective resampling rate used for each output.

  """
  # Algorithm: Just compute rates as weights/mean_weight *
  # overall_rate. This way the average weight corresponds to the
  # overall rate, and a weight twice the average has twice the rate,
  # etc.
  with ops.name_scope(scope, 'weighted_resample', inputs) as opscope:
    # First: Maintain a running estimated mean weight, with decay
    # adjusted (by also maintaining an invocation count) during the
    # warmup period so that at the beginning, there aren't too many
    # zeros mixed in, throwing the average off.

    with variable_scope.variable_scope(scope, 'estimate_mean', inputs):
      count_so_far = variable_scope.get_local_variable(
          'resample_count', initializer=0)

      estimated_mean = variable_scope.get_local_variable(
          'estimated_mean', initializer=0.0)

      count = count_so_far.assign_add(1)
      real_decay = math_ops.minimum(
          math_ops.truediv((count - 1), math_ops.minimum(count, warmup)),
          mean_decay)

      batch_mean = math_ops.reduce_mean(weights)
      mean = moving_averages.assign_moving_average(
          estimated_mean, batch_mean, real_decay, zero_debias=False)

    # Then, normalize the weights into rates using the mean weight and
    # overall target rate:
    rates = weights * overall_rate / mean

    results = resample_at_rate([rates] + inputs, rates,
                               scope=opscope, seed=seed, back_prop=False)

    return (results[1:], results[0])
Example #8
0
def _ReinforceNormalGrad(op, _):
    # f: normal probability density funciton
    # x: the sampled values (output)
    # u: mean (input)
    # s: standard deviation (stddev)
    #
    # Derivative of log normal w.r.t. means:
    # d ln(f(x,u,s))   (x - u)
    # -------------- = -------
    #       d u          s^2
    #
    # Derivative of log normal w.r.t. stddevs:
    # d ln(f,u, s))   (x - u)^2 - s^2
    # ------------- = ---------------
    #       d s             s^3
    #
    means = op.inputs[0]
    stddevs = op.inputs[1]

    # x - u
    grad_means = op.outputs[0] - means
    # divide by squared standard deviation
    grad_means = math_ops.truediv(grad_means, math_ops.square(stddevs))
    # multiply by reward
    grad_means = grad_means * _get_reward(means)
    # multiply by -1 (gradient descent on mean)
    grad_means = -grad_means

    grad_stddevs = None
    if stddevs.get_shape().ndims > 0 and stddevs.get_shape(
    )[0] == means.get_shape()[0]:
        # (x - u)^2
        grad_stddevs = math_ops.square(op.outputs[0] - means)
        # subtract s^2
        stddevs_squared = math_ops.square(stddevs)
        grad_stddevs = grad_stddevs - stddevs_squared
        # divide by s^3
        stddevs_cubed = math_ops.add(
            stddevs_squared * stddevs,
            ops.convert_to_tensor(0.00001, dtype=stddevs.dtype))
        grad_stddevs = math_ops.truediv(grad_stddevs, stddevs_cubed)
        # multiply by reward
        grad_stddevs = grad_stddevs * _get_reward(stddevs)
        # multiply by -1 (gradient descent on stddev)
        grad_stddevs = -grad_stddevs

    return grad_means, grad_stddevs, None
Example #9
0
def compute_cdf(values, value_range, **kwargs):
    """Returns the normalized cumulative distribution of the given values tensor.

  Uses tf.while_loop to directly compute the cdf of the values. Number of bins
  for histogram is fixed at _NBINS=255

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`
    **kwargs: keyword arguments: name

  Returns:
    A 1-D `Tensor` holding normalized cdf of values.

  """
    nbins = _NBINS
    name = kwargs.get('name', None)
    with ops.name_scope(name, 'cdf', [values, value_range, nbins]):
        values = ops.convert_to_tensor(values, name='values')
        value_range = ops.convert_to_tensor(value_range, name='value_range')
        nbins_float = np.float32(nbins)

        # Map tensor values that fall within value_range to [0, 1].
        scaled_values = math_ops.truediv(values - value_range[0],
                                         value_range[1] - value_range[0],
                                         name='scaled_values')

        # map tensor values within the open interval value_range to {0,.., nbins-1},
        # values outside the open interval will be zero or less, or nbins or more.
        indices = math_ops.floor(nbins_float * scaled_values, name='indices')

        # Clip edge cases (e.g. value = value_range[1]) or "outliers."
        indices = math_ops.cast(
            clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)

        cdf = array_ops.zeros(nbins)
        i = constant_op.constant(0)

        def loop_cond(loop_count, _):
            return math_ops.less(loop_count, nbins)

        def loop_body(loop_count, cdf):
            temp = math_ops.reduce_sum(
                math_ops.cast(math_ops.less_equal(indices, loop_count),
                              dtypes.float32))
            cdf = math_ops.add(
                cdf,
                array_ops.one_hot(loop_count,
                                  depth=_NBINS,
                                  on_value=temp,
                                  off_value=0.0))
            return [loop_count + 1, cdf]

        _, cdf = control_flow_ops.while_loop(loop_cond,
                                             loop_body, [i, cdf],
                                             maximum_iterations=nbins)

        return math_ops.div(cdf, math_ops.reduce_max(cdf))
Example #10
0
 def mean(self):
   """Class means for every batch member."""
   with ops.name_scope('mean'):
     alpha_sum = math_ops.reduce_sum(self._alpha,
                                     reduction_indices=self._dist_indices,
                                     keep_dims=True)
     mean = math_ops.truediv(self._alpha, alpha_sum)
     mean.set_shape(self._alpha.get_shape())
     return mean
Example #11
0
def loss(y_true, y_pred):
    del y_true
    margin = 1.
    labels = y_pred[:, :1]

    labels = tf.cast(labels, dtype='int32')

    embeddings = y_pred[:, 1:]

    pdist_matrix = pairwise_distance(embeddings, squared=True)
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)  # was 'array_ops.size(labels)'

    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32),
                                1,
                                keepdims=True), 0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    num_positives = math_ops.reduce_sum(mask_positives)

    semi_hard_triplet_loss_distance = math_ops.truediv(
        math_ops.reduce_sum(
            math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                             0.0)),
        num_positives,
        name='triplet_semihard_loss')

    ### Code from Tensorflow function semi-hard triplet loss ENDS here.
    return semi_hard_triplet_loss_distance
Example #12
0
def compute_cdf(values, value_range, **kwargs):
  """Returns the normalized cumulative distribution of the given values tensor.

  Uses tf.while_loop to directly compute the cdf of the values. Number of bins
  for histogram is fixed at _NBINS=255

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`
    **kwargs: keyword arguments: name

  Returns:
    A 1-D `Tensor` holding normalized cdf of values.

  """
  nbins = _NBINS
  name = kwargs.get('name', None)
  with ops.name_scope(name, 'cdf', [values, value_range, nbins]):
    values = ops.convert_to_tensor(values, name='values')
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    nbins_float = np.float32(nbins)

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(
        values - value_range[0],
        value_range[1] - value_range[0],
        name='scaled_values')

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)

    cdf = array_ops.zeros(nbins)
    i = constant_op.constant(0)

    def loop_cond(loop_count, _):
      return math_ops.less(loop_count, nbins)

    def loop_body(loop_count, cdf):
      temp = math_ops.reduce_sum(
          math_ops.cast(
              math_ops.less_equal(indices, loop_count), dtypes.float32))
      cdf = math_ops.add(
          cdf,
          array_ops.one_hot(
              loop_count, depth=_NBINS, on_value=temp, off_value=0.0))
      return [loop_count + 1, cdf]

    _, cdf = control_flow_ops.while_loop(
        loop_cond, loop_body, [i, cdf], maximum_iterations=nbins)

    return math_ops.div(cdf, math_ops.reduce_max(cdf))
Example #13
0
 def mean(self):
     """Class means for every batch member."""
     with ops.name_scope('mean'):
         alpha_sum = math_ops.reduce_sum(
             self._alpha,
             reduction_indices=self._dist_indices,
             keep_dims=True)
         mean = math_ops.truediv(self._alpha, alpha_sum)
         mean.set_shape(self._alpha.get_shape())
         return mean
def _auc_convert_hist_to_auc(hist_true_acc, hist_false_acc, nbins):
    """Convert histograms to auc.

  Args:
    hist_true_acc:  `Tensor` holding accumulated histogram of scores for records
      that were `True`.
    hist_false_acc:  `Tensor` holding accumulated histogram of scores for
      records that were `False`.
    nbins:  Integer number of bins in the histograms.

  Returns:
    Scalar `Tensor` estimating AUC.
  """
    # Note that this follows the "Approximating AUC" section in:
    # Efficient AUC learning curve calculation, R. R. Bouckaert,
    # AI'06 Proceedings of the 19th Australian joint conference on Artificial
    # Intelligence: advances in Artificial Intelligence
    # Pages 181-191.
    # Note that the above paper has an error, and we need to re-order our bins to
    # go from high to low score.

    # Normalize histogram so we get fraction in each bin.
    normed_hist_true = math_ops.truediv(hist_true_acc,
                                        math_ops.reduce_sum(hist_true_acc))
    normed_hist_false = math_ops.truediv(hist_false_acc,
                                         math_ops.reduce_sum(hist_false_acc))

    # These become delta x, delta y from the paper.
    delta_y_t = array_ops.reverse_v2(normed_hist_true, [0], name='delta_y_t')
    delta_x_t = array_ops.reverse_v2(normed_hist_false, [0], name='delta_x_t')

    # strict_1d_cumsum requires float32 args.
    delta_y_t = math_ops.cast(delta_y_t, dtypes.float32)
    delta_x_t = math_ops.cast(delta_x_t, dtypes.float32)

    # Trapezoidal integration, \int_0^1 0.5 * (y_t + y_{t-1}) dx_t
    y_t = _strict_1d_cumsum(delta_y_t, nbins)
    first_trap = delta_x_t[0] * y_t[0] / 2.0
    other_traps = delta_x_t[1:] * (y_t[1:] + y_t[:nbins - 1]) / 2.0
    return math_ops.add(first_trap,
                        math_ops.reduce_sum(other_traps),
                        name='auc')
Example #15
0
 def f(x1, x2):
     if x1.dtype == dtypes.bool:
         assert x2.dtype == dtypes.bool
         float_ = np_dtypes.default_float_type()
         x1 = math_ops.cast(x1, float_)
         x2 = math_ops.cast(x2, float_)
     if not np_dtypes.is_allow_float64():
         # math_ops.truediv in Python3 produces float64 when both inputs are int32
         # or int64. We want to avoid that when is_allow_float64() is False.
         x1, x2 = _avoid_float64(x1, x2)
     return math_ops.truediv(x1, x2)
Example #16
0
def _auc_convert_hist_to_auc(hist_true_acc, hist_false_acc, nbins):
  """Convert histograms to auc.

  Args:
    hist_true_acc:  `Tensor` holding accumulated histogram of scores for records
      that were `True`.
    hist_false_acc:  `Tensor` holding accumulated histogram of scores for
      records that were `False`.
    nbins:  Integer number of bins in the histograms.

  Returns:
    Scalar `Tensor` estimating AUC.
  """
  # Note that this follows the "Approximating AUC" section in:
  # Efficient AUC learning curve calculation, R. R. Bouckaert,
  # AI'06 Proceedings of the 19th Australian joint conference on Artificial
  # Intelligence: advances in Artificial Intelligence
  # Pages 181-191.
  # Note that the above paper has an error, and we need to re-order our bins to
  # go from high to low score.

  # Normalize histogram so we get fraction in each bin.
  normed_hist_true = math_ops.truediv(hist_true_acc,
                                      math_ops.reduce_sum(hist_true_acc))
  normed_hist_false = math_ops.truediv(hist_false_acc,
                                       math_ops.reduce_sum(hist_false_acc))

  # These become delta x, delta y from the paper.
  delta_y_t = array_ops.reverse(normed_hist_true, [True], name='delta_y_t')
  delta_x_t = array_ops.reverse(normed_hist_false, [True], name='delta_x_t')

  # strict_1d_cumsum requires float32 args.
  delta_y_t = math_ops.cast(delta_y_t, dtypes.float32)
  delta_x_t = math_ops.cast(delta_x_t, dtypes.float32)

  # Trapezoidal integration, \int_0^1 0.5 * (y_t + y_{t-1}) dx_t
  y_t = _strict_1d_cumsum(delta_y_t, nbins)
  first_trap = delta_x_t[0] * y_t[0] / 2.0
  other_traps = delta_x_t[1:] * (y_t[1:] + y_t[:nbins - 1]) / 2.0
  return math_ops.add(first_trap, math_ops.reduce_sum(other_traps), name='auc')
Example #17
0
def _safe_div(numerator, denominator, name):
    """Divides two values, returning 0 if the denominator is <= 0.
  Args:
    numerator: A real `Tensor`.
    denominator: A real `Tensor`, with dtype matching `numerator`.
    name: Name for the returned op.
  Returns:
    0 if `denominator` <= 0, else `numerator` / `denominator`
  """
    return array_ops.where(math_ops.greater(denominator, 0),
                           math_ops.truediv(numerator, denominator),
                           0,
                           name=name)
Example #18
0
def _safe_div(numerator, denominator, name):
    """Divides two tensors element-wise, returning 0 if the denominator is <= 0.
    Args:
      numerator: A real `Tensor`.
      denominator: A real `Tensor`, with dtype matching `numerator`.
      name: Name for the returned op.
    Returns:
      0 if `denominator` <= 0, else `numerator` / `denominator`
    """
    t = math_ops.truediv(numerator, denominator)
    zero = array_ops.zeros_like(t, dtype=denominator.dtype)
    condition = math_ops.greater(denominator, zero)
    zero = math_ops.cast(zero, t.dtype)
    return array_ops.where(condition, t, zero, name=name)
Example #19
0
def safe_div(numerator, denominator):
  """Divides two tensors element-wise, returning 0 if the denominator is <= 0.

  Args:
    numerator: A `Tensor`.
    denominator: A `Tensor`, with dtype matching `numerator`.

  Returns:
    0 if `denominator` <= 0, else `numerator` / `denominator`
  """
  t = math_ops.truediv(numerator, denominator)
  zero = array_ops.zeros_like(t, dtype=denominator.dtype)
  condition = math_ops.greater(denominator, zero)
  zero = math_ops.cast(zero, t.dtype)
  return array_ops.where(condition, t, zero)
def weighted_moving_average(value,
                            decay,
                            weight,
                            truediv=True,
                            name="WeightedMovingAvg"):
    """Compute the weighted moving average of `value`.

    Conceptually, the weighted moving average is:
      moving_average(value * weight) / moving_average(weight),
    where a moving average updates by the rule
      new_value = decay * old_value + (1 - decay) * update

    Args:
      value: A tensor.
      decay: A float Tensor or float value.  The moving average decay.
      weight:  A tensor that keeps the current value of a weight.
        Shape should be able to multiply `value`.
      truediv:  Boolean, if True, dividing by moving_average(weight) is floating
        point division.  If False, use division implied by dtypes.
      name: Optional name of the returned operation.

    Returns:
      An Operation that updates the weighted moving average.
    """
    # Unlike assign_moving_average, the weighted moving average doesn't modify
    # user-visible variables. It is the ratio of two internal variables, which are
    # moving averages of the updates.  Thus, the signature of this function is
    # quite different than assign_moving_average.
    with variable_scope.variable_op_scope([value, weight, decay], name,
                                          name) as scope:
        value_variable = variable_scope.get_variable(
            "value",
            initializer=array_ops.zeros_initializer(value.get_shape(),
                                                    dtype=value.dtype),
            trainable=False)
        weight_variable = variable_scope.get_variable(
            "weight",
            initializer=array_ops.zeros_initializer(weight.get_shape(),
                                                    dtype=weight.dtype),
            trainable=False)
        numerator = assign_moving_average(value_variable, value * weight,
                                          decay)
        denominator = assign_moving_average(weight_variable, weight, decay)

        if truediv:
            return math_ops.truediv(numerator, denominator, name=scope.name)
        else:
            return math_ops.div(numerator, denominator, name=scope.name)
Example #21
0
def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None):
    """Return histogram of values.

  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram').

  Returns:
    A 1-D `Tensor` holding histogram of values.

  """
    with ops.name_scope(name, 'histogram',
                        [values, value_range, nbins]) as scope:
        values = ops.convert_to_tensor(values, name='values')
        values = gen_array_ops.reshape(values, [-1])
        value_range = ops.convert_to_tensor(value_range, name='value_range')
        nbins = ops.convert_to_tensor(nbins, dtype=np.int32, name='nbins')
        nbins_float = math_ops.cast(nbins, values.dtype)

        # Map tensor values that fall within value_range to [0, 1].
        scaled_values = math_ops.truediv(values - value_range[0],
                                         value_range[1] - value_range[0],
                                         name='scaled_values')

        # map tensor values within the open interval value_range to {0,.., nbins-1},
        # values outside the open interval will be zero or less, or nbins or more.
        indices = math_ops.floor(nbins_float * scaled_values, name='indices')

        # Clip edge cases (e.g. value = value_range[1]) or "outliers."
        indices = math_ops.cast(
            clip_ops.clip_by_value(indices, 0, nbins_float - 1), np.int32)

        return math_ops.unsorted_segment_sum(array_ops.ones_like(indices,
                                                                 dtype=dtype),
                                             indices,
                                             nbins,
                                             name=scope)
Example #22
0
def _MeanGrad(op, grad):
  """Gradient for Mean."""
  sum_grad = _SumGrad(op, grad)[0]
  input_shape = op.inputs[0]._shape_tuple()  # pylint: disable=protected-access
  output_shape = op.outputs[0]._shape_tuple()  # pylint: disable=protected-access
  if (input_shape is not None and output_shape is not None and
      None not in input_shape and None not in output_shape):
    input_size = np.prod(input_shape)
    output_size = np.prod(output_shape)
    factor = input_size // max(output_size, 1)
    factor = constant_op.constant(factor, dtype=sum_grad.dtype)
  else:
    input_shape = array_ops.shape(op.inputs[0])
    output_shape = array_ops.shape(op.outputs[0])
    factor = _safe_shape_div(
        math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape))
  return math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), None
Example #23
0
def weighted_moving_average(
    value, decay, weight, truediv=True, name="WeightedMovingAvg"):
  """Compute the weighted moving average of `value`.

  Conceptually, the weighted moving average is:
    moving_average(value * weight) / moving_average(weight),
  where a moving average updates by the rule
    new_value = decay * old_value + (1 - decay) * update

  Args:
    value: A tensor.
    decay: A float Tensor or float value.  The moving average decay.
    weight:  A tensor that keeps the current value of a weight.
      Shape should be able to multiply `value`.
    truediv:  Boolean, if True, dividing by moving_average(weight) is floating
      point division.  If False, use division implied by dtypes.
    name: Optional name of the returned operation.

  Returns:
    An Operation that updates the weighted moving average.
  """
  # Unlike assign_moving_average, the weighted moving average doesn't modify
  # user-visible variables. It is the ratio of two internal variables, which are
  # moving averages of the updates.  Thus, the signature of this function is
  # quite different than assign_moving_average.
  with variable_scope.variable_op_scope(
      [value, weight, decay], name, name) as scope:
    value_variable = variable_scope.get_variable(
        "value",
        initializer=array_ops.zeros_initializer(
            value.get_shape(), dtype=value.dtype),
        trainable=False
    )
    weight_variable = variable_scope.get_variable(
        "weight",
        initializer=array_ops.zeros_initializer(
            weight.get_shape(), dtype=weight.dtype),
        trainable=False
    )
    numerator = assign_moving_average(value_variable, value * weight, decay)
    denominator = assign_moving_average(weight_variable, weight, decay)

    if truediv:
      return math_ops.truediv(numerator, denominator, name=scope.name)
    else:
      return math_ops.div(numerator, denominator, name=scope.name)
Example #24
0
def _MeanGrad(op, grad):
  """Gradient for Mean."""
  sum_grad = _SumGrad(op, grad)[0]
  input_shape = op.inputs[0]._shape_tuple()  # pylint: disable=protected-access
  output_shape = op.outputs[0]._shape_tuple()  # pylint: disable=protected-access
  if (input_shape is not None and output_shape is not None and
      None not in input_shape and None not in output_shape):
    input_size = np.prod(input_shape)
    output_size = np.prod(output_shape)
    factor = input_size // max(output_size, 1)
    factor = constant_op.constant(factor, dtype=sum_grad.dtype)
  else:
    input_shape = array_ops.shape(op.inputs[0])
    output_shape = array_ops.shape(op.outputs[0])
    factor = _safe_shape_div(
        math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape))
  return math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), None
Example #25
0
def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None):
  """Return histogram of values.

  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram').

  Returns:
    A 1-D `Tensor` holding histogram of values.

  """
  with ops.name_scope(name, 'histogram', [values, value_range, nbins]) as scope:
    values = ops.convert_to_tensor(values, name='values')
    values = gen_array_ops.reshape(values, [-1])
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    nbins = ops.convert_to_tensor(nbins, dtype=np.int32, name='nbins')
    nbins_float = math_ops.cast(nbins, values.dtype)

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(
        values - value_range[0],
        value_range[1] - value_range[0],
        name='scaled_values')

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), np.int32)

    return math_ops.unsorted_segment_sum(
        array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope)
Example #26
0
def _estimate_data_distribution(c, num_examples_per_class_seen):
  """Estimate data distribution as labels are seen.

  Args:
    c: The class labels.  Type `int32`, shape `[batch_size]`.
    num_examples_per_class_seen: A `ResourceVariable` containing counts.
      Type `int64`, shape `[num_classes]`.

  Returns:
    dist: The updated distribution.  Type `float32`, shape `[num_classes]`.
  """
  num_classes = num_examples_per_class_seen.get_shape()[0].value
  # Update the class-count based on what labels are seen in
  # batch.  But do this asynchronously to avoid performing a
  # cross-device round-trip.  Just use the cached value.
  num_examples_per_class_seen = num_examples_per_class_seen.assign_add(
      math_ops.reduce_sum(
          array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
  init_prob_estimate = math_ops.truediv(
      num_examples_per_class_seen,
      math_ops.reduce_sum(num_examples_per_class_seen))
  return math_ops.cast(init_prob_estimate, dtypes.float32)
Example #27
0
def _estimate_data_distribution(c, num_examples_per_class_seen):
    """Estimate data distribution as labels are seen.

  Args:
    c: The class labels.  Type `int32`, shape `[batch_size]`.
    num_examples_per_class_seen: A `ResourceVariable` containing counts.
      Type `int64`, shape `[num_classes]`.

  Returns:
    dist: The updated distribution.  Type `float32`, shape `[num_classes]`.
  """
    num_classes = num_examples_per_class_seen.get_shape()[0].value
    # Update the class-count based on what labels are seen in
    # batch.  But do this asynchronously to avoid performing a
    # cross-device round-trip.  Just use the cached value.
    num_examples_per_class_seen = num_examples_per_class_seen.assign_add(
        math_ops.reduce_sum(
            array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
    init_prob_estimate = math_ops.truediv(
        num_examples_per_class_seen,
        math_ops.reduce_sum(num_examples_per_class_seen))
    return math_ops.cast(init_prob_estimate, dtypes.float32)
Example #28
0
def _estimate_data_distribution(c, num_examples_per_class_seen):
  """Estimate data distribution as labels are seen.

  Args:
    c: The class labels.  Type `int32`, shape `[batch_size]`.
    num_examples_per_class_seen: Type `int64`, shape `[num_classes]`,
      containing counts.

  Returns:
    num_examples_per_lass_seen: Updated counts.  Type `int64`, shape
      `[num_classes]`.
    dist: The updated distribution.  Type `float32`, shape `[num_classes]`.
  """
  num_classes = num_examples_per_class_seen.get_shape()[0].value
  # Update the class-count based on what labels are seen in batch.
  num_examples_per_class_seen = math_ops.add(
      num_examples_per_class_seen, math_ops.reduce_sum(
          array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
  init_prob_estimate = math_ops.truediv(
      num_examples_per_class_seen,
      math_ops.reduce_sum(num_examples_per_class_seen))
  dist = math_ops.cast(init_prob_estimate, dtypes.float32)
  return num_examples_per_class_seen, dist
def triplet_semihard_loss(labels, embeddings, margin=1.0):
    """Computes the triplet loss with semi-hard negative mining.

  The loss encourages the positive distances (between a pair of embeddings with
  the same labels) to be smaller than the minimum negative distance among
  which are at least greater than the positive distance plus the margin constant
  (called semi-hard negative) in the mini-batch. If no such negative exists,
  uses the largest negative distance instead.
  See: https://arxiv.org/abs/1503.03832.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    triplet_loss: tf.float32 scalar.
  """

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    # adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32),
                                1,
                                keepdims=True), 0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    triplet_loss = math_ops.truediv(math_ops.reduce_sum(
        math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)),
                                    num_positives,
                                    name='triplet_semihard_loss')

    return triplet_loss
Example #30
0
def lifted_struct_loss(labels, embeddings, margin=1.0):
  """Computes the lifted structured loss.

  The loss encourages the positive distances (between a pair of embeddings
  with the same labels) to be smaller than any negative distances (between a
  pair of embeddings with different labels) in the mini-batch in a way
  that is differentiable with respect to the embedding vectors.
  See: https://arxiv.org/abs/1511.06452.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    lifted_loss: tf.float32 scalar.
  """
  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  # Build pairwise squared distance matrix.
  pairwise_distances = pairwise_distance(embeddings)

  # Build pairwise binary adjacency matrix.
  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
  # Invert so we can select negatives only.
  adjacency_not = math_ops.logical_not(adjacency)

  batch_size = array_ops.size(labels)

  diff = margin - pairwise_distances
  mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
  # Safe maximum: Temporarily shift negative distances
  #   above zero before taking max.
  #     this is to take the max only among negatives.
  row_minimums = math_ops.reduce_min(diff, 1, keepdims=True)
  row_negative_maximums = math_ops.reduce_max(
      math_ops.multiply(diff - row_minimums, mask), 1,
      keepdims=True) + row_minimums

  # Compute the loss.
  # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
  #   where m_i is the max of alpha - negative D_i's.
  # This matches the Caffe loss layer implementation at:
  #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long

  max_elements = math_ops.maximum(
      row_negative_maximums, array_ops.transpose(row_negative_maximums))
  diff_tiled = array_ops.tile(diff, [batch_size, 1])
  mask_tiled = array_ops.tile(mask, [batch_size, 1])
  max_elements_vect = array_ops.reshape(
      array_ops.transpose(max_elements), [-1, 1])

  loss_exp_left = array_ops.reshape(
      math_ops.reduce_sum(
          math_ops.multiply(
              math_ops.exp(diff_tiled - max_elements_vect), mask_tiled),
          1,
          keepdims=True), [batch_size, batch_size])

  loss_mat = max_elements + math_ops.log(
      loss_exp_left + array_ops.transpose(loss_exp_left))
  # Add the positive distance.
  loss_mat += pairwise_distances

  mask_positives = math_ops.cast(
      adjacency, dtype=dtypes.float32) - array_ops.diag(
          array_ops.ones([batch_size]))

  # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
  num_positives = math_ops.reduce_sum(mask_positives) / 2.0

  lifted_loss = math_ops.truediv(
      0.25 * math_ops.reduce_sum(
          math_ops.square(
              math_ops.maximum(
                  math_ops.multiply(loss_mat, mask_positives), 0.0))),
      num_positives,
      name='liftedstruct_loss')
  return lifted_loss
Example #31
0
def histogram_fixed_width(values,
                          value_range,
                          nbins=100,
                          dtype=dtypes.int32,
                          name=None):
  """Return histogram of values.

  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor`.  new_values <= value_range[0] will be
      mapped to hist[0], values >= value_range[1] will be mapped to hist[-1].
      Must be same dtype as new_values.
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A 1-D `Tensor` holding histogram of values.

  Examples:
  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.default_session() as sess:
    hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
    variables.initialize_all_variables().run()
    sess.run(hist) => [2, 1, 1, 0, 2]
  ```
  """
  with ops.op_scope([values, value_range, nbins], name,
                    'histogram_fixed_width') as scope:
    values = ops.convert_to_tensor(values, name='values')
    values = array_ops.reshape(values, [-1])
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
    nbins_float = math_ops.to_float(nbins)

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(values - value_range[0],
                                     value_range[1] - value_range[0],
                                     name='scaled_values')

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)

    # TODO(langmore) This creates an array of ones to add up and place in the
    # bins.  This is inefficient, so replace when a better Op is available.
    return math_ops.unsorted_segment_sum(
        array_ops.ones_like(indices, dtype=dtype),
        indices,
        nbins,
        name=scope)
Example #32
0
 def _safe_div(self, numerator, denominator, name):
     t = math_ops.truediv(numerator, denominator)
     zero = array_ops.zeros_like(t, dtype=denominator.dtype)
     condition = math_ops.greater(denominator, zero)
     zero = math_ops.cast(zero, t.dtype)
     return array_ops.where(condition, t, zero, name=name)
Example #33
0
def histogram_fixed_width(hist,
                          new_values,
                          value_range,
                          use_locking=False,
                          name='histogram_fixed_width'):
  """Update histogram Variable with new values.

  This Op fills histogram with counts of values falling within fixed-width,
  half-open bins.

  Args:
    hist:  1-D mutable `Tensor`, e.g. a `Variable`.
    new_values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor`.  new_values <= value_range[0] will be
      mapped to hist[0], values >= value_range[1] will be mapped to hist[-1].
      Must be same dtype as new_values.
    use_locking:  Boolean.
      If `True`, use locking during the operation (optional).
    name:  A name for this operation (optional).

  Returns:
    An op that updates `hist` with `new_values` when evaluated.

  Examples:
  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.default_session() as sess:
    hist = variables.Variable(array_ops.zeros(nbins, dtype=tf.int32))
    hist_update = histogram_ops.histogram_fixed_width(hist, new_values,
                                                      value_range)
    variables.initialize_all_variables().run()
    sess.run(hist_update) => [2, 1, 1, 0, 2]
  ```
  """
  with ops.op_scope([hist, new_values, value_range], name) as scope:
    new_values = ops.convert_to_tensor(new_values, name='new_values')
    new_values = array_ops.reshape(new_values, [-1])
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    dtype = hist.dtype

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(new_values - value_range[0],
                                     value_range[1] - value_range[0],
                                     name='scaled_values')
    nbins = math_ops.cast(hist.get_shape()[0], scaled_values.dtype)

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins - 1), dtypes.int32)

    # Dummy vector to scatter.
    # TODO(langmore) Replace non-ideal creation of large dummy vector once an
    # alternative to scatter is available.
    updates = array_ops.ones([indices.get_shape()[0]], dtype=dtype)
    return state_ops.scatter_add(hist,
                                 indices,
                                 updates,
                                 use_locking=use_locking,
                                 name=scope)
 def div_x2(x2):
     return math_ops.truediv(x1, x2) * math_ops.cast(1.1,
                                                     dtype=x2.dtype)
Example #35
0
def streaming_covariance(predictions,
                         labels,
                         weights=None,
                         metrics_collections=None,
                         updates_collections=None,
                         name=None):
    """Computes the unbiased sample covariance between `predictions` and `labels`.

  The `streaming_covariance` function creates four local variables,
  `comoment`, `mean_prediction`, `mean_label`, and `count`, which are used to
  compute the sample covariance between predictions and labels across multiple
  batches of data. The covariance is ultimately returned as an idempotent
  operation that simply divides `comoment` by `count` - 1. We use `count` - 1
  in order to get an unbiased estimate.

  The algorithm used for this online computation is described in
  https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
  Specifically, the formula used to combine two sample comoments is
  `C_AB = C_A + C_B + (E[x_A] - E[x_B]) * (E[y_A] - E[y_B]) * n_A * n_B / n_AB`
  The comoment for a single batch of data is simply
  `sum((x - E[x]) * (y - E[y]))`, optionally weighted.

  If `weights` is not None, then it is used to compute weighted comoments,
  means, and count. NOTE: these weights are treated as "frequency weights", as
  opposed to "reliability weights". See discussion of the difference on
  https://wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance

  To facilitate the computation of covariance across multiple batches of data,
  the function creates an `update_op` operation, which updates underlying
  variables and returns the updated covariance.

  Args:
    predictions: A `Tensor` of arbitrary size.
    labels: A `Tensor` of the same size as `predictions`.
    weights: Optional `Tensor` indicating the frequency with which an example is
      sampled. Rank must be 0, or the same rank as `labels`, and must be
      broadcastable to `labels` (i.e., all dimensions must be either `1`, or the
      same as the corresponding `labels` dimension).
    metrics_collections: An optional list of collections that the metric value
      variable should be added to.
    updates_collections: An optional list of collections that the metric update
      ops should be added to.
    name: An optional variable_scope name.

  Returns:
    covariance: A `Tensor` representing the current unbiased sample covariance,
      `comoment` / (`count` - 1).
    update_op: An operation that updates the local variables appropriately.

  Raises:
    ValueError: If labels and predictions are of different sizes or if either
      `metrics_collections` or `updates_collections` are not a list or tuple.
  """
    with variable_scope.variable_scope(name, 'covariance',
                                       (predictions, labels, weights)):
        predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
            predictions, labels, weights)
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        count_ = metrics_impl.metric_variable([], dtypes.float32, name='count')
        mean_prediction = metrics_impl.metric_variable([],
                                                       dtypes.float32,
                                                       name='mean_prediction')
        mean_label = metrics_impl.metric_variable([],
                                                  dtypes.float32,
                                                  name='mean_label')
        comoment = metrics_impl.metric_variable(  # C_A in update equation
            [], dtypes.float32, name='comoment')

        if weights is None:
            batch_count = math_ops.cast(array_ops.size(labels),
                                        dtypes.float32)  # n_B in eqn
            weighted_predictions = predictions
            weighted_labels = labels
        else:
            weights = weights_broadcast_ops.broadcast_weights(weights, labels)
            batch_count = math_ops.reduce_sum(weights)  # n_B in eqn
            weighted_predictions = math_ops.multiply(predictions, weights)
            weighted_labels = math_ops.multiply(labels, weights)

        update_count = state_ops.assign_add(count_, batch_count)  # n_AB in eqn
        prev_count = update_count - batch_count  # n_A in update equation

        # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount)
        # batch_mean_prediction is E[x_B] in the update equation
        batch_mean_prediction = math_ops.div_no_nan(
            math_ops.reduce_sum(weighted_predictions), batch_count)
        delta_mean_prediction = math_ops.div_no_nan(
            (batch_mean_prediction - mean_prediction) * batch_count,
            update_count)
        update_mean_prediction = state_ops.assign_add(mean_prediction,
                                                      delta_mean_prediction)
        # prev_mean_prediction is E[x_A] in the update equation
        prev_mean_prediction = update_mean_prediction - delta_mean_prediction

        # batch_mean_label is E[y_B] in the update equation
        batch_mean_label = math_ops.div_no_nan(
            math_ops.reduce_sum(weighted_labels), batch_count)
        delta_mean_label = math_ops.div_no_nan(
            (batch_mean_label - mean_label) * batch_count, update_count)
        update_mean_label = state_ops.assign_add(mean_label, delta_mean_label)
        # prev_mean_label is E[y_A] in the update equation
        prev_mean_label = update_mean_label - delta_mean_label

        unweighted_batch_coresiduals = ((predictions - batch_mean_prediction) *
                                        (labels - batch_mean_label))
        # batch_comoment is C_B in the update equation
        if weights is None:
            batch_comoment = math_ops.reduce_sum(unweighted_batch_coresiduals)
        else:
            batch_comoment = math_ops.reduce_sum(unweighted_batch_coresiduals *
                                                 weights)

        # View delta_comoment as = C_AB - C_A in the update equation above.
        # Since C_A is stored in a var, by how much do we need to increment that var
        # to make the var = C_AB?
        delta_comoment = (batch_comoment +
                          (prev_mean_prediction - batch_mean_prediction) *
                          (prev_mean_label - batch_mean_label) *
                          (prev_count * batch_count / update_count))
        update_comoment = state_ops.assign_add(comoment, delta_comoment)

        covariance = array_ops.where(math_ops.less_equal(count_, 1.),
                                     float('nan'),
                                     math_ops.truediv(comoment, count_ - 1),
                                     name='covariance')
        with ops.control_dependencies([update_comoment]):
            update_op = array_ops.where(math_ops.less_equal(count_, 1.),
                                        float('nan'),
                                        math_ops.truediv(comoment, count_ - 1),
                                        name='update_op')

    if metrics_collections:
        ops.add_to_collections(metrics_collections, covariance)

    if updates_collections:
        ops.add_to_collections(updates_collections, update_op)

    return covariance, update_op
Example #36
0
def histogram_fixed_width_bins(values,
                               value_range,
                               nbins=100,
                               dtype=dtypes.int32,
                               name=None):
    """Bins the given values for use in a histogram.

  Given the tensor `values`, this operation returns a rank 1 `Tensor`
  representing the indices of a histogram into which each element
  of `values` would be binned. The bins are equal width and
  determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A `Tensor` holding the indices of the binned values whose shape matches
    `values`.

  Raises:
    TypeError: If any unsupported dtype is provided.
    tf.errors.InvalidArgumentError: If value_range does not
        satisfy value_range[0] < value_range[1].

  Examples:

  >>> # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  ...
  >>> nbins = 5
  >>> value_range = [0.0, 5.0]
  >>> new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
  >>> indices = tf.histogram_fixed_width_bins(new_values, value_range, nbins=5)
  >>> indices.numpy()
  array([0, 0, 1, 2, 4, 4], dtype=int32)
  """
    with ops.name_scope(name, 'histogram_fixed_width_bins',
                        [values, value_range, nbins]):
        values = ops.convert_to_tensor(values, name='values')
        shape = array_ops.shape(values)

        values = array_ops.reshape(values, [-1])
        value_range = ops.convert_to_tensor(value_range, name='value_range')
        nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
        nbins_float = math_ops.cast(nbins, values.dtype)

        # Map tensor values that fall within value_range to [0, 1].
        scaled_values = math_ops.truediv(values - value_range[0],
                                         value_range[1] - value_range[0],
                                         name='scaled_values')

        # map tensor values within the open interval value_range to {0,.., nbins-1},
        # values outside the open interval will be zero or less, or nbins or more.
        indices = math_ops.floor(nbins_float * scaled_values, name='indices')

        # Clip edge cases (e.g. value = value_range[1]) or "outliers."
        indices = math_ops.cast(
            clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)
        return array_ops.reshape(indices, shape)
Example #37
0
def histogram_fixed_width(values, value_range, nbins=100, use_locking=True, dtype=dtypes.int32, name=None):
    """Return histogram of values.

  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor`.  new_values <= value_range[0] will be
      mapped to hist[0], values >= value_range[1] will be mapped to hist[-1].
      Must be same dtype as new_values.
    nbins:  Integer number of bins in this histogram.
    use_locking:  Boolean.
      If `True`, use locking during the operation (optional).
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A `Variable` holding histogram of values.

  Examples:
  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.default_session() as sess:
    hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
    variables.initialize_all_variables().run()
    sess.run(hist) => [2, 1, 1, 0, 2]
  ```
  """
    with variable_scope.variable_op_scope([values, value_range], name, "histogram_fixed_width") as scope:
        values = ops.convert_to_tensor(values, name="values")
        values = array_ops.reshape(values, [-1])
        value_range = ops.convert_to_tensor(value_range, name="value_range")

        # Map tensor values that fall within value_range to [0, 1].
        scaled_values = math_ops.truediv(values - value_range[0], value_range[1] - value_range[0], name="scaled_values")

        # map tensor values within the open interval value_range to {0,.., nbins-1},
        # values outside the open interval will be zero or less, or nbins or more.
        indices = math_ops.floor(nbins * scaled_values, name="indices")

        # Clip edge cases (e.g. value = value_range[1]) or "outliers."
        indices = math_ops.cast(clip_ops.clip_by_value(indices, 0, nbins - 1), dtypes.int32)

        # Dummy vector to scatter.
        # TODO(langmore) Replace non-ideal creation of large dummy vector once an
        # alternative to scatter is available.
        updates = array_ops.ones_like(indices, dtype=dtype)

        hist = variable_scope.get_variable(
            "hist", initializer=array_ops.zeros_initializer([nbins], dtype=dtype), trainable=False
        )
        hist_assign_zero = hist.assign(array_ops.zeros_like(hist))

        with ops.control_dependencies([hist_assign_zero]):
            return state_ops.scatter_add(hist, indices, updates, use_locking=use_locking, name=scope.name)
Example #38
0
def streaming_pearson_correlation(predictions,
                                  labels,
                                  weights=None,
                                  metrics_collections=None,
                                  updates_collections=None,
                                  name=None):
    """Computes Pearson correlation coefficient between `predictions`, `labels`.

  The `streaming_pearson_correlation` function delegates to
  `streaming_covariance` the tracking of three [co]variances:

  - `streaming_covariance(predictions, labels)`, i.e. covariance
  - `streaming_covariance(predictions, predictions)`, i.e. variance
  - `streaming_covariance(labels, labels)`, i.e. variance

  The product-moment correlation ultimately returned is an idempotent operation
  `cov(predictions, labels) / sqrt(var(predictions) * var(labels))`. To
  facilitate correlation computation across multiple batches, the function
  groups the `update_op`s of the underlying streaming_covariance and returns an
  `update_op`.

  If `weights` is not None, then it is used to compute a weighted correlation.
  NOTE: these weights are treated as "frequency weights", as opposed to
  "reliability weights". See discussion of the difference on
  https://wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance

  Args:
    predictions: A `Tensor` of arbitrary size.
    labels: A `Tensor` of the same size as predictions.
    weights: Optional `Tensor` indicating the frequency with which an example is
      sampled. Rank must be 0, or the same rank as `labels`, and must be
      broadcastable to `labels` (i.e., all dimensions must be either `1`, or the
      same as the corresponding `labels` dimension).
    metrics_collections: An optional list of collections that the metric value
      variable should be added to.
    updates_collections: An optional list of collections that the metric update
      ops should be added to.
    name: An optional variable_scope name.

  Returns:
    pearson_r: A `Tensor` representing the current Pearson product-moment
      correlation coefficient, the value of
      `cov(predictions, labels) / sqrt(var(predictions) * var(labels))`.
    update_op: An operation that updates the underlying variables appropriately.

  Raises:
    ValueError: If `labels` and `predictions` are of different sizes, or if
      `weights` is the wrong size, or if either `metrics_collections` or
      `updates_collections` are not a `list` or `tuple`.
  """
    with variable_scope.variable_scope(name, 'pearson_r',
                                       (predictions, labels, weights)):
        predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
            predictions, labels, weights)
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        # Broadcast weights here to avoid duplicate broadcasting in each call to
        # `streaming_covariance`.
        if weights is not None:
            weights = weights_broadcast_ops.broadcast_weights(weights, labels)
        cov, update_cov = streaming_covariance(predictions,
                                               labels,
                                               weights=weights,
                                               name='covariance')
        var_predictions, update_var_predictions = streaming_covariance(
            predictions,
            predictions,
            weights=weights,
            name='variance_predictions')
        var_labels, update_var_labels = streaming_covariance(
            labels, labels, weights=weights, name='variance_labels')

        pearson_r = math_ops.truediv(cov,
                                     math_ops.multiply(
                                         math_ops.sqrt(var_predictions),
                                         math_ops.sqrt(var_labels)),
                                     name='pearson_r')
        update_op = math_ops.truediv(update_cov,
                                     math_ops.multiply(
                                         math_ops.sqrt(update_var_predictions),
                                         math_ops.sqrt(update_var_labels)),
                                     name='update_op')

    if metrics_collections:
        ops.add_to_collections(metrics_collections, pearson_r)

    if updates_collections:
        ops.add_to_collections(updates_collections, update_op)

    return pearson_r, update_op
Example #39
0
def triplet_semihard_loss_hash(labels,
                               embeddings,
                               objectives,
                               pairwise_distance,
                               margin=1.0):
    """Computes the triplet loss with semi-hard negative mining.
    Args:
	labels - 1-D tensor [batch_size] as tf.int32
                multiclass integer labels.
	embeddings - 2-D tensor [batch_size, feature dimensions] as tf.float32
                     `Tensor` of embedding vectors. Embeddings should be l2 normalized.
        pairwise_distance - func 
            args : 
                feature - 2D tensor [ndata, feature dims], 
                objective - 2D tensor [ndata, feature_dims]
            return :
                2D tensor [ndata, ndata] 
	margin - float defaults to be 1.0
                margin term in the loss definition.

    Returns:
    	triplet_loss - tf.float32 scalar.
    """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # pdist_matrix[i][j] = dist(i, j)
    pdist_matrix = pairwise_distance(embeddings,
                                     objectives)  # [batch_size, batch_size]
    # adjacency[i][j]=1 if label[i]==label[j] else 0
    adjacency = math_ops.equal(
        labels, array_ops.transpose(labels))  # [batch_size, batch_size]
    # adjacency_not[i][j]=0 if label[i]==label[j] else 0
    adjacency_not = math_ops.logical_not(adjacency)  # [batch_size, batch_size]
    batch_size = array_ops.size(labels)

    # Compute the mask.
    # pdist_matrix_tile[batch_size*i+j, k] = distance(j, k)
    pdist_matrix_tile = array_ops.tile(
        pdist_matrix, [batch_size, 1])  # [batch_size*batch_size, batch_size]
    # reshape(transpose(pdist_matrix), [-1, 1])[batch_size*i+j][0] = distance(j, i)
    # tile(adjacency_not, [batch_size, 1])[batch_size*i+j][k] = 1 if label[j]!=label[k] otherwise 0
    # mask[batch_size*i+j][k] = 1 if label[j]!=label[k] different label, and distance(j,k)>distance(j,i)
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix),
                              [-1, 1])))  # [batch_size*batch_size, batch_size]
    # mask_final[i][j]=1 if there exists k s.t. label[j]!=label[k] and distance(j,k)>distance(j,i)
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32),
                                1,
                                keep_dims=True), 0.0),
        [batch_size, batch_size])  # [batch_size, batch_size]
    # mask_final[i][j]=1 if there exists k s.t. label[i]!=label[k] and distance(i,k)>distance(i,j)
    mask_final = array_ops.transpose(mask_final)  # [batch_size, batch_size]

    adjacency_not = math_ops.cast(
        adjacency_not, dtype=dtypes.float32)  # [batch_size, batch_size]
    mask = math_ops.cast(
        mask, dtype=dtypes.float32)  # [batch_size*batch_size, batch_size]

    # masked_minimum(pdist_matrix_tile, mask)[batch*i+j][1] = pdist_matrix[j][k] s.t minimum over 'k's label[j]!=label[k], distance(j,k)>distance(j,i)
    # negatives_outside[i][j] = pdist_matrix[j][k] s.t minimum over 'k's label[j]!=label[k], distance(j,k)>distance(j,i)
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask),
        [batch_size, batch_size])  # [batch_size, batch_size]
    # negatives_outside[i][j] = pdist_matrix[i][k] s.t minimum over 'k's label[i]!=label[k], distance(i,k)>distance(i,j)
    negatives_outside = array_ops.transpose(
        negatives_outside)  # [batch_size, batch_size]

    # negatives_inside[i][j] = pdist_matrix[i][k] s.t maximum over label[i]!=label[k]
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not),
        [1, batch_size])  # [batch_size, batch_size]

    # semi_hard_negatives[i][j] = pdist_matrix[i][k] if exists negatives_outside, otherwise negatives_inside
    semi_hard_negatives = array_ops.where(
        mask_final, negatives_outside,
        negatives_inside)  # [batch_size, batch_size]
    loss_mat = math_ops.add(margin, pdist_matrix -
                            semi_hard_negatives)  # [batch_size, batch_size]

    # mask_positives[i][j] = 1 if label[i]==label[j], and i!=j
    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    # in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    triplet_loss = math_ops.truediv(math_ops.reduce_sum(
        math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)),
                                    num_positives,
                                    name='triplet_semihard_loss')  # hinge
    return triplet_loss
def lifted_struct_loss(labels, embeddings, margin=1.0):
    """Computes the lifted structured loss.

  The loss encourages the positive distances (between a pair of embeddings
  with the same labels) to be smaller than any negative distances (between a
  pair of embeddings with different labels) in the mini-batch in a way
  that is differentiable with respect to the embedding vectors.
  See: https://arxiv.org/abs/1511.06452.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    lifted_loss: tf.float32 scalar.
  """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pairwise_distances = pairwise_distance(embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    diff = margin - pairwise_distances
    mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    # Safe maximum: Temporarily shift negative distances
    #   above zero before taking max.
    #     this is to take the max only among negatives.
    row_minimums = math_ops.reduce_min(diff, 1, keepdims=True)
    row_negative_maximums = math_ops.reduce_max(
        math_ops.multiply(diff - row_minimums,
                          mask), 1, keepdims=True) + row_minimums

    # Compute the loss.
    # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
    #   where m_i is the max of alpha - negative D_i's.
    # This matches the Caffe loss layer implementation at:
    #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long

    max_elements = math_ops.maximum(row_negative_maximums,
                                    array_ops.transpose(row_negative_maximums))
    diff_tiled = array_ops.tile(diff, [batch_size, 1])
    mask_tiled = array_ops.tile(mask, [batch_size, 1])
    max_elements_vect = array_ops.reshape(array_ops.transpose(max_elements),
                                          [-1, 1])

    loss_exp_left = array_ops.reshape(
        math_ops.reduce_sum(math_ops.multiply(
            math_ops.exp(diff_tiled - max_elements_vect), mask_tiled),
                            1,
                            keepdims=True), [batch_size, batch_size])

    loss_mat = max_elements + math_ops.log(loss_exp_left +
                                           array_ops.transpose(loss_exp_left))
    # Add the positive distance.
    loss_mat += pairwise_distances

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
    num_positives = math_ops.reduce_sum(mask_positives) / 2.0

    lifted_loss = math_ops.truediv(0.25 * math_ops.reduce_sum(
        math_ops.square(
            math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                             0.0))),
                                   num_positives,
                                   name='liftedstruct_loss')
    return lifted_loss
Example #41
0
 def _safe_div(self, numerator, denominator, name):
   t = math_ops.truediv(numerator, denominator)
   zero = array_ops.zeros_like(t, dtype=denominator.dtype)
   condition = math_ops.greater(denominator, zero)
   zero = math_ops.cast(zero, t.dtype)
   return array_ops.where(condition, t, zero, name=name)
Example #42
0
def triplet_semihard_loss(labels, embeddings, margin=1.0):
  """Computes the triplet loss with semi-hard negative mining.

  The loss encourages the positive distances (between a pair of embeddings with
  the same labels) to be smaller than the minimum negative distance among
  which are at least greater than the positive distance plus the margin constant
  (called semi-hard negative) in the mini-batch. If no such negative exists,
  uses the largest negative distance instead.
  See: https://arxiv.org/abs/1503.03832.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    triplet_loss: tf.float32 scalar.
  """
  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  # Build pairwise squared distance matrix.
  pdist_matrix = pairwise_distance(embeddings, squared=True)
  # Build pairwise binary adjacency matrix.
  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
  # Invert so we can select negatives only.
  adjacency_not = math_ops.logical_not(adjacency)

  batch_size = array_ops.size(labels)

  # Compute the mask.
  pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
  mask = math_ops.logical_and(
      array_ops.tile(adjacency_not, [batch_size, 1]),
      math_ops.greater(
          pdist_matrix_tile, array_ops.reshape(
              array_ops.transpose(pdist_matrix), [-1, 1])))
  mask_final = array_ops.reshape(
      math_ops.greater(
          math_ops.reduce_sum(
              math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True),
          0.0), [batch_size, batch_size])
  mask_final = array_ops.transpose(mask_final)

  adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
  mask = math_ops.cast(mask, dtype=dtypes.float32)

  # negatives_outside: smallest D_an where D_an > D_ap.
  negatives_outside = array_ops.reshape(
      masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
  negatives_outside = array_ops.transpose(negatives_outside)

  # negatives_inside: largest D_an.
  negatives_inside = array_ops.tile(
      masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
  semi_hard_negatives = array_ops.where(
      mask_final, negatives_outside, negatives_inside)

  loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

  mask_positives = math_ops.cast(
      adjacency, dtype=dtypes.float32) - array_ops.diag(
          array_ops.ones([batch_size]))

  # In lifted-struct, the authors multiply 0.5 for upper triangular
  #   in semihard, they take all positive pairs except the diagonal.
  num_positives = math_ops.reduce_sum(mask_positives)

  triplet_loss = math_ops.truediv(
      math_ops.reduce_sum(
          math_ops.maximum(
              math_ops.multiply(loss_mat, mask_positives), 0.0)),
      num_positives,
      name='triplet_semihard_loss')

  return triplet_loss
Example #43
0
def histogram_fixed_width_bins(values,
                               value_range,
                               nbins=100,
                               dtype=dtypes.int32,
                               name=None):
    """Bins the given values for use in a histogram.

  Given the tensor `values`, this operation returns a rank 1 `Tensor`
  representing the indices of a histogram into which each element
  of `values` would be binned. The bins are equal width and
  determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A `Tensor` holding the indices of the binned values whose shape matches
    `values`.

  Examples:

  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.compat.v1.get_default_session() as sess:
    indices = tf.histogram_fixed_width_bins(new_values, value_range, nbins=5)
    variables.global_variables_initializer().run()
    sess.run(indices) => [0, 0, 1, 2, 4]
  ```
  """
    with ops.name_scope(name, 'histogram_fixed_width_bins',
                        [values, value_range, nbins]):
        values = ops.convert_to_tensor(values, name='values')
        shape = array_ops.shape(values)

        values = array_ops.reshape(values, [-1])
        value_range = ops.convert_to_tensor(value_range, name='value_range')
        nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
        nbins_float = math_ops.cast(nbins, values.dtype)

        # Map tensor values that fall within value_range to [0, 1].
        scaled_values = math_ops.truediv(values - value_range[0],
                                         value_range[1] - value_range[0],
                                         name='scaled_values')

        # map tensor values within the open interval value_range to {0,.., nbins-1},
        # values outside the open interval will be zero or less, or nbins or more.
        indices = math_ops.floor(nbins_float * scaled_values, name='indices')

        # Clip edge cases (e.g. value = value_range[1]) or "outliers."
        indices = math_ops.cast(
            clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)
        return array_ops.reshape(indices, shape)
Example #44
0
def histogram_fixed_width_bins(values,
                               value_range,
                               nbins=100,
                               dtype=dtypes.int32,
                               name=None):
  """Bins the given values for use in a histogram.

  Given the tensor `values`, this operation returns a rank 1 `Tensor`
  representing the indices of a histogram into which each element
  of `values` would be binned. The bins are equal width and
  determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A `Tensor` holding the indices of the binned values whose shape matches
    `values`.

  Examples:

  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.get_default_session() as sess:
    indices = tf.histogram_fixed_width_bins(new_values, value_range, nbins=5)
    variables.global_variables_initializer().run()
    sess.run(indices) => [0, 0, 1, 2, 4]
  ```
  """
  with ops.name_scope(name, 'histogram_fixed_width_bins',
                      [values, value_range, nbins]):
    values = ops.convert_to_tensor(values, name='values')
    shape = array_ops.shape(values)

    values = array_ops.reshape(values, [-1])
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
    nbins_float = math_ops.cast(nbins, values.dtype)

    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(
        values - value_range[0],
        value_range[1] - value_range[0],
        name='scaled_values')

    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')

    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)
    return array_ops.reshape(indices, shape)
Example #45
0
def histogram_fixed_width(values,
                          value_range,
                          nbins=100,
                          dtype=dtypes.int32,
                          name=None):
    """Return histogram of values.

  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor`.  new_values <= value_range[0] will be
      mapped to hist[0], values >= value_range[1] will be mapped to hist[-1].
      Must be same dtype as new_values.
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A 1-D `Tensor` holding histogram of values.

  Examples:

  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.default_session() as sess:
    hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
    variables.global_variables_initializer().run()
    sess.run(hist) => [2, 1, 1, 0, 2]
  ```
  """
    with ops.name_scope(name, 'histogram_fixed_width',
                        [values, value_range, nbins]) as scope:
        values = ops.convert_to_tensor(values, name='values')
        values = array_ops.reshape(values, [-1])
        value_range = ops.convert_to_tensor(value_range, name='value_range')
        nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
        nbins_float = math_ops.to_float(nbins)

        # Map tensor values that fall within value_range to [0, 1].
        scaled_values = math_ops.truediv(values - value_range[0],
                                         value_range[1] - value_range[0],
                                         name='scaled_values')

        # map tensor values within the open interval value_range to {0,.., nbins-1},
        # values outside the open interval will be zero or less, or nbins or more.
        indices = math_ops.floor(nbins_float * scaled_values, name='indices')

        # Clip edge cases (e.g. value = value_range[1]) or "outliers."
        indices = math_ops.cast(
            clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)

        # TODO(langmore) This creates an array of ones to add up and place in the
        # bins.  This is inefficient, so replace when a better Op is available.
        return math_ops.unsorted_segment_sum(array_ops.ones_like(indices,
                                                                 dtype=dtype),
                                             indices,
                                             nbins,
                                             name=scope)
 def div_x1(x1):
     return math_ops.truediv(x1, x2) * math_ops.cast(1.1,
                                                     dtype=x1.dtype)