Ejemplo n.º 1
0
    def test_sort_by_scores_3d(self):
        with tf.Graph().as_default():
            scores = [[1., 3., 2.], [1., 2., 3.]]
            example_feature = [[[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]],
                               [[10., 20., 30.], [40., 50., 60.],
                                [70., 80., 90.]]]
            with tf.compat.v1.Session() as sess:
                sorted_example_feature = sess.run(
                    utils.sort_by_scores(scores, [example_feature])[0])
                self.assertAllEqual(
                    sorted_example_feature,
                    [[[4., 5., 6.], [7., 8., 9.], [1., 2., 3.]],
                     [[70., 80., 90.], [40., 50., 60.], [10., 20., 30.]]])

                sorted_example_feature = sess.run(
                    utils.sort_by_scores(scores, [example_feature], topn=2)[0])
                self.assertAllEqual(sorted_example_feature,
                                    [[[4., 5., 6.], [7., 8., 9.]],
                                     [[70., 80., 90.], [40., 50., 60.]]])

                sorted_example_feature = sess.run(
                    utils.sort_by_scores([scores[0]],
                                         [[example_feature[0]]])[0])
                self.assertAllEqual(
                    sorted_example_feature,
                    [[[4., 5., 6.], [7., 8., 9.], [1., 2., 3.]]])
Ejemplo n.º 2
0
 def test_sort_by_scores_shuffle_ties(self):
   with tf.Graph().as_default():
     tf.compat.v1.set_random_seed(589)
     scores = [[2., 1., 1.]]
     names = [['a', 'b', 'c']]
     with tf.compat.v1.Session() as sess:
       sorted_names = sess.run(
           utils.sort_by_scores(scores, [names], shuffle_ties=False))[0]
       self.assertAllEqual(sorted_names, [[b'a', b'b', b'c']])
       sorted_names = sess.run(
           utils.sort_by_scores(scores, [names], shuffle_ties=True, seed=2))[0]
       self.assertAllEqual(sorted_names, [[b'a', b'c', b'b']])
Ejemplo n.º 3
0
def _sort_and_normalize(labels, logits, weights=None):
    """Sorts `labels` and `logits` and normalize `weights`.

  Args:
    labels: A `Tensor` of the same shape as `logits` representing graded
      relevance.
    logits: A `Tensor` with shape [batch_size, list_size]. Each value is the
      ranking score of the corresponding item.
    weights: A scalar, a `Tensor` with shape [batch_size, 1], or a `Tensor` with
      the same shape as `labels`.

  Returns:
    A tuple of (sorted_labels, sorted_logits, sorted_weights).
  """
    labels = tf.convert_to_tensor(value=labels)
    logits = tf.convert_to_tensor(value=logits)
    logits.get_shape().assert_has_rank(2)
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    weights = 1.0 if weights is None else tf.convert_to_tensor(value=weights)
    weights = tf.ones_like(labels) * weights
    topn = tf.shape(input=logits)[1]

    # Only sort entries with valid labels that are >= 0.
    scores = tf.where(
        tf.greater_equal(labels, 0.), logits, -1e-6 * tf.ones_like(logits) +
        tf.reduce_min(input_tensor=logits, axis=1, keepdims=True))
    sorted_labels, sorted_logits, sorted_weights = utils.sort_by_scores(
        scores, [labels, logits, weights], topn=topn)
    return sorted_labels, sorted_logits, sorted_weights
Ejemplo n.º 4
0
    def _compute_impl(self, labels, predictions, weights, mask):
        """See `_RankingMetric`."""
        topn = tf.shape(predictions)[1] if self._topn is None else self._topn
        # Relevance = 1.0 when labels >= 1.0.
        relevance = tf.cast(tf.greater_equal(labels, 1.0), dtype=tf.float32)
        sorted_relevance, sorted_weights = utils.sort_by_scores(
            predictions, [relevance, weights], topn=topn, mask=mask)
        per_list_relevant_counts = tf.cumsum(sorted_relevance, axis=1)
        per_list_cutoffs = tf.cumsum(tf.ones_like(sorted_relevance), axis=1)
        per_list_precisions = tf.math.divide_no_nan(per_list_relevant_counts,
                                                    per_list_cutoffs)
        total_precision = tf.reduce_sum(input_tensor=per_list_precisions *
                                        sorted_weights * sorted_relevance,
                                        axis=1,
                                        keepdims=True)

        # Compute the total relevance regardless of self._topn.
        total_relevance = tf.reduce_sum(input_tensor=weights * relevance,
                                        axis=1,
                                        keepdims=True)

        per_list_map = tf.math.divide_no_nan(total_precision, total_relevance)
        # per_list_weights are computed from the whole list to avoid the problem of
        # 0 when there is no relevant example in topn.
        per_list_weights = _per_example_weights_to_per_list_weights(
            weights, relevance)
        return per_list_map, per_list_weights
Ejemplo n.º 5
0
  def compute_unreduced_loss(self, labels, logits):
    """See `_RankingLoss`."""
    is_valid = utils.is_label_valid(labels)
    # Reset the invalid labels to 0 and reset the invalid logits to a logit with
    # ~= 0 contribution.
    labels = tf.compat.v1.where(is_valid, labels, tf.zeros_like(labels))
    logits = tf.compat.v1.where(is_valid, logits,
                                tf.math.log(_EPSILON) * tf.ones_like(logits))
    scores = tf.compat.v1.where(
        is_valid, labels,
        tf.reduce_min(input_tensor=labels, axis=1, keepdims=True) -
        1e-6 * tf.ones_like(labels))
    # Use a fixed ops-level seed and the randomness is controlled by the
    # graph-level seed.
    sorted_labels, sorted_logits = utils.sort_by_scores(
        scores, [labels, logits], shuffle_ties=True, seed=37)

    raw_max = tf.reduce_max(input_tensor=sorted_logits, axis=1, keepdims=True)
    sorted_logits = sorted_logits - raw_max
    sums = tf.cumsum(tf.exp(sorted_logits), axis=1, reverse=True)
    sums = tf.math.log(sums) - sorted_logits

    if self._lambda_weight is not None and isinstance(self._lambda_weight,
                                                      ListMLELambdaWeight):
      batch_size, list_size = tf.unstack(tf.shape(input=sorted_labels))
      sums *= self._lambda_weight.individual_weights(
          sorted_labels,
          tf.tile(tf.expand_dims(tf.range(list_size) + 1, 0), [batch_size, 1]))

    negative_log_likelihood = tf.reduce_sum(
        input_tensor=sums, axis=1, keepdims=True)
    return negative_log_likelihood, tf.ones_like(negative_log_likelihood)
Ejemplo n.º 6
0
def average_relevance_position(labels, predictions, weights=None, name=None):
    """Computes average relevance position (ARP).

    This can also be named as average_relevance_rank, but this can be confusing
    with mean_reciprocal_rank in acronyms. This name is more distinguishing and
    has been used historically for binary relevance as average_click_position.

    Args:
      labels: A `Tensor` of the same shape as `predictions`.
      predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
        the ranking score of the corresponding example.
      weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
        former case is per-example and the latter case is per-list.
      name: A string used as the name for this metric.

    Returns:
      A metric for the weighted average relevance position.
    """
    with ops.name_scope(name, 'average_relevance_position',
                        (labels, predictions, weights)):
        _, list_size = array_ops.unstack(array_ops.shape(predictions))
        labels, predictions, weights, topn = _prepare_and_validate_params(
            labels, predictions, weights, list_size)
        sorted_labels, sorted_weights = utils.sort_by_scores(
            predictions, [labels, weights], topn=topn)
        relevance = sorted_labels * sorted_weights
        position = math_ops.to_float(math_ops.range(1, topn + 1))
        # TODO(xuanhui): Consider to add a cap poistion topn + 1 when there is no
        # relevant examples.
        return math_ops.reduce_mean(
            position * array_ops.ones_like(relevance) * relevance)
Ejemplo n.º 7
0
def precision(labels, predictions, weights=None, topn=None, name=None):
    """Computes precision as weighted average of relevant examples.

    Args:
      labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
        relevant example.
      predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
        the ranking score of the corresponding example.
      weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
        former case is per-example and the latter case is per-list.
      topn: A cutoff for how many examples to consider for this metric.
      name: A string used as the name for this metric.

    Returns:
      A metric for the weighted precision of the batch.
    """
    with ops.name_scope(name, 'precision', (labels, predictions, weights)):
        labels, predictions, weights, topn = _prepare_and_validate_params(
            labels, predictions, weights, topn)
        sorted_labels, sorted_weights = utils.sort_by_scores(
            predictions, [labels, weights], topn=topn)
        # Relevance = 1.0 when labels >= 1.0.
        relevance = math_ops.to_float(
            math_ops.greater_equal(
                sorted_labels, 1.0))
        per_list_precision = _safe_div(
            math_ops.reduce_sum(relevance * sorted_weights, 1, keepdims=True),
            math_ops.reduce_sum(
                array_ops.ones_like(relevance) * sorted_weights, 1, keepdims=True))
        # per_list_weights are computed from the whole list to avoid the problem of
        # 0 when there is no relevant example in topn.
        per_list_weights = _per_example_weights_to_per_list_weights(
            weights, math_ops.to_float(math_ops.greater_equal(labels, 1.0)))
        return math_ops.reduce_mean(per_list_precision * per_list_weights)
Ejemplo n.º 8
0
def _per_list_precision(labels, predictions, weights, topn):
  """Computes the precision for each query in the batch.

  Args:
    labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
      relevant example.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
      former case is per-example and the latter case is per-list.
    topn: A cutoff for how many examples to consider for this metric.

  Returns:
    A `Tensor` of size [batch_size, 1] containing the percision of each query
    respectively.
  """
  sorted_labels, sorted_weights = utils.sort_by_scores(
      predictions, [labels, weights], topn=topn)
  # Relevance = 1.0 when labels >= 1.0.
  relevance = tf.cast(tf.greater_equal(sorted_labels, 1.0), dtype=tf.float32)
  per_list_precision = tf.compat.v1.math.divide_no_nan(
      tf.reduce_sum(
          input_tensor=relevance * sorted_weights, axis=1, keepdims=True),
      tf.reduce_sum(
          input_tensor=tf.ones_like(relevance) * sorted_weights,
          axis=1,
          keepdims=True))
  return per_list_precision
Ejemplo n.º 9
0
def discounted_cumulative_gain(labels,
                               predictions,
                               weights=None,
                               topn=None,
                               name=None):
    """Computes discounted cumulative gain (DCG).

    Args:
      labels: A `Tensor` of the same shape as `predictions`.
      predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
        the ranking score of the corresponding example.
      weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
        former case is per-example and the latter case is per-list.
      topn: A cutoff for how many examples to consider for this metric.
      name: A string used as the name for this metric.

    Returns:
      A metric for the weighted discounted cumulative gain of the batch.
    """
    with ops.name_scope(name, 'discounted_cumulative_gain',
                        (labels, predictions, weights)):
        labels, predictions, weights, topn = _prepare_and_validate_params(
            labels, predictions, weights, topn)
        sorted_labels, sorted_weights = utils.sort_by_scores(
            predictions, [labels, weights], topn=topn)
        dcg = _discounted_cumulative_gain(sorted_labels,
                                          sorted_weights) * math_ops.log1p(1.0)
        per_list_weights = _per_example_weights_to_per_list_weights(
            weights=weights,
            relevance=math_ops.pow(2.0, math_ops.to_float(labels)) - 1.0)
        return math_ops.reduce_mean(
            _safe_div(dcg, per_list_weights) * per_list_weights)
Ejemplo n.º 10
0
    def compute_unreduced_loss(self, labels, logits, weights):
        """See `_RankingLoss`."""
        is_label_valid = utils.is_label_valid(labels)
        # Reset the invalid labels to 0 and reset the invalid logits to a logit with
        # ~= 0 contribution.
        labels = tf.where(is_label_valid, labels, tf.zeros_like(labels))
        logits = tf.where(is_label_valid, logits,
                          tf.math.log(_EPSILON) * tf.ones_like(logits))
        weights = 1.0 if weights is None else tf.convert_to_tensor(
            value=weights)
        weights = tf.squeeze(weights)

        # Shuffle labels and logits to add randomness to sort.
        shuffled_indices = utils.shuffle_valid_indices(is_label_valid,
                                                       self._seed)
        shuffled_labels = tf.gather_nd(labels, shuffled_indices)
        shuffled_logits = tf.gather_nd(logits, shuffled_indices)

        sorted_labels, sorted_logits = utils.sort_by_scores(
            shuffled_labels, [shuffled_labels, shuffled_logits])

        raw_max = tf.reduce_max(input_tensor=sorted_logits,
                                axis=1,
                                keepdims=True)
        sorted_logits = sorted_logits - raw_max
        sums = tf.cumsum(tf.exp(sorted_logits), axis=1, reverse=True)
        sums = tf.math.log(sums) - sorted_logits

        if self._lambda_weight is not None and isinstance(
                self._lambda_weight, ListMLELambdaWeight):
            sums *= self._lambda_weight.individual_weights(sorted_labels)

        negative_log_likelihood = tf.reduce_sum(input_tensor=sums, axis=1)
        return negative_log_likelihood, weights
Ejemplo n.º 11
0
def mean_reciprocal_rank(labels, predictions, weights=None, name=None):
    """Computes mean reciprocal rank (MRR).

    Args:
      labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
        relevant example.
      predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
        the ranking score of the corresponding example.
      weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
        former case is per-example and the latter case is per-list.
      name: A string used as the name for this metric.

    Returns:
      A metric for the weighted mean reciprocal rank of the batch.
    """
    with ops.name_scope(name, 'mean_reciprocal_rank',
                        (labels, predictions, weights)):
        _, list_size = array_ops.unstack(array_ops.shape(predictions))
        labels, predictions, weights, topn = _prepare_and_validate_params(
            labels, predictions, weights, list_size)
        sorted_labels, = utils.sort_by_scores(predictions, [labels], topn=topn)
        # Relevance = 1.0 when labels >= 1.0 to accommodate graded relevance.
        relevance = math_ops.to_float(
            math_ops.greater_equal(
                sorted_labels, 1.0))
        reciprocal_rank = 1.0 / math_ops.to_float(math_ops.range(1, topn + 1))
        # MRR has a shape of [batch_size, 1]
        mrr = math_ops.reduce_max(
            relevance * reciprocal_rank, axis=1, keepdims=True)
        return math_ops.reduce_mean(
            mrr * array_ops.ones_like(weights) * weights)
Ejemplo n.º 12
0
def _per_list_precision(labels, predictions, topn, mask):
    """Computes the precision for each query in the batch.

  Args:
    labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
      relevant example.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    topn: A cutoff for how many examples to consider for this metric.
    mask: A `Tensor` of the same shape as predictions indicating which entries
      are valid for computing the metric.

  Returns:
    A `Tensor` of size [batch_size, 1] containing the precision of each query
    respectively.
  """
    sorted_labels = utils.sort_by_scores(predictions, [labels],
                                         topn=topn,
                                         mask=mask)[0]
    # Relevance = 1.0 when labels >= 1.0.
    relevance = tf.cast(tf.greater_equal(sorted_labels, 1.0), dtype=tf.float32)
    if topn is None:
        topn = tf.shape(relevance)[1]
    valid_topn = tf.minimum(
        topn,
        tf.reduce_sum(tf.cast(mask, dtype=tf.int32), axis=1, keepdims=True))
    per_list_precision = tf.compat.v1.math.divide_no_nan(
        tf.reduce_sum(input_tensor=relevance, axis=1, keepdims=True),
        tf.cast(valid_topn, dtype=tf.float32))
    return per_list_precision
Ejemplo n.º 13
0
def _per_list_recall(labels, predictions, topn, mask):
    """Computes the recall@k for each query in the batch.

  Args:
    labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
      relevant example.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    topn: A cutoff for how many examples to consider for this metric.
    mask: A mask indicating which entries are valid for computing the metric.

  Returns:
    A `Tensor` of size [batch_size, 1] containing the precision of each query
    respectively.
  """
    sorted_labels = utils.sort_by_scores(predictions, [labels],
                                         topn=topn,
                                         mask=mask)[0]
    topn_positives = tf.cast(tf.greater_equal(sorted_labels, 1.0),
                             dtype=tf.float32)
    labels = tf.cast(tf.greater_equal(labels, 1.0), dtype=tf.float32)
    per_list_recall = tf.compat.v1.math.divide_no_nan(
        tf.reduce_sum(input_tensor=topn_positives, axis=1, keepdims=True),
        tf.reduce_sum(input_tensor=labels, axis=1, keepdims=True))
    return per_list_recall
Ejemplo n.º 14
0
 def compute(self, labels, predictions, weights):
     """See `_RankingMetric`."""
     labels, predictions, weights, topn = _prepare_and_validate_params(
         labels, predictions, weights, self._topn)
     sorted_labels, sorted_weights = utils.sort_by_scores(predictions,
                                                          [labels, weights],
                                                          topn=topn)
     # Relevance = 1.0 when labels >= 1.0.
     sorted_relevance = tf.cast(tf.greater_equal(sorted_labels, 1.0),
                                dtype=tf.float32)
     per_list_relevant_counts = tf.cumsum(sorted_relevance, axis=1)
     per_list_cutoffs = tf.cumsum(tf.ones_like(sorted_relevance), axis=1)
     per_list_precisions = tf.math.divide_no_nan(per_list_relevant_counts,
                                                 per_list_cutoffs)
     total_precision = tf.reduce_sum(input_tensor=per_list_precisions *
                                     sorted_weights * sorted_relevance,
                                     axis=1,
                                     keepdims=True)
     total_relevance = tf.reduce_sum(input_tensor=sorted_weights *
                                     sorted_relevance,
                                     axis=1,
                                     keepdims=True)
     per_list_map = tf.math.divide_no_nan(total_precision, total_relevance)
     # per_list_weights are computed from the whole list to avoid the problem of
     # 0 when there is no relevant example in topn.
     per_list_weights = _per_example_weights_to_per_list_weights(
         weights, tf.cast(tf.greater_equal(labels, 1.0), dtype=tf.float32))
     return per_list_map, per_list_weights
Ejemplo n.º 15
0
 def _compute_per_list_metric(self, labels, predictions, weights, topn,
                              mask):
     """See `_DivRankingMetric`."""
     sorted_labels = utils.sort_by_scores(predictions, [labels],
                                          topn=topn,
                                          mask=mask)[0]
     # relevance shape = [batch_size, topn].
     relevance = tf.reduce_sum(tf.cast(tf.greater_equal(sorted_labels, 1.0),
                                       dtype=tf.float32),
                               axis=-1)
     # num_subtopics shape = [batch_size, 1].
     num_subtopics = tf.reduce_sum(tf.cast(tf.reduce_any(tf.greater_equal(
         labels, 1.0),
                                                         axis=1,
                                                         keepdims=True),
                                           dtype=tf.float32),
                                   axis=-1)
     if topn is None:
         topn = tf.shape(relevance)[1]
     # valid_topn shape = [batch_size, 1].
     valid_topn = tf.minimum(
         topn,
         tf.reduce_sum(tf.cast(mask, dtype=tf.int32), axis=1,
                       keepdims=True))
     return tf.compat.v1.math.divide_no_nan(
         tf.reduce_sum(input_tensor=relevance, axis=1, keepdims=True),
         tf.reduce_sum(input_tensor=tf.cast(valid_topn, dtype=tf.float32) *
                       num_subtopics,
                       axis=1,
                       keepdims=True))
Ejemplo n.º 16
0
    def _compute_impl(self, labels, predictions, weights, mask):
        """See `_RankingMetric`."""
        topn = tf.shape(predictions)[1] if self._topn is None else self._topn

        # Relevance = 1.0 when labels >= 1.0 to accommodate graded relevance.
        relevance = tf.cast(tf.greater_equal(labels, 1.0), dtype=tf.float32)
        irrelevance = tf.cast(mask, tf.float32) - relevance

        total_relevance = tf.reduce_sum(relevance, axis=1, keepdims=True)
        total_irrelevance = tf.reduce_sum(irrelevance, axis=1, keepdims=True)

        sorted_relevance, sorted_irrelevance = utils.sort_by_scores(
            predictions, [relevance, irrelevance], mask=mask, topn=topn)

        numerator = tf.minimum(tf.cumsum(sorted_irrelevance, axis=1),
                               total_relevance)
        denominator = tf.minimum(
            total_irrelevance,
            total_relevance) if self._use_trec_version else total_relevance

        bpref = tf.math.divide_no_nan(
            tf.reduce_sum(
                ((1. - tf.math.divide_no_nan(numerator, denominator)) *
                 sorted_relevance),
                axis=1,
                keepdims=True), total_relevance)

        per_list_weights = _per_example_weights_to_per_list_weights(
            weights=weights,
            relevance=tf.cast(tf.greater_equal(relevance, 1.0),
                              dtype=tf.float32))

        return bpref, per_list_weights
Ejemplo n.º 17
0
def inverse_max_dcg(labels,
                    gain_fn=lambda labels: tf.pow(2.0, labels) - 1.,
                    rank_discount_fn=lambda rank: 1. / tf.math.log1p(rank),
                    topn=None):
    """Computes the inverse of max DCG.

  Args:
    labels: A `Tensor` with shape [batch_size, list_size]. Each value is the
      graded relevance of the corresponding item.
    gain_fn: A gain function. By default this is set to: 2^label - 1.
    rank_discount_fn: A discount function. By default this is set to:
      1/log(1+rank).
    topn: An integer as the cutoff of examples in the sorted list.

  Returns:
    A `Tensor` with shape [batch_size, 1].
  """
    ideal_sorted_labels, = utils.sort_by_scores(labels, [labels], topn=topn)
    rank = tf.range(tf.shape(input=ideal_sorted_labels)[1]) + 1
    discounted_gain = gain_fn(ideal_sorted_labels) * rank_discount_fn(
        tf.cast(rank, dtype=tf.float32))
    discounted_gain = tf.reduce_sum(input_tensor=discounted_gain,
                                    axis=1,
                                    keepdims=True)
    return tf.compat.v1.where(tf.greater(discounted_gain, 0.),
                              1. / discounted_gain,
                              tf.zeros_like(discounted_gain))
Ejemplo n.º 18
0
  def test_sort_by_scores_with_mask_and_shuffle_ties(self):
    with tf.Graph().as_default():
      tf.random.set_seed(42)
      scores = [[0., math.inf, 0., -math.inf, -math.inf]]
      names = [['a', 'b', 'c', 'd', 'e']]
      mask = [[True, False, True, True, False]]

      with tf.compat.v1.Session() as sess:
        result = utils.sort_by_scores(scores, [names], mask=mask,
                                      shuffle_ties=True, seed=13)
        sorted_names = sess.run(result)[0]
        self.assertAllEqual(sorted_names, [[b'a', b'c', b'd', b'b', b'e']])

        result = utils.sort_by_scores(scores, [names], mask=mask,
                                      shuffle_ties=True, seed=17)
        sorted_names = sess.run(result)[0]
        self.assertAllEqual(sorted_names, [[b'c', b'a', b'd', b'e', b'b']])
Ejemplo n.º 19
0
 def compute(self, labels, predictions, weights):
   """See `_RankingMetric`."""
   labels, predictions, weights, topn = _prepare_and_validate_params(
       labels, predictions, weights, self._topn)
   sorted_labels, sorted_weights = utils.sort_by_scores(
       predictions, [labels, weights], topn=topn)
   dcg = _discounted_cumulative_gain(sorted_labels, sorted_weights)
   # Sorting over the weighted labels to get ideal ranking.
   ideal_sorted_labels, ideal_sorted_weights = utils.sort_by_scores(
       weights * labels, [labels, weights], topn=topn)
   ideal_dcg = _discounted_cumulative_gain(ideal_sorted_labels,
                                           ideal_sorted_weights)
   per_list_ndcg = tf.compat.v1.math.divide_no_nan(dcg, ideal_dcg)
   per_list_weights = _per_example_weights_to_per_list_weights(
       weights=weights,
       relevance=tf.pow(2.0, tf.cast(labels, dtype=tf.float32)) - 1.0)
   return tf.compat.v1.metrics.mean(per_list_ndcg, per_list_weights)
Ejemplo n.º 20
0
 def test_sort_by_scores_with_mask(self):
   with tf.Graph().as_default():
     scores = [[0., math.inf, 2., -math.inf, 1.]]
     names = [['a', 'b', 'c', 'd', 'e']]
     mask_1 = [[True, False, True, True, False]]
     mask_2 = [[False, True, False, True, True]]
     with tf.compat.v1.Session() as sess:
       sorted_names = sess.run(
           utils.sort_by_scores(scores, [names], mask=mask_1,
                                shuffle_ties=False))[0]
       self.assertAllEqual(sorted_names, [[b'c', b'a', b'd', b'b', b'e']])
       sorted_names = sess.run(
           utils.sort_by_scores(scores, [names], mask=mask_2,
                                shuffle_ties=False))[0]
       self.assertAllEqual(sorted_names, [[b'b', b'e', b'd', b'a', b'c']])
       sorted_names = sess.run(
           utils.sort_by_scores(scores, [names], shuffle_ties=False))[0]
       self.assertAllEqual(sorted_names, [[b'b', b'c', b'e', b'a', b'd']])
Ejemplo n.º 21
0
  def test_sort_by_scores(self):
    scores = [[1., 3., 2.], [1., 2., 3.]]
    positions = [[1, 2, 3], [4, 5, 6]]
    names = [['a', 'b', 'c'], ['d', 'e', 'f']]
    with tf.compat.v1.Session() as sess:
      sorted_positions, sorted_names = sess.run(
          utils.sort_by_scores(scores, [positions, names]))
      self.assertAllEqual(sorted_positions, [[2, 3, 1], [6, 5, 4]])
      self.assertAllEqual(sorted_names,
                          [[b'b', b'c', b'a'], [b'f', b'e', b'd']])

      sorted_positions, sorted_names = sess.run(
          utils.sort_by_scores(scores, [positions, names], topn=2))
      self.assertAllEqual(sorted_positions, [[2, 3], [6, 5]])
      self.assertAllEqual(sorted_names, [[b'b', b'c'], [b'f', b'e']])

      sorted_positions, sorted_names = sess.run(
          utils.sort_by_scores([scores[0]], [[positions[0]], [names[0]]]))
      self.assertAllEqual(sorted_positions, [[2, 3, 1]])
      self.assertAllEqual(sorted_names, [[b'b', b'c', b'a']])
Ejemplo n.º 22
0
 def _compute_per_list_metric(self, labels, predictions, weights, topn):
     """See `_DivRankingMetric`."""
     sorted_labels, sorted_weights = utils.sort_by_scores(predictions,
                                                          [labels, weights],
                                                          topn=topn,
                                                          seed=self._seed)
     alpha_dcg = _discounted_cumulative_gain(sorted_labels, sorted_weights,
                                             self._gain_fn,
                                             self._rank_discount_fn)
     per_list_weights = self._compute_per_list_weights(weights, labels)
     return tf.compat.v1.math.divide_no_nan(alpha_dcg, per_list_weights)
Ejemplo n.º 23
0
 def compute(self, labels, predictions, weights):
   """See `_RankingMetric`."""
   list_size = tf.shape(input=predictions)[1]
   labels, predictions, weights, topn = _prepare_and_validate_params(
       labels, predictions, weights, list_size)
   sorted_labels, sorted_weights = utils.sort_by_scores(
       predictions, [labels, weights], topn=topn)
   relevance = sorted_labels * sorted_weights
   position = tf.cast(tf.range(1, topn + 1), dtype=tf.float32)
   # TODO: Consider to add a cap poistion topn + 1 when there is no
   # relevant examples.
   return position * tf.ones_like(relevance), relevance
Ejemplo n.º 24
0
 def compute(self, labels, predictions, weights):
   """See `_RankingMetric`."""
   labels, predictions, weights, topn = _prepare_and_validate_params(
       labels, predictions, weights, self._topn)
   sorted_labels, sorted_weights = utils.sort_by_scores(
       predictions, [labels, weights], topn=topn)
   dcg = _discounted_cumulative_gain(sorted_labels, sorted_weights,
                                     self._gain_fn, self._rank_discount_fn)
   per_list_weights = _per_example_weights_to_per_list_weights(
       weights=weights,
       relevance=self._gain_fn(tf.cast(labels, dtype=tf.float32)))
   per_list_dcg = tf.compat.v1.math.divide_no_nan(dcg, per_list_weights)
   return per_list_dcg, per_list_weights
Ejemplo n.º 25
0
 def _compute_impl(self, labels, predictions, weights, mask):
     """See `_RankingMetric`."""
     topn = tf.shape(predictions)[1] if self._topn is None else self._topn
     sorted_labels, sorted_weights = utils.sort_by_scores(predictions,
                                                          [labels, weights],
                                                          topn=topn,
                                                          mask=mask)
     dcg = _discounted_cumulative_gain(sorted_labels, sorted_weights,
                                       self._gain_fn,
                                       self._rank_discount_fn)
     # Sorting over the weighted labels to get ideal ranking.
     ideal_sorted_labels, ideal_sorted_weights = utils.sort_by_scores(
         weights * labels, [labels, weights], topn=topn, mask=mask)
     ideal_dcg = _discounted_cumulative_gain(ideal_sorted_labels,
                                             ideal_sorted_weights,
                                             self._gain_fn,
                                             self._rank_discount_fn)
     per_list_ndcg = tf.compat.v1.math.divide_no_nan(dcg, ideal_dcg)
     per_list_weights = _per_example_weights_to_per_list_weights(
         weights=weights,
         relevance=self._gain_fn(tf.cast(labels, dtype=tf.float32)))
     return per_list_ndcg, per_list_weights
Ejemplo n.º 26
0
def mean_average_precision(labels,
                           predictions,
                           weights=None,
                           topn=None,
                           name=None):
    """Computes mean average precision (MAP).
  The implementation of MAP is based on Equation (1.7) in the following:
  Liu, T-Y "Learning to Rank for Information Retrieval" found at
  https://www.nowpublishers.com/article/DownloadSummary/INR-016

  Args:
    labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
      relevant example.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
      former case is per-example and the latter case is per-list.
    topn: A cutoff for how many examples to consider for this metric.
    name: A string used as the name for this metric.

  Returns:
    A metric for the mean average precision.
  """
    with tf.compat.v1.name_scope(metric.name, 'mean_average_precision',
                                 (labels, predictions, weights)):
        labels, predictions, weights, topn = _prepare_and_validate_params(
            labels, predictions, weights, topn)
        sorted_labels, sorted_weights = utils.sort_by_scores(predictions,
                                                             [labels, weights],
                                                             topn=topn)
        # Relevance = 1.0 when labels >= 1.0.
        sorted_relevance = tf.cast(tf.greater_equal(sorted_labels, 1.0),
                                   dtype=tf.float32)
        per_list_relevant_counts = tf.cumsum(sorted_relevance, axis=1)
        per_list_cutoffs = tf.cumsum(tf.ones_like(sorted_relevance), axis=1)
        per_list_precisions = tf.math.divide_no_nan(per_list_relevant_counts,
                                                    per_list_cutoffs)
        total_precision = tf.reduce_sum(input_tensor=per_list_precisions *
                                        sorted_weights * sorted_relevance,
                                        axis=1,
                                        keepdims=True)
        total_relevance = tf.reduce_sum(input_tensor=sorted_weights *
                                        sorted_relevance,
                                        axis=1,
                                        keepdims=True)
        per_list_map = tf.math.divide_no_nan(total_precision, total_relevance)
        # per_list_weights are computed from the whole list to avoid the problem of
        # 0 when there is no relevant example in topn.
        per_list_weights = _per_example_weights_to_per_list_weights(
            weights, tf.cast(tf.greater_equal(labels, 1.0), dtype=tf.float32))
        return tf.compat.v1.metrics.mean(per_list_map, per_list_weights)
Ejemplo n.º 27
0
 def compute(self, labels, predictions, weights):
     """See `_RankingMetric`."""
     labels, predictions, weights, topn = _prepare_and_validate_params(
         labels, predictions, weights, self._topn)
     sorted_labels, sorted_weights = utils.sort_by_scores(predictions,
                                                          [labels, weights],
                                                          topn=topn)
     dcg = _discounted_cumulative_gain(sorted_labels,
                                       sorted_weights) * tf.math.log1p(1.0)
     per_list_weights = _per_example_weights_to_per_list_weights(
         weights=weights,
         relevance=tf.pow(2.0, tf.cast(labels, dtype=tf.float32)) - 1.0)
     return tf.compat.v1.metrics.mean(_safe_div(dcg, per_list_weights),
                                      per_list_weights)
Ejemplo n.º 28
0
 def _inverse_max_dcg(self, labels):
     """Computes the inverse of max DCG."""
     ideal_sorted_labels, = utils.sort_by_scores(labels, [labels],
                                                 topn=self._topn)
     rank = math_ops.range(array_ops.shape(ideal_sorted_labels)[1]) + 1
     discounted_gain = self._gain_fn(
         ideal_sorted_labels) * self._rank_discount_fn(
             math_ops.to_float(rank))
     discounted_gain = math_ops.reduce_sum(discounted_gain,
                                           1,
                                           keepdims=True)
     return array_ops.where(math_ops.greater(discounted_gain, 0.),
                            1. / discounted_gain,
                            array_ops.zeros_like(discounted_gain))
def bilingual_lexical_induction(labels, predictions, features):
    """Compute the BLI. We do not make all the needed verifications as they were already made for previous metrics."""

    if FLAGS.query_relevance_type == "binary":
        ground_truth = 2
    else:
        ground_truth = FLAGS.query_size
    # We get the label of the highest ranked word by the model
    sorted_labels = utils.sort_by_scores(predictions, [labels], topn=1)[0]
    # We check if the label is equal to ground truth
    relevance = tf.cast(tf.equal(sorted_labels, ground_truth),
                        dtype=tf.float32)
    # We return it
    return tf.compat.v1.metrics.mean(relevance)
Ejemplo n.º 30
0
def normalized_discounted_cumulative_gain(labels,
                                          predictions,
                                          weights=None,
                                          topn=None,
                                          name=None):
  """Computes normalized discounted cumulative gain (NDCG).

  Args:
    labels: A `Tensor` of the same shape as `predictions`.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
      former case is per-example and the latter case is per-list.
    topn: A cutoff for how many examples to consider for this metric.
    name: A string used as the name for this metric.

  Returns:
    A metric for the weighted normalized discounted cumulative gain of the
    batch.
  """
  with tf.compat.v1.name_scope(name, 'normalized_discounted_cumulative_gain',
                               (labels, predictions, weights)):
    labels, predictions, weights, topn = _prepare_and_validate_params(
        labels, predictions, weights, topn)
    sorted_labels, sorted_weights = utils.sort_by_scores(
        predictions, [labels, weights], topn=topn)
    dcg = _discounted_cumulative_gain(sorted_labels, sorted_weights)
    # Sorting over the weighted labels to get ideal ranking.
    ideal_sorted_labels, ideal_sorted_weights = utils.sort_by_scores(
        weights * labels, [labels, weights], topn=topn)
    ideal_dcg = _discounted_cumulative_gain(ideal_sorted_labels,
                                            ideal_sorted_weights)
    per_list_ndcg = _safe_div(dcg, ideal_dcg)
    per_list_weights = _per_example_weights_to_per_list_weights(
        weights=weights,
        relevance=tf.pow(2.0, tf.cast(labels, dtype=tf.float32)) - 1.0)
    return tf.compat.v1.metrics.mean(per_list_ndcg, per_list_weights)