Exemple #1
0
def compute_mean(metric_key,
                 labels,
                 predictions,
                 weights=None,
                 topn=None,
                 name=None):
    """Returns the mean of the specified metric given the inputs.

  Args:
    metric_key: A key in `RankingMetricKey`.
    labels: A `Tensor` of the same shape as `predictions` representing
      relevance.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
      former case is per-example and the latter case is per-list.
    topn: An `integer` specifying the cutoff of how many items are considered in
      the metric.
    name: A `string` used as the name for this metric.

  Returns:
    A scalar as the computed metric.
  """
    metric_dict = {
        RankingMetricKey.ARP: metrics_impl.ARPMetric(metric_key),
        RankingMetricKey.MRR: metrics_impl.MRRMetric(metric_key, topn),
        RankingMetricKey.NDCG: metrics_impl.NDCGMetric(name, topn),
        RankingMetricKey.DCG: metrics_impl.DCGMetric(name, topn),
        RankingMetricKey.PRECISION: metrics_impl.PrecisionMetric(name, topn),
        RankingMetricKey.RECALL: metrics_impl.RecallMetric(name, topn),
        RankingMetricKey.MAP:
        metrics_impl.MeanAveragePrecisionMetric(name, topn),
        RankingMetricKey.ORDERED_PAIR_ACCURACY: metrics_impl.OPAMetric(name),
        RankingMetricKey.BPREF: metrics_impl.BPrefMetric(name, topn),
        RankingMetricKey.HITS: metrics_impl.HitsMetric(metric_key, topn),
    }
    assert metric_key in metric_dict, ('metric_key %s not supported.' %
                                       metric_key)
    # TODO: Add mask argument for metric.compute() call
    metric, weight = metric_dict[metric_key].compute(labels, predictions,
                                                     weights)
    return tf.compat.v1.div_no_nan(tf.reduce_sum(input_tensor=metric * weight),
                                   tf.reduce_sum(input_tensor=weight))
Exemple #2
0
def binary_preference(labels,
                      predictions,
                      weights=None,
                      topn=None,
                      name=None,
                      use_trec_version=True):
    """Computes binary preference (BPref).

  The implementation of BPref is based on the desciption in the following:
  https://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf
  BPref = 1 / R SUM_r(1 - |n ranked higher than r| / min(R, N))

  Args:
    labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
      relevant example.
    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
      the ranking score of the corresponding example.
    weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
      former case is per-example and the latter case is per-list.
    topn: A cutoff for how many examples to consider for this metric.
    name: A string used as the name for this metric.
    use_trec_version: A boolean to choose the version of the formula to use.
      If False, than the alternative BPref formula will be used:
      BPref = 1 / R SUM_r(1 - |n ranked higher than r| / R)

  Returns:
    A metric for binary preference metric of the batch.

  """
    metric = metrics_impl.BPrefMetric(name,
                                      topn,
                                      use_trec_version=use_trec_version)
    with tf.compat.v1.name_scope(metric.name, 'binary_preference',
                                 (labels, predictions, weights)):
        # TODO: Add mask argument for metric.compute() call
        per_list_bpref, per_list_weights = metric.compute(
            labels, predictions, weights)
    return tf.compat.v1.metrics.mean(per_list_bpref, per_list_weights)