Esempio n. 1
0
 def test_make_mean_reciprocal_rank_fn(self):
   with tf.Graph().as_default():
     scores = [[1., 3., 2.], [1., 2., 3.]]
     # Note that scores are ranked in descending order.
     # ranks = [[3, 1, 2], [3, 2, 1]]
     labels = [[0., 0., 1.], [0., 1., 2.]]
     # Note that the definition of MRR only uses the highest ranked
     # relevant item, where an item is relevant if its label is > 0.
     rel_rank = [2, 1]
     weights = [[1., 2., 3.], [4., 5., 6.]]
     num_queries = len(scores)
     weights_feature_name = 'weights'
     features = {weights_feature_name: weights}
     m = metrics_lib.make_ranking_metric_fn(metrics_lib.RankingMetricKey.MRR)
     m_w = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.MRR,
         weights_feature_name=weights_feature_name)
     m_2 = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.MRR, topn=1)
     self._check_metrics([
         (m([labels[0]], [scores[0]], features), 0.5),
         (m(labels, scores, features), (0.5 + 1.0) / 2),
         (m_w(labels, scores, features),
          (3. * 0.5 + (6. + 5.) / 2. * 1.) / (3. + (6. + 5.) / 2.)),
         (m_2(labels, scores,
              features), (sum([0., 1. / rel_rank[1], 0.]) / num_queries)),
     ])
Esempio n. 2
0
 def test_make_discounted_cumulative_gain_fn(self):
     with tf.Graph().as_default():
         scores = [[1., 3., 2.], [1., 2., 3.]]
         labels = [[0., 0., 1.], [0., 1., 2.]]
         weights = [[1., 1., 1.], [2., 2., 1.]]
         weights_feature_name = 'weights'
         features = {weights_feature_name: weights}
         m = metrics_lib.make_ranking_metric_fn(
             metrics_lib.RankingMetricKey.DCG)
         m_w = metrics_lib.make_ranking_metric_fn(
             metrics_lib.RankingMetricKey.DCG,
             weights_feature_name=weights_feature_name)
         expected_dcg_1 = _dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)
         self._check_metrics([
             (m([labels[0]], [scores[0]], features), expected_dcg_1),
         ])
         expected_dcg_2 = _dcg(2., 1) + _dcg(1., 2)
         expected_dcg_2_weighted = _dcg(2., 1) + _dcg(1., 2) * 2.
         expected_weight_2 = ((4 - 1) * 1. + (2 - 1) * 2.) / (4 - 1 + 2 - 1)
         self._check_metrics([
             (m(labels, scores,
                features), (expected_dcg_1 + expected_dcg_2) / 2.0),
             (m_w(labels, scores,
                  features), (expected_dcg_1 + expected_dcg_2_weighted) /
              (1. + expected_weight_2)),
         ])
Esempio n. 3
0
  def test_make_bpref_fn(self):
    with tf.Graph().as_default():
      scores = [[1., 3., 2.], [1., 2., 3.]]
      # Note that scores are ranked in descending order.
      # ranks = [[3, 1, 2], [3, 2, 1]]
      labels = [[0., 0., 1.], [1., 0., 2.]]
      weights = [[1., 2., 3.], [4., 5., 6.]]
      weights_feature_name = 'weights'
      features = {weights_feature_name: weights}
      # BPref = 1 / R SUM_r(1- |n ranked higher than r| / min(R, N))

      m = metrics_lib.make_ranking_metric_fn(metrics_lib.RankingMetricKey.BPREF)
      m_w = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.BPREF,
          weights_feature_name=weights_feature_name)
      m_2 = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.BPREF, topn=1)
      m_alt = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.BPREF, use_trec_version=False)
      self._check_metrics([
          (m([labels[0]], [scores[0]],
             features), 1. / 2. * (1. - 1. / 1.)),  # = 0.
          (m(labels, scores, features),
           (1. / 2. * (1. - 1. / 1.) +
            (1. / 2. * ((1. - 0. / 1.) + (1. - 1. / 1.)))) / 2),  # = 0.25
          (m_w(labels, scores, features),
           (3. * (1. / 2. * (1. - 1. / 1.)) +
            5. * (1. / 2. * ((1. - 0. / 1.) + (1. - 1. / 1.)))) / (3. + 5.)),
          (m_2(labels, scores, features), (0. +
                                           (1. / 2. * (1. - 0. / 1.))) / 2.),
          (m_alt(labels, scores, features),
           (1. / 2. * (1. - 1. / 1.) +
            (1. / 2. * ((1. - 0. / 2.) + (1. - 1. / 2.)))) / 2),  # = 0.5
      ])
Esempio n. 4
0
  def test_make_hits_fn(self):
    with tf.Graph().as_default():
      scores = [[1., 3., 2.], [1., 2., 3.]]
      # Note that scores are ranked in descending order.
      # ranks = [[3, 1, 2], [3, 2, 1]]
      labels = [[0., 0., 1.], [0., 1., 1.]]
      # Note that the definition of Hits considers an item relevant
      # if its label is >= 1.0.
      weights = [[1., 2., 3.], [4., 5., 6.]]
      num_queries = len(scores)
      weights_feature_name = 'weights'
      features = {weights_feature_name: weights}
      m = metrics_lib.make_ranking_metric_fn(metrics_lib.RankingMetricKey.HITS)
      m_2 = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.HITS, topn=1)
      m_w = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.HITS, topn=1,
          weights_feature_name=weights_feature_name)

      self._check_metrics([
          (m([labels[0]], [scores[0]], features), 1.0),
          (m_2(labels, scores, features), ((0. + 1.) / num_queries)),
          (m_w(labels, scores, features),
           (3. * 0. + (6. + 5.) / 2. * 1.) / (3. + (6. + 5.) / 2.)),
      ])
Esempio n. 5
0
    def test_make_discounted_cumulative_gain_fn(self):
        with tf.Graph().as_default():
            scores = [[1., 3., 2.], [1., 2., 3.]]
            # Note that scores are ranked in descending order.
            ranks = [[3, 1, 2], [3, 2, 1]]
            labels = [[0., 0., 1.], [0., 1., 2.]]
            weights = [[1., 1., 1.], [2., 2., 1.]]
            weights_feature_name = 'weights'
            features = {weights_feature_name: weights}
            m = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.DCG)
            m_w = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.DCG,
                weights_feature_name=weights_feature_name)
            expected_dcg_1 = _dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)
            self._check_metrics([
                (m([labels[0]], [scores[0]], features), expected_dcg_1),
            ])
            expected_dcg_2 = _dcg(2., 1) + _dcg(1., 2)
            expected_dcg_2_weighted = _dcg(2., 1) + _dcg(1., 2) * 2.
            expected_weight_2 = ((4 - 1) * 1. + (2 - 1) * 2.) / (4 - 1 + 2 - 1)
            self._check_metrics([
                (m(labels, scores,
                   features), (expected_dcg_1 + expected_dcg_2) / 2.0),
                (m_w(labels, scores,
                     features), (expected_dcg_1 + expected_dcg_2_weighted) /
                 (1. + expected_weight_2)),
            ])
            # Testing different gain and discount functions
            gain_fn = lambda rel: rel
            rank_discount_fn = lambda rank: rank

            def mod_dcg_fn(l, r):
                return _dcg(l,
                            r,
                            gain_fn=gain_fn,
                            rank_discount_fn=rank_discount_fn)

            m_mod = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.DCG,
                gain_fn=gain_fn,
                rank_discount_fn=rank_discount_fn)
            list_size = len(scores[0])
            expected_modified_dcg_1 = sum([
                mod_dcg_fn(labels[0][ind], ranks[0][ind])
                for ind in range(list_size)
            ])
            self._check_metrics([
                (m_mod([labels[0]], [scores[0]],
                       features), expected_modified_dcg_1),
            ])
Esempio n. 6
0
 def test_make_mean_reciprocal_rank_fn(self):
   scores = [[1., 3., 2.], [1., 2., 3.]]
   labels = [[0., 0., 1.], [0., 1., 2.]]
   weights = [[1., 2., 3.], [4., 5., 6.]]
   weights_feature_name = 'weights'
   features = {weights_feature_name: weights}
   m = metrics.make_ranking_metric_fn(metrics.RankingMetricKey.MRR)
   m_w = metrics.make_ranking_metric_fn(
       metrics.RankingMetricKey.MRR, weights_feature_name=weights_feature_name)
   self._check_metrics([
       (m([labels[0]], [scores[0]], features), 0.5),
       (m(labels, scores, features), (0.5 + 1.0) / 2),
       (m_w(labels, scores, features), (6. * 0.5 + 15. * 1.) / (6. + 15.)),
   ])
Esempio n. 7
0
 def test_make_precision_fn(self):
   scores = [[1., 3., 2.], [1., 2., 3.]]
   labels = [[0., 0., 1.], [0., 1., 2.]]
   features = {}
   m = metrics.make_ranking_metric_fn(metrics.RankingMetricKey.PRECISION)
   m_top_1 = metrics.make_ranking_metric_fn(
       metrics.RankingMetricKey.PRECISION, topn=1)
   m_top_2 = metrics.make_ranking_metric_fn(
       metrics.RankingMetricKey.PRECISION, topn=2)
   self._check_metrics([
       (m([labels[0]], [scores[0]], features), 1. / 3.),
       (m_top_1([labels[0]], [scores[0]], features), 0. / 1.),
       (m_top_2([labels[0]], [scores[0]], features), 1. / 2.),
       (m(labels, scores, features), (1. / 3. + 2. / 3.) / 2.),
   ])
Esempio n. 8
0
 def test_make_average_relevance_position_fn(self):
   scores = [[1., 3., 2.], [1., 2., 3.]]
   labels = [[0., 0., 1.], [0., 1., 2.]]
   weights = [[1., 2., 3.], [4., 5., 6.]]
   weights_feature_name = 'weights'
   features = {weights_feature_name: weights}
   m = metrics.make_ranking_metric_fn(metrics.RankingMetricKey.ARP)
   m_w = metrics.make_ranking_metric_fn(
       metrics.RankingMetricKey.ARP, weights_feature_name=weights_feature_name)
   self._check_metrics([
       (m([labels[0]], [scores[0]], features), 2.),
       (m(labels, scores, features), (1. * 2. + 2. * 1. + 1. * 2.) / 4.),
       (m_w(labels, scores, features),
        (3. * 1. * 2. + 6. * 2. * 1. + 5 * 1. * 2.) / (3. + 12. + 5.)),
   ])
Esempio n. 9
0
    def test_multi_dim_weighted_eval(self):
        weights_feature_name = self._default_weights_feature_name
        metric_fns = {
            'metric/precision@1':
            metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.PRECISION, topn=1),
        }
        head = ranking_head.create_ranking_head(
            loss_fn=_make_loss_fn(weights_feature_name),
            eval_metric_fns=metric_fns)

        weights = self._default_weights

        # Create estimator spec.
        spec = head.create_estimator_spec(
            features={weights_feature_name: weights},
            mode=tf.estimator.ModeKeys.EVAL,
            logits=self._default_logits,
            labels=self._default_labels)

        expected_metrics = [
            'labels_mean',
            'logits_mean',
            'metric/precision@1',
        ]

        with self.cached_session() as sess:
            _initialize_variables(self, spec.scaffold)
            update_ops = {
                k: spec.eval_metric_ops[k][1]
                for k in spec.eval_metric_ops
            }
            loss, metrics = sess.run((spec.loss, update_ops))
            self.assertAllClose(self._default_weighted_loss, loss)
            self.assertItemsEqual(expected_metrics, metrics.keys())
Esempio n. 10
0
 def _eval_metric_fns(self):
   """Returns a dict from name to metric functions."""
   metric_fns = {}
   metric_fns.update({
       "metric/ndcg_%d" % topn: metrics.make_ranking_metric_fn(
           metrics.RankingMetricKey.NDCG, topn=topn) for topn in [5, 10]
   })
   metric_fns.update({
       "metric/mrr_%d" % topn:
       metrics.make_ranking_metric_fn(metrics.RankingMetricKey.MRR, topn=topn)
       for topn in [10]
   })
   metric_fns.update({
       "metric/%s" % name: metrics.make_ranking_metric_fn(name) for name in
       [metrics.RankingMetricKey.MRR, metrics.RankingMetricKey.NDCG]
   })
   return metric_fns
Esempio n. 11
0
 def test_make_recall_fn(self):
   with tf.Graph().as_default():
     scores = [[1., 3., 2.], [1., 2., 3.]]
     labels = [[1., 0., 1.], [0., 1., 2.]]
     features = {}
     m = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.RECALL)
     m_top_1 = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.RECALL, topn=1)
     m_top_2 = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.RECALL, topn=2)
     self._check_metrics([
         (m([labels[0]], [scores[0]], features), 2. / 2.),
         (m_top_1([labels[0]], [scores[0]], features), 0. / 2.),
         (m_top_2([labels[0]], [scores[0]], features), 1. / 2.),
         (m_top_2(labels, scores, features), (1. / 2. + 2. / 2.) / 2.),
     ])
Esempio n. 12
0
    def test_eval(self):
        with tf.Graph().as_default():
            metric_fns = {
                'metric/precision@1':
                metrics_lib.make_ranking_metric_fn(
                    metrics_lib.RankingMetricKey.PRECISION, topn=1),
            }
            head1 = ranking_head.create_ranking_head(
                loss_fn=_make_loss_fn(),
                eval_metric_fns=metric_fns,
                name='head1')
            head2 = ranking_head.create_ranking_head(
                loss_fn=_make_loss_fn(),
                eval_metric_fns=metric_fns,
                name='head2')
            multi_head = ranking_head.create_multi_ranking_head([head1, head2])

            logits = {
                'head1': tf.convert_to_tensor(value=[[1., 3.], [1., 2.]]),
                'head2': tf.convert_to_tensor(value=[[2., 3.], [2., 2.]]),
            }
            labels = {
                'head1': tf.convert_to_tensor(value=[[0., 1.], [0., 2.]]),
                'head2': tf.convert_to_tensor(value=[[0., 1.], [0., 2.]]),
            }
            spec = multi_head.create_estimator_spec(
                features={},
                mode=tf.estimator.ModeKeys.EVAL,
                logits=logits,
                labels=labels)

            expected_metrics = [
                'head1/labels_mean',
                'head1/logits_mean',
                'head1/metric/precision@1',
                'head2/labels_mean',
                'head2/logits_mean',
                'head2/metric/precision@1',
            ]

            # Assert spec contains expected tensors.
            self.assertIsNotNone(spec.loss)
            self.assertIsNone(spec.train_op)
            self.assertIsNone(spec.export_outputs)
            self.assertCountEqual(spec.eval_metric_ops.keys(),
                                  expected_metrics)

            # Assert predictions, loss, and metrics.
            with self.cached_session() as sess:
                _initialize_variables(self, spec.scaffold)
                self.assertIsNone(spec.scaffold.summary_op)
                update_ops = {
                    k: spec.eval_metric_ops[k][1]
                    for k in spec.eval_metric_ops
                }
                loss, metrics = sess.run((spec.loss, update_ops))
                self.assertAllClose(loss, 10.)
                self.assertItemsEqual(metrics.keys(), expected_metrics)
Esempio n. 13
0
 def test_make_precision_ia_fn(self):
     with tf.Graph().as_default():
         scores = [[1., 3., 2.], [1., 2., 3.]]
         labels = [[[0., 0.], [0., 0.], [1., 0.]],
                   [[0., 0.], [1., 0.], [1., 1.]]]
         features = {}
         m = metrics_lib.make_ranking_metric_fn(
             metrics_lib.RankingMetricKey.PRECISION_IA)
         m_top_1 = metrics_lib.make_ranking_metric_fn(
             metrics_lib.RankingMetricKey.PRECISION_IA, topn=1)
         m_top_2 = metrics_lib.make_ranking_metric_fn(
             metrics_lib.RankingMetricKey.PRECISION_IA, topn=2)
         self._check_metrics([
             (m([labels[0]], [scores[0]], features), 1. / 3.),
             (m_top_1([labels[0]], [scores[0]], features), 0. / 1.),
             (m_top_2([labels[0]], [scores[0]], features), 1. / 2.),
             (m(labels, scores, features), (1. / 3. + 3. / 6.) / 2.),
         ])
Esempio n. 14
0
 def test_make_ordered_pair_accuracy_fn(self):
   scores = [[1., 3., 2.], [1., 2., 3.]]
   labels = [[0., 0., 1.], [0., 1., 2.]]
   m = metrics.make_ranking_metric_fn(
       metrics.RankingMetricKey.ORDERED_PAIR_ACCURACY)
   self._check_metrics([
       (m([labels[0]], [scores[0]], {}), 1. / 2.),
       (m([labels[1]], [scores[1]], {}), 1.),
       (m(labels, scores, {}), (1. + 3.) / (2. + 3.)),
   ])
Esempio n. 15
0
def _get_metric_pair(key, weight=None, topn=None):
  """Helper function to construct metric name and function."""
  name = "".join([
      "metric/",
      "weighted_" if weight else "",
      key,
      "_%s" % topn if topn else "",
  ])
  return name, metrics.make_ranking_metric_fn(
      key, weights_feature_name=weight, topn=topn)
Esempio n. 16
0
 def test_make_mean_average_precision_fn(self):
   with tf.Graph().as_default():
     scores = [[1., 3., 2.], [1., 2., 3.]]
     # Note that scores are ranked in descending order, so the ranks are
     # [[3, 1, 2], [3, 2, 1]]
     labels = [[0., 0., 1.], [0., 1., 2.]]
     rels = [[0, 0, 1], [0, 1, 1]]
     features = {}
     m = metrics_lib.make_ranking_metric_fn(metrics_lib.RankingMetricKey.MAP)
     m_top_1 = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.MAP, topn=1)
     m_top_2 = metrics_lib.make_ranking_metric_fn(
         metrics_lib.RankingMetricKey.MAP, topn=2)
     self._check_metrics([
         (m([labels[0]], [scores[0]], features), _ap(rels[0], scores[0])),
         (m_top_1([labels[0]], [scores[0]],
                  features), _ap(rels[0], scores[0], topn=1)),
         (m_top_2([labels[0]], [scores[0]],
                  features), _ap(rels[0], scores[0], topn=2)),
         (m(labels, scores,
            features), sum(_ap(rels[i], scores[i]) for i in range(2)) / 2.),
     ])
Esempio n. 17
0
    def test_make_normalized_discounted_cumulative_gain_fn(self):
        scores = [[1., 3., 2.], [1., 2., 3.]]
        labels = [[0., 0., 1.], [0., 1., 2.]]
        weights = [[1., 2., 3.], [4., 5., 6.]]
        weights_feature_name = 'weights'
        features = {weights_feature_name: weights[0]}
        m = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG)

        expected_ndcg = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / (
            _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3))
        self._check_metrics([
            (m([labels[0]], [scores[0]], features), expected_ndcg),
        ])
        expected_ndcg_1 = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / (
            _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3))
        expected_ndcg_2 = 1.0
        expected_ndcg = (expected_ndcg_1 + expected_ndcg_2) / 2.0
        self._check_metrics([
            (m(labels, scores, features), expected_ndcg),
        ])

        # With weights.
        m_top = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG,
            weights_feature_name=weights_feature_name,
            topn=1)
        m_weight = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG,
            weights_feature_name=weights_feature_name)
        self._check_metrics([
            (m_top([labels[0]], [scores[0]],
                   features), _dcg(0., 1, 2.) / _dcg(1., 1, 3.)),
            (m_weight([labels[0]], [scores[0]], features),
             (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
             (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
        ])
Esempio n. 18
0
    def test_eval(self):
        with tf.Graph().as_default():
            metric_fns = {
                'metric/precision@1':
                metrics_lib.make_ranking_metric_fn(
                    metrics_lib.RankingMetricKey.PRECISION, topn=1),
            }
            head = ranking_head.create_ranking_head(loss_fn=_make_loss_fn(),
                                                    eval_metric_fns=metric_fns)

            # Create estimator spec.
            spec = head.create_estimator_spec(
                features=self._default_features_dict,
                mode=tf.estimator.ModeKeys.EVAL,
                logits=self._default_logits,
                labels=self._default_labels)

            expected_metrics = [
                'labels_mean',
                'logits_mean',
                'metric/precision@1',
            ]

            # Assert spec contains expected tensors.
            self.assertIsNotNone(spec.loss)
            self.assertIsNone(spec.train_op)
            self.assertIsNone(spec.export_outputs)
            self.assertItemsEqual(expected_metrics,
                                  spec.eval_metric_ops.keys())

            # Assert predictions, loss, and metrics.
            with self.cached_session() as sess:
                _initialize_variables(self, spec.scaffold)
                self.assertIsNone(spec.scaffold.summary_op)
                update_ops = {
                    k: spec.eval_metric_ops[k][1]
                    for k in spec.eval_metric_ops
                }
                loss, metrics = sess.run((spec.loss, update_ops))
                self.assertAllClose(self._default_loss, loss)
                self.assertItemsEqual(expected_metrics, metrics.keys())
Esempio n. 19
0
  def test_make_normalized_discounted_cumulative_gain_fn(self):
    with tf.Graph().as_default():
      scores = [[1., 3., 2.], [1., 2., 3.]]
      # Note that scores are ranked in descending order.
      ranks = [[3, 1, 2], [3, 2, 1]]
      labels = [[0., 0., 1.], [0., 1., 2.]]
      weights = [[1., 2., 3.], [4., 5., 6.]]
      weights_3d = [[[1.], [2.], [3.]], [[4.], [5.], [6.]]]
      list_weights = [1., 0.]
      list_weights_2d = [[1.], [0.]]
      weights_feature_name = 'weights'
      weights_invalid_feature_name = 'weights_invalid'
      weights_3d_feature_name = 'weights_3d'
      list_weights_name = 'list_weights'
      list_weights_2d_name = 'list_weights_2d'
      features = {
          weights_feature_name: [weights[0]],
          weights_invalid_feature_name: weights[0],
          weights_3d_feature_name: [weights_3d[0]],
          list_weights_name: list_weights,
          list_weights_2d_name: list_weights_2d
      }
      m = metrics_lib.make_ranking_metric_fn(metrics_lib.RankingMetricKey.NDCG)

      expected_ndcg = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / (
          _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3))
      self._check_metrics([
          (m([labels[0]], [scores[0]], features), expected_ndcg),
      ])
      expected_ndcg_1 = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / (
          _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3))
      expected_ndcg_2 = 1.0
      expected_ndcg = (expected_ndcg_1 + expected_ndcg_2) / 2.0
      self._check_metrics([
          (m(labels, scores, features), expected_ndcg),
      ])

      # With item-wise weights.
      m_top = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.NDCG,
          weights_feature_name=weights_feature_name,
          topn=1)
      m_weight = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.NDCG,
          weights_feature_name=weights_feature_name)
      m_weights_3d = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.NDCG,
          weights_feature_name=weights_3d_feature_name)
      self._check_metrics([
          (m_top([labels[0]], [scores[0]],
                 features), _dcg(0., 1, 2.) / _dcg(1., 1, 3.)),
          (m_weight([labels[0]], [scores[0]], features),
           (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
           (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
          (m_weights_3d([labels[0]], [scores[0]], features),
           (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
           (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
      ])
      with self.assertRaises(ValueError):
        m_weight_invalid = metrics_lib.make_ranking_metric_fn(
            metrics_lib.RankingMetricKey.NDCG,
            weights_feature_name=weights_invalid_feature_name)
        m_weight_invalid([labels[0]], [scores[0]], features)

      # With list-wise weights.
      m_list_weight = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.NDCG,
          weights_feature_name=list_weights_name)
      m_list_weight_2d = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.NDCG,
          weights_feature_name=list_weights_2d_name)
      self._check_metrics([
          (m_list_weight(labels, scores, features),
           (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
           (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
          (m_list_weight_2d(labels, scores, features),
           (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
           (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
      ])

      # Testing different gain and discount functions
      gain_fn = lambda rel: rel
      rank_discount_fn = lambda rank: 1. / rank

      def mod_dcg_fn(l, r):
        return _dcg(l, r, gain_fn=gain_fn, rank_discount_fn=rank_discount_fn)

      m_mod = metrics_lib.make_ranking_metric_fn(
          metrics_lib.RankingMetricKey.NDCG,
          gain_fn=gain_fn,
          rank_discount_fn=rank_discount_fn)
      list_size = len(scores[0])
      expected_modified_dcg_1 = sum([
          mod_dcg_fn(labels[0][ind], ranks[0][ind]) for ind in range(list_size)
      ])
      self._check_metrics([
          (m_mod([labels[0]], [scores[0]], features), expected_modified_dcg_1),
      ])
Esempio n. 20
0
    def test_make_alpha_discounted_cumulative_gain_fn(self):
        with tf.Graph().as_default():
            scores = [[1., 3., 2.], [1., 2., 3.]]
            # Note that scores are ranked in descending order.
            # ranks = [[3, 1, 2], [3, 2, 1]]
            labels = [[[0., 0.], [0., 1.], [0., 1.]],
                      [[0., 0.], [1., 0.], [1., 1.]]]
            # cum_labels = [[[0., 2.], [0., 0.], [0., 1.]],
            #               [[2., 1.], [1., 1.], [0., 0.]]]
            weights = [[1., 2., 3.], [4., 5., 6.]]
            weights_3d = [[[1.], [2.], [3.]], [[4.], [5.], [6.]]]
            list_weights = [1., 0.]
            list_weights_2d = [[1.], [0.]]
            weights_feature_name = 'weights'
            weights_invalid_feature_name = 'weights_invalid'
            weights_3d_feature_name = 'weights_3d'
            list_weights_name = 'list_weights'
            list_weights_2d_name = 'list_weights_2d'
            features = {
                weights_feature_name: [weights[0]],
                weights_invalid_feature_name: weights[0],
                weights_3d_feature_name: [weights_3d[0]],
                list_weights_name: list_weights,
                list_weights_2d_name: list_weights_2d
            }
            m = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.ALPHA_DCG)

            expected_alphadcg = (_alpha_dcg([0., 1.], [0., 0.], 1) +
                                 _alpha_dcg([0., 1.], [0., 1.], 2) +
                                 _alpha_dcg([0., 0.], [0., 2.], 3))
            self._check_metrics([
                (m([labels[0]], [scores[0]], features), expected_alphadcg),
            ])
            expected_alphadcg_1 = (_alpha_dcg([0., 1.], [0., 0.], 1) +
                                   _alpha_dcg([0., 1.], [0., 1.], 2) +
                                   _alpha_dcg([0., 0.], [0., 2.], 3))
            expected_alphadcg_2 = (_alpha_dcg([1., 1.], [0., 0.], 1) +
                                   _alpha_dcg([1., 0.], [1., 1.], 2) +
                                   _alpha_dcg([0., 0.], [2., 1.], 3))
            expected_alphadcg = (expected_alphadcg_1 +
                                 expected_alphadcg_2) / 2.0
            self._check_metrics([
                (m(labels, scores, features), expected_alphadcg),
            ])

            # With item-wise weights.
            m_top = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.ALPHA_DCG,
                weights_feature_name=weights_feature_name,
                topn=1)
            m_weight = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.ALPHA_DCG,
                weights_feature_name=weights_feature_name)
            m_weights_3d = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.ALPHA_DCG,
                weights_feature_name=weights_3d_feature_name)
            self._check_metrics([
                (m_top([labels[0]], [scores[0]],
                       features), _alpha_dcg([0., 1.], [0., 0.], 1, 2.) / 2.5),
                (m_weight([labels[0]], [scores[0]], features),
                 (_alpha_dcg([0., 1.], [0., 0.], 1, 2.) +
                  _alpha_dcg([0., 1.], [0., 1.], 2, 3.) +
                  _alpha_dcg([0., 0.], [0., 2.], 3, 1.)) / 2.5),
                (m_weights_3d([labels[0]], [scores[0]], features),
                 (_alpha_dcg([0., 1.], [0., 0.], 1, 2.) +
                  _alpha_dcg([0., 1.], [0., 1.], 2, 3.) +
                  _alpha_dcg([0., 0.], [0., 2.], 3, 1.)) / 2.5),
            ])
            with self.assertRaises(ValueError):
                m_weight_invalid = metrics_lib.make_ranking_metric_fn(
                    metrics_lib.RankingMetricKey.ALPHA_DCG,
                    weights_feature_name=weights_invalid_feature_name)
                m_weight_invalid([labels[0]], [scores[0]], features)

            # With list-wise weights.
            m_list_weight = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.ALPHA_DCG,
                weights_feature_name=list_weights_name)
            m_list_weight_2d = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.ALPHA_DCG,
                weights_feature_name=list_weights_2d_name)
            self._check_metrics([
                (m_list_weight(labels, scores, features),
                 (_alpha_dcg([0., 1.], [0., 0.], 1, 1.) +
                  _alpha_dcg([0., 1.], [0., 1.], 2, 1.) +
                  _alpha_dcg([0., 0.], [0., 2.], 3, 1.))),
                (m_list_weight_2d(labels, scores, features),
                 (_alpha_dcg([0., 1.], [0., 0.], 1, 1.) +
                  _alpha_dcg([0., 1.], [0., 1.], 2, 1.) +
                  _alpha_dcg([0., 0.], [0., 2.], 3, 1.))),
            ])

            # Test different gain and discount functions.
            alpha = 0.2
            rank_discount_fn = lambda rank: 1. / rank

            mod_alpha_dcg_fn = functools.partial(
                _alpha_dcg, alpha=alpha, rank_discount_fn=rank_discount_fn)

            m_mod = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.ALPHA_DCG,
                rank_discount_fn=rank_discount_fn,
                alpha=alpha)

            expected_modified_alphadcg_1 = (
                mod_alpha_dcg_fn([0., 1.], [0., 0.], 1) +
                mod_alpha_dcg_fn([0., 1.], [0., 1.], 2) +
                mod_alpha_dcg_fn([0., 0.], [0., 2.], 3))
            self._check_metrics([
                (m_mod([labels[0]], [scores[0]],
                       features), expected_modified_alphadcg_1),
            ])
Esempio n. 21
0
    def test_make_normalized_discounted_cumulative_gain_fn(self):
        with tf.Graph().as_default():
            scores = [[1., 3., 2.], [1., 2., 3.]]
            labels = [[0., 0., 1.], [0., 1., 2.]]
            weights = [[1., 2., 3.], [4., 5., 6.]]
            weights_3d = [[[1.], [2.], [3.]], [[4.], [5.], [6.]]]
            list_weights = [1., 0.]
            list_weights_2d = [[1.], [0.]]
            weights_feature_name = 'weights'
            weights_invalid_feature_name = 'weights_invalid'
            weights_3d_feature_name = 'weights_3d'
            list_weights_name = 'list_weights'
            list_weights_2d_name = 'list_weights_2d'
            features = {
                weights_feature_name: [weights[0]],
                weights_invalid_feature_name: weights[0],
                weights_3d_feature_name: [weights_3d[0]],
                list_weights_name: list_weights,
                list_weights_2d_name: list_weights_2d
            }
            m = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.NDCG)

            expected_ndcg = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / (
                _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3))
            self._check_metrics([
                (m([labels[0]], [scores[0]], features), expected_ndcg),
            ])
            expected_ndcg_1 = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / (
                _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3))
            expected_ndcg_2 = 1.0
            expected_ndcg = (expected_ndcg_1 + expected_ndcg_2) / 2.0
            self._check_metrics([
                (m(labels, scores, features), expected_ndcg),
            ])

            # With item-wise weights.
            m_top = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.NDCG,
                weights_feature_name=weights_feature_name,
                topn=1)
            m_weight = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.NDCG,
                weights_feature_name=weights_feature_name)
            m_weights_3d = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.NDCG,
                weights_feature_name=weights_3d_feature_name)
            self._check_metrics([
                (m_top([labels[0]], [scores[0]],
                       features), _dcg(0., 1, 2.) / _dcg(1., 1, 3.)),
                (m_weight([labels[0]], [scores[0]], features),
                 (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
                 (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
                (m_weights_3d([labels[0]], [scores[0]], features),
                 (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
                 (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
            ])
            with self.assertRaises(ValueError):
                m_weight_invalid = metrics_lib.make_ranking_metric_fn(
                    metrics_lib.RankingMetricKey.NDCG,
                    weights_feature_name=weights_invalid_feature_name)
                m_weight_invalid([labels[0]], [scores[0]], features)

            # With list-wise weights.
            m_list_weight = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.NDCG,
                weights_feature_name=list_weights_name)
            m_list_weight_2d = metrics_lib.make_ranking_metric_fn(
                metrics_lib.RankingMetricKey.NDCG,
                weights_feature_name=list_weights_2d_name)
            self._check_metrics([
                (m_list_weight(labels, scores, features),
                 (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
                 (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
                (m_list_weight_2d(labels, scores, features),
                 (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) /
                 (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))),
            ])