def test_make_mean_reciprocal_rank_fn(self): scores = [[1., 3., 2.], [1., 2., 3.]] # Note that scores are ranked in descending order. # ranks = [[3, 1, 2], [3, 2, 1]] labels = [[0., 0., 1.], [0., 1., 2.]] # Note that the definition of MRR only uses the highest ranked # relevant item, where an item is relevant if its label is > 0. rel_rank = [2, 1] weights = [[1., 2., 3.], [4., 5., 6.]] num_queries = len(scores) weights_feature_name = 'weights' features = {weights_feature_name: torch.tensor(weights)} m = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.MRR) m_w = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.MRR, weights_feature_name=weights_feature_name) m_2 = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.MRR, topn=1) self._check_metrics([ (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), 0.5), (m(torch.tensor(labels), torch.tensor(scores), features), (0.5 + 1.0) / 2), (m_w(torch.tensor(labels), torch.tensor(scores), features), (3. * 0.5 + (6. + 5.) / 2. * 1.) / (3. + (6. + 5.) / 2.)), (m_2(torch.tensor(labels), torch.tensor(scores), features), (sum([0., 1. / rel_rank[1], 0.]) / num_queries)), ])
def test_make_discounted_cumulative_gain_fn(self): scores = [[1., 3., 2.], [1., 2., 3.]] # Note that scores are ranked in descending order. ranks = [[3, 1, 2], [3, 2, 1]] labels = [[0., 0., 1.], [0., 1., 2.]] weights = [[1., 1., 1.], [2., 2., 1.]] weights_feature_name = 'weights' features = {weights_feature_name: torch.tensor(weights)} m = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.DCG) m_w = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.DCG, weights_feature_name=weights_feature_name) expected_dcg_1 = _dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3) self._check_metrics([ (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), expected_dcg_1), ]) expected_dcg_2 = _dcg(2., 1) + _dcg(1., 2) expected_dcg_2_weighted = _dcg(2., 1) + _dcg(1., 2) * 2. expected_weight_2 = ((4 - 1) * 1. + (2 - 1) * 2.) / (4 - 1 + 2 - 1) self._check_metrics([ (m(torch.tensor(labels), torch.tensor(scores), features), (expected_dcg_1 + expected_dcg_2) / 2.0), (m_w(torch.tensor(labels), torch.tensor(scores), features), (expected_dcg_1 + expected_dcg_2_weighted) / (1. + expected_weight_2)), ]) # Testing different gain and discount functions gain_fn = lambda rel: rel rank_discount_fn = lambda rank: rank def mod_dcg_fn(l, r): return _dcg(l, r, gain_fn=gain_fn, rank_discount_fn=rank_discount_fn) m_mod = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.DCG, gain_fn=gain_fn, rank_discount_fn=rank_discount_fn) list_size = len(scores[0]) expected_modified_dcg_1 = sum([ mod_dcg_fn(labels[0][ind], ranks[0][ind]) for ind in range(list_size) ]) self._check_metrics([ (m_mod(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), expected_modified_dcg_1), ])
def test_make_average_relevance_position_fn(self): scores = [[1., 3., 2.], [1., 2., 3.]] labels = [[0., 0., 1.], [0., 1., 2.]] weights = [[1., 2., 3.], [4., 5., 6.]] weights_feature_name = 'weights' features = {weights_feature_name: torch.tensor(weights)} m = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.ARP) m_w = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.ARP, weights_feature_name=weights_feature_name) self._check_metrics([ (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), 2.), (m(torch.tensor(labels), torch.tensor(scores), features), (1. * 2. + 2. * 1. + 1. * 2.) / 4.), (m_w(torch.tensor(labels), torch.tensor(scores), features), (3. * 1. * 2. + 6. * 2. * 1. + 5 * 1. * 2.) / (3. + 12. + 5.)), ])
def test_make_precision_fn(self): scores = [[1., 3., 2.], [1., 2., 3.]] labels = [[0., 0., 1.], [0., 1., 2.]] features = {} m = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.PRECISION) m_top_1 = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.PRECISION, topn=1) m_top_2 = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.PRECISION, topn=2) self._check_metrics([ (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), 1. / 3.), (m_top_1(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), 0. / 1.), (m_top_2(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), 1. / 2.), (m(torch.tensor(labels), torch.tensor(scores), features), (1. / 3. + 2. / 3.) / 2.), ])
def test_make_ordered_pair_accuracy_fn(self): scores = [[1., 3., 2.], [1., 2., 3.]] labels = [[0., 0., 1.], [0., 1., 2.]] m = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.ORDERED_PAIR_ACCURACY) self._check_metrics([ (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]), {}), 1. / 2.), (m(torch.tensor([labels[1]]), torch.tensor([scores[1]]), {}), 1.), (m(torch.tensor(labels), torch.tensor(scores), {}), (1. + 3.) / (2. + 3.)), ])
def test_eval(self): metric_fns = { 'metric/precision@1': metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.PRECISION, topn=1), } head = ranking_head.Head(loss_fn=_make_loss_fn(), eval_metric_fns=metric_fns) loss, metrics_values = head.run(ranking_head.ModeKeys.EVAL, self._default_labels, self._default_logits, features={}) self.assertAlmostEqual(loss.item(), self._default_loss, 5)
def test_make_mean_average_precision_fn(self): scores = [[1., 3., 2.], [1., 2., 3.]] # Note that scores are ranked in descending order, so the ranks are # [[3, 1, 2], [3, 2, 1]] labels = [[0., 0., 1.], [0., 1., 2.]] rels = [[0, 0, 1], [0, 1, 1]] features = {} m = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.MAP) m_top_1 = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.MAP, topn=1) m_top_2 = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.MAP, topn=2) self._check_metrics([ (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), _ap(rels[0], scores[0])), (m_top_1(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), _ap(rels[0], scores[0], topn=1)), (m_top_2(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), _ap(rels[0], scores[0], topn=2)), (m(torch.tensor(labels), torch.tensor(scores), features), sum(_ap(rels[i], scores[i]) for i in range(2)) / 2.), ])
def test_make_normalized_discounted_cumulative_gain_fn(self): scores = [[1., 3., 2.], [1., 2., 3.]] # Note that scores are ranked in descending order. ranks = [[3, 1, 2], [3, 2, 1]] labels = [[0., 0., 1.], [0., 1., 2.]] weights = [[1., 2., 3.], [4., 5., 6.]] weights_3d = [[[1.], [2.], [3.]], [[4.], [5.], [6.]]] list_weights = [1., 0.] list_weights_2d = [[1.], [0.]] weights_feature_name = 'weights' weights_invalid_feature_name = 'weights_invalid' weights_3d_feature_name = 'weights_3d' list_weights_name = 'list_weights' list_weights_2d_name = 'list_weights_2d' features = { weights_feature_name: torch.tensor([weights[0]]), weights_invalid_feature_name: torch.tensor(weights[0]), weights_3d_feature_name: torch.tensor([weights_3d[0]]), list_weights_name: torch.tensor(list_weights), list_weights_2d_name: torch.tensor(list_weights_2d) } m = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.NDCG) expected_ndcg = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / ( _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3)) self._check_metrics([ (m(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), expected_ndcg), ]) expected_ndcg_1 = (_dcg(0., 1) + _dcg(1., 2) + _dcg(0., 3)) / ( _dcg(1., 1) + _dcg(0., 2) + _dcg(0., 3)) expected_ndcg_2 = 1.0 expected_ndcg = (expected_ndcg_1 + expected_ndcg_2) / 2.0 self._check_metrics([ (m(torch.tensor(labels), torch.tensor(scores), features), expected_ndcg), ]) # With item-wise weights. m_top = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.NDCG, weights_feature_name=weights_feature_name, topn=1) m_weight = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.NDCG, weights_feature_name=weights_feature_name) m_weights_3d = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.NDCG, weights_feature_name=weights_3d_feature_name) self._check_metrics([ (m_top(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), _dcg(0., 1, 2.) / _dcg(1., 1, 3.)), (m_weight(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) / (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))), (m_weights_3d(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) / (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))), ]) with self.assertRaises(ValueError): m_weight_invalid = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.NDCG, weights_feature_name=weights_invalid_feature_name) m_weight_invalid(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features) # With list-wise weights. m_list_weight = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.NDCG, weights_feature_name=list_weights_name) m_list_weight_2d = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.NDCG, weights_feature_name=list_weights_2d_name) self._check_metrics([ (m_list_weight(torch.tensor(labels), torch.tensor(scores), features), (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) / (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))), (m_list_weight_2d(torch.tensor(labels), torch.tensor(scores), features), (_dcg(0., 1, 2.) + _dcg(1., 2, 3.) + _dcg(0., 3, 1.)) / (_dcg(1., 1, 3.) + _dcg(0., 2, 1.) + _dcg(0., 3, 2.))), ]) # Testing different gain and discount functions gain_fn = lambda rel: rel rank_discount_fn = lambda rank: 1. / rank def mod_dcg_fn(l, r): return _dcg(l, r, gain_fn=gain_fn, rank_discount_fn=rank_discount_fn) m_mod = metrics_lib.make_ranking_metric_fn( metrics_lib.RankingMetricKey.NDCG, gain_fn=gain_fn, rank_discount_fn=rank_discount_fn) list_size = len(scores[0]) expected_modified_dcg_1 = sum([ mod_dcg_fn(labels[0][ind], ranks[0][ind]) for ind in range(list_size) ]) self._check_metrics([ (m_mod(torch.tensor([labels[0]]), torch.tensor([scores[0]]), features), expected_modified_dcg_1), ])