Exemple #1
0
 def build_metrics(self, training=None):
     del training
     metrics = [tfr_metrics.MeanAveragePrecisionMetric(name='MAP')]
     for topn in [1, 5, 10]:
         metrics.append(
             tfr_metrics.NDCGMetric(name='NDCG@{}'.format(topn), topn=topn))
     for topn in [1, 5, 10]:
         metrics.append(
             tfr_metrics.MRRMetric(name='MRR@{}'.format(topn), topn=topn))
     return metrics
Exemple #2
0
    def test_mean_average_precision(self):
        scores = [[1., 3., 2.], [1., 2., 3.]]
        # Note that scores are ranked in descending order, so the ranks are
        # [[3, 1, 2], [3, 2, 1]]
        labels = [[0., 0., 1.], [0., 1., 2.]]
        rels = [[0, 0, 1], [0, 1, 1]]

        metric_ = metrics_lib.MeanAveragePrecisionMetric()
        metric_.update_state([labels[0]], [scores[0]])
        expected_result = _ap(rels[0], scores[0])
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)

        metric_ = metrics_lib.MeanAveragePrecisionMetric(topn=1)
        metric_.update_state([labels[0]], [scores[0]])
        expected_result = _ap(rels[0], scores[0], topn=1)
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)

        metric_ = metrics_lib.MeanAveragePrecisionMetric(topn=2)
        metric_.update_state([labels[0]], [scores[0]])
        expected_result = _ap(rels[0], scores[0], topn=2)
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)

        metric_ = metrics_lib.MeanAveragePrecisionMetric()
        metric_.update_state(labels, scores)
        expected_result = sum(_ap(rels[i], scores[i]) for i in range(2)) / 2.
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)

        metric_ = metrics_lib.MeanAveragePrecisionMetric(topn=1)
        metric_.update_state(labels, scores)
        expected_result = sum(
            _ap(rels[i], scores[i], topn=1) for i in range(2)) / 2.
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)
Exemple #3
0
    def _calculate_aggregated_metrics(self, flattened_aggregated_logs,
                                      query_feature_name):
        """Calculates metrics where lists are grouped by `query_feature_name`."""
        qid2labels = {}
        qid2preds = {}

        qids = flattened_aggregated_logs[query_feature_name]
        preds = flattened_aggregated_logs[_PREDICTION]
        labels = flattened_aggregated_logs[_LABEL]
        for qid, pred, label in zip(qids, preds, labels):
            qid2labels[qid] = qid2labels.get(qid, []) + [label]
            qid2preds[qid] = qid2preds.get(qid, []) + [pred]

        metrics = [
            tfr_metrics.MeanAveragePrecisionMetric(name='Aggregated_MAP')
        ]
        for topn in [1, 5, 10]:
            metrics.append(
                tfr_metrics.NDCGMetric(name='Aggregated_NDCG@{}'.format(topn),
                                       topn=topn))
        for topn in [1, 5, 10]:
            metrics.append(
                tfr_metrics.MRRMetric(name='Aggregated_MRR@{}'.format(topn),
                                      topn=topn))

        output_results = {}
        for metric in metrics:
            for qid in qid2preds:
                preds = np.expand_dims(qid2preds[qid], 0)
                labels = np.expand_dims(qid2labels[qid], 0)
                metric.update_state(labels, preds)
            output_results.update({
                'agggregated_metrics/{}'.format(metric.name):
                metric.result().numpy()
            })
            logging.info('agggregated_metrics/%s = %f', metric.name,
                         metric.result().numpy())
        return output_results
Exemple #4
0
    def test_mean_average_precision_with_weights(self):
        scores = [[1., 3., 2.], [1., 2., 3.]]
        # Note that scores are ranked in descending order, so the ranks are
        # [[3, 1, 2], [3, 2, 1]]
        labels = [[0., 0., 1.], [0., 1., 2.]]
        rels = [[0, 0, 1], [0, 1, 1]]
        weights = [[1., 2., 3.], [4., 5., 6.]]
        list_weights = [[1.], [2.]]
        as_list_weights = _example_weights_to_list_weights(
            weights, labels, 'MAP')
        # See Equation (1.7) in the following reference to make sense of
        # the formulas that appear in the following expression:
        # Liu, T-Y "Learning to Rank for Information Retrieval" found at
        # https://www.nowpublishers.com/article/DownloadSummary/INR-016

        metric_ = metrics_lib.MeanAveragePrecisionMetric()
        metric_.update_state([labels[0]], [scores[0]], [weights[0]])
        expected_result = ((1. / 2.) * 3.) / (0 * 1 + 0 * 2 + 1 * 3)
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)

        metric_ = metrics_lib.MeanAveragePrecisionMetric()
        metric_.update_state([labels[1]], [scores[1]], [weights[1]])
        expected_result = ((1. / 1.) * 6. +
                           (2. / 2.) * 5.) / (0 * 4 + 1 * 5 + 1 * 6)
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)

        metric_ = metrics_lib.MeanAveragePrecisionMetric()
        metric_.update_state(labels, scores, weights)
        expected_result = (((1. / 2.) * 3.) /
                           (0 * 1 + 0 * 2 + 1 * 3) * as_list_weights[0] +
                           ((1. / 1.) * 6. +
                            (2. / 2.) * 5.) / (0 * 4 + 1 * 5 + 1 * 6) *
                           as_list_weights[1]) / sum(as_list_weights)
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)

        metric_ = metrics_lib.MeanAveragePrecisionMetric(topn=1)
        metric_.update_state(labels, scores, weights)
        expected_result = ((0 * as_list_weights[0] + ((1. / 1.) * 6.) /
                            (1 * 6) * as_list_weights[1]) /
                           sum(as_list_weights))
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)

        metric_ = metrics_lib.MeanAveragePrecisionMetric(topn=2)
        metric_.update_state(labels, scores, weights)
        expected_result = (
            ((1. / 2.) * 3.) / (0 * 1 + 1 * 3) * as_list_weights[0] +
            ((1. / 1.) * 6. + (2. / 2.) * 5.) /
            (1 * 5 + 1 * 6) * as_list_weights[1]) / sum(as_list_weights)
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)

        # Per list weight.
        metric_ = metrics_lib.MeanAveragePrecisionMetric()
        metric_.update_state(labels, scores, list_weights)
        expected_result = sum(
            _ap(rels[i], scores[i]) * list_weights[i][0]
            for i in range(2)) / sum(list_weights[i][0] for i in range(2))
        self.assertAlmostEqual(metric_.result().numpy(),
                               expected_result,
                               places=5)

        # Zero precision case.
        metric_ = metrics_lib.MeanAveragePrecisionMetric(topn=2)
        metric_.update_state(labels, scores, [0., 0., 0.])
        self.assertAlmostEqual(metric_.result().numpy(), 0., places=5)