Ejemplo n.º 1
0
    def test_precision_at_max_recs(self):
        """Tests Precision@k for the case when all users have exactly k recommendations.

        When all users have exactly k recommendations,
        there isn't an extra ``user_id`` index generated when sorting for the largest ``k`` scores.
        """
        actual = pd.DataFrame({
            Constants.user_id: [0],
            Constants.item_id: [0],
            'click': [True]
        })
        predicted = pd.DataFrame({
            Constants.user_id: [0, 0, 0],
            Constants.item_id: [0, 1, 2],
            'click': [0, -1, -2]
        })

        self.assertEqual(
            1.,
            RankingRecoMetrics.Precision('click',
                                         k=1).get_score(actual, predicted))
        self.assertEqual(
            0.5,
            RankingRecoMetrics.Precision('click',
                                         k=2).get_score(actual, predicted))
        self.assertEqual(
            1. / 3,
            RankingRecoMetrics.Precision('click',
                                         k=3).get_score(actual, predicted))
Ejemplo n.º 2
0
    def test_reco_quick_start_example(self):
        # Data
        actual = pd.DataFrame({
            "user_id": [1, 2, 3, 4],
            "item_id": [1, 2, 0, 3],
            "clicks": [0, 1, 0, 0]
        })
        predicted = pd.DataFrame({
            "user_id": [1, 2, 3, 4],
            "item_id": [1, 2, 2, 3],
            "clicks": [0.8, 0.7, 0.8, 0.7]
        })

        # Metrics
        auc = BinaryRecoMetrics.AUC(click_column="clicks")
        ctr = BinaryRecoMetrics.CTR(click_column="clicks")
        ncdg_k = RankingRecoMetrics.NDCG(click_column="clicks", k=3)
        precision_k = RankingRecoMetrics.Precision(click_column="clicks", k=2)
        recall_k = RankingRecoMetrics.Recall(click_column="clicks", k=2)
        map_k = RankingRecoMetrics.MAP(click_column="clicks", k=2)

        # Scores
        self.assertEqual(auc.get_score(actual, predicted), 0.25)
        self.assertEqual(ctr.get_score(actual, predicted), 0.3333333333333333)
        self.assertEqual(ncdg_k.get_score(actual, predicted), 1)
        self.assertEqual(precision_k.get_score(actual, predicted), 1)
        self.assertEqual(recall_k.get_score(actual, predicted), 1)
        self.assertEqual(map_k.get_score(actual, predicted), 1)
Ejemplo n.º 3
0
    def test_precision(self):
        # User 1 was recommended items 1, 3, 2 and has items 1, 4 relevant
        # User 2 and 3 check for no relevant items
        # User 4 checks for no recommendations
        actual = pd.DataFrame({
            Constants.user_id: [1, 1, 1, 3, 4],
            Constants.item_id: [1, 2, 4, 1, 3],
            'click': [1, 0, 1, 0, 1]
        })

        predicted = pd.DataFrame({
            Constants.user_id: [1, 1, 1, 2, 3],
            Constants.item_id: [1, 2, 3, 3, 1],
            'click': [0.8, 0.7, 0.75, 0.7, 0.5]
        })

        metric = RankingRecoMetrics.Precision(click_column='click', k=2)
        results = metric.get_score(actual,
                                   predicted,
                                   return_extended_results=True)

        self.assertEqual(0.5, results['precision'])
        self.assertEqual(1, results['support'])

        precision_3 = RankingRecoMetrics.Precision(click_column='click', k=3)
        result_3 = precision_3.get_score(actual, predicted)
        self.assertEqual(1. / 3, result_3)
Ejemplo n.º 4
0
    def test_recall(self):
        # User 1 has items 1, 4, 2 relevant and was recommended items 1, 3, 2, so they should be included in the support
        # User 2 checks for no relevant items, so they shouldn't be included in the support
        # User 3 & 4 checks for no recommendations, but they should be included in the support
        actual = pd.DataFrame({
            Constants.user_id: [1, 1, 1, 1, 3, 4],
            Constants.item_id: [1, 2, 3, 4, 3, 1],
            'click': [1, 1, 0, 1, 1, 1]
        })

        predicted = pd.DataFrame({
            Constants.user_id: [1, 1, 1, 2],
            Constants.item_id: [1, 2, 3, 3],
            'click': [0.8, 0.7, 0.75, 0.7]
        })

        metric = RankingRecoMetrics.Recall(click_column='click', k=2)
        results = metric.get_score(actual,
                                   predicted,
                                   return_extended_results=True)

        self.assertEqual(1. / 9, results['recall'])
        self.assertEqual(3, results['support'])

        recall_3 = RankingRecoMetrics.Recall(click_column='click',
                                             k=3).get_score(actual, predicted)
        self.assertEqual(2. / 9, recall_3)
Ejemplo n.º 5
0
    def test_map(self):
        # User 1 got items 1,3,2,4 as recommendations. Items 1 and 4 are relevant.
        # User 2 checks for no relevant items
        # user 3 checks for no recommendations
        actual = pd.DataFrame({
            Constants.user_id: [1, 1, 1, 3],
            Constants.item_id: [1, 2, 4, 3],
            'click': [1, 0, 1, 1]
        })

        predicted = pd.DataFrame({
            Constants.user_id: [1, 1, 1, 1, 2],
            Constants.item_id: [1, 2, 3, 4, 3],
            'click': [0.8, 0.7, 0.75, 0.65, 0.7]
        })

        metric = RankingRecoMetrics.MAP(click_column='click', k=2)
        results = metric.get_score(actual,
                                   predicted,
                                   return_extended_results=True)

        self.assertEqual(0.5, results['map'])
        self.assertEqual(1, results['support'])

        map_3 = RankingRecoMetrics.MAP(click_column='click',
                                       k=3).get_score(actual, predicted)
        self.assertEqual(0.5, map_3)

        map_4 = RankingRecoMetrics.MAP(click_column='click',
                                       k=4).get_score(actual, predicted)
        self.assertEqual(0.75, map_4)
Ejemplo n.º 6
0
    def test_ndcg(self):
        # First, test the IDCG value
        idcg_val = idcg(3)
        self.assertEqual(1. / np.log2(2) + 1. / np.log2(3) + 1. / np.log2(4),
                         idcg_val)

        # Then, test NDCG
        # User 1 has items 1, 4, 2 relevant and was recommended items 1, 3, 2
        # User 2 checks for no relevant items, shouldn't contribute to the metric
        # User 3 checks for no recommendations, is 0
        # User 4 has items 1, 2 relevant and was recommended items 3, 4; is 0
        actual = pd.DataFrame({
            Constants.user_id: [1, 1, 1, 1, 3, 4, 4],
            Constants.item_id: [1, 2, 3, 4, 3, 1, 2],
            'click': [1, 1, 0, 1, 1, 1, 1]
        })

        predicted = pd.DataFrame({
            Constants.user_id: [1, 1, 1, 2, 4, 4],
            Constants.item_id: [1, 2, 3, 3, 3, 4],
            'click': [0.8, 0.7, 0.75, 0.7, 0.6, 0.4]
        })

        metric = RankingRecoMetrics.NDCG(click_column='click', k=3)
        results = metric.get_score(actual,
                                   predicted,
                                   return_extended_results=True)
        self.assertEqual(((1. / np.log2(2) + 1. / np.log2(4)) / idcg_val) / 3,
                         results['ndcg'])
        self.assertEqual(3, results['support'])
Ejemplo n.º 7
0
    def test_change_column_names(self):
        user_id_column = 'uid'
        item_id_column = 'iid'
        actual = pd.DataFrame({
            user_id_column: [0, 0],
            item_id_column: [1, 2],
            'click': [True, True]
        })
        predicted = pd.DataFrame({
            user_id_column: [0, 0, 0, 0],
            item_id_column: [0, 1, 2, 3],
            'click': [0, -1, -2, -3]
        })

        # Test that the output is the same
        recall_1 = RankingRecoMetrics.Recall('click',
                                             k=1,
                                             user_id_column=user_id_column,
                                             item_id_column=item_id_column)
        self.assertEqual(0., recall_1.get_score(actual, predicted))

        recall_2 = RankingRecoMetrics.Recall('click',
                                             k=2,
                                             user_id_column=user_id_column,
                                             item_id_column=item_id_column)
        self.assertEqual(0.5, recall_2.get_score(actual, predicted))

        # Test that none of the metrics crash
        metrics = CombinedMetrics(
            BinaryRecoMetrics.CTR('click',
                                  k=1,
                                  user_id_column=user_id_column,
                                  item_id_column=item_id_column),
            RankingRecoMetrics.MAP('click',
                                   k=1,
                                   user_id_column=user_id_column,
                                   item_id_column=item_id_column),
            RankingRecoMetrics.NDCG('click',
                                    k=1,
                                    user_id_column=user_id_column,
                                    item_id_column=item_id_column),
            RankingRecoMetrics.Precision('click',
                                         k=1,
                                         user_id_column=user_id_column,
                                         item_id_column=item_id_column),
            RankingRecoMetrics.Recall('click',
                                      k=1,
                                      user_id_column=user_id_column,
                                      item_id_column=item_id_column))

        metrics.get_score(actual, predicted)
Ejemplo n.º 8
0
    def test_accumulate_when_no_results_this_batch(self):
        metrics = CombinedMetrics(
            RankingRecoMetrics.Recall(click_column='click', k=1),
            BinaryRecoMetrics.CTR(click_column='click', k=1))

        actual = pd.DataFrame({
            Constants.user_id: [0],
            Constants.item_id: [0],
            'click': [True]
        })
        predicted = pd.DataFrame({
            Constants.user_id: [0],
            Constants.item_id: [0],
            'click': [0]
        })
        batch_res, acc_res = metrics.get_score(actual,
                                               predicted,
                                               batch_accumulate=True,
                                               return_extended_results=True)
        self.assertEqual(2, len(batch_res))
        self.assertEqual(2, len(acc_res))

        actual = pd.DataFrame({
            Constants.user_id: [1],
            Constants.item_id: [1],
            'click': [True]
        })
        predicted = pd.DataFrame({
            Constants.user_id: [2],
            Constants.item_id: [2],
            'click': [0]
        })
        batch_res, acc_res = metrics.get_score(actual,
                                               predicted,
                                               batch_accumulate=True,
                                               return_extended_results=True)
        self.assertEqual(2, len(batch_res))
        self.assertEqual(2, len(acc_res))
Ejemplo n.º 9
0
    def test_inter_list_diversity_in_use(self):
        actual = pd.DataFrame({
            Constants.user_id: [0, 0, 1, 2],
            Constants.item_id: [4, 5, 1, 4],
            'score': [True, True, True, True]
        })
        predicted = pd.DataFrame({
            Constants.user_id: [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2],
            Constants.item_id: [0, 1, 2, 3, 0, 1, 2, 4, 0, 1, 2, 5],
            'score':
            [0.9, 0.7, 0.6, 0.3, 0.9, 0.7, 0.4, 0.1, 0.9, 0.8, 0.6, 0.6]
        })
        metrics = CombinedMetrics(
            RankingRecoMetrics.Recall(click_column='score', k=4),
            BinaryRecoMetrics.CTR(click_column='score', k=4),
            DiversityRecoMetrics.InterListDiversity(click_column='score',
                                                    k=4,
                                                    user_sample_size=2,
                                                    num_runs=5))

        acc_res = metrics.get_score(actual,
                                    predicted,
                                    batch_accumulate=False,
                                    return_extended_results=True)

        self.assertEqual(3, len(acc_res))
        self.assertEqual(
            0.25, acc_res['Inter-List Diversity@4']['inter-list diversity'])

        with self.assertRaises(ValueError):
            # This should fail when `batch_accumulate=True`, and `InterListDiversity` gets applied in combined metrics.
            batch_res, acc_res = metrics.get_score(
                actual,
                predicted,
                batch_accumulate=True,
                return_extended_results=True)