Example #1
0
    def test_reco_quick_start_example(self):
        # Data
        actual = pd.DataFrame({
            "user_id": [1, 2, 3, 4],
            "item_id": [1, 2, 0, 3],
            "clicks": [0, 1, 0, 0]
        })
        predicted = pd.DataFrame({
            "user_id": [1, 2, 3, 4],
            "item_id": [1, 2, 2, 3],
            "clicks": [0.8, 0.7, 0.8, 0.7]
        })

        # Metrics
        auc = BinaryRecoMetrics.AUC(click_column="clicks")
        ctr = BinaryRecoMetrics.CTR(click_column="clicks")
        ncdg_k = RankingRecoMetrics.NDCG(click_column="clicks", k=3)
        precision_k = RankingRecoMetrics.Precision(click_column="clicks", k=2)
        recall_k = RankingRecoMetrics.Recall(click_column="clicks", k=2)
        map_k = RankingRecoMetrics.MAP(click_column="clicks", k=2)

        # Scores
        self.assertEqual(auc.get_score(actual, predicted), 0.25)
        self.assertEqual(ctr.get_score(actual, predicted), 0.3333333333333333)
        self.assertEqual(ncdg_k.get_score(actual, predicted), 1)
        self.assertEqual(precision_k.get_score(actual, predicted), 1)
        self.assertEqual(recall_k.get_score(actual, predicted), 1)
        self.assertEqual(map_k.get_score(actual, predicted), 1)
    def test_ndcg(self):
        # First, test the IDCG value
        idcg_val = idcg(3)
        self.assertEqual(1. / np.log2(2) + 1. / np.log2(3) + 1. / np.log2(4),
                         idcg_val)

        # Then, test NDCG
        # User 1 has items 1, 4, 2 relevant and was recommended items 1, 3, 2
        # User 2 checks for no relevant items, shouldn't contribute to the metric
        # User 3 checks for no recommendations, is 0
        # User 4 has items 1, 2 relevant and was recommended items 3, 4; is 0
        actual = pd.DataFrame({
            Constants.user_id: [1, 1, 1, 1, 3, 4, 4],
            Constants.item_id: [1, 2, 3, 4, 3, 1, 2],
            'click': [1, 1, 0, 1, 1, 1, 1]
        })

        predicted = pd.DataFrame({
            Constants.user_id: [1, 1, 1, 2, 4, 4],
            Constants.item_id: [1, 2, 3, 3, 3, 4],
            'click': [0.8, 0.7, 0.75, 0.7, 0.6, 0.4]
        })

        metric = RankingRecoMetrics.NDCG(click_column='click', k=3)
        results = metric.get_score(actual,
                                   predicted,
                                   return_extended_results=True)
        self.assertEqual(((1. / np.log2(2) + 1. / np.log2(4)) / idcg_val) / 3,
                         results['ndcg'])
        self.assertEqual(3, results['support'])
Example #3
0
    def test_change_column_names(self):
        user_id_column = 'uid'
        item_id_column = 'iid'
        actual = pd.DataFrame({
            user_id_column: [0, 0],
            item_id_column: [1, 2],
            'click': [True, True]
        })
        predicted = pd.DataFrame({
            user_id_column: [0, 0, 0, 0],
            item_id_column: [0, 1, 2, 3],
            'click': [0, -1, -2, -3]
        })

        # Test that the output is the same
        recall_1 = RankingRecoMetrics.Recall('click',
                                             k=1,
                                             user_id_column=user_id_column,
                                             item_id_column=item_id_column)
        self.assertEqual(0., recall_1.get_score(actual, predicted))

        recall_2 = RankingRecoMetrics.Recall('click',
                                             k=2,
                                             user_id_column=user_id_column,
                                             item_id_column=item_id_column)
        self.assertEqual(0.5, recall_2.get_score(actual, predicted))

        # Test that none of the metrics crash
        metrics = CombinedMetrics(
            BinaryRecoMetrics.CTR('click',
                                  k=1,
                                  user_id_column=user_id_column,
                                  item_id_column=item_id_column),
            RankingRecoMetrics.MAP('click',
                                   k=1,
                                   user_id_column=user_id_column,
                                   item_id_column=item_id_column),
            RankingRecoMetrics.NDCG('click',
                                    k=1,
                                    user_id_column=user_id_column,
                                    item_id_column=item_id_column),
            RankingRecoMetrics.Precision('click',
                                         k=1,
                                         user_id_column=user_id_column,
                                         item_id_column=item_id_column),
            RankingRecoMetrics.Recall('click',
                                      k=1,
                                      user_id_column=user_id_column,
                                      item_id_column=item_id_column))

        metrics.get_score(actual, predicted)