def test_perform_w_new_items_top_n(self, method: str):
        metric = Correlation(method, top_n=self.top_n)

        result_w_new_items = metric.perform(split_w_new_items)

        u1_actual = pd.Series([1, 5])
        u1_ideal = pd.Series([0, 1, 2, 3, 4])

        u1_expected_pearson = u1_actual.corr(u1_ideal, method)
        u1_result_pearson = float(
            result_w_new_items.query('from_id == "u1"')[str(metric)])

        self.assertAlmostEqual(u1_expected_pearson, u1_result_pearson)

        u2_actual = pd.Series([2, 0])
        u2_ideal = pd.Series([0, 1, 2])

        u2_expected_pearson = u2_actual.corr(u2_ideal, method)
        u2_result_pearson = float(
            result_w_new_items.query('from_id == "u2"')[str(metric)])

        self.assertAlmostEqual(u2_expected_pearson, u2_result_pearson)

        sys_expected_pearson = (u1_expected_pearson + u2_expected_pearson) / 2
        sys_result_pearson = float(
            result_w_new_items.query('from_id == "sys"')[str(metric)])

        self.assertAlmostEqual(sys_expected_pearson, sys_result_pearson)
    def test_perform_only_one_top_n(self, method: str):
        metric = Correlation(method, top_n=self.top_n)

        result_only_one = metric.perform(split_only_one)

        pearsons_predicted = result_only_one[str(metric)]

        self.assertTrue(all(pd.isna(pearsons_predicted)))
    def test_perform_only_new(self, method: str):
        metric = Correlation(method)

        result_only_new = metric.perform(split_only_new)

        pearsons_predicted = result_only_new[str(metric)]

        self.assertTrue(all(pd.isna(pearsons_predicted)))
예제 #4
0
    def test_all(self):
        ratings_filename = os.path.join(contents_path, '..', 'datasets',
                                        'examples', 'new_ratings.csv')

        ratings_frame = RatingsImporter(
            CSVFile(ratings_filename)).import_ratings()

        rs = ContentBasedRS(
            LinearPredictor(
                {"Plot": ['tfidf', 'embedding']},
                SkLinearRegression(),
            ), ratings_frame, items_dir)

        catalog = set([
            os.path.splitext(f)[0] for f in os.listdir(items_dir)
            if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz')
        ])

        em = EvalModel(rs,
                       KFoldPartitioning(),
                       metric_list=[
                           Precision(sys_average='micro'),
                           PrecisionAtK(1, sys_average='micro'),
                           RPrecision(),
                           Recall(),
                           RecallAtK(3, ),
                           FMeasure(1, sys_average='macro'),
                           FMeasureAtK(2, beta=1, sys_average='micro'),
                           NDCG(),
                           NDCGAtK(3),
                           MRR(),
                           MRRAtK(5, ),
                           Correlation('pearson', top_n=5),
                           Correlation('kendall', top_n=3),
                           Correlation('spearman', top_n=4),
                           MAE(),
                           MSE(),
                           RMSE(),
                           CatalogCoverage(catalog),
                           CatalogCoverage(catalog, k=2),
                           CatalogCoverage(catalog, top_n=3),
                           GiniIndex(),
                           GiniIndex(top_n=3),
                           DeltaGap({
                               'primo': 0.5,
                               'secondo': 0.5
                           })
                       ],
                       methodology=TestItemsMethodology())

        result = em.fit()
    def test_eval_ranking_needed_metrics_implicit_split(self):

        # We set the split_list directly by the class attribute
        c = MetricCalculator()
        RankingNeededMetric.rank_truth_list = self.rank_split_list

        system_res, each_user_res = c.eval_metrics([
            Precision(),
            PrecisionAtK(2),
            RPrecision(),
            Recall(),
            RecallAtK(2),
            FMeasure(),
            FMeasureAtK(2),
            NDCG(),
            NDCGAtK(2),
            MRR(),
            MRRAtK(2),
            Correlation('pearson'),
            Correlation('kendall'),
            Correlation('spearman'),
            PredictionCoverage(self.catalog),
            CatalogCoverage(self.catalog, top_n=2),
            GiniIndex(),
            DeltaGap(user_groups={
                'a': 0.5,
                'b': 0.5
            }),
            LongTailDistr(out_dir='test_plot'),
            PopProfileVsRecs(user_groups={
                'a': 0.5,
                'b': 0.5
            },
                             out_dir='test_plot'),
            PopRecsCorrelation(out_dir='test_plot')
        ])

        self.assertIsInstance(system_res, pd.DataFrame)
        self.assertIsInstance(each_user_res, pd.DataFrame)
예제 #6
0
    def test_graph(self):
        catalog = set(ratings.to_id)

        users_dir = os.path.join(dir_test_files, 'complex_contents',
                                 'users_codified/')

        graph = NXFullGraph(
            ratings,
            user_contents_dir=users_dir,
            item_contents_dir=items_dir,
            item_exo_representation="dbpedia",
            user_exo_representation='local',
            item_exo_properties=['starring'],
            user_exo_properties=['1'
                                 ]  # It's the column in the users .DAT which
            # identifies the gender
        )

        graph_rs = GraphBasedRS(NXPageRank(), graph)

        em = EvalModel(graph_rs,
                       KFoldPartitioning(),
                       metric_list=[
                           Precision(relevant_threshold=3),
                           Recall(),
                           FMeasure(beta=1),
                           FMeasure(beta=2, sys_average='micro'),
                           MRR(),
                           Correlation('pearson'),
                           GiniIndex(),
                           DeltaGap({
                               'popular': 0.5,
                               'niche': 0.5
                           }),
                           PredictionCoverage(catalog),
                           PopProfileVsRecs(user_groups={
                               'popular': 0.5,
                               'niche': 0.5
                           },
                                            out_dir='plots/'),
                           LongTailDistr('plots/', format='svg'),
                           PopRecsCorrelation('plots/')
                       ],
                       verbose_predictions=True,
                       methodology=TestItemsMethodology())

        em.fit()