def test_fit_graph_w_testrating_methodology(self):
        graph = NXFullGraph(ratings)

        rs = GraphBasedRS(NXPageRank(), graph)

        em = EvalModel(rs, KFoldPartitioning(), metric_list=[Precision()])

        sys_result, users_result = em.fit()

        self.assertIsInstance(sys_result, pd.DataFrame)
        self.assertIsInstance(users_result, pd.DataFrame)
    def test_fit_cb_w_testrating_methodology(self):
        rs = ContentBasedRS(
            CentroidVector(
                {"Plot": "tfidf"},
                CosineSimilarity(),
            ), ratings, items_dir)

        em = EvalModel(rs, KFoldPartitioning(), metric_list=[Precision()])

        sys_result, users_result = em.fit()

        self.assertIsInstance(sys_result, pd.DataFrame)
        self.assertIsInstance(users_result, pd.DataFrame)
    def test_all(self):
        ratings_filename = os.path.join(contents_path, '..', 'datasets',
                                        'examples', 'new_ratings.csv')

        ratings_frame = RatingsImporter(
            CSVFile(ratings_filename)).import_ratings()

        rs = ContentBasedRS(
            LinearPredictor(
                {"Plot": ['tfidf', 'embedding']},
                SkLinearRegression(),
            ), ratings_frame, items_dir)

        catalog = set([
            os.path.splitext(f)[0] for f in os.listdir(items_dir)
            if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz')
        ])

        em = EvalModel(rs,
                       KFoldPartitioning(),
                       metric_list=[
                           Precision(sys_average='micro'),
                           PrecisionAtK(1, sys_average='micro'),
                           RPrecision(),
                           Recall(),
                           RecallAtK(3, ),
                           FMeasure(1, sys_average='macro'),
                           FMeasureAtK(2, beta=1, sys_average='micro'),
                           NDCG(),
                           NDCGAtK(3),
                           MRR(),
                           MRRAtK(5, ),
                           Correlation('pearson', top_n=5),
                           Correlation('kendall', top_n=3),
                           Correlation('spearman', top_n=4),
                           MAE(),
                           MSE(),
                           RMSE(),
                           CatalogCoverage(catalog),
                           CatalogCoverage(catalog, k=2),
                           CatalogCoverage(catalog, top_n=3),
                           GiniIndex(),
                           GiniIndex(top_n=3),
                           DeltaGap({
                               'primo': 0.5,
                               'secondo': 0.5
                           })
                       ],
                       methodology=TestItemsMethodology())

        result = em.fit()
Beispiel #4
0
    def test_graph(self):
        catalog = set(ratings.to_id)

        users_dir = os.path.join(dir_test_files, 'complex_contents',
                                 'users_codified/')

        graph = NXFullGraph(
            ratings,
            user_contents_dir=users_dir,
            item_contents_dir=items_dir,
            item_exo_representation="dbpedia",
            user_exo_representation='local',
            item_exo_properties=['starring'],
            user_exo_properties=['1'
                                 ]  # It's the column in the users .DAT which
            # identifies the gender
        )

        graph_rs = GraphBasedRS(NXPageRank(), graph)

        em = EvalModel(graph_rs,
                       KFoldPartitioning(),
                       metric_list=[
                           Precision(relevant_threshold=3),
                           Recall(),
                           FMeasure(beta=1),
                           FMeasure(beta=2, sys_average='micro'),
                           MRR(),
                           Correlation('pearson'),
                           GiniIndex(),
                           DeltaGap({
                               'popular': 0.5,
                               'niche': 0.5
                           }),
                           PredictionCoverage(catalog),
                           PopProfileVsRecs(user_groups={
                               'popular': 0.5,
                               'niche': 0.5
                           },
                                            out_dir='plots/'),
                           LongTailDistr('plots/', format='svg'),
                           PopRecsCorrelation('plots/')
                       ],
                       verbose_predictions=True,
                       methodology=TestItemsMethodology())

        em.fit()
    def test_fit_graph_w_allitems_methodology(self):
        graph = NXFullGraph(ratings)

        rs = GraphBasedRS(NXPageRank(), graph)

        items = set([
            os.path.splitext(f)[0] for f in os.listdir(items_dir)
            if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz')
        ])

        em = EvalModel(rs,
                       KFoldPartitioning(),
                       metric_list=[Precision()],
                       methodology=AllItemsMethodology(items))

        sys_result, users_result = em.fit()

        self.assertIsInstance(sys_result, pd.DataFrame)
        self.assertIsInstance(users_result, pd.DataFrame)
    def test_fit_cb_w_allitems_methodology(self):
        rs = ContentBasedRS(
            CentroidVector(
                {"Plot": "tfidf"},
                CosineSimilarity(),
            ), ratings, items_dir)

        items = set([
            os.path.splitext(f)[0] for f in os.listdir(items_dir)
            if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz')
        ])

        em = EvalModel(rs,
                       KFoldPartitioning(),
                       metric_list=[Precision()],
                       methodology=AllItemsMethodology(items))

        sys_result, users_result = em.fit()

        self.assertIsInstance(sys_result, pd.DataFrame)
        self.assertIsInstance(users_result, pd.DataFrame)