def test_perform_w_new_items_top_n(self, method: str): metric = Correlation(method, top_n=self.top_n) result_w_new_items = metric.perform(split_w_new_items) u1_actual = pd.Series([1, 5]) u1_ideal = pd.Series([0, 1, 2, 3, 4]) u1_expected_pearson = u1_actual.corr(u1_ideal, method) u1_result_pearson = float( result_w_new_items.query('from_id == "u1"')[str(metric)]) self.assertAlmostEqual(u1_expected_pearson, u1_result_pearson) u2_actual = pd.Series([2, 0]) u2_ideal = pd.Series([0, 1, 2]) u2_expected_pearson = u2_actual.corr(u2_ideal, method) u2_result_pearson = float( result_w_new_items.query('from_id == "u2"')[str(metric)]) self.assertAlmostEqual(u2_expected_pearson, u2_result_pearson) sys_expected_pearson = (u1_expected_pearson + u2_expected_pearson) / 2 sys_result_pearson = float( result_w_new_items.query('from_id == "sys"')[str(metric)]) self.assertAlmostEqual(sys_expected_pearson, sys_result_pearson)
def test_perform_only_one_top_n(self, method: str): metric = Correlation(method, top_n=self.top_n) result_only_one = metric.perform(split_only_one) pearsons_predicted = result_only_one[str(metric)] self.assertTrue(all(pd.isna(pearsons_predicted)))
def test_perform_only_new(self, method: str): metric = Correlation(method) result_only_new = metric.perform(split_only_new) pearsons_predicted = result_only_new[str(metric)] self.assertTrue(all(pd.isna(pearsons_predicted)))
def test_all(self): ratings_filename = os.path.join(contents_path, '..', 'datasets', 'examples', 'new_ratings.csv') ratings_frame = RatingsImporter( CSVFile(ratings_filename)).import_ratings() rs = ContentBasedRS( LinearPredictor( {"Plot": ['tfidf', 'embedding']}, SkLinearRegression(), ), ratings_frame, items_dir) catalog = set([ os.path.splitext(f)[0] for f in os.listdir(items_dir) if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz') ]) em = EvalModel(rs, KFoldPartitioning(), metric_list=[ Precision(sys_average='micro'), PrecisionAtK(1, sys_average='micro'), RPrecision(), Recall(), RecallAtK(3, ), FMeasure(1, sys_average='macro'), FMeasureAtK(2, beta=1, sys_average='micro'), NDCG(), NDCGAtK(3), MRR(), MRRAtK(5, ), Correlation('pearson', top_n=5), Correlation('kendall', top_n=3), Correlation('spearman', top_n=4), MAE(), MSE(), RMSE(), CatalogCoverage(catalog), CatalogCoverage(catalog, k=2), CatalogCoverage(catalog, top_n=3), GiniIndex(), GiniIndex(top_n=3), DeltaGap({ 'primo': 0.5, 'secondo': 0.5 }) ], methodology=TestItemsMethodology()) result = em.fit()
def test_eval_ranking_needed_metrics_implicit_split(self): # We set the split_list directly by the class attribute c = MetricCalculator() RankingNeededMetric.rank_truth_list = self.rank_split_list system_res, each_user_res = c.eval_metrics([ Precision(), PrecisionAtK(2), RPrecision(), Recall(), RecallAtK(2), FMeasure(), FMeasureAtK(2), NDCG(), NDCGAtK(2), MRR(), MRRAtK(2), Correlation('pearson'), Correlation('kendall'), Correlation('spearman'), PredictionCoverage(self.catalog), CatalogCoverage(self.catalog, top_n=2), GiniIndex(), DeltaGap(user_groups={ 'a': 0.5, 'b': 0.5 }), LongTailDistr(out_dir='test_plot'), PopProfileVsRecs(user_groups={ 'a': 0.5, 'b': 0.5 }, out_dir='test_plot'), PopRecsCorrelation(out_dir='test_plot') ]) self.assertIsInstance(system_res, pd.DataFrame) self.assertIsInstance(each_user_res, pd.DataFrame)
def test_graph(self): catalog = set(ratings.to_id) users_dir = os.path.join(dir_test_files, 'complex_contents', 'users_codified/') graph = NXFullGraph( ratings, user_contents_dir=users_dir, item_contents_dir=items_dir, item_exo_representation="dbpedia", user_exo_representation='local', item_exo_properties=['starring'], user_exo_properties=['1' ] # It's the column in the users .DAT which # identifies the gender ) graph_rs = GraphBasedRS(NXPageRank(), graph) em = EvalModel(graph_rs, KFoldPartitioning(), metric_list=[ Precision(relevant_threshold=3), Recall(), FMeasure(beta=1), FMeasure(beta=2, sys_average='micro'), MRR(), Correlation('pearson'), GiniIndex(), DeltaGap({ 'popular': 0.5, 'niche': 0.5 }), PredictionCoverage(catalog), PopProfileVsRecs(user_groups={ 'popular': 0.5, 'niche': 0.5 }, out_dir='plots/'), LongTailDistr('plots/', format='svg'), PopRecsCorrelation('plots/') ], verbose_predictions=True, methodology=TestItemsMethodology()) em.fit()