def test_rank_single_representation(self, model: Regressor): lm = model # Single representation alg = LinearPredictor({'Plot': ['tfidf']}, lm) user_ratings = self.ratings.query('from_id == "A000"') alg.process_rated(user_ratings, self.movies_dir) alg.fit() # rank with filter_list res_filtered = alg.rank(user_ratings, self.movies_dir, filter_list=self.filter_list) item_ranked_set = set(res_filtered['to_id']) self.assertEqual(len(item_ranked_set), len(self.filter_list)) self.assertCountEqual(item_ranked_set, self.filter_list) # rank without filter_list res_all_unrated = alg.rank(user_ratings, self.movies_dir) item_rated_set = set(user_ratings['to_id']) item_ranked_set = set(res_all_unrated['to_id']) # We expect this to be empty, since the alg should rank only unrated items (unless in filter list) rated_in_ranked = item_ranked_set.intersection(item_rated_set) self.assertEqual(len(rated_in_ranked), 0) # rank with n_recs specified n_recs = 5 res_n_recs = alg.rank(user_ratings, self.movies_dir, n_recs) self.assertEqual(len(res_n_recs), n_recs) item_rated_set = set(user_ratings['to_id']) item_ranked_set = set(res_n_recs['to_id']) # We expect this to be empty, since the alg should rank only unrated items (unless in filter list) rated_in_ranked = item_ranked_set.intersection(item_rated_set) self.assertEqual(len(rated_in_ranked), 0)
def test_predict_multiple_representations(self, model: Regressor): lm = model # Multiple representations filtered only items with score >= 2 alg = LinearPredictor({'Plot': ['tfidf', 'embedding'], 'Genre': ['tfidf', 'embedding'], 'imdbRating': [0]}, lm, only_greater_eq=2) user_ratings = self.ratings.query('from_id == "A000"') alg.process_rated(user_ratings, self.movies_dir) alg.fit() # predict with filter_list res_filtered = alg.predict(user_ratings, self.movies_dir, filter_list=self.filter_list) item_scored_set = set(res_filtered['to_id']) self.assertEqual(len(item_scored_set), len(self.filter_list)) self.assertCountEqual(item_scored_set, self.filter_list) # predict without filter_list res_all_unrated = alg.predict(user_ratings, self.movies_dir) item_rated_set = set(user_ratings['to_id']) item_scored_set = set(res_all_unrated['to_id']) # We expect this to be empty, since the alg should rank only unrated items (unless in filter list) rated_in_scored = item_scored_set.intersection(item_rated_set) self.assertEqual(len(rated_in_scored), 0)
def test_all(self): ratings_filename = os.path.join(contents_path, '..', 'datasets', 'examples', 'new_ratings.csv') ratings_frame = RatingsImporter( CSVFile(ratings_filename)).import_ratings() rs = ContentBasedRS( LinearPredictor( {"Plot": ['tfidf', 'embedding']}, SkLinearRegression(), ), ratings_frame, items_dir) catalog = set([ os.path.splitext(f)[0] for f in os.listdir(items_dir) if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz') ]) em = EvalModel(rs, KFoldPartitioning(), metric_list=[ Precision(sys_average='micro'), PrecisionAtK(1, sys_average='micro'), RPrecision(), Recall(), RecallAtK(3, ), FMeasure(1, sys_average='macro'), FMeasureAtK(2, beta=1, sys_average='micro'), NDCG(), NDCGAtK(3), MRR(), MRRAtK(5, ), Correlation('pearson', top_n=5), Correlation('kendall', top_n=3), Correlation('spearman', top_n=4), MAE(), MSE(), RMSE(), CatalogCoverage(catalog), CatalogCoverage(catalog, k=2), CatalogCoverage(catalog, top_n=3), GiniIndex(), GiniIndex(top_n=3), DeltaGap({ 'primo': 0.5, 'secondo': 0.5 }) ], methodology=TestItemsMethodology()) result = em.fit()
def test_calc_scores_content_based(self): recsys = ContentBasedRS( LinearPredictor({'Plot': 'tfidf'}, SkLinearRegression()), self.ratings_original, movies_dir) # We just need a Metric of the ScoresNeededMetric class to test metric_list = [MAE()] valid_metric = PredictionCalculator(self.split_list, recsys).calc_predictions( self.test_items_list, metric_list) score_truth = ScoresNeededMetric.score_truth_list # We expect this to be empty, since there are no RankingNeededMetric in the metric list rank_truth = RankingNeededMetric.rank_truth_list self.assertEqual(valid_metric, metric_list) self.assertGreater(len(score_truth), 0) self.assertEqual(len(rank_truth), 0)