def test_rank_single_representation(self): # Single representation alg = CentroidVector({'Genre': ['embedding']}, CosineSimilarity(), threshold=0) user_ratings = self.ratings.query('from_id == "A000"') alg.process_rated(user_ratings, self.movies_dir) alg.fit() # rank with filter_list res_filtered = alg.rank(user_ratings, self.movies_dir, filter_list=self.filter_list) item_ranked_set = set(res_filtered['to_id']) self.assertEqual(len(item_ranked_set), len(self.filter_list)) self.assertCountEqual(item_ranked_set, self.filter_list) # rank without filter_list res_all_unrated = alg.rank(user_ratings, self.movies_dir) item_rated_set = set(user_ratings['to_id']) item_ranked_set = set(res_all_unrated['to_id']) # We expect this to be empty, since the alg should rank only unrated items (unless in filter list) rated_in_ranked = item_ranked_set.intersection(item_rated_set) self.assertEqual(len(rated_in_ranked), 0) # rank with n_recs specified n_recs = 5 res_n_recs = alg.rank(user_ratings, self.movies_dir, n_recs) self.assertEqual(len(res_n_recs), n_recs) item_rated_set = set(user_ratings['to_id']) item_ranked_set = set(res_n_recs['to_id']) # We expect this to be empty, since the alg should rank only unrated items (unless in filter list) rated_in_ranked = item_ranked_set.intersection(item_rated_set) self.assertEqual(len(rated_in_ranked), 0)
def test_rank_multiple_representations(self): # Multiple representations with auto threshold based on the mean ratings of the user alg = CentroidVector({'Plot': ['tfidf', 'embedding'], "Genre": ['tfidf', 'embedding'], 'imdbRating': [0]}, CosineSimilarity()) user_ratings = self.ratings.query('from_id == "A000"') alg.process_rated(user_ratings, self.movies_dir) alg.fit() # rank with filter_list res_filtered = alg.rank(user_ratings, self.movies_dir, filter_list=self.filter_list) item_ranked_set = set(res_filtered['to_id']) self.assertEqual(len(item_ranked_set), len(self.filter_list)) self.assertCountEqual(item_ranked_set, self.filter_list) # rank without filter_list res_all_unrated = alg.rank(user_ratings, self.movies_dir) item_rated_set = set(user_ratings['to_id']) item_ranked_set = set(res_all_unrated['to_id']) # We expect this to be empty, since the alg should rank only unrated items (unless in filter list) rated_in_ranked = item_ranked_set.intersection(item_rated_set) self.assertEqual(len(rated_in_ranked), 0) # rank with n_recs specified n_recs = 5 res_n_recs = alg.rank(user_ratings, self.movies_dir, n_recs) self.assertEqual(len(res_n_recs), n_recs) item_rated_set = set(user_ratings['to_id']) item_ranked_set = set(res_n_recs['to_id']) # We expect this to be empty, since the alg should rank only unrated items (unless in filter list) rated_in_ranked = item_ranked_set.intersection(item_rated_set) self.assertEqual(len(rated_in_ranked), 0)
def test_predict(self): alg = CentroidVector({'Genre': ['embedding']}, CosineSimilarity(), threshold=0) user_ratings = self.ratings.query('from_id == "A000"') alg.process_rated(user_ratings, self.movies_dir) alg.fit() # Will raise Exception since it's not a Score Prediction Algorithm with self.assertRaises(NotPredictionAlg): alg.predict(user_ratings, self.movies_dir)