Example #1
0
    def test_raise_errors(self):
        # Only positive available
        self.ratings = pd.DataFrame.from_records(
            [("A000", "tt0112281", 1, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        alg = ClassifierRecommender({'Plot': 'tfidf'}, SkKNN(), 0)
        user_ratings = self.ratings.query('from_id == "A000"')

        with self.assertRaises(OnlyPositiveItems):
            alg.process_rated(user_ratings, self.movies_dir)

        # Only negative available
        self.ratings = pd.DataFrame.from_records(
            [("A000", "tt0112281", -1, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        alg = ClassifierRecommender({'Plot': 'tfidf'}, SkKNN(), 0)
        user_ratings = self.ratings.query('from_id == "A000"')

        with self.assertRaises(OnlyNegativeItems):
            alg.process_rated(user_ratings, self.movies_dir)

        # No Item avilable locally
        self.ratings = pd.DataFrame.from_records(
            [("A000", "non existent", 0.5, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        alg = ClassifierRecommender({'Plot': 'tfidf'}, SkKNN(), 0)
        user_ratings = self.ratings.query('from_id == "A000"')

        with self.assertRaises(NoRatedItems):
            alg.process_rated(user_ratings, self.movies_dir)
Example #2
0
    def setUp(self) -> None:
        self.ratings = pd.DataFrame.from_records(
            [("A000", "tt0112281", 0.5, "54654675"),
             ("A000", "tt0112302", 0.5, "54654675"),
             ("A001", "tt0114576", 0.8, "54654675"),
             ("A001", "tt0112896", -0.4, "54654675"),
             ("A000", "tt0112346", -0.5, "54654675"),
             ("A000", "tt0112453", -0.5, "54654675"),
             ("A002", "tt0112453", -0.2, "54654675"),
             ("A002", "tt0113497", 0.5, "54654675"),
             ("A003", "tt0112453", -0.8, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        # tt0112281 is rated for A000 but let's suppose we want to know its rank
        self.filter_list = ['tt0112281', 'tt0112760', 'tt0112896']

        self.movies_dir = os.path.join(dir_test_files, 'complex_contents',
                                       'movies_codified/')

        # IMPORTANT! If a new classifier is added, just add it to this list to test it
        self.classifiers_list = [
            SkSVC(),
            SkKNN(),
            SkRandomForest(),
            SkLogisticRegression(),
            SkDecisionTree(),
            SkGaussianProcess()
        ]
    def test_pop_invalid_metric(self):
        recsys = ContentBasedRS(
            ClassifierRecommender({'Plot': 'tfidf'}, SkKNN(), threshold=3),
            self.ratings_original, movies_dir)

        # Tries to calc score predictions with a pure ranking algorithm
        metric_list = [MAE()]

        valid_metric = PredictionCalculator(self.split_list,
                                            recsys).calc_predictions(
                                                self.test_items_list,
                                                metric_list)
        score_truth = ScoresNeededMetric.score_truth_list
        rank_truth = RankingNeededMetric.rank_truth_list

        # The metric is excluded from the valid ones and nothing is calculated since
        # there aren't any others
        self.assertEqual(len(valid_metric), 0)
        self.assertEqual(len(score_truth), 0)
        self.assertEqual(len(rank_truth), 0)

        # Tries to calc score predictions with a pure ranking algorithm but there are also
        # other type of metrics
        metric_ranking = NDCG()
        metric_score = MAE()
        metric_list = [metric_score, metric_ranking]

        valid_metric = PredictionCalculator(self.split_list,
                                            recsys).calc_predictions(
                                                self.test_items_list,
                                                metric_list)
        score_truth = ScoresNeededMetric.score_truth_list
        rank_truth = RankingNeededMetric.rank_truth_list

        # The metric MAE is excluded from the valid ones but NDCG is valid so predictions
        # for that metric (RankingNeededMetric) are calculated
        self.assertIn(metric_ranking, valid_metric)
        self.assertNotIn(metric_score, valid_metric)

        self.assertEqual(len(score_truth), 0)
        self.assertGreater(len(rank_truth), 0)
    def test_calc_rank_content_based(self):

        recsys = ContentBasedRS(
            ClassifierRecommender({'Plot': 'tfidf'}, SkKNN(), threshold=3),
            self.ratings_original, movies_dir)

        # We just need a Metric of the RankingNeededMetric class to test
        metric_list = [NDCG()]

        valid_metric = PredictionCalculator(self.split_list,
                                            recsys).calc_predictions(
                                                self.test_items_list,
                                                metric_list)
        rank_truth = RankingNeededMetric.rank_truth_list

        # We expect this to be empty, since there are no ScoresNeededMetric in the metric list
        score_truth = ScoresNeededMetric.score_truth_list

        self.assertEqual(valid_metric, metric_list)
        self.assertGreater(len(rank_truth), 0)
        self.assertEqual(len(score_truth), 0)