def test_multiple(self):
        recs_number = 3
        user_id_list = ['A000', 'A001']
        # Test prediction and ranking with the Classifier Recommender algorithm
        graph = NXFullGraph(ratings)
        alg = NXPageRank()
        rs = GraphBasedRS(alg, graph)

        # Prediction
        with self.assertRaises(NotPredictionAlg):
            rs.multiple_fit_predict(user_id_list, filter_list=self.filter_list)

        # Test ranking with the Classifier Recommender algorithm on specified items
        result_rank_filtered = rs.multiple_fit_rank(
            user_id_list, filter_list=self.filter_list)
        self.assertEqual(set(user_id_list),
                         set(result_rank_filtered['from_id']))
        for user in user_id_list:
            self.assertEqual(
                len(result_rank_filtered.query('from_id == @user')),
                len(self.filter_list))

        # Test top-n ranking with the Classifier Recommender algorithm
        result_rank_numbered = rs.multiple_fit_rank(user_id_list,
                                                    recs_number=recs_number)
        self.assertEqual(set(user_id_list),
                         set(result_rank_numbered['from_id']))
        for user in user_id_list:
            self.assertEqual(
                len(result_rank_numbered.query('from_id == @user')),
                recs_number)
    def test_nx_page_rank(self):
        # Because graph based recommendation needs to have all items to predict in the ratings dataframe
        recs_number = 1

        graph = NXFullGraph(ratings)
        alg = NXPageRank()
        rs = GraphBasedRS(alg, graph)

        # Test prediction and ranking with the Page Rank algorithm, prediction will raise exception
        # since it's not a PredictionAlgorithm
        with self.assertRaises(NotPredictionAlg):
            rs.fit_predict('A000')

        result_rank = rs.fit_rank('A000')
        self.assertEqual(len(result_rank), 3)

        # Test prediction and ranking with the Page Rank algorithm on specified items, prediction will raise exception
        # since it's not a PredictionAlgorithm
        with self.assertRaises(NotPredictionAlg):
            rs.fit_predict('A000', filter_list=self.filter_list)

        result_rank_filtered = rs.fit_rank('A000',
                                           filter_list=self.filter_list)
        self.assertEqual(len(result_rank_filtered), 2)

        # Test top-n ranking with the Page Rank algorithm
        result_rank_numbered = rs.fit_rank('A000', recs_number=recs_number)
        self.assertEqual(len(result_rank_numbered), recs_number)
Example #3
0
    def setUp(self) -> None:
        ratings = pd.DataFrame.from_records(
            [("A000", "tt0114576", 1, "54654675"),
             ("A000", "tt0112453", -0.2, "54654675"),
             ("A001", "tt0114576", 0.8, "54654675"),
             ("A001", "tt0112896", -0.4, "54654675"),
             ("A000", "tt0113041", 0.6, "54654675"),
             ("A002", "tt0112453", -0.2, "54654675"),
             ("A002", "tt0113497", 0.5, "54654675"),
             ("A003", "tt0112453", -0.8, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        # ContentBasedAlgorithm is an abstract class, so we need to instantiate
        # a subclass to test its methods
        self.graph = NXFullGraph(ratings)

        self.graph.add_property_node('Nolan')

        self.alg = NXPageRank()
    def test_fit_graph_w_testrating_methodology(self):
        graph = NXFullGraph(ratings)

        rs = GraphBasedRS(NXPageRank(), graph)

        em = EvalModel(rs, KFoldPartitioning(), metric_list=[Precision()])

        sys_result, users_result = em.fit()

        self.assertIsInstance(sys_result, pd.DataFrame)
        self.assertIsInstance(users_result, pd.DataFrame)
Example #5
0
    def test_graph(self):
        catalog = set(ratings.to_id)

        users_dir = os.path.join(dir_test_files, 'complex_contents',
                                 'users_codified/')

        graph = NXFullGraph(
            ratings,
            user_contents_dir=users_dir,
            item_contents_dir=items_dir,
            item_exo_representation="dbpedia",
            user_exo_representation='local',
            item_exo_properties=['starring'],
            user_exo_properties=['1'
                                 ]  # It's the column in the users .DAT which
            # identifies the gender
        )

        graph_rs = GraphBasedRS(NXPageRank(), graph)

        em = EvalModel(graph_rs,
                       KFoldPartitioning(),
                       metric_list=[
                           Precision(relevant_threshold=3),
                           Recall(),
                           FMeasure(beta=1),
                           FMeasure(beta=2, sys_average='micro'),
                           MRR(),
                           Correlation('pearson'),
                           GiniIndex(),
                           DeltaGap({
                               'popular': 0.5,
                               'niche': 0.5
                           }),
                           PredictionCoverage(catalog),
                           PopProfileVsRecs(user_groups={
                               'popular': 0.5,
                               'niche': 0.5
                           },
                                            out_dir='plots/'),
                           LongTailDistr('plots/', format='svg'),
                           PopRecsCorrelation('plots/')
                       ],
                       verbose_predictions=True,
                       methodology=TestItemsMethodology())

        em.fit()
    def test_fit_graph_w_allitems_methodology(self):
        graph = NXFullGraph(ratings)

        rs = GraphBasedRS(NXPageRank(), graph)

        items = set([
            os.path.splitext(f)[0] for f in os.listdir(items_dir)
            if os.path.isfile(os.path.join(items_dir, f)) and f.endswith('xz')
        ])

        em = EvalModel(rs,
                       KFoldPartitioning(),
                       metric_list=[Precision()],
                       methodology=AllItemsMethodology(items))

        sys_result, users_result = em.fit()

        self.assertIsInstance(sys_result, pd.DataFrame)
        self.assertIsInstance(users_result, pd.DataFrame)
    def test_calc_rank_graph_based(self):

        graph = NXFullGraph(self.ratings_original)

        recsys = GraphBasedRS(NXPageRank(), graph)

        # We just need a Metric of the RankingNeededMetric class to test
        metric_list = [NDCG()]

        valid_metric = PredictionCalculator(self.split_list,
                                            recsys).calc_predictions(
                                                self.test_items_list,
                                                metric_list)
        rank_truth = RankingNeededMetric.rank_truth_list

        # We expect this to be empty, since there are no ScoresNeededMetric in the metric list
        score_truth = ScoresNeededMetric.score_truth_list

        self.assertEqual(valid_metric, metric_list)
        self.assertGreater(len(rank_truth), 0)
        self.assertEqual(len(score_truth), 0)
    def test_page_rank_with_feature_selection(self):
        # the PageRank algorithm is tested with the NXTopKPageRank Feature Selection algorithm
        # since the Feature Selection is already tested in the dedicated test file
        # this test only checks that the PageRank run works while defining a Feature Selection algorithm

        movies_dir = os.path.join(dir_test_files, 'complex_contents', 'movies_codified/')
        user_dir = os.path.join(dir_test_files, 'complex_contents', 'users_codified/')

        df = pd.DataFrame.from_dict({'from_id': ["1", "1", "2", "2", "2", "3", "4", "4"],
                                     'to_id': ["tt0113228", "tt0113041", "tt0113228", "tt0112346",
                                               "tt0112453", "tt0112453", "tt0112346", "tt0112453"],
                                     'score': [0.8, 0.7, -0.4, 1.0, 0.4, 0.1, -0.3, 0.7]})

        # only one property from the dbpedia repr extracted
        graph_with_properties: NXFullGraph = NXFullGraph(df,
                                                         user_contents_dir=user_dir,
                                                         item_contents_dir=movies_dir,
                                                         item_exo_representation='dbpedia',
                                                         user_exo_representation='local',
                                                         item_exo_properties=None,
                                                         user_exo_properties=['1']
                                                         )

        # fs standard algorithm
        alg = NXPageRank(feature_selection=NXTopKPageRank())
        result = alg.rank('4', graph_with_properties)
        self.assertEqual(len(result), 2)

        # fs personalized algorithm
        alg = NXPageRank(personalized=True, feature_selection=NXTopKPageRank())
        result_personalized = alg.rank('4', graph_with_properties)
        self.assertEqual(len(result_personalized), 2)

        # fs personalized algorithm and filter list
        alg = NXPageRank(personalized=True, feature_selection=NXTopKPageRank())
        result_personalized = alg.rank('4', graph_with_properties, filter_list=['tt0113228'])
        self.assertEqual(len(result_personalized), 1)

        # fs personalized algorithm and empty filter list
        alg = NXPageRank(personalized=True, feature_selection=NXTopKPageRank())
        result_personalized = alg.rank('4', graph_with_properties, filter_list=[])
        self.assertEqual(len(result_personalized), 0)
    def test_rank(self):

        # test not personalized
        alg = NXPageRank()

        # rank with filter_list
        res_filtered = alg.rank('A000', self.graph, filter_list=self.filter_list)
        item_ranked_set = set(res_filtered['to_id'])
        self.assertEqual(len(item_ranked_set), len(self.filter_list))
        self.assertCountEqual(item_ranked_set, self.filter_list)

        # rank without filter_list
        res_all_unrated = alg.rank('A000', self.graph)
        item_rated_set = set(self.ratings.query('from_id == "A000"')['to_id'])
        item_ranked_set = set(res_all_unrated['to_id'])
        # We expect this to be empty, since the alg should rank only unrated items (unless in filter list)
        rated_in_ranked = item_ranked_set.intersection(item_rated_set)
        self.assertEqual(len(rated_in_ranked), 0)

        # rank with n_recs specified
        n_recs = 1
        res_n_recs = alg.rank('A000', self.graph, n_recs)
        self.assertEqual(len(res_n_recs), n_recs)
        item_rated_set = set(self.ratings.query('from_id == "A000"')['to_id'])
        item_ranked_set = set(res_n_recs['to_id'])
        # We expect this to be empty, since the alg should rank only unrated items (unless in filter list)
        rated_in_ranked = item_ranked_set.intersection(item_rated_set)
        self.assertEqual(len(rated_in_ranked), 0)

        # test personalized
        alg = NXPageRank(personalized=True)
        result_personalized = alg.rank('A000', self.graph)

        alg = NXPageRank()
        result_not_personalized = alg.rank('A000', self.graph)

        result_personalized = np.array(result_personalized)
        result_not_personalized = np.array(result_not_personalized)

        result_personalized.sort(axis=0)
        result_not_personalized.sort(axis=0)

        self.assertFalse(np.array_equal(result_personalized, result_not_personalized))
    def test_predict(self):
        alg = NXPageRank()

        # Will raise Exception since it's not a Score Prediction Algorithm
        with self.assertRaises(NotPredictionAlg):
            alg.predict('A000', self.graph)
Example #11
0
class TestGraphBasedAlgorithm(TestCase):
    def setUp(self) -> None:
        ratings = pd.DataFrame.from_records(
            [("A000", "tt0114576", 1, "54654675"),
             ("A000", "tt0112453", -0.2, "54654675"),
             ("A001", "tt0114576", 0.8, "54654675"),
             ("A001", "tt0112896", -0.4, "54654675"),
             ("A000", "tt0113041", 0.6, "54654675"),
             ("A002", "tt0112453", -0.2, "54654675"),
             ("A002", "tt0113497", 0.5, "54654675"),
             ("A003", "tt0112453", -0.8, "54654675")],
            columns=["from_id", "to_id", "score", "timestamp"])

        # ContentBasedAlgorithm is an abstract class, so we need to instantiate
        # a subclass to test its methods
        self.graph = NXFullGraph(ratings)

        self.graph.add_property_node('Nolan')

        self.alg = NXPageRank()

    def test_clean_rank(self):
        rank = {
            UserNode("A000"): 0.5,
            ItemNode("tt0114576"): 0.5,
            UserNode("A001"): 0.5,
            ItemNode("tt0113497"): 0.5,
            ItemNode("tt0112453"): 0.5,
            PropertyNode("Nolan"): 0.5
        }

        # remove from rank all nodes except Item nodes
        result = self.alg.clean_result(self.graph, rank, user_id="A000")
        expected = {"tt0113497": 0.5}
        self.assertEqual(expected, result)

        # remove from rank all nodes except Item nodes and User nodes
        result = self.alg.clean_result(self.graph,
                                       rank,
                                       user_id="A000",
                                       remove_users=False)
        expected = {"tt0113497": 0.5, "A001": 0.5, "A000": 0.5}
        self.assertEqual(expected, result)

        # remove from rank all nodes except Item nodes and keep item rated by the user
        result = self.alg.clean_result(self.graph,
                                       rank,
                                       user_id="A000",
                                       remove_profile=False)
        expected = {'tt0112453': 0.5, 'tt0113497': 0.5, 'tt0114576': 0.5}
        self.assertEqual(expected, result)

        # remove from rank all nodes except Item nodes and property nodes
        result = self.alg.clean_result(self.graph,
                                       rank,
                                       user_id="A000",
                                       remove_properties=False)
        expected = {'tt0113497': 0.5, 'Nolan': 0.5}
        self.assertEqual(expected, result)

    def test_filter_result(self):
        result_page_rank = {
            ItemNode("i1"): 0.8,
            ItemNode("i2"): 0.7,
            UserNode('u1'): 0.2,
            PropertyNode("p1"): 0.1
        }

        result = self.alg.filter_result(result_page_rank, ['i1'])
        expected = {ItemNode("i1"): 0.8}
        self.assertEqual(expected, result)

        result = self.alg.filter_result(result_page_rank, ['u1', 'p1'])
        expected = {UserNode('u1'): 0.2, PropertyNode("p1"): 0.1}
        self.assertEqual(expected, result)

        # filter with non existent nodes, result will be empty
        result = self.alg.filter_result(result_page_rank,
                                        ['not exists', 'i20'])
        expected = {}
        self.assertEqual(expected, result)

    def test_extract_profile(self):

        result = self.alg.extract_profile(self.graph, "A000")
        expected = {'tt0112453': -0.2, 'tt0113041': 0.6, 'tt0114576': 1.0}

        self.assertEqual(expected, result)

        # Also if you wrap items in its corresponding type will work
        expected_wrapped = {
            ItemNode('tt0112453'): -0.2,
            ItemNode('tt0113041'): 0.6,
            ItemNode('tt0114576'): 1.0
        }
        self.assertEqual(expected_wrapped, result)

        # This will fail because they are not users
        expected_wrapped_fake = {
            UserNode('tt0112453'): -0.2,
            UserNode('tt0113041'): 0.6,
            UserNode('tt0114576'): 1.0
        }
        self.assertNotEqual(expected_wrapped_fake, result)