Example #1
0
    def test_ratings_to_csv(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4)

        ri.import_ratings()

        # Test save
        ri.imported_ratings_to_csv('csv_test/')
        self.assertTrue(os.path.isfile('csv_test/ratings_frame.csv'))

        # Test save first duplicate
        ri.imported_ratings_to_csv('csv_test/')
        self.assertTrue(os.path.isfile('csv_test/ratings_frame (1).csv'))

        # Test save second duplicate
        ri.imported_ratings_to_csv('csv_test/')
        self.assertTrue(os.path.isfile('csv_test/ratings_frame (2).csv'))

        # Test save with overwrite
        ri.imported_ratings_to_csv('csv_test/', overwrite=True)
        self.assertTrue(os.path.isfile('csv_test/ratings_frame.csv'))
        self.assertFalse(os.path.isfile('csv_test/ratings_frame (3).csv'))

        # Test save with custom name
        ri.imported_ratings_to_csv('csv_test/', 'ratings_custom_name')
        self.assertTrue(os.path.isfile('csv_test/ratings_custom_name.csv'))
Example #2
0
    def test_import_ratings_by_index(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4)

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        from_id_result = list(ratings['from_id'])
        to_id_result = list(ratings['to_id'])
        score_result = list(ratings['score'])

        from_id_expected = [row['user_id'] for row in self.raw_source_content]
        to_id_expected = [row['item_id'] for row in self.raw_source_content]
        score_expected = [
            float(row['stars']) for row in self.raw_source_content
        ]

        self.assertTrue(
            all(isinstance(from_id, str) for from_id in from_id_result))
        self.assertTrue(all(isinstance(to_id, str) for to_id in to_id_result))
        self.assertTrue(all(
            isinstance(score, float) for score in score_result))

        self.assertEqual(from_id_expected, from_id_result)
        self.assertEqual(to_id_expected, to_id_result)
        self.assertEqual(score_expected, score_result)
Example #3
0
    def test_add_score_column_w_score_processor(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4)

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        ratings_w_added_column = ri.add_score_column(
            'review_title',
            column_name='text_polarity',
            score_processor=TextBlobSentimentAnalysis())

        expected_columns = ['from_id', 'to_id', 'score', 'text_polarity']
        result_columns = list(ratings_w_added_column.columns)

        self.assertEqual(expected_columns, result_columns)

        score_column_added = list(ratings['text_polarity'])

        self.assertTrue(-1 <= score <= 1 for score in score_column_added)
Example #4
0
    def test_add_score_column(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4)

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        ratings_w_added_column = ri.add_score_column(
            'stars', column_name='score_duplicate')

        expected_columns = ['from_id', 'to_id', 'score', 'score_duplicate']
        result_columns = list(ratings_w_added_column.columns)

        self.assertEqual(expected_columns, result_columns)

        score_column_added = list(ratings['score_duplicate'])
        expected = [float(row['stars']) for row in self.raw_source_content]

        self.assertEqual(expected, score_column_added)
Example #5
0
    def test_exception_import_ratings(self):

        # Test exception column name not present in raw source
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column='not_existent',
                             to_id_column='item_id',
                             score_column='stars')

        with self.assertRaises(KeyError):
            ri.import_ratings()

        # Test exception column index not present in raw source
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=99,
                             to_id_column='item_id',
                             score_column='stars')

        with self.assertRaises(IndexError):
            ri.import_ratings()

        # Test exception score column can't be converted into float
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column='user_id',
                             to_id_column='item_id',
                             score_column='review_title')

        with self.assertRaises(ValueError):
            ri.import_ratings()
Example #6
0
    def test_import_ratings_w_score_processor(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4,
                             score_processor=NumberNormalizer())

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        score_result = list(ratings['score'])

        self.assertTrue(-1 <= score <= 1 for score in score_result)
Example #7
0
    def test_import_ratings_w_timestamp_index(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4,
                             timestamp_column=5)

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score', 'timestamp']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        timestamp_result = list(ratings['timestamp'])
        timestamp_expected = [
            row['timestamp'] for row in self.raw_source_content
        ]

        self.assertTrue(
            all(isinstance(timestamp, str) for timestamp in timestamp_result))
        self.assertEqual(timestamp_expected, timestamp_result)
Example #8
0
)
"""

points_review_config = RatingsFieldConfig(field_name='points',
                                          processor=NumberNormalizer(min_=1,
                                                                     max_=5))

ratings_importer = RatingsImporter(
    source=CSVFile(ratings_filename),  #cambia
    rating_configs=[points_review_config],
    from_field_name='user_id',
    to_field_name='item_id',
    timestamp_field_name='timestamp',
)

ratings_frame = ratings_importer.import_ratings()
print(ratings_frame)

tfidf_classifier_config = ClassifierRecommender(item_field='Plot',
                                                field_representation='0',
                                                classifier='random_forest')

classifier_recsys_config = RecSysConfig(
    users_directory=users_ca_dir,
    items_directory=items_ca_dir,
    ranking_algorithm=tfidf_classifier_config,
    rating_frame=ratings_frame)

classifier_recommender = RecSys(config=classifier_recsys_config)

rank = classifier_recommender.fit_ranking(user_id='1', recs_number=5)