def test_ratings_to_csv(self): ri = RatingsImporter(source=self.raw_source, from_id_column=0, to_id_column=1, score_column=4) ri.import_ratings() # Test save ri.imported_ratings_to_csv('csv_test/') self.assertTrue(os.path.isfile('csv_test/ratings_frame.csv')) # Test save first duplicate ri.imported_ratings_to_csv('csv_test/') self.assertTrue(os.path.isfile('csv_test/ratings_frame (1).csv')) # Test save second duplicate ri.imported_ratings_to_csv('csv_test/') self.assertTrue(os.path.isfile('csv_test/ratings_frame (2).csv')) # Test save with overwrite ri.imported_ratings_to_csv('csv_test/', overwrite=True) self.assertTrue(os.path.isfile('csv_test/ratings_frame.csv')) self.assertFalse(os.path.isfile('csv_test/ratings_frame (3).csv')) # Test save with custom name ri.imported_ratings_to_csv('csv_test/', 'ratings_custom_name') self.assertTrue(os.path.isfile('csv_test/ratings_custom_name.csv'))
def test_import_ratings_by_index(self): ri = RatingsImporter(source=self.raw_source, from_id_column=0, to_id_column=1, score_column=4) ratings = ri.import_ratings() expected_columns = ['from_id', 'to_id', 'score'] result_columns = list(ratings.columns) self.assertEqual(expected_columns, result_columns) from_id_result = list(ratings['from_id']) to_id_result = list(ratings['to_id']) score_result = list(ratings['score']) from_id_expected = [row['user_id'] for row in self.raw_source_content] to_id_expected = [row['item_id'] for row in self.raw_source_content] score_expected = [ float(row['stars']) for row in self.raw_source_content ] self.assertTrue( all(isinstance(from_id, str) for from_id in from_id_result)) self.assertTrue(all(isinstance(to_id, str) for to_id in to_id_result)) self.assertTrue(all( isinstance(score, float) for score in score_result)) self.assertEqual(from_id_expected, from_id_result) self.assertEqual(to_id_expected, to_id_result) self.assertEqual(score_expected, score_result)
def test_add_score_column_w_score_processor(self): ri = RatingsImporter(source=self.raw_source, from_id_column=0, to_id_column=1, score_column=4) ratings = ri.import_ratings() expected_columns = ['from_id', 'to_id', 'score'] result_columns = list(ratings.columns) self.assertEqual(expected_columns, result_columns) ratings_w_added_column = ri.add_score_column( 'review_title', column_name='text_polarity', score_processor=TextBlobSentimentAnalysis()) expected_columns = ['from_id', 'to_id', 'score', 'text_polarity'] result_columns = list(ratings_w_added_column.columns) self.assertEqual(expected_columns, result_columns) score_column_added = list(ratings['text_polarity']) self.assertTrue(-1 <= score <= 1 for score in score_column_added)
def test_add_score_column(self): ri = RatingsImporter(source=self.raw_source, from_id_column=0, to_id_column=1, score_column=4) ratings = ri.import_ratings() expected_columns = ['from_id', 'to_id', 'score'] result_columns = list(ratings.columns) self.assertEqual(expected_columns, result_columns) ratings_w_added_column = ri.add_score_column( 'stars', column_name='score_duplicate') expected_columns = ['from_id', 'to_id', 'score', 'score_duplicate'] result_columns = list(ratings_w_added_column.columns) self.assertEqual(expected_columns, result_columns) score_column_added = list(ratings['score_duplicate']) expected = [float(row['stars']) for row in self.raw_source_content] self.assertEqual(expected, score_column_added)
def test_exception_import_ratings(self): # Test exception column name not present in raw source ri = RatingsImporter(source=self.raw_source, from_id_column='not_existent', to_id_column='item_id', score_column='stars') with self.assertRaises(KeyError): ri.import_ratings() # Test exception column index not present in raw source ri = RatingsImporter(source=self.raw_source, from_id_column=99, to_id_column='item_id', score_column='stars') with self.assertRaises(IndexError): ri.import_ratings() # Test exception score column can't be converted into float ri = RatingsImporter(source=self.raw_source, from_id_column='user_id', to_id_column='item_id', score_column='review_title') with self.assertRaises(ValueError): ri.import_ratings()
def test_import_ratings_w_score_processor(self): ri = RatingsImporter(source=self.raw_source, from_id_column=0, to_id_column=1, score_column=4, score_processor=NumberNormalizer()) ratings = ri.import_ratings() expected_columns = ['from_id', 'to_id', 'score'] result_columns = list(ratings.columns) self.assertEqual(expected_columns, result_columns) score_result = list(ratings['score']) self.assertTrue(-1 <= score <= 1 for score in score_result)
def test_import_ratings_w_timestamp_index(self): ri = RatingsImporter(source=self.raw_source, from_id_column=0, to_id_column=1, score_column=4, timestamp_column=5) ratings = ri.import_ratings() expected_columns = ['from_id', 'to_id', 'score', 'timestamp'] result_columns = list(ratings.columns) self.assertEqual(expected_columns, result_columns) timestamp_result = list(ratings['timestamp']) timestamp_expected = [ row['timestamp'] for row in self.raw_source_content ] self.assertTrue( all(isinstance(timestamp, str) for timestamp in timestamp_result)) self.assertEqual(timestamp_expected, timestamp_result)
) """ points_review_config = RatingsFieldConfig(field_name='points', processor=NumberNormalizer(min_=1, max_=5)) ratings_importer = RatingsImporter( source=CSVFile(ratings_filename), #cambia rating_configs=[points_review_config], from_field_name='user_id', to_field_name='item_id', timestamp_field_name='timestamp', ) ratings_frame = ratings_importer.import_ratings() print(ratings_frame) tfidf_classifier_config = ClassifierRecommender(item_field='Plot', field_representation='0', classifier='random_forest') classifier_recsys_config = RecSysConfig( users_directory=users_ca_dir, items_directory=items_ca_dir, ranking_algorithm=tfidf_classifier_config, rating_frame=ratings_frame) classifier_recommender = RecSys(config=classifier_recsys_config) rank = classifier_recommender.fit_ranking(user_id='1', recs_number=5)