Example #1
0
    def test_exception_add_score_column(self):
        # Test exception score column can't be converted into float
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column='user_id',
                             to_id_column='item_id',
                             score_column='stars')

        with self.assertRaises(ValueError):
            ri.add_score_column('review_title', 'text')
Example #2
0
    def test_add_score_column_w_score_processor(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4)

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        ratings_w_added_column = ri.add_score_column(
            'review_title',
            column_name='text_polarity',
            score_processor=TextBlobSentimentAnalysis())

        expected_columns = ['from_id', 'to_id', 'score', 'text_polarity']
        result_columns = list(ratings_w_added_column.columns)

        self.assertEqual(expected_columns, result_columns)

        score_column_added = list(ratings['text_polarity'])

        self.assertTrue(-1 <= score <= 1 for score in score_column_added)
Example #3
0
    def test_add_score_column(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4)

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        ratings_w_added_column = ri.add_score_column(
            'stars', column_name='score_duplicate')

        expected_columns = ['from_id', 'to_id', 'score', 'score_duplicate']
        result_columns = list(ratings_w_added_column.columns)

        self.assertEqual(expected_columns, result_columns)

        score_column_added = list(ratings['score_duplicate'])
        expected = [float(row['stars']) for row in self.raw_source_content]

        self.assertEqual(expected, score_column_added)