def test_load_data(self):
        """ Test Case for correct loading of dataframes """
        self.assertIsInstance(
            load_data(os.path.join(os.path.pardir, 'src', 'data',
                                   'tweets.csv')), pd.core.frame.DataFrame)

        self.assertIsInstance(
            get_datasets(
                os.path.join(os.path.pardir, 'src', 'data',
                             'labeled_data.csv'),
                os.path.join(
                    os.path.pardir, 'src', 'data',
                    'hatespeech_text_label_vote_RESTRICTED_100K.csv'))[0],
            pd.core.frame.DataFrame)

        self.assertIsInstance(
            get_datasets(
                os.path.join(os.path.pardir, 'src', 'data',
                             'labeled_data.csv'),
                os.path.join(
                    os.path.pardir, 'src', 'data',
                    'hatespeech_text_label_vote_RESTRICTED_100K.csv'))[1],
            pd.core.frame.DataFrame)

        self.assertIsInstance(
            concatenate_datasets(
                os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'),
                self.df2, self.df3), pd.core.frame.DataFrame)
 def setUp(self):
     self.df = load_data(
         os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'))
     self.df2, self.df3 = get_datasets(
         os.path.join(os.path.pardir, 'src', 'data', 'labeled_data.csv'),
         os.path.join(os.path.pardir, 'src', 'data',
                      'hatespeech_text_label_vote_RESTRICTED_100K.csv'))
     self.df_concatenated = concatenate_datasets(
         os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'),
         self.df2, self.df3)
    def setUp(self):
        self.df = load_data(
            os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'))
        self.df2, self.df3 = get_datasets(
            os.path.join(os.path.pardir, 'src', 'data', 'labeled_data.csv'),
            os.path.join(os.path.pardir, 'src', 'data',
                         'hatespeech_text_label_vote_RESTRICTED_100K.csv'))
        self.df_concatenated = concatenate_datasets(
            os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'),
            self.df2, self.df3)

        self.training_data, self.testing_data, self.training_y, self.testing_y = split_data(
            self.df_concatenated, 'text', 'hate_speech', 0.25)
Ejemplo n.º 4
0
 def setUp(self):
     self.df = load_data(
         os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'))
     self.df2, self.df3 = get_datasets(
         os.path.join(os.path.pardir, 'src', 'data', 'labeled_data.csv'),
         os.path.join(os.path.pardir, 'src', 'data',
                      'hatespeech_text_label_vote_RESTRICTED_100K.csv'))
     self.df_concatenated = concatenate_datasets(
         os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'),
         self.df2, self.df3)
     self.test_set = pd.DataFrame([
         'This is the first document.',
         'This document is the second document.',
         'And this is the third one.', 'Is this the first document?'
     ],
                                  columns=["text"])
     self.test_result_count = [[0, 1, 1, 1, 0, 0, 1, 0, 1],
                               [0, 2, 0, 1, 0, 1, 1, 0, 1],
                               [1, 0, 0, 1, 1, 0, 1, 1, 1],
                               [0, 1, 1, 1, 0, 0, 1, 0, 1]]