def test_load_data(self): """ Test Case for correct loading of dataframes """ self.assertIsInstance( load_data(os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv')), pd.core.frame.DataFrame) self.assertIsInstance( get_datasets( os.path.join(os.path.pardir, 'src', 'data', 'labeled_data.csv'), os.path.join( os.path.pardir, 'src', 'data', 'hatespeech_text_label_vote_RESTRICTED_100K.csv'))[0], pd.core.frame.DataFrame) self.assertIsInstance( get_datasets( os.path.join(os.path.pardir, 'src', 'data', 'labeled_data.csv'), os.path.join( os.path.pardir, 'src', 'data', 'hatespeech_text_label_vote_RESTRICTED_100K.csv'))[1], pd.core.frame.DataFrame) self.assertIsInstance( concatenate_datasets( os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'), self.df2, self.df3), pd.core.frame.DataFrame)
def setUp(self): self.df = load_data( os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv')) self.df2, self.df3 = get_datasets( os.path.join(os.path.pardir, 'src', 'data', 'labeled_data.csv'), os.path.join(os.path.pardir, 'src', 'data', 'hatespeech_text_label_vote_RESTRICTED_100K.csv')) self.df_concatenated = concatenate_datasets( os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'), self.df2, self.df3)
def setUp(self): self.df = load_data( os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv')) self.df2, self.df3 = get_datasets( os.path.join(os.path.pardir, 'src', 'data', 'labeled_data.csv'), os.path.join(os.path.pardir, 'src', 'data', 'hatespeech_text_label_vote_RESTRICTED_100K.csv')) self.df_concatenated = concatenate_datasets( os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'), self.df2, self.df3) self.training_data, self.testing_data, self.training_y, self.testing_y = split_data( self.df_concatenated, 'text', 'hate_speech', 0.25)
def setUp(self): self.df = load_data( os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv')) self.df2, self.df3 = get_datasets( os.path.join(os.path.pardir, 'src', 'data', 'labeled_data.csv'), os.path.join(os.path.pardir, 'src', 'data', 'hatespeech_text_label_vote_RESTRICTED_100K.csv')) self.df_concatenated = concatenate_datasets( os.path.join(os.path.pardir, 'src', 'data', 'tweets.csv'), self.df2, self.df3) self.test_set = pd.DataFrame([ 'This is the first document.', 'This document is the second document.', 'And this is the third one.', 'Is this the first document?' ], columns=["text"]) self.test_result_count = [[0, 1, 1, 1, 0, 0, 1, 0, 1], [0, 2, 0, 1, 0, 1, 1, 0, 1], [1, 0, 0, 1, 1, 0, 1, 1, 1], [0, 1, 1, 1, 0, 0, 1, 0, 1]]