def test_input_file_do_not_exist(self): filename = "foo.txt" cleaner1 = TweetCleaner(filename,self.test_output_file) with self.assertRaises(IOError) as context: cleaner1.process_tweets_file() msg = "Input file %s does not exist" % filename self.assertTrue(msg in context.exception) print "Check program execution with a input file that does no exist [OK]"
class TestTweetCleaner(unittest.TestCase): #Run before each test case def setUp(self): self.current_path = os.getcwd().replace("src","") self.test_input_file = self.current_path + "/tweet_input/test.txt" self.test_output_file = self.current_path + "/tweet_output/test_ouput.txt" self.cleaner = TweetCleaner(self.test_input_file,self.test_output_file) def tearDown(self): del self.cleaner self.delete_test_files() def test_constructor_state(self): #Check if class was initialized correctly self.assertEqual(self.cleaner.input_filename,self.test_input_file) self.assertEqual(self.cleaner.output_filename,self.test_output_file) self.assertEqual(self.cleaner.num_tweets_w_unicode,0) print "Check state of the attributes in the class constructor [OK]" def assertWarns(self,warning,msg,callable,*args, **kwds): with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter('always') result = callable(*args, **kwds) self.assertTrue(any(item.category == Warning for item in warning_list)) self.assertEqual(msg,str(warning_list[-1].message)) def create_test_file(self,data): # Write file with two unicode strings with open(self.test_input_file ,"w") as f: for line in data: f.write(json.dumps(line).decode('unicode-escape').encode('utf8')) f.write('\n') def delete_test_files(self): # Delete test files try: os.remove(self.test_input_file) os.remove(self.test_output_file) except OSError: pass def test_no_unicode_char(self): # Create string with no unicode chars text = "Spark Summit East this week! #Spark #Apache" cleaned_text = "Spark Summit East this week! #Spark #Apache" self.assertEqual(self.cleaner.clean_tweet(text),cleaned_text) print "Check clean_tweet function with string with no unicode chars [OK]" def test_has_unicode_char(self): # Create string with unicode chars text = "I'm at Terminal de Integra\u00e7\u00e3o do Varadouro in Jo\u00e3o Pessoa, PB https:\/\/t.co\/HOl34REL1a" cleaned_text = "I'm at Terminal de Integrao do Varadouro in Joo Pessoa, PB https://t.co/HOl34REL1a" self.assertEqual(self.cleaner.clean_tweet(text),cleaned_text) print "Check clean_tweet function with unicode string [OK]" def test_empty_string(self): # Create empty string text = "" self.assertFalse(self.cleaner.clean_tweet(text),"") print "Check clean_tweet function with empty string [OK]" def test_has_escape_char(self): # Create string with escape chars text = '@KayKay121 dragged me to the library. Now I have to be\n\r productive https:\/\/t.co\/HjZR3d5QaQ\n' cleaned_text = '@KayKay121 dragged me to the library. Now I have to be productive https://t.co/HjZR3d5QaQ' self.assertEqual(self.cleaner.clean_tweet(text),cleaned_text) print "Check clean_tweet function with string with escape chars [OK]" def test_unicode_escape_char(self): # Create string with escape chars text = '@KayKay121 dragged me to the library. Now I have to be\n\r productive \ud83d\udc94 https:\/\/t.co\/HjZR3d5QaQ\n' cleaned_text = '@KayKay121 dragged me to the library. Now I have to be productive https://t.co/HjZR3d5QaQ' self.assertEqual(self.cleaner.clean_tweet(text),cleaned_text) print "Check clean_tweet function with string with unicode and escape chars [OK]" def test_input_file_do_not_exist(self): filename = "foo.txt" cleaner1 = TweetCleaner(filename,self.test_output_file) with self.assertRaises(IOError) as context: cleaner1.process_tweets_file() msg = "Input file %s does not exist" % filename self.assertTrue(msg in context.exception) print "Check program execution with a input file that does no exist [OK]" def test_incorrect_json_input_file(self): line = "Spark Summit East this week! #Spark #Apache" with self.assertRaises(ValueError) as context: self.cleaner.read_json_data(line) self.assertTrue("Error: No JSON object could be decoded" in context.exception) print "Read a JSON object with incorrect format [OK]" def test_no_field_text(self): # Create test data line = '{"created_at":"Thu Oct 29 17:51:01 +0000 2015","id":659789756637822976,"id_str":"659789756637822976"}' data = self.cleaner.read_json_data(line) self.assertEqual(self.cleaner.read_text_field(data),"") print "Read a JSON object without text field [OK]" def test_no_field_timestamp(self): line = '{"text":"Spark Summit East this week! #Spark #Apache","id":659789756637822976,"id_str":"659789756637822976"}' data = self.cleaner.read_json_data(line) self.assertEqual(self.cleaner.read_created_at_field(data),None) print "Read a JSON object without timestamp field [OK]" def test_correct_text_field(self): line = '{"text":"Spark Summit East this week! #Spark #Apache","created_at":"Thu Oct 29 17:51:01 +0000 2015","id":659789756637822976,"id_str":"659789756637822976"}' data = self.cleaner.read_json_data(line) self.assertEqual(self.cleaner.read_text_field(data),"Spark Summit East this week! #Spark #Apache") print "Read a JSON object with non empty text field [OK]" def test_correct_timestamp_field(self): line = '{"text":"Spark Summit East this week! #Spark #Apache","created_at":"Thu Oct 29 17:51:01 +0000 2015","id":659789756637822976,"id_str":"659789756637822976"}' data = self.cleaner.read_json_data(line) self.assertEqual(self.cleaner.read_created_at_field(data),"Thu Oct 29 17:51:01 +0000 2015") print "Read a JSON object with non empty timestamp format [OK]" def test_tweet_has_unicode(self): text = u"@KayKay121 dragged me to the library. Now I have to be productive \ud83d\udc94 https:\/\/t.co\/HjZR3d5QaQ" self.assertTrue(self.cleaner.check_unicode_chars(text)) print "Check if a tweet has unicode chars [OK]" def test_tweet_is_not_unicode(self): # Create string text = "@KayKay121 dragged me to the library. Now I have to be productive \ud83d\udc94 https:\/\/t.co\/HjZR3d5QaQ" self.assertTrue(self.cleaner.check_unicode_chars(text)) print "Check if a tweet has no unicode chars [OK]" def test_empty_string_has_unicode(self): self.assertFalse(self.cleaner.check_unicode_chars("")) print "Check if a empty tweet has unicode chars [OK]" def test_clean_tweet_sucessfully(self): text = "I'm at Terminal de Integra\u00e7\u00e3o do Varadouro in Jo\u00e3o Pessoa\n\r, PB https:\/\/t.co\/HOl34REL1a" processed_text = "I'm at Terminal de Integrao do Varadouro in Joo Pessoa , PB https://t.co/HOl34REL1a" self.assertEqual(self.cleaner.clean_tweet(text),processed_text) self.assertEqual(self.cleaner.clean_tweet("This is a test"),"This is a test") print "Clean_tweet function with correct execution [OK]" def test_clean_empty_tweet(self): self.assertEqual(self.cleaner.clean_tweet(""),"") print "Clean_tweet function with empty text [OK]" def test_input_file_blank(self): # Create empty file open(self.test_input_file, 'w').close() self.cleaner.process_tweets_file() with open(self.test_output_file) as f: total_lines = sum(1 for _ in f) #Check empty output file self.assertEqual(total_lines,0) print "Open empty file and check its output [OK]" def test_correct_output(self): # Create test data data = [{"created_at":"Thu Oct 29 17:51:01 +0000 2015","id":659789756637822976,"id_str":"659789756637822976","text":"Spark Summit East this week! #Spark #Apache"}, {"created_at":"Thu Oct 29 18:10:49 +0000 2015","id":659794531844509700,"id_str":"659794531844509700","text":"I'm at Terminal de Integra\u00e7\u00e3o do Varadouro in Jo\u00e3o Pessoa, PB https:\/\/t.co\/HOl34REL1a"}, {"created_at":"Thu Oct 29 17:51:50 +0000 2015","id":659789756637822976,"id_str":"659789756637822976","text":"@KayKay121 dragged me to the library. Now I have to be productive \ud83d\udc94 https:\/\/t.co\/HjZR3d5QaQ"}] self.create_test_file(data) self.cleaner.process_tweets_file() #Create a file with the correct output lines = "Spark Summit East this week! #Spark #Apache (timestamp: Thu Oct 29 17:51:01 +0000 2015)\nI'm at Terminal de Integrao do Varadouro in Joo Pessoa, PB https://t.co/HOl34REL1a (timestamp: Thu Oct 29 18:10:49 +0000 2015)\n@KayKay121 dragged me to the library. Now I have to be productive https://t.co/HjZR3d5QaQ (timestamp: Thu Oct 29 17:51:50 +0000 2015)\n\n2 tweets contained unicode" with open(self.current_path + "/tweet_output/test_output_correct.txt","w") as f: f.write(lines) #Check if output file is correct equal_files = False if os.path.getsize(self.test_output_file) == os.path.getsize(self.current_path + "/tweet_output/test_output_correct.txt"): if open(self.test_output_file,'r').read() == open(self.current_path + "/tweet_output/test_output_correct.txt",'r').read(): # Files are the same. equal_files=True self.assertEqual(equal_files,True) # Delete test files self.delete_test_files() os.remove(self.current_path + "/tweet_output/test_output_correct.txt") print "Check correct execution of a tweet file [OK]" def test_process_tweets_file_empty_text_field(self): data = [{"created_at":"Thu Oct 29 17:51:01 +0000 2015","id":659789756637822976,"id_str":"659789756637822976"}, {"created_at":"Thu Oct 29 18:10:49 +0000 2015","id":659794531844509700,"id_str":"659794531844509700","text":"I'm at Terminal de Integra\u00e7\u00e3o do Varadouro in Jo\u00e3o Pessoa, PB https:\/\/t.co\/HOl34REL1a"}, {"created_at":"Thu Oct 29 17:51:50 +0000 2015","id":659789756637822976,"id_str":"659789756637822976","text":"@KayKay121 dragged me to the library. Now I have to be productive \ud83d\udc94 https:\/\/t.co\/HjZR3d5QaQ"}] self.create_test_file(data) self.cleaner.process_tweets_file() num_lines = sum(1 for line in open(self.test_output_file)) self.assertEqual(num_lines,5) print "Check execution on tweet with no text field [OK]" def test_process_tweets_file_empty_timestamp_field(self): data = [{"created_at":"Thu Oct 29 17:51:01 +0000 2015","id":659789756637822976,"id_str":"659789756637822976","text":"Spark Summit East this week! #Spark #Apache"}, {"id":659794531844509700,"id_str":"659794531844509700","text":"I'm at Terminal de Integra\u00e7\u00e3o do Varadouro in Jo\u00e3o Pessoa, PB https:\/\/t.co\/HOl34REL1a"}, {"created_at":"Thu Oct 29 17:51:50 +0000 2015","id":659789756637822976,"id_str":"659789756637822976","text":"@KayKay121 dragged me to the library. Now I have to be productive \ud83d\udc94 https:\/\/t.co\/HjZR3d5QaQ"}] self.create_test_file(data) self.cleaner.process_tweets_file() num_lines = sum(1 for line in open(self.test_output_file)) self.assertEqual(num_lines,5) print "Check execution on tweet with no timestamp field [OK]"
def setUp(self): self.current_path = os.getcwd().replace("src","") self.test_input_file = self.current_path + "/tweet_input/test.txt" self.test_output_file = self.current_path + "/tweet_output/test_ouput.txt" self.cleaner = TweetCleaner(self.test_input_file,self.test_output_file)