def test_create_graph_by_file(self): lines ='{"created_at":"Thu Oct 29 17:51:01 +0000 2015","entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}\n{"created_at":"Thu Oct 29 17:51:30 +0000 2015","entities":{"hashtags":[{"text":"Apache"},{"text":"Hadoop"},{"text":"Storm"}]}}\n{"created_at":"Thu Oct 29 17:51:55 +0000 2015","entities":{"hashtags":[{"text":"Apache"}]}}\n{"created_at":"Thu Oct 29 17:51:56 +0000 2015","entities":{"hashtags":[{"text":"Flink"},{"text":"Spark"}]}}\n{"created_at":"Thu Oct 29 17:51:59 +0000 2015","entities":{"hashtags":[{"text":"HBase"},{"text":"Spark"}]}}\n{"created_at":"Thu Oct 29 17:52:05 +0000 2015","entities":{"hashtags":[{"text":"Hadoop"},{"text":"Apache"}]}}' current_path = os.getcwd().replace("src","") #Create input file with open(current_path + "/tweet_input/test_tweets.txt",'w') as input: input.write(lines) # Create graph using the created input file self.graph.create_graph_by_file(current_path + "/tweet_input/test_tweets.txt",current_path + "/tweet_output/test_tweets_out.txt") result = self.print_graph() #Create correct graph correct_graph = Graph() epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:30 +0000 2015") correct_graph.add_edge("apache","hadoop",epoch) correct_graph.add_edge("apache","storm",epoch) correct_graph.add_edge("hadoop","storm",epoch) correct_graph.add_edge("apache","hadoop",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:52:05 +0000 2015")) correct_graph.add_edge("hbase","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:59 +0000 2015")) correct_graph.add_edge("flink","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:56 +0000 2015")) correct_result = self.print_graph() # Check if the two graphs are equal self.assertEqual(result,correct_result) #Check output file is correct correct_output = ["1.00","2.00","2.00","2.00","2.00","1.67"] i = 0 with open(current_path + "/tweet_output/test_tweets_out.txt") as output_file: for line in output_file: self.assertEqual(line.strip().split()[0],correct_output[i]) i+=1 # Delete test files os.remove(current_path + "/tweet_input/test_tweets.txt") os.remove(current_path + "/tweet_output/test_tweets_out.txt") print "Check the graph and the output file produced by tweets file [OK]"
class TestGraph(unittest.TestCase): #Runs before each test case def setUp(self): #Create empty graph self.graph = Graph() def tearDown(self): del self.graph def assertWarns(self,warning,msg,callable,*args, **kwds): with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter('always') result = callable(*args, **kwds) self.assertTrue(any(item.category == Warning for item in warning_list)) self.assertEqual(msg,str(warning_list[-1].message)) def test_constructor_state(self): "Test the constructor method" #Checks if graph has no nodes self.assertEqual(len(self.graph.get_nodes()),0) #Checks if node does not exist self.assertEqual(self.graph.get_node("spark"),None) #Checks if rolling average equals to zero self.assertEqual(self.graph.compute_rolling_average(),0.0) #Checks if first and last timestamps are set to zero self.assertEqual(self.graph.first_timestamp_window,0) self.assertEqual(self.graph.last_timestamp_window,0) print "Check state of the attributes in the Graph class constructor [OK]" def test_extract_tweet_info_success(self): #Check successful information extraction line = '{"created_at":"Thu Oct 29 17:51:01 +0000 2015","text":"Spark Summit East this week! #Spark #Apache","entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}' hashtags,epoch = self.graph.extract_tweet_info(self.graph.read_json_data(line)) self.assertEquals(hashtags,set([u'apache',u'spark'])) self.assertEquals(epoch,Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015")) print "Check if extract_tweet_info function is executing correctly [OK]" #Check extraction with no hashtags def test_extract_tweet_info_success_no_hashtag(self): line = '{"created_at":"Thu Oct 29 17:51:01 +0000 2015","entities":{"hashtags":[]}}' hashtags,epoch = self.graph.extract_tweet_info(self.graph.read_json_data(line)) self.assertEquals(hashtags,set()) self.assertEquals(epoch,Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015")) print "Check if extract_tweet_info function is executing with tweets with no hashtags [OK]" #Check extraction with no hashtags def test_extract_tweet_info_success_no_hashtag_field(self): line = '{"created_at":"Thu Oct 29 17:51:01 +0000 2015","entities":{}}' hashtags,epoch = self.graph.extract_tweet_info(self.graph.read_json_data(line)) self.assertEquals(hashtags,set()) self.assertEquals(epoch,Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015")) print "Check if extract_tweet_info function is executing with tweets with no hashtags field [OK]" #Check unsuccessful information extraction - no timestamp field def test_extract_tweet_info_fail(self): data = self.graph.read_json_data('{"entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}') self.assertWarns(Warning,"Tweet was skipped because it has no timestamp",self.graph.extract_tweet_info,data) #Check return with warnings.catch_warnings(): warnings.simplefilter("ignore") hashtags, epoch = self.graph.extract_tweet_info(data) self.assertEqual(epoch,None) print "Check if extract_tweet_info function is executing with tweets with no timestamp field [OK]" #Check unsuccessful information extraction - timestamp wrong format def test_extract_tweet_info_fail_bad_format(self): data = self.graph.read_json_data('{"created_at":"Thu Oct 29 17:51:01 +0000","entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}') self.assertWarns(Warning,"Tweet was skipped because timestamp has a incorrect format",self.graph.extract_tweet_info,data) #Check return with warnings.catch_warnings(): warnings.simplefilter("ignore") hashtags, epoch = self.graph.extract_tweet_info(data) self.assertEqual(epoch,None) print "Check if extract_tweet_info function is executing with tweets with bad format timestamp [OK]" #Check execution when input file does not exist def test_input_file_do_not_exist(self): filename = "foo.txt" filename_out = "foo_out.txt" with self.assertRaises(IOError) as context: self.graph.create_graph_by_file(filename,filename_out) msg = "Input file %s does not exist" % filename self.assertTrue(msg in context.exception) # Delete test files os.remove(filename_out) print "Test execution when input does not exist [OK]" # Check add new node to the graph def test_add_new_node_graph(self): node = self.graph.add_node("node1") self.assertIn(node.get_hashtag(),self.graph.nodes) print "Check if add_node function is executing correctly [OK]" # Check existing node to the graph def test_add_existing_node_graph(self): node1 = self.graph.add_node("node1") node2 = self.graph.add_node("node1") self.assertEquals(node1,node2) print "Check add_node function when add nodes that are already in the graph [OK]" def create_edge(self,ht1,ht2,timestamp): epoch = Graph.convert_timestamp_to_epoch(timestamp) self.graph.add_edge(ht1,ht2,epoch) return epoch #Check if edge was added to the graph def test_add_edge_correct_execution(self): epoch = self.create_edge("spark","apache","Thu Oct 29 17:51:01 +0000 2015") node1 = self.graph.get_node("spark") node2 = self.graph.get_node("apache") #Check if nodes exists in the graph self.assertIn(node1.get_hashtag(),self.graph.nodes) self.assertIn(node2.get_hashtag(),self.graph.nodes) #Check if nodes are connected self.assertIn(node2.get_hashtag(),node1.get_adjacency_structure()) self.assertIn(node1.get_hashtag(),node2.get_adjacency_structure()) #Check if edge has corrected timestamp self.assertEqual(node1.get_adjacency_structure()[node2.get_hashtag()][0],epoch) self.assertEqual(node2.get_adjacency_structure()[node1.get_hashtag()][0],epoch) #Check node degree self.assertEqual(node1.get_degree(),1) self.assertEqual(node2.get_degree(),1) #Check rolling_average self.assertEqual(self.graph.compute_rolling_average(),1.0) print "Check if add_edge function executes correctly [OK]" #Check duplicate edge with same timestamp def test_add_edge_same_timestamp(self): epoch = self.create_edge("spark","apache","Thu Oct 29 17:51:01 +0000 2015") self.graph.add_edge("spark","apache",epoch) node1 = self.graph.get_node("spark") self.assertEqual(len(node1.get_adjacency_structure()),1) print "Check add_edge function when add an edge with a existing timestamp [OK]" #Check duplicate edge with different timestamp def test_add_same_edge_different_timestamp(self): self.create_edge("spark","apache","Thu Oct 29 17:51:01 +0000 2015") self.create_edge("spark","apache","Thu Oct 29 17:52:01 +0000 2015") node1 = self.graph.get_node("spark") node2 = self.graph.get_node("apache") self.assertEqual(len(node1.get_adjacency_structure()[node2.get_hashtag()]),2) self.assertEqual(len(node2.get_adjacency_structure()[node1.get_hashtag()]),2) print "Check add_edge function when add an edge with a distinct timestamp [OK]" def test_add_edge_with_wrong_format_timestamp(self): #Check timestamp with wrong format with self.assertRaises(ValueError) as context: self.graph.add_edge("sun","apple","this is a test") msg = "Timestamp must be a float number (epoch time)" self.assertTrue(msg in context.exception) print "Check add_edge function when timestamp has wrong format [OK]" #Check timestamp with wrong format def test_convert_timestamp_to_epoch(self): with self.assertRaises(ValueError) as context: Graph.convert_timestamp_to_epoch("this is a test") msg = "Timestamp has a incorrect format" self.assertTrue(msg in context.exception) print "Check convert_timestamp_to_epoch function with wrong timestamp format [OK]" #Check if rolling average degree is computed correctly def test_compute_rolling_edge(self): self.graph.add_edge("spark","apache",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015")) self.assertEqual(self.graph.compute_rolling_average(),1.0) epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:30 +0000 2015") self.graph.add_edge("apache","hadoop",epoch) self.graph.add_edge("apache","storm",epoch) self.graph.add_edge("hadoop","storm",epoch) self.assertEqual(self.graph.compute_rolling_average(),2.0) self.graph.add_edge("flink","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:56 +0000 2015")) self.assertEqual(self.graph.compute_rolling_average(),2.0) print "Check if rolling average degree in compute_rolling_edge function is computed correctly [OK]" #Check if tweet window is executing correctly def test_time_window(self): #Check window when the first new tweet arrives - update_time_window epoch_first = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015") self.assertFalse(self.graph.check_time_window(epoch_first)) self.assertEqual(epoch_first,self.graph.first_timestamp_window) self.assertEqual(epoch_first,self.graph.last_timestamp_window) self.graph.add_node("a") #Check window when new tweet does no modify time window epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:10 +0000 2015") self.assertFalse(self.graph.check_time_window(epoch)) self.assertEqual(epoch_first,self.graph.first_timestamp_window) self.assertEqual(epoch,self.graph.last_timestamp_window) self.graph.add_node("b") #Check window when new tweet does no modify time window epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:53:10 +0000 2015") self.assertTrue(self.graph.check_time_window(epoch)) self.assertEqual(epoch_first,self.graph.first_timestamp_window) self.assertEqual(epoch,self.graph.last_timestamp_window) print "Check if check_time_window function is computed correctly [OK]" #Check update_graph_structure method def test_graph_structure_window(self): self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:01 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"b"}]}}')) self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:20 +0000 2015","entities":{"hashtags":[{"text":"b"},{"text":"a"}]}}')) self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:30 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"c"}]}}')) self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:58 +0000 2015","entities":{"hashtags":[{"text":"e"},{"text":"f"}]}}')) self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:34:05 +0000 2015","entities":{"hashtags":[{"text":"g"},{"text":"h"}]}}')) epoch = Graph.convert_timestamp_to_epoch("Mon Nov 2 00:33:20 +0000 2015") self.assertEqual(self.graph.first_timestamp_window,epoch) self.assertEqual(len(self.graph.get_node("a").get_adjacency_structure()["b"]),1) self.assertEqual(self.graph.get_node("a").get_adjacency_structure()["b"][0],epoch) print "Check if update_graph_structure_window function is computed correctly [OK]" #Check if graph was update by the window change def test_graph_structure_update_window(self): tweets = ['{"created_at":"Mon Nov 2 00:33:01 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"b"}]}}','{"created_at":"Mon Nov 2 00:33:20 +0000 2015","entities":{"hashtags":[{"text":"b"},{"text":"a"}]}}','{"created_at":"Mon Nov 2 00:33:30 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"c"}]}}','{"created_at":"Mon Nov 2 00:33:58 +0000 2015","entities":{"hashtags":[{"text":"e"},{"text":"f"}]}}','{"created_at":"Mon Nov 2 00:34:05 +0000 2015","entities":{"hashtags":[{"text":"g"},{"text":"h"}]}}','{"created_at":"Mon Nov 2 00:34:31 +0000 2015","entities":{"hashtags":[{"text":"e"},{"text":"a"}]}}'] for tweet in tweets: self.graph.process_data(self.graph.read_json_data(tweet)) self.assertEqual(self.graph.first_timestamp_window,Graph.convert_timestamp_to_epoch("Mon Nov 2 00:33:58 +0000 2015")) self.assertIsNot("b",self.graph.get_nodes()) self.assertIsNot("b",self.graph.get_node("a").get_adjacency_structure()) self.assertIsNot("c",self.graph.get_nodes()) self.assertIsNot("c",self.graph.get_node("a").get_adjacency_structure()) print "Check if a graph was update with tweet window change [OK]" # Check if graph was created according to the input file def test_create_graph_by_file(self): lines ='{"created_at":"Thu Oct 29 17:51:01 +0000 2015","entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}\n{"created_at":"Thu Oct 29 17:51:30 +0000 2015","entities":{"hashtags":[{"text":"Apache"},{"text":"Hadoop"},{"text":"Storm"}]}}\n{"created_at":"Thu Oct 29 17:51:55 +0000 2015","entities":{"hashtags":[{"text":"Apache"}]}}\n{"created_at":"Thu Oct 29 17:51:56 +0000 2015","entities":{"hashtags":[{"text":"Flink"},{"text":"Spark"}]}}\n{"created_at":"Thu Oct 29 17:51:59 +0000 2015","entities":{"hashtags":[{"text":"HBase"},{"text":"Spark"}]}}\n{"created_at":"Thu Oct 29 17:52:05 +0000 2015","entities":{"hashtags":[{"text":"Hadoop"},{"text":"Apache"}]}}' current_path = os.getcwd().replace("src","") #Create input file with open(current_path + "/tweet_input/test_tweets.txt",'w') as input: input.write(lines) # Create graph using the created input file self.graph.create_graph_by_file(current_path + "/tweet_input/test_tweets.txt",current_path + "/tweet_output/test_tweets_out.txt") result = self.print_graph() #Create correct graph correct_graph = Graph() epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:30 +0000 2015") correct_graph.add_edge("apache","hadoop",epoch) correct_graph.add_edge("apache","storm",epoch) correct_graph.add_edge("hadoop","storm",epoch) correct_graph.add_edge("apache","hadoop",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:52:05 +0000 2015")) correct_graph.add_edge("hbase","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:59 +0000 2015")) correct_graph.add_edge("flink","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:56 +0000 2015")) correct_result = self.print_graph() # Check if the two graphs are equal self.assertEqual(result,correct_result) #Check output file is correct correct_output = ["1.00","2.00","2.00","2.00","2.00","1.67"] i = 0 with open(current_path + "/tweet_output/test_tweets_out.txt") as output_file: for line in output_file: self.assertEqual(line.strip().split()[0],correct_output[i]) i+=1 # Delete test files os.remove(current_path + "/tweet_input/test_tweets.txt") os.remove(current_path + "/tweet_output/test_tweets_out.txt") print "Check the graph and the output file produced by tweets file [OK]" # Print graph structure as string def print_graph(self): lines = "" for node_hashtag in self.graph.nodes: for neighbor in self.graph.nodes[node_hashtag].get_neighbors(): timestamps = self.graph.get_node(node_hashtag).adjacent[neighbor] tmp = "(%s, %s, %s)" % (node_hashtag,neighbor,''.join(str(e) for e in timestamps)) lines+=tmp return lines # Clean a string with escape and unicode chars def test_clean_tweet_sucessfully(self): text = "spa\u00e7r\n\rk" processed_text = "spar k" self.assertEqual(self.graph.extract_unicode_escape_chars(text),processed_text) print "Check if unicode hashtags are cleaned [OK]" def test_clean_empty_tweet(self): self.assertEqual(self.graph.extract_unicode_escape_chars(""),"") print "Check if extract_unicode_escape_chars function executes correctly when there is no hashtags [OK]"