def test_compute_rolling_edge(self): self.graph.add_edge("spark","apache",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015")) self.assertEqual(self.graph.compute_rolling_average(),1.0) epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:30 +0000 2015") self.graph.add_edge("apache","hadoop",epoch) self.graph.add_edge("apache","storm",epoch) self.graph.add_edge("hadoop","storm",epoch) self.assertEqual(self.graph.compute_rolling_average(),2.0) self.graph.add_edge("flink","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:56 +0000 2015")) self.assertEqual(self.graph.compute_rolling_average(),2.0) print "Check if rolling average degree in compute_rolling_edge function is computed correctly [OK]"
def test_extract_tweet_info_success(self): #Check successful information extraction line = '{"created_at":"Thu Oct 29 17:51:01 +0000 2015","text":"Spark Summit East this week! #Spark #Apache","entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}' hashtags,epoch = self.graph.extract_tweet_info(self.graph.read_json_data(line)) self.assertEquals(hashtags,set([u'apache',u'spark'])) self.assertEquals(epoch,Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015")) print "Check if extract_tweet_info function is executing correctly [OK]"
def test_add_neighbor_execute_correctly(self): epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015") self.node3.add_neighbor(self.node4.get_hashtag(),epoch) #Check if a node 3 is neighbor of node 4 self.assertIn(self.node4.get_hashtag(),self.node3.get_adjacency_structure()) #Check if list adjacent of node 3 has correct timestamp self.assertIn(epoch,self.node3.get_adjacency_structure()[self.node4.get_hashtag()]) print "Check if add_neighbor function is executing correctly [OK]"
def test_time_window(self): #Check window when the first new tweet arrives - update_time_window epoch_first = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015") self.assertFalse(self.graph.check_time_window(epoch_first)) self.assertEqual(epoch_first,self.graph.first_timestamp_window) self.assertEqual(epoch_first,self.graph.last_timestamp_window) self.graph.add_node("a") #Check window when new tweet does no modify time window epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:10 +0000 2015") self.assertFalse(self.graph.check_time_window(epoch)) self.assertEqual(epoch_first,self.graph.first_timestamp_window) self.assertEqual(epoch,self.graph.last_timestamp_window) self.graph.add_node("b") #Check window when new tweet does no modify time window epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:53:10 +0000 2015") self.assertTrue(self.graph.check_time_window(epoch)) self.assertEqual(epoch_first,self.graph.first_timestamp_window) self.assertEqual(epoch,self.graph.last_timestamp_window) print "Check if check_time_window function is computed correctly [OK]"
def test_graph_structure_update_window(self): tweets = ['{"created_at":"Mon Nov 2 00:33:01 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"b"}]}}','{"created_at":"Mon Nov 2 00:33:20 +0000 2015","entities":{"hashtags":[{"text":"b"},{"text":"a"}]}}','{"created_at":"Mon Nov 2 00:33:30 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"c"}]}}','{"created_at":"Mon Nov 2 00:33:58 +0000 2015","entities":{"hashtags":[{"text":"e"},{"text":"f"}]}}','{"created_at":"Mon Nov 2 00:34:05 +0000 2015","entities":{"hashtags":[{"text":"g"},{"text":"h"}]}}','{"created_at":"Mon Nov 2 00:34:31 +0000 2015","entities":{"hashtags":[{"text":"e"},{"text":"a"}]}}'] for tweet in tweets: self.graph.process_data(self.graph.read_json_data(tweet)) self.assertEqual(self.graph.first_timestamp_window,Graph.convert_timestamp_to_epoch("Mon Nov 2 00:33:58 +0000 2015")) self.assertIsNot("b",self.graph.get_nodes()) self.assertIsNot("b",self.graph.get_node("a").get_adjacency_structure()) self.assertIsNot("c",self.graph.get_nodes()) self.assertIsNot("c",self.graph.get_node("a").get_adjacency_structure()) print "Check if a graph was update with tweet window change [OK]"
def test_graph_structure_window(self): self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:01 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"b"}]}}')) self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:20 +0000 2015","entities":{"hashtags":[{"text":"b"},{"text":"a"}]}}')) self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:30 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"c"}]}}')) self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:58 +0000 2015","entities":{"hashtags":[{"text":"e"},{"text":"f"}]}}')) self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:34:05 +0000 2015","entities":{"hashtags":[{"text":"g"},{"text":"h"}]}}')) epoch = Graph.convert_timestamp_to_epoch("Mon Nov 2 00:33:20 +0000 2015") self.assertEqual(self.graph.first_timestamp_window,epoch) self.assertEqual(len(self.graph.get_node("a").get_adjacency_structure()["b"]),1) self.assertEqual(self.graph.get_node("a").get_adjacency_structure()["b"][0],epoch) print "Check if update_graph_structure_window function is computed correctly [OK]"
def test_create_graph_by_file(self): lines ='{"created_at":"Thu Oct 29 17:51:01 +0000 2015","entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}\n{"created_at":"Thu Oct 29 17:51:30 +0000 2015","entities":{"hashtags":[{"text":"Apache"},{"text":"Hadoop"},{"text":"Storm"}]}}\n{"created_at":"Thu Oct 29 17:51:55 +0000 2015","entities":{"hashtags":[{"text":"Apache"}]}}\n{"created_at":"Thu Oct 29 17:51:56 +0000 2015","entities":{"hashtags":[{"text":"Flink"},{"text":"Spark"}]}}\n{"created_at":"Thu Oct 29 17:51:59 +0000 2015","entities":{"hashtags":[{"text":"HBase"},{"text":"Spark"}]}}\n{"created_at":"Thu Oct 29 17:52:05 +0000 2015","entities":{"hashtags":[{"text":"Hadoop"},{"text":"Apache"}]}}' current_path = os.getcwd().replace("src","") #Create input file with open(current_path + "/tweet_input/test_tweets.txt",'w') as input: input.write(lines) # Create graph using the created input file self.graph.create_graph_by_file(current_path + "/tweet_input/test_tweets.txt",current_path + "/tweet_output/test_tweets_out.txt") result = self.print_graph() #Create correct graph correct_graph = Graph() epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:30 +0000 2015") correct_graph.add_edge("apache","hadoop",epoch) correct_graph.add_edge("apache","storm",epoch) correct_graph.add_edge("hadoop","storm",epoch) correct_graph.add_edge("apache","hadoop",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:52:05 +0000 2015")) correct_graph.add_edge("hbase","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:59 +0000 2015")) correct_graph.add_edge("flink","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:56 +0000 2015")) correct_result = self.print_graph() # Check if the two graphs are equal self.assertEqual(result,correct_result) #Check output file is correct correct_output = ["1.00","2.00","2.00","2.00","2.00","1.67"] i = 0 with open(current_path + "/tweet_output/test_tweets_out.txt") as output_file: for line in output_file: self.assertEqual(line.strip().split()[0],correct_output[i]) i+=1 # Delete test files os.remove(current_path + "/tweet_input/test_tweets.txt") os.remove(current_path + "/tweet_output/test_tweets_out.txt") print "Check the graph and the output file produced by tweets file [OK]"
def test_convert_timestamp_to_epoch(self): with self.assertRaises(ValueError) as context: Graph.convert_timestamp_to_epoch("this is a test") msg = "Timestamp has a incorrect format" self.assertTrue(msg in context.exception) print "Check convert_timestamp_to_epoch function with wrong timestamp format [OK]"
def create_edge(self,ht1,ht2,timestamp): epoch = Graph.convert_timestamp_to_epoch(timestamp) self.graph.add_edge(ht1,ht2,epoch) return epoch
def test_extract_tweet_info_success_no_hashtag_field(self): line = '{"created_at":"Thu Oct 29 17:51:01 +0000 2015","entities":{}}' hashtags,epoch = self.graph.extract_tweet_info(self.graph.read_json_data(line)) self.assertEquals(hashtags,set()) self.assertEquals(epoch,Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015")) print "Check if extract_tweet_info function is executing with tweets with no hashtags field [OK]"