def test_create_graph_by_file(self):
		lines ='{"created_at":"Thu Oct 29 17:51:01 +0000 2015","entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}\n{"created_at":"Thu Oct 29 17:51:30 +0000 2015","entities":{"hashtags":[{"text":"Apache"},{"text":"Hadoop"},{"text":"Storm"}]}}\n{"created_at":"Thu Oct 29 17:51:55 +0000 2015","entities":{"hashtags":[{"text":"Apache"}]}}\n{"created_at":"Thu Oct 29 17:51:56 +0000 2015","entities":{"hashtags":[{"text":"Flink"},{"text":"Spark"}]}}\n{"created_at":"Thu Oct 29 17:51:59 +0000 2015","entities":{"hashtags":[{"text":"HBase"},{"text":"Spark"}]}}\n{"created_at":"Thu Oct 29 17:52:05 +0000 2015","entities":{"hashtags":[{"text":"Hadoop"},{"text":"Apache"}]}}' 
		
		current_path = os.getcwd().replace("src","")

		#Create input file
		with open(current_path + "/tweet_input/test_tweets.txt",'w') as input:	
			input.write(lines)
		
		# Create graph using the created input file
		self.graph.create_graph_by_file(current_path + "/tweet_input/test_tweets.txt",current_path + "/tweet_output/test_tweets_out.txt") 
		result = self.print_graph()

		#Create correct graph
		correct_graph = Graph()
		epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:30 +0000 2015")
		correct_graph.add_edge("apache","hadoop",epoch)
		correct_graph.add_edge("apache","storm",epoch)
		correct_graph.add_edge("hadoop","storm",epoch)
		correct_graph.add_edge("apache","hadoop",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:52:05 +0000 2015"))
		correct_graph.add_edge("hbase","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:59 +0000 2015"))
		correct_graph.add_edge("flink","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:56 +0000 2015"))
		correct_result = self.print_graph()
		
		# Check if the two graphs are equal
		self.assertEqual(result,correct_result)	

		#Check output file is correct
		correct_output = ["1.00","2.00","2.00","2.00","2.00","1.67"]
		i = 0
		with open(current_path + "/tweet_output/test_tweets_out.txt") as output_file: 
			for line in output_file:
				self.assertEqual(line.strip().split()[0],correct_output[i])
				i+=1

		# Delete test files
		os.remove(current_path + "/tweet_input/test_tweets.txt")
		os.remove(current_path + "/tweet_output/test_tweets_out.txt")
		print "Check the graph and the output file produced by tweets file [OK]"
class TestGraph(unittest.TestCase):

	#Runs before each test case
	def setUp(self):
		#Create empty graph
		self.graph = Graph()
		
	def tearDown(self):
	        del self.graph

	def assertWarns(self,warning,msg,callable,*args, **kwds):
	       	with warnings.catch_warnings(record=True) as warning_list:
	        	warnings.simplefilter('always')
			result = callable(*args, **kwds)
            		self.assertTrue(any(item.category == Warning for item in warning_list))
			self.assertEqual(msg,str(warning_list[-1].message))

	def test_constructor_state(self):
		"Test the constructor method"
		#Checks if graph has no nodes	
		self.assertEqual(len(self.graph.get_nodes()),0)
		#Checks if node does not exist
		self.assertEqual(self.graph.get_node("spark"),None)
		#Checks if rolling average equals to zero
		self.assertEqual(self.graph.compute_rolling_average(),0.0)
		#Checks if first and last timestamps are set to zero 
		self.assertEqual(self.graph.first_timestamp_window,0)
		self.assertEqual(self.graph.last_timestamp_window,0)
		print "Check state of the attributes in the Graph class constructor [OK]"


	def test_extract_tweet_info_success(self):
		#Check successful information extraction 
		line = '{"created_at":"Thu Oct 29 17:51:01 +0000 2015","text":"Spark Summit East this week! #Spark #Apache","entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}'
		hashtags,epoch = self.graph.extract_tweet_info(self.graph.read_json_data(line))
		self.assertEquals(hashtags,set([u'apache',u'spark']))
		self.assertEquals(epoch,Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015"))
		print "Check if extract_tweet_info function is executing correctly [OK]"


	#Check extraction with no hashtags 
	def test_extract_tweet_info_success_no_hashtag(self):
		line = '{"created_at":"Thu Oct 29 17:51:01 +0000 2015","entities":{"hashtags":[]}}'
		hashtags,epoch = self.graph.extract_tweet_info(self.graph.read_json_data(line))
		self.assertEquals(hashtags,set())
		self.assertEquals(epoch,Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015"))
		print "Check if extract_tweet_info function is executing with tweets with no hashtags [OK]"

	#Check extraction with no hashtags 
	def test_extract_tweet_info_success_no_hashtag_field(self):
		line = '{"created_at":"Thu Oct 29 17:51:01 +0000 2015","entities":{}}'
		hashtags,epoch = self.graph.extract_tweet_info(self.graph.read_json_data(line))
		self.assertEquals(hashtags,set())
		self.assertEquals(epoch,Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015"))
		print "Check if extract_tweet_info function is executing with tweets with no hashtags field [OK]"

	#Check unsuccessful information extraction - no timestamp field
	def test_extract_tweet_info_fail(self):
		data = self.graph.read_json_data('{"entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}')
		self.assertWarns(Warning,"Tweet was skipped because it has no timestamp",self.graph.extract_tweet_info,data)
		#Check return 
		with warnings.catch_warnings():
			warnings.simplefilter("ignore")
			hashtags, epoch = self.graph.extract_tweet_info(data)
			self.assertEqual(epoch,None)
		print "Check if extract_tweet_info function is executing with tweets with no timestamp field [OK]"


	#Check unsuccessful information extraction - timestamp wrong format
	def test_extract_tweet_info_fail_bad_format(self):
		data = self.graph.read_json_data('{"created_at":"Thu Oct 29 17:51:01 +0000","entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}')
		self.assertWarns(Warning,"Tweet was skipped because timestamp has a incorrect format",self.graph.extract_tweet_info,data)
		#Check return 
		with warnings.catch_warnings():
			warnings.simplefilter("ignore")
			hashtags, epoch = self.graph.extract_tweet_info(data)
			self.assertEqual(epoch,None)
		print "Check if extract_tweet_info function is executing with tweets with bad format timestamp [OK]"


	#Check execution when input file does not exist
	def test_input_file_do_not_exist(self):
		filename = "foo.txt"
		filename_out = "foo_out.txt"
		with self.assertRaises(IOError) as context:
			self.graph.create_graph_by_file(filename,filename_out)
		msg = "Input file %s does not exist" % filename 
		self.assertTrue(msg in context.exception)	
		# Delete test files
		os.remove(filename_out)
		print "Test execution when input does not exist [OK]"

	
	# Check add new node to the graph
	def test_add_new_node_graph(self):
		node = self.graph.add_node("node1")
		self.assertIn(node.get_hashtag(),self.graph.nodes)
		print "Check if add_node function is executing correctly [OK]"


	# Check existing node to the graph
	def test_add_existing_node_graph(self):
		node1 = self.graph.add_node("node1")
		node2 = self.graph.add_node("node1")
		self.assertEquals(node1,node2)
		print "Check add_node function when add nodes that are already in the graph [OK]"

	
	def create_edge(self,ht1,ht2,timestamp):
		epoch = Graph.convert_timestamp_to_epoch(timestamp)
		self.graph.add_edge(ht1,ht2,epoch)
		return epoch

	#Check if edge was added to the graph
	def test_add_edge_correct_execution(self):
		epoch = self.create_edge("spark","apache","Thu Oct 29 17:51:01 +0000 2015")
		node1 = self.graph.get_node("spark")
		node2 = self.graph.get_node("apache")
		#Check if nodes exists in the graph
		self.assertIn(node1.get_hashtag(),self.graph.nodes)
		self.assertIn(node2.get_hashtag(),self.graph.nodes)
		#Check if nodes are connected
		self.assertIn(node2.get_hashtag(),node1.get_adjacency_structure())
		self.assertIn(node1.get_hashtag(),node2.get_adjacency_structure())
		#Check if edge has corrected timestamp
		self.assertEqual(node1.get_adjacency_structure()[node2.get_hashtag()][0],epoch)
		self.assertEqual(node2.get_adjacency_structure()[node1.get_hashtag()][0],epoch)
		#Check node degree
		self.assertEqual(node1.get_degree(),1)
		self.assertEqual(node2.get_degree(),1)
		#Check rolling_average
		self.assertEqual(self.graph.compute_rolling_average(),1.0)
		print "Check if add_edge function executes correctly [OK]"

	#Check duplicate edge with same timestamp
	def test_add_edge_same_timestamp(self):
		epoch = self.create_edge("spark","apache","Thu Oct 29 17:51:01 +0000 2015")
		self.graph.add_edge("spark","apache",epoch)
		node1 = self.graph.get_node("spark")
		self.assertEqual(len(node1.get_adjacency_structure()),1)
		print "Check add_edge function when add an edge with a existing timestamp  [OK]"

	#Check duplicate edge with different timestamp
	def test_add_same_edge_different_timestamp(self):
		self.create_edge("spark","apache","Thu Oct 29 17:51:01 +0000 2015")
		self.create_edge("spark","apache","Thu Oct 29 17:52:01 +0000 2015")
		node1 = self.graph.get_node("spark")
		node2 = self.graph.get_node("apache")
		self.assertEqual(len(node1.get_adjacency_structure()[node2.get_hashtag()]),2)
		self.assertEqual(len(node2.get_adjacency_structure()[node1.get_hashtag()]),2)
		print "Check add_edge function when add an edge with a distinct timestamp  [OK]"

	def test_add_edge_with_wrong_format_timestamp(self):
		#Check timestamp with wrong format
		with self.assertRaises(ValueError) as context:
			self.graph.add_edge("sun","apple","this is a test")
		msg = "Timestamp must be a float number (epoch time)"
		self.assertTrue(msg in context.exception)
		print "Check add_edge function when timestamp has wrong format [OK]"

	#Check timestamp with wrong format
	def test_convert_timestamp_to_epoch(self):
		with self.assertRaises(ValueError) as context:
			Graph.convert_timestamp_to_epoch("this is a test")
		msg = "Timestamp has a incorrect format"
		self.assertTrue(msg in context.exception)
		print "Check convert_timestamp_to_epoch function with wrong timestamp format [OK]"

	#Check if rolling average degree is computed correctly
	def test_compute_rolling_edge(self):
		self.graph.add_edge("spark","apache",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015"))
		self.assertEqual(self.graph.compute_rolling_average(),1.0)
		epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:30 +0000 2015")
		self.graph.add_edge("apache","hadoop",epoch)
		self.graph.add_edge("apache","storm",epoch)
		self.graph.add_edge("hadoop","storm",epoch)
		self.assertEqual(self.graph.compute_rolling_average(),2.0)
		self.graph.add_edge("flink","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:56 +0000 2015"))
		self.assertEqual(self.graph.compute_rolling_average(),2.0)
		print "Check if rolling average degree in compute_rolling_edge function is computed correctly [OK]"

	#Check if tweet window is executing correctly
	def test_time_window(self):
		#Check window when the first new tweet arrives - update_time_window
		epoch_first = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:01 +0000 2015")
		self.assertFalse(self.graph.check_time_window(epoch_first))
		self.assertEqual(epoch_first,self.graph.first_timestamp_window)
		self.assertEqual(epoch_first,self.graph.last_timestamp_window)
		self.graph.add_node("a")
		#Check window when new tweet does no modify time window
		epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:10 +0000 2015")
		self.assertFalse(self.graph.check_time_window(epoch))
		self.assertEqual(epoch_first,self.graph.first_timestamp_window)
		self.assertEqual(epoch,self.graph.last_timestamp_window)
		self.graph.add_node("b")
		#Check window when new tweet does no modify time window
		epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:53:10 +0000 2015")
		self.assertTrue(self.graph.check_time_window(epoch))
		self.assertEqual(epoch_first,self.graph.first_timestamp_window)
		self.assertEqual(epoch,self.graph.last_timestamp_window)
		print "Check if check_time_window function is computed correctly [OK]"


	#Check update_graph_structure method
	def test_graph_structure_window(self):	
		self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:01 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"b"}]}}'))
		self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:20 +0000 2015","entities":{"hashtags":[{"text":"b"},{"text":"a"}]}}'))
		self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:30 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"c"}]}}'))
		self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:33:58 +0000 2015","entities":{"hashtags":[{"text":"e"},{"text":"f"}]}}'))
		self.graph.process_data(self.graph.read_json_data('{"created_at":"Mon Nov 2 00:34:05 +0000 2015","entities":{"hashtags":[{"text":"g"},{"text":"h"}]}}'))
		epoch = Graph.convert_timestamp_to_epoch("Mon Nov 2 00:33:20 +0000 2015")
		self.assertEqual(self.graph.first_timestamp_window,epoch)
		self.assertEqual(len(self.graph.get_node("a").get_adjacency_structure()["b"]),1)
		self.assertEqual(self.graph.get_node("a").get_adjacency_structure()["b"][0],epoch)
		print "Check if update_graph_structure_window function is computed correctly [OK]"


	#Check if graph was update by the window change
	def test_graph_structure_update_window(self):
		tweets = ['{"created_at":"Mon Nov 2 00:33:01 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"b"}]}}','{"created_at":"Mon Nov 2 00:33:20 +0000 2015","entities":{"hashtags":[{"text":"b"},{"text":"a"}]}}','{"created_at":"Mon Nov 2 00:33:30 +0000 2015","entities":{"hashtags":[{"text":"a"},{"text":"c"}]}}','{"created_at":"Mon Nov 2 00:33:58 +0000 2015","entities":{"hashtags":[{"text":"e"},{"text":"f"}]}}','{"created_at":"Mon Nov 2 00:34:05 +0000 2015","entities":{"hashtags":[{"text":"g"},{"text":"h"}]}}','{"created_at":"Mon Nov 2 00:34:31 +0000 2015","entities":{"hashtags":[{"text":"e"},{"text":"a"}]}}']

		for tweet in tweets:
			self.graph.process_data(self.graph.read_json_data(tweet))	
		self.assertEqual(self.graph.first_timestamp_window,Graph.convert_timestamp_to_epoch("Mon Nov 2 00:33:58 +0000 2015"))
		self.assertIsNot("b",self.graph.get_nodes())
		self.assertIsNot("b",self.graph.get_node("a").get_adjacency_structure())
		self.assertIsNot("c",self.graph.get_nodes())
		self.assertIsNot("c",self.graph.get_node("a").get_adjacency_structure())
		print "Check if a graph was update with tweet window change [OK]"


	# Check if graph was created according to the input file	
	def test_create_graph_by_file(self):
		lines ='{"created_at":"Thu Oct 29 17:51:01 +0000 2015","entities":{"hashtags":[{"text":"Spark"},{"text":"Apache"}]}}\n{"created_at":"Thu Oct 29 17:51:30 +0000 2015","entities":{"hashtags":[{"text":"Apache"},{"text":"Hadoop"},{"text":"Storm"}]}}\n{"created_at":"Thu Oct 29 17:51:55 +0000 2015","entities":{"hashtags":[{"text":"Apache"}]}}\n{"created_at":"Thu Oct 29 17:51:56 +0000 2015","entities":{"hashtags":[{"text":"Flink"},{"text":"Spark"}]}}\n{"created_at":"Thu Oct 29 17:51:59 +0000 2015","entities":{"hashtags":[{"text":"HBase"},{"text":"Spark"}]}}\n{"created_at":"Thu Oct 29 17:52:05 +0000 2015","entities":{"hashtags":[{"text":"Hadoop"},{"text":"Apache"}]}}' 
		
		current_path = os.getcwd().replace("src","")

		#Create input file
		with open(current_path + "/tweet_input/test_tweets.txt",'w') as input:	
			input.write(lines)
		
		# Create graph using the created input file
		self.graph.create_graph_by_file(current_path + "/tweet_input/test_tweets.txt",current_path + "/tweet_output/test_tweets_out.txt") 
		result = self.print_graph()

		#Create correct graph
		correct_graph = Graph()
		epoch = Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:30 +0000 2015")
		correct_graph.add_edge("apache","hadoop",epoch)
		correct_graph.add_edge("apache","storm",epoch)
		correct_graph.add_edge("hadoop","storm",epoch)
		correct_graph.add_edge("apache","hadoop",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:52:05 +0000 2015"))
		correct_graph.add_edge("hbase","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:59 +0000 2015"))
		correct_graph.add_edge("flink","spark",Graph.convert_timestamp_to_epoch("Thu Oct 29 17:51:56 +0000 2015"))
		correct_result = self.print_graph()
		
		# Check if the two graphs are equal
		self.assertEqual(result,correct_result)	

		#Check output file is correct
		correct_output = ["1.00","2.00","2.00","2.00","2.00","1.67"]
		i = 0
		with open(current_path + "/tweet_output/test_tweets_out.txt") as output_file: 
			for line in output_file:
				self.assertEqual(line.strip().split()[0],correct_output[i])
				i+=1

		# Delete test files
		os.remove(current_path + "/tweet_input/test_tweets.txt")
		os.remove(current_path + "/tweet_output/test_tweets_out.txt")
		print "Check the graph and the output file produced by tweets file [OK]"


	# Print graph structure as string
	def print_graph(self):
		lines = ""
		for node_hashtag in self.graph.nodes:
			for neighbor in self.graph.nodes[node_hashtag].get_neighbors():
				timestamps = self.graph.get_node(node_hashtag).adjacent[neighbor]
				tmp = "(%s, %s, %s)" % (node_hashtag,neighbor,''.join(str(e) for e in timestamps))
				lines+=tmp
		return lines

	# Clean a string with escape and unicode chars
	def test_clean_tweet_sucessfully(self):
		text = "spa\u00e7r\n\rk"
		processed_text = "spar k"
		self.assertEqual(self.graph.extract_unicode_escape_chars(text),processed_text)	
		print "Check if unicode hashtags are cleaned [OK]"
		
	def test_clean_empty_tweet(self):
		self.assertEqual(self.graph.extract_unicode_escape_chars(""),"")	
		print "Check if extract_unicode_escape_chars function executes correctly when there is no hashtags [OK]"