Esempio n. 1
0
def output_block_pattern_list (block_pattern_list ,input_file, prefix = ""):
#{{{
	file_mode = global_APIs.get_file_mode(input_file)
	block_pattern_list_file_path = "sql_database/"
	block_pattern_list_file_path += file_mode
	if not prefix == "":
		block_pattern_list_file_path += "/" + str(prefix)
	block_pattern_list_file_path += "/block_pattern_list.txt"
	fl = open (block_pattern_list_file_path, "w") 
	for block in block_pattern_list:
		fl.write(block)
		fl.write(" \n")
		pattern = block_pattern_list[block]["pattern"]
		tmp = ""
		for message in pattern:
			tmp += message
			tmp += " "
		fl.write(tmp)
		fl.write("\n")
		
		happen_count = block_pattern_list[block]["happen_count"]
		tmp = ""
		tmp += "correct"
		tmp += " "
		tmp += str(happen_count["correct"])
		tmp += " "
		tmp += "incorrect"
		tmp += " "
		tmp += str(happen_count["incorrect"])
		fl.write(tmp)
		fl.write("\n")
	fl.close()
Esempio n. 2
0
def output_message_closest_message_list (message_closest_message_list ,input_file, prefix = ""):
#{{{
	file_mode = global_APIs.get_file_mode(input_file)
	message_closest_message_file_path = "sql_database/"
	message_closest_message_file_path += file_mode
	if not prefix == "":
		message_closest_message_file_path += "/" + str(prefix)
	message_closest_message_file_path += "/message_closest_message_list.txt"
	fl = open (message_closest_message_file_path, "w") 
	for message_pattern in message_closest_message_list:
		fl.write(message_pattern)
		fl.write(" \n")
		closest_message_list = message_closest_message_list[message_pattern]
		tmp = ""
		if "prior" in closest_message_list:
			tmp += "prior"
			tmp += " "
			for message in closest_message_list["prior"]:
				tmp += message
				tmp += " "

		fl.write(tmp)
		fl.write("\n")
		tmp = ""
		if "follow" in closest_message_list:
			tmp += "follow"
			tmp += " "
			for message in closest_message_list["follow"]:
				tmp += message
				tmp += " "
		fl.write(tmp)
		fl.write("\n")
	fl.close()
Esempio n. 3
0
def output_single_line_pattern_db(single_line_pattern_db, input_file, prefix = ""):
#{{{
	file_mode = global_APIs.get_file_mode(input_file)
	node_id_last_file_path = "sql_database/"
	node_id_last_file_path += file_mode
	if not prefix == "":
		node_id_last_file_path += "/" + str(prefix)
	node_id_last_file_path += "/single_line_pattern_db.txt"
	fl = open (node_id_last_file_path, "w") 
	for line_pattern_name in single_line_pattern_db:
		fl.write(line_pattern_name)
		fl.write(" \n")
		line_pattern = single_line_pattern_db[line_pattern_name]["pattern"]
		tmp = ""
		for i in range (0, len(line_pattern)):
			element = line_pattern[i]
			pos = element[0]
			word = element[1]
			
			tmp += str(pos)
			tmp += " "
			tmp += str(word)
			tmp += " "
		fl.write(tmp)
		fl.write("\n")
		
		tmp = ""
		tmp += "belong_block"
		tmp += " "
		tmp += single_line_pattern_db[line_pattern_name]["belong_block"]
		fl.write(tmp)
		fl.write("\n")
	fl.close()
Esempio n. 4
0
def read_message_closest_message_list (input_file, prefix = ""):
#{{{
	message_closest_message_list = {}
	file_mode = global_APIs.get_file_mode(input_file)
	message_closest_message_file_path = "sql_database/"
	message_closest_message_file_path += file_mode
	if not prefix == "":
		message_closest_message_file_path += "/" + str(prefix)
	message_closest_message_file_path += "/message_closest_message_list.txt"
	if os.path.isfile(message_closest_message_file_path):	
		fl = open (message_closest_message_file_path, "r") 
		while True:
			line_1 = fl.readline()
			line_2 = fl.readline()
			line_3 = fl.readline()
			if not line_1 or not line_2 or not line_3:	
				break
			message_pattern = line_1.split()[0]
			prior_pattern_list = line_2.split()
			follow_pattern_list = line_3.split()
			tmp = {}
			prior_list = []
			for i in range (1, len(prior_pattern_list)):
				prior_list.append(prior_pattern_list[i])
			follow_list = []
			for i in range (1, len(follow_pattern_list)):
				follow_list.append(follow_pattern_list[i])
			tmp["prior"] = prior_list	
			tmp["follow"] = follow_list	
			message_closest_message_list[message_pattern] = tmp
	return 	message_closest_message_list	
Esempio n. 5
0
def read_single_line_pattern_db(input_file, prefix = ""):
#{{{
	single_line_pattern_db = {}
	file_mode = global_APIs.get_file_mode(input_file)
	single_line_pattern_file_path = "sql_database/"
	single_line_pattern_file_path += file_mode
	if not prefix == "":
		single_line_pattern_file_path += "/" + str(prefix)
	single_line_pattern_file_path += "/single_line_pattern_db.txt"
	if os.path.isfile(single_line_pattern_file_path):	
		fl = open(single_line_pattern_file_path, "r")
		while True:
			line_1 = fl.readline()
			line_2 = fl.readline()
			line_3 = fl.readline()
			if not line_1 or not line_2 or not line_3:	
				break
			pattern_name = line_1.split()[0]
			pattern = convert_string_into_pattern(line_2)
			tmp = {}
			tmp["pattern"] = pattern
			belong_block = line_3.split()
			if len (belong_block) == 1:
				tmp["belong_block"] = ""
			else:
				tmp["belong_block"] = belong_block[1] 
			single_line_pattern_db[pattern_name] = tmp
		fl.close()	
		
	else:
		return {}
	return 	single_line_pattern_db
Esempio n. 6
0
def output_happen_matrix (happen_matrix, input_file, prefix = "", file_name_prefix = ""):
#{{{
	file_mode = global_APIs.get_file_mode(input_file)
	happen_matrix_path = "sql_database/"
	happen_matrix_path += file_mode
	happen_matrix_path += "/" 
	if not prefix == "":
		happen_matrix_path += "/" + str(prefix) + "/"
	if not file_name_prefix == "":
		happen_matrix_path += file_name_prefix + "_"
		
	happen_matrix_path += "happen_matrix"
	happen_matrix_path += ".txt"
	fl = open (happen_matrix_path, "w") 
	for line_pattern_name in happen_matrix:
		fl.write(line_pattern_name)
		fl.write(" \n")
		line_pattern_happen_list = happen_matrix[line_pattern_name]
		tmp = ""
		tmp += "happen_time"
		tmp += " "
		tmp += str(line_pattern_happen_list["happen_time"])
		tmp += " "
		for next_pattern in line_pattern_happen_list:
			if next_pattern == "happen_time":
				continue
			next_pattern_happern_time = line_pattern_happen_list[next_pattern] 	
			tmp += next_pattern
			tmp += " "
			tmp += str(next_pattern_happern_time)
			tmp += " "
		fl.write(tmp)
		fl.write("\n")
	fl.close()
Esempio n. 7
0
def output_this_round_affect_message_list(sub_folder, new_found_single_line_pattern, need_update_single_line_pattern_list, left_new_found_single_line_pattern_list, affected_message_list, prefix):
#{{{
	file_mode = global_APIs.get_file_mode(sub_folder)
	affect_message_file_path = "sql_database/"
	affect_message_file_path += file_mode
	if not prefix == "":
		 affect_message_file_path += "/" + str(prefix)
	affect_message_file_path += "/affected_message.txt" 
	fl = open (affect_message_file_path, "w")
	fl.write("new_found_single_line_pattern\n")
	tmp = ""
	for message in new_found_single_line_pattern:
		tmp += message + " "
	tmp += "\n"
	fl.write (tmp)
	fl.write("need_update_single_line_pattern_list\n") 
	tmp = ""
	for message in need_update_single_line_pattern_list:
		tmp += message + " "
	tmp += "\n"
	fl.write (tmp)
	fl.write("left_new_found_single_line_pattern_list\n") 
	tmp = ""
	for message in left_new_found_single_line_pattern_list:
		tmp += message + " "
	tmp += "\n"
	fl.write (tmp)
	fl.write("affected_message_list\n") 
	tmp = ""
	for message in affected_message_list:
		tmp += message + " "
	tmp += "\n"
	fl.write (tmp)
	fl.close()
Esempio n. 8
0
def read_single_line_pattern_range_list (input_file, prefix = ""):
#{{{
	single_line_pattern_range_line_pattern_list = {}
	file_mode = global_APIs.get_file_mode(input_file)
	range_pattern_list_path = "sql_database/"
	range_pattern_list_path += file_mode
	if not prefix == "":
		range_pattern_list_path += "/" + str(prefix)
	range_pattern_list_path += "/range_pattern_list"
	range_pattern_list_path += ".txt"
	if os.path.isfile(range_pattern_list_path):	
		fl = open (range_pattern_list_path, "r") 
		while True:
			line_1 = fl.readline()
			line_2 = fl.readline()
			if not line_1 or not line_2:	
				break
			pattern_name = line_1.split()[0]

			range_pattern_file_list = line_2.split()
			range_pattern_list = {}
			i = 0
			while i < len(range_pattern_file_list):
				range_pattern_list[range_pattern_file_list[i]] = int(range_pattern_file_list[i+1])
				i = i + 2	
			single_line_pattern_range_line_pattern_list[pattern_name] = range_pattern_list

		fl.close()	
		
	else:
		return {}
	return single_line_pattern_range_line_pattern_list
Esempio n. 9
0
def output_done_sub_folder_list(done_sub_folder_list, sub_folder):
#{{{
	file_mode = global_APIs.get_file_mode(sub_folder)
	done_sub_folder_list_file_path = "sql_database/"
	done_sub_folder_list_file_path += file_mode
	done_sub_folder_list_file_path += "/done_sub_folder_list.txt" 
	fl = open (done_sub_folder_list_file_path, "w") 
	for sub_folder in done_sub_folder_list:
		tmp = ""
		tmp += sub_folder
		tmp += " \n"
		fl.write(tmp)
	fl.close()
Esempio n. 10
0
def block_pattern_list_summary (input_file, show_report = 0):
	#this input_file is just a file mode tragger
	file_mode = global_APIs.get_file_mode(input_file)
	block_pattern_list_file_path = "sql_database/"
	block_pattern_list_file_path += file_mode
	file_list = global_APIs.get_folder_file_list(block_pattern_list_file_path)
	last_sub_folder = global_APIs.get_latest_sql_db_path(input_file)

	block_pattern_list = read_block_pattern_list(input_file, last_sub_folder)
	single_line_pattern_db = read_single_line_pattern_db(input_file, last_sub_folder)

	block_covered_message_list = {} 
	block_covered_message_num = 0
	for block in 	block_pattern_list:
		block_pattern = block_pattern_list[block]["pattern"]
		block_covered_message_num += len(block_pattern)
		for message in block_pattern:
			if not message in block_covered_message_list:
				tmp = [block]
				block_covered_message_list[message] = tmp
			else:
				print message
				tmp = block_covered_message_list[message]
				tmp.append(block)
				block_covered_message_list[message] = tmp
				

	message_have_block_info_list = []
	message_have_block_info_num = 0
	for single_line_pattern in single_line_pattern_db:
		if not single_line_pattern_db[single_line_pattern]["belong_block"] == "":
			message_have_block_info_num += 1
			if not single_line_pattern in message_have_block_info_list:
				message_have_block_info_list.append(single_line_pattern)
	if show_report == 1:	
		print "==================================================="
		print "EBD summary report"
		print "	block_covered_message_num: " + str(block_covered_message_num)
		print "	message_have_block_info_num: " + str(message_have_block_info_num)
		print "	total_single_line_message: " + str(len(single_line_pattern_db))
		print "	total_block: " + str(len(block_pattern_list))
		print "==================================================="
	error_list = {}
	
	for message in block_covered_message_list:
		if len(block_covered_message_list[message]) > 1:
			error_list [message] = block_covered_message_list[message]
		
	return error_list
Esempio n. 11
0
def output_node_id_last_list_file(node_id_last_list, input_file, prefix = ""):
#{{{
	file_mode = global_APIs.get_file_mode(input_file)
	node_id_last_file_path = "sql_database/"
	node_id_last_file_path += file_mode
	if not prefix == "":
		node_id_last_file_path += "/" + str(prefix)
	node_id_last_file_path += "/node_id_last_file.txt"
	fl = open (node_id_last_file_path, "w") 
	for node_id in 	node_id_last_list:
		tmp = node_id
		tmp += " "
		tmp += str(node_id_last_list[node_id])
		tmp += "\n"
		fl.write(tmp)
	fl.close()
Esempio n. 12
0
def read_done_sub_folder_list(sub_folder):
#{{{
	done_sub_folder_list = []
	file_mode = global_APIs.get_file_mode(sub_folder)
	done_sub_folder_list_file_path = "sql_database/"
	done_sub_folder_list_file_path += file_mode
	done_sub_folder_list_file_path += "/done_sub_folder_list.txt" 
	if os.path.isfile(done_sub_folder_list_file_path):	
		fl = open(done_sub_folder_list_file_path, "r")
		for line in fl.readlines():
			line = line.replace("\n", "")
			line = line.split(" ")
			done_sub_folder_list.append(line[0])
		fl.close()	
		
	else:
		return [] 
	return 	done_sub_folder_list
Esempio n. 13
0
def read_this_round_affect_message_list(sub_folder,  prefix):
	file_mode = global_APIs.get_file_mode(sub_folder)
	affect_message_file_path = "sql_database/"
	affect_message_file_path += file_mode
	if not prefix == "":
		 affect_message_file_path += "/" + str(prefix)
	affect_message_file_path += "/affected_message.txt"
	if os.path.isfile( affect_message_file_path):
		fl = open (affect_message_file_path, "r")
		line_list = fl.readlines()
		new_found_single_line_pattern = []	
		need_update_single_line_pattern_list = []
		left_new_found_single_line_pattern_list = []
		affected_message_list = []

		new_found_line = line_list[1]
		need_update_line = line_list[3]
		left_new_line = line_list[5]
		affected_line = line_list[7]
		for message in new_found_line.split():
			if message == "\n":
				continue
			new_found_single_line_pattern.append(message)
		for message in need_update_line.split():
			if message == "\n":
				continue
			need_update_single_line_pattern_list.append(message)
		for message in left_new_line.split():
			if message == "\n":
				continue
			left_new_found_single_line_pattern_list.append(message)
		for message in affected_line.split():
			if message == "\n":
				continue
			affected_message_list.append(message)
		tmp = []
		tmp.append(new_found_single_line_pattern)	
		tmp.append(need_update_single_line_pattern_list)	
		tmp.append(left_new_found_single_line_pattern_list)	
		tmp.append(affected_message_list)	
		return tmp
	else:
		return []
Esempio n. 14
0
def read_node_id_last_list_file(input_file, prefix = ""):
#{{{
	node_id_last_list = {}
	file_mode = global_APIs.get_file_mode(input_file)
	node_id_last_file_path = "sql_database/"
	node_id_last_file_path += file_mode
	if not prefix == "":
		node_id_last_file_path += "/" + str(prefix)
	node_id_last_file_path += "/node_id_last_file.txt" 
	if os.path.isfile(node_id_last_file_path):	
		fl = open(node_id_last_file_path, "r")
		for line in fl.readlines():
			line = line.replace("\n", "")
			line = line.split(" ")
			node_id_last_list[line[0]] = line[1]
		fl.close()	
		
	else:
		return {}
	return 	node_id_last_list
Esempio n. 15
0
def whole_daily_folder_block_extract (folder_name):
	total_error_report = "extract_error_report.txt"
	error_report_fl = open(total_error_report, "w")	
	sub_folder_list = global_APIs.get_folder_file_list (folder_name)
	done_example_list = []
	done_example_report_fl = open("example_report.txt", "w")
	folder_num = -1 
	for sub_folder in sub_folder_list:
		folder_num += 1
		if folder_num < folder_control_lower_band:
			continue
		if folder_num >= folder_control_upper_band:
			break
		print "block extract " + sub_folder
		file_mode = global_APIs.get_file_mode(sub_folder)
		if file_mode == "":
			print "can't detect mode " + sub_folder
			continue
		folder_block_extract(sub_folder, error_report_fl, done_example_list, done_example_report_fl)
	error_report_fl.close()
	done_example_report_fl.close()
Esempio n. 16
0
def read_block_pattern_list (input_file, prefix = ""):
#{{{
	block_pattern_list = {}	
	
	file_mode = global_APIs.get_file_mode(input_file)
	block_pattern_list_file_path = "sql_database/"
	block_pattern_list_file_path += file_mode
	if not prefix == "":
		block_pattern_list_file_path += "/" + str(prefix)
	block_pattern_list_file_path += "/block_pattern_list.txt"
	if os.path.isfile(block_pattern_list_file_path):	
		fl = open (block_pattern_list_file_path, "r") 
		while True:
			line_1 = fl.readline()
			line_2 = fl.readline()
			line_3 = fl.readline()
			if not line_1 or not line_2 or not line_3:	
				break
			block_name = line_1.split()[0]

			block_pattern_list_tmp = {}
			pattern_list = line_2.split()
			pattern_tmp = []
			i = 0
			while i < len(pattern_list):
				pattern_tmp.append(pattern_list[i])
				i = i + 1	
			block_pattern_list_tmp["pattern"] = pattern_tmp

			happen_count_list = line_3.split()
			happen_count_tmp = {}
			i = 0
			while i < len(happen_count_list):
				happen_count_tmp[happen_count_list[i]] = happen_count_list[i + 1]
				i = i + 2
			block_pattern_list_tmp["happen_count"] = happen_count_tmp
			block_pattern_list[block_name] = block_pattern_list_tmp
	return 	block_pattern_list
Esempio n. 17
0
def output_single_line_pattern_range_list (single_line_pattern_range_line_pattern_list, input_file, prefix = ""):
#{{{
	file_mode = global_APIs.get_file_mode(input_file)
	range_pattern_list_path = "sql_database/"
	range_pattern_list_path += file_mode
	if not prefix == "":
		range_pattern_list_path += "/" + str(prefix)
	range_pattern_list_path += "/range_pattern_list"
	range_pattern_list_path += ".txt"
	fl = open (range_pattern_list_path, "w") 
	for line_pattern_name in single_line_pattern_range_line_pattern_list:
		fl.write(line_pattern_name)
		fl.write(" \n")
		range_pattern_list = single_line_pattern_range_line_pattern_list[line_pattern_name]
		tmp = ""
		for pattern in range_pattern_list:
			tmp += pattern
			tmp += " "	
			tmp += str(range_pattern_list[pattern])
			tmp += " "	
		fl.write(tmp)
		fl.write("\n")
	fl.close()
Esempio n. 18
0
def read_happen_matrix (input_file, prefix = "", file_name_prefix = ""):
#{{{
	happen_matrix = {}
	file_mode = global_APIs.get_file_mode(input_file)
	happen_matrix_path = "sql_database/"
	happen_matrix_path += file_mode
	happen_matrix_path += "/" 
	
	if not prefix == "":
		happen_matrix_path += "/" + str(prefix) + "/"
	if not file_name_prefix == "":
		happen_matrix_path += file_name_prefix + "_"
		
	happen_matrix_path += "happen_matrix"
	happen_matrix_path += ".txt"
	if os.path.isfile(happen_matrix_path):	
		fl = open (happen_matrix_path, "r") 
		while True:
			line_1 = fl.readline()
			line_2 = fl.readline()
			if not line_1 or not line_2:	
				break
			pattern_name = line_1.split()[0]
			next_pattern_list = {}
			next_pattern_file_list = line_2.split()
			i = 0	
			while i < len(next_pattern_file_list) :
				next_pattern_name = next_pattern_file_list[i]
				next_pattern_happen_time = next_pattern_file_list[i+1]
				i = i + 2
				tmp = []
				next_pattern_list[next_pattern_name] = int(next_pattern_happen_time)
			happen_matrix[pattern_name] = next_pattern_list

	else:
		return {}
	return happen_matrix
Esempio n. 19
0
def whole_daily_folder_block_learning (folder_name):
	sub_folder_list = global_APIs.get_folder_file_list (folder_name)
	done_sub_folder_list = []
	folder_num = -1 
	report_fl = open("total_progress.txt", "w")
	last_prefix = -1
	#this last prefix is the last successed folder number
	for sub_folder in sub_folder_list:
		report_tmp = ""
		report_tmp += "block learning " + sub_folder + "\n"
		print "block learning " + sub_folder + "\n"
		folder_num += 1
		file_mode = global_APIs.get_file_mode(sub_folder)
		if file_mode == "":
			print "Can't detect file mode from folder " + sub_folder
			continue
		if folder_num < folder_control_lower_band:
			last_prefix = folder_num
			continue
		if folder_num >= folder_control_upper_band:
			break
		#done_sub_folder		
		done_sub_folder_list = database_opt.read_done_sub_folder_list(sub_folder)
		
		if sub_folder in done_sub_folder_list:
			last_prefix = folder_num
			continue
		prefix_path = str(folder_num)
		global_APIs.sql_prefix_folder_initializer(sub_folder, prefix_path)
		#previous result read from database
		#{{{
		previous_happen_matrix = database_opt.read_happen_matrix(sub_folder, last_prefix, "total")
		new_found_single_line_pattern = []
		single_line_pattern_db = database_opt.read_single_line_pattern_db(sub_folder, last_prefix)
		single_line_pattern_range_line_pattern_list = database_opt.read_single_line_pattern_range_list(sub_folder, last_prefix) 
		message_closest_message_list = database_opt.read_message_closest_message_list(sub_folder, last_prefix) 
		block_pattern_list =  database_opt.read_block_pattern_list(sub_folder, last_prefix)
		previous_node_id_last_list = database_opt.read_node_id_last_list_file(sub_folder, last_prefix)
		#}}}

		#generate this matrix
		#{{{
		ignore_previous_file = 0
		this_happen_matrix = {}
		result = folder_happen_matrix_analyze (
			sub_folder, 
			this_happen_matrix, 
			single_line_pattern_db, 
			new_found_single_line_pattern, 
			single_line_pattern_range_line_pattern_list, 
			previous_node_id_last_list, 
			ignore_previous_file)
		this_happen_matrix = result[0]
		single_line_pattern_db = result[1]
		new_found_single_line_pattern = result[2]
		single_line_pattern_range_line_pattern_list = result[3]
		node_id_last_list = result[4]
		previous_node_id_last_list = database_opt.read_node_id_last_list_file(sub_folder, last_prefix)
		
		total_happen_matrix = happen_matrix_merge(this_happen_matrix, previous_happen_matrix, previous_node_id_last_list, sub_folder)
	
		if global_APIs.invalid_message == 'invalid_message':	
			global_APIs.single_line_db_invalid_message_assign(single_line_pattern_db)
		global_APIs.generate_single_pattern_dynamic_similarity_threshold(total_happen_matrix)

		#store recent record	
		database_opt.output_happen_matrix(total_happen_matrix, sub_folder, folder_num, "total")
		database_opt.output_happen_matrix(this_happen_matrix, sub_folder, folder_num, "this")
		database_opt.output_single_line_pattern_range_list (single_line_pattern_range_line_pattern_list, sub_folder, folder_num)
		database_opt.output_node_id_last_list_file(node_id_last_list, sub_folder,folder_num )
		#}}}

		#new_found_single_line_pattern
		#{{{
		report_tmp += "	new found single line " + str (len(new_found_single_line_pattern)) + "\n"
		print "	new found single line " + str (len(new_found_single_line_pattern))
		orig_new_found_single_line_pattern_list = []
		for message in new_found_single_line_pattern:
			orig_new_found_single_line_pattern_list.append(message)
		previous_happen_matrix = database_opt.read_happen_matrix(sub_folder, last_prefix, "total")
		need_update_single_line_pattern_list = anomaly_detection.this_happen_matrix_anomaly_detection(
			previous_happen_matrix, 
			total_happen_matrix, 
			new_found_single_line_pattern, 
			single_line_pattern_db, 
			message_closest_message_list)	

		new_found_single_line_num = 0
		left_new_found_single_line_pattern_list = []
		for message in 	orig_new_found_single_line_pattern_list:
			if message in need_update_single_line_pattern_list:
				new_found_single_line_num += 1
			else:
				left_new_found_single_line_pattern_list.append(message)
		update_length = len(need_update_single_line_pattern_list)

		report_tmp += "	new_found_single_line " + str(new_found_single_line_num) + "\n"
		print "	new_found_single_line " + str(new_found_single_line_num)
		report_tmp += "	need update previous single line " + str(update_length - new_found_single_line_num) + "\n"
		print "	need update previous single line " + str(update_length - new_found_single_line_num)
		#}}}

		if len(need_update_single_line_pattern_list) > 0:
			result = folder_block_learning (
					sub_folder, 
					total_happen_matrix, 
					single_line_pattern_db, 
					need_update_single_line_pattern_list, 
					single_line_pattern_range_line_pattern_list, 
					block_pattern_list, 
					message_closest_message_list)

			total_happen_matrix = result[0]
			single_line_pattern_range_line_pattern_list = result[1]
			message_closest_message_list = result[2]
			block_pattern_list = result[3]
			single_line_pattern_db = result[4]
			affected_message_list = result[5]
			
			report_tmp += "	affected_message_list_length: " + str(len(affected_message_list)) + "\n"
			print "	affected_message_list_length: " + str(len(affected_message_list))
			if len(affected_message_list) == str(new_found_single_line_num):
				report_tmp += "	previous block list have no change" + "\n"

		database_opt.output_message_closest_message_list(message_closest_message_list, sub_folder, folder_num)
		database_opt.output_block_pattern_list(block_pattern_list, sub_folder, folder_num)
		database_opt.output_single_line_pattern_db(single_line_pattern_db, sub_folder, folder_num)
		
		done_sub_folder_list.append(sub_folder)
		#ARES
		#database_opt.output_done_sub_folder_list(done_sub_folder_list, sub_folder )

		database_opt.output_this_round_affect_message_list(
			sub_folder,
			orig_new_found_single_line_pattern_list,
			need_update_single_line_pattern_list,
			left_new_found_single_line_pattern_list,
			affected_message_list,
			folder_num)

		last_prefix = folder_num
		
		block_merge_error_list = database_opt.block_pattern_list_summary(sub_folder)
		if not len(block_merge_error_list) == 0:
			report_tmp += 	str(block_merge_error_list)

		#print report_tmp
		report_fl.write(report_tmp)

	report_fl.close()
	return folder_num
Esempio n. 20
0
def affected_message_pattern_history_analyze(folder_name):
    #this is testing cutoff
    sub_folder_list = global_APIs.get_folder_file_list(folder_name)
    folder_num = -1
    prefix = -1
    last_prefix = -1
    each_message_record_dict = {}
    cutoff_message_list = []
    each_interval_record = {}
    cut_off_happen_time_threshold = 100
    cut_off_folder_num_threshold = 10

    daily_num = 2
    interval_num = 0
    interval_control_num = 10
    total_day_num = 0

    this_interval_need_update_list = []
    this_interval_affected_list = []
    this_interval_new_found_num = 0
    this_interval_new_found_update_num = 0

    for sub_folder in sub_folder_list:
        folder_num += 1
        prefix = folder_num
        file_mode = global_APIs.get_file_mode(sub_folder)
        if file_mode == "":
            continue
        if folder_num < multi_file_folder.folder_control_lower_band:
            last_prefix = folder_num
            continue
        if folder_num >= multi_file_folder.folder_control_upper_band:
            break

        affect_record = database_opt.read_this_round_affect_message_list(
            sub_folder, prefix)
        new_found_single_line_pattern = affect_record[0]
        need_update_single_line_pattern_list = affect_record[1]
        left_new_found_single_line_pattern_list = affect_record[2]
        affected_message_list = affect_record[3]

        for need_update_pattern in need_update_single_line_pattern_list:
            if not need_update_pattern in this_interval_need_update_list:
                this_interval_need_update_list.append(need_update_pattern)
        for affected_message in affected_message_list:
            if not affected_message in this_interval_affected_list:
                this_interval_affected_list.append(affected_message)
        this_interval_new_found_num += len(new_found_single_line_pattern)
        this_interval_new_found_update_num += len(
            new_found_single_line_pattern) - len(
                left_new_found_single_line_pattern_list)

        total_day_num = (folder_num + 1) * daily_num
        if total_day_num % interval_control_num == 0:
            tmp = []
            tmp.append(this_interval_new_found_num)
            tmp.append(this_interval_new_found_update_num)
            tmp.append(len(this_interval_need_update_list))
            tmp.append(len(this_interval_affected_list))
            print total_day_num
            print tmp
            each_interval_record[interval_num] = tmp
            this_interval_need_update_list = []
            this_interval_affected_list = []
            this_interval_new_found_num = 0
            this_interval_new_found_update_num = 0
            interval_num += 1
        continue

        for message in new_found_single_line_pattern:
            tmp = {}
            tmp["first_seen"] = folder_num
            tmp["last_need_update"] = -1
            tmp["last_change"] = -1
            each_message_record_dict[message] = tmp
        for message in need_update_single_line_pattern_list:
            each_message_record_dict[message]["last_need_update"] = folder_num
        for message in affected_message_list:
            previous_block_pattern_list = database_opt.read_block_pattern_list(
                sub_folder, last_prefix)
            previous_single_line_pattern_db = database_opt.read_single_line_pattern_db(
                sub_folder, last_prefix)
            this_block_pattern_list = database_opt.read_block_pattern_list(
                sub_folder, prefix)
            this_single_line_pattern_db = database_opt.read_single_line_pattern_db(
                sub_folder, prefix)
            if not message in previous_single_line_pattern_db or previous_single_line_pattern_db[
                    message]["belong_block"] == "":
                each_message_record_dict[message]["last_change"] = folder_num
                continue
            previous_belong_block = previous_single_line_pattern_db[message][
                "belong_block"]
            previous_belong_block_pattern = previous_block_pattern_list[
                previous_belong_block]
            this_belong_block = this_single_line_pattern_db[message][
                "belong_block"]
            if this_belong_block == "":
                #previous belong to a block, now disconnected
                if message in cutoff_message_list:
                    print "cutoff message update " + message + " " + str(
                        each_message_record_dict[message]
                        ["last_change"]) + " " + str(folder_num)

                each_message_record_dict[message]["last_change"] = folder_num
                continue
            this_belong_block_pattern = this_block_pattern_list[
                this_belong_block]
            #now this message have a belong block
            same_result = judge_two_block_pattern_list_same(
                previous_belong_block_pattern, this_belong_block_pattern)
            if same_result == 0:
                if message in cutoff_message_list:
                    print "cutoff message update " + message + " " + str(
                        each_message_record_dict[message]
                        ["last_change"]) + " " + str(folder_num)
                each_message_record_dict[message]["last_change"] = folder_num
                continue

        #here make cut off decision
        this_happen_matrix = database_opt.read_happen_matrix(
            sub_folder, prefix, "total")
        #happen_matrix is for count each message's happen time
        for message in each_message_record_dict:
            if message in cutoff_message_list:
                continue
            message_happen_time = this_happen_matrix[message]["happen_time"]
            if message_happen_time < cut_off_happen_time_threshold:
                continue
            last_change_folder_num = each_message_record_dict[message][
                "last_change"]
            if last_change_folder_num == -1:
                #continue
                #this means this message never merged or dismerged with any other messages
                first_seen_folder_num = each_message_record_dict[message][
                    "first_seen"]
                if folder_num - first_seen_folder_num > cut_off_folder_num_threshold:
                    cutoff_message_list.append(message)
            else:
                interval_to_last_change = folder_num - last_change_folder_num
                if interval_to_last_change >= cut_off_folder_num_threshold:
                    cutoff_message_list.append(message)
        #final test
        #for message in cutoff_message_list:
        #	last_change_folder_num = each_message_record_dict[message]["last_change"]
        #	interval_to_last_change = folder_num - last_change_folder_num
        #	if interval_to_last_change < cut_off_folder_num_threshold:
        #		print "error "	+ message + " " + str(folder_num)

        last_prefix = folder_num