コード例 #1
0
def one_sub_daily_folder_report_node_block_summary (sub_folder):
#{{{
	node_file_list = {}
	sub_folder_file_list = global_APIs.get_folder_file_list (sub_folder)
	for sub_folder_file in sub_folder_file_list:
		sub_folder_file_real_name = global_APIs.get_real_file_name(sub_folder_file)
		single_line_file_name_pattern = r'([0-9]+)\_(.*)\.txt\_single\_line\_report\.txt$'
		block_file_name_pattern = r'([0-9]+)\_(.*)\.txt\_block\_report\.txt$'
		#block_report/0/0_c0-0c0s9n1.txt_single_line_report.txt
		#block_report/0/0_c0-0c0s9n1.txt_block_report.txt
		matchobj = re.match (single_line_file_name_pattern, sub_folder_file_real_name)
		if matchobj:
			node_name = matchobj.group(2)
			if node_name in node_file_list:
				tmp = node_file_list[node_name]
			else:
				tmp = {}
			tmp["single"] = sub_folder_file
			node_file_list[node_name] = tmp
		matchobj = re.match (block_file_name_pattern, sub_folder_file_real_name)
		if matchobj:
			node_name = matchobj.group(2)
			if node_name in node_file_list:
				tmp = node_file_list[node_name]
			else:
				tmp = {}
			tmp["block"] = sub_folder_file
			node_file_list[node_name] = tmp
	return node_file_list
コード例 #2
0
def folder_happen_matrix_analyze (folder_name, happen_matrix, single_line_pattern_db, need_update_single_line_pattern_list, single_line_pattern_range_line_pattern_list, node_id_last_list , ignore_previous_file):
	file_list = global_APIs.get_folder_file_list (folder_name)
	file_count = 0
	for input_file in file_list:
		fl = open (input_file, "r")
		first_message = fl.readline()
		fl.close()	
		node_id = global_APIs.get_line_id(first_message)
		if node_id == "":
			#this is a empty file, do nothing
			continue

		file_count += 1
		if file_count % 20 == 0:
			print "generate matrix " + str(file_count)
		result = block_learning.generate_happen_matrix_from_message_list (input_file, happen_matrix, single_line_pattern_db, need_update_single_line_pattern_list, single_line_pattern_range_line_pattern_list, node_id_last_list, ignore_previous_file)
		happen_matrix = result[0]
		single_line_pattern_db = result[1]
		need_update_single_line_pattern_list = result[2]
		single_line_pattern_range_line_pattern_list = result[3]
		node_id_last_list = result[4]
	result = []
	result.append(happen_matrix)	
	result.append(single_line_pattern_db)	
	result.append(need_update_single_line_pattern_list)	
	result.append(single_line_pattern_range_line_pattern_list)
	result.append(node_id_last_list)
	return 	result	
コード例 #3
0
ファイル: LSTM_code.py プロジェクト: zongzeliunt/SLEBD
def correct_sequences_extract (care_block_name, block_report_path):
#{{{
	sub_folder_list = global_APIs.get_folder_file_list(block_report_path)
	#be safe, sort it
	sub_folder_list = global_APIs.sub_folder_number_sort(block_report_path, sub_folder_list)
	total_event_sequence_list = []
	for sub_folder in sub_folder_list:
		if sub_folder == "block_report/150":
			break
		print sub_folder
		sub_file_list = global_APIs.get_folder_file_list(sub_folder)
		for file_name in sub_file_list:
			#print file_name
			if not global_APIs.this_report_file_is_block_file(file_name, block_report_path):
				continue
			fl = open (file_name, "r")
			event_list = []
			for line in fl.readlines():
				event_list.append(line.split()[0])
			fl.close()
			last_care_block_num = -1
			for i in range (0, len( event_list)):
				event = event_list[i]
				care_block_num = -1
				if event == care_block_name:
					care_block_num = i
				else:
					continue
				start_num = find_event_sequence_extract_range(care_block_num, last_care_block_num)
				
				last_care_block_num = care_block_num	
				if not start_num == -1:
					event_sequence = event_list[start_num: care_block_num]
					total_event_sequence_list.append(event_sequence)
	
	care_block_work_path_name = care_block_name + "_work_path"
	sequence_file_name = care_block_work_path_name + "/correct_sequences.txt"
	fl = open(sequence_file_name, "w")
	
	for event_sequence in total_event_sequence_list:
		new_event_sequence = ""
		for event in event_sequence:
			#new_event_sequence += event + " "	
			new_event_sequence += event_name_convert(event) + " "	
		new_event_sequence += ("\n")		
		fl.write(new_event_sequence)	
	fl.close()
コード例 #4
0
def block_length_average_summary(folder_name="block_report"):
    #{{{
    total_event_list = []
    sub_folder_list = global_APIs.get_folder_file_list(folder_name)
    count = 0
    limit = 1
    total_event_length_count = {}
    for sub_folder in sub_folder_list:
        if sub_folder == "block_report/node_stack.txt":
            continue
        print sub_folder
        node_file_list = sequence_pattern_mining.one_sub_daily_folder_report_node_block_summary(
            sub_folder)
        for node_name in node_file_list:
            block_report_file = node_file_list[node_name]["block"]
            single_line_report_file = node_file_list[node_name]["single"]
            fl = open(block_report_file, "r")

            block_name_pattern = r'block\_([0-9]+)$'
            for line in fl.readlines():
                line = line.split()
                event_name = line[0]

                matchobj = re.match(block_name_pattern, event_name)
                if not matchobj:
                    continue
                start_line = int(line[1])
                finish_line = int(line[2])
                length = finish_line - start_line + 1
                if event_name in total_event_length_count:
                    tmp = total_event_length_count[event_name]
                    tmp["count"] += 1
                    tmp["total_length"] += length
                else:
                    tmp = {}
                    tmp["count"] = 1
                    tmp["total_length"] = length
                total_event_length_count[event_name] = tmp
            fl.close()

    event_name_average_length_list = {}
    for event_name in total_event_length_count:
        average_length = total_event_length_count[event_name][
            "total_length"] / total_event_length_count[event_name]["count"]
        event_name_average_length_list[event_name] = average_length

    length_count_list = {}
    for event_name in event_name_average_length_list:
        length = event_name_average_length_list[event_name]
        if length in length_count_list:
            length_count_list[length] += 1
        else:
            length_count_list[length] = 1
    for length in length_count_list:
        print "length " + str(length) + " count " + str(
            length_count_list[length])
コード例 #5
0
def test_file_event_count(folder_name):
	file_list = global_APIs.get_folder_file_list (folder_name)
	total_event_count = 0
	
	for block_report_file in file_list:
		print block_report_file
		fl = open(block_report_file, "r")
		node_block_lines = fl.readlines()
		print len(node_block_lines)
		total_event_count += 	len(node_block_lines)
	print total_event_count
コード例 #6
0
def folder_block_extract (folder_name, error_report_fl, done_example_list, done_example_report_fl):
	last_sub_folder = global_APIs.get_latest_sql_db_path(folder_name)
	file_list = global_APIs.get_folder_file_list (folder_name)
	block_pattern_list = database_opt.read_block_pattern_list(folder_name, last_sub_folder)
	single_line_pattern_db = database_opt.read_single_line_pattern_db(folder_name, last_sub_folder)
	#we don't need to load closest message list
	file_list_error = {}
	file_count = 0
	for input_file in file_list:
		file_count += 1
		#if file_count % 5 == 0:
		#	print "file extract " + str(file_count)

		result = input_file_block_report_extraction(input_file, block_pattern_list, single_line_pattern_db)
		if result == []:
			continue
		block_list = result[0]
		file_single_line_report = result[1]
		summary = result[2]
		file_failed_list = result[3]
	
		database_opt.output_file_block_report_list(block_list, input_file)
		database_opt.output_file_single_line_list(file_single_line_report, input_file)

		if not file_failed_list == []:
			file_list_error[input_file] = file_failed_list
			error_report_fl.write(input_file)
			error_report_fl.write("\n")
			
			for file_record in file_failed_list:
				error_report_fl.write(str(file_record))
				error_report_fl.write("\n")
		#dump exmaple
		for block in block_list:
			block_name = block[0]
			if not block_name in block_pattern_list:
				continue
				#this is not a block
			if block_name in done_example_list:
				continue
			done_example_list.append(block_name)
			done_example_report_fl.write(block_name)
			done_example_report_fl.write("\n")

			block_start_line = block[1]
			block_finish_line = block[2]
			fl = open(input_file, "r")
			fl_line = fl.readlines()
			for i in range (block_start_line, block_finish_line + 1):
				done_example_report_fl.write(fl_line[i-1])
			done_example_report_fl.write("\n")
				
	
	database_opt.output_extract_error_list(file_list_error, input_file)
コード例 #7
0
def block_report_file_merge (folder_name = "block_report"):
#{{{
	fl = open("analyze_block_node.txt", "r")
	node_line = fl.readline()
	node_list = node_line.split()
	fl.close()
	if not os.path.isdir("merged_files"):
		os.mkdir("merged_files")	
	
	analyze_lower_band = 0
	analyze_upper_band = 3 
	sub_folder_list = global_APIs.get_folder_file_list (folder_name)
	folder_num = -1
	for sub_folder in sub_folder_list:
		sub_folder_file_list = global_APIs.get_folder_file_list (sub_folder)
		print sub_folder
		folder_num += 1
		if folder_num < analyze_lower_band:
			continue
		if folder_num >= analyze_upper_band:
			break
		sub_folder_file_list = global_APIs.get_folder_file_list (sub_folder)
		node_file_list = one_sub_daily_folder_report_node_block_summary (sub_folder)
		for node_name in node_file_list:
			if not node_name in node_list:
				continue
			file_out = folder_name + "/" + str(folder_num) + "/" + str(folder_num) + "_" + node_name + ".txt_block_report.txt"
			#block_report/0/0_c0-0c0s8n3.txt_block_report.txt
			file_in = "merged_files" + "/" + node_name + "_" + str(analyze_lower_band) + "_" + str(analyze_upper_band) + "_block_report.txt"
			#merged_files/0_c0-0c0s8n3_0_5_block_report.txt
			print file_out
			print file_in
			out_fl = open(file_out, "r")
			in_fl = open(file_in, "a")
			for line in out_fl.readlines():
				in_fl.write(line)
			out_fl.close()
			in_fl.close()
コード例 #8
0
ファイル: database_opt.py プロジェクト: zongzeliunt/SLEBD
def block_pattern_list_summary (input_file, show_report = 0):
	#this input_file is just a file mode tragger
	file_mode = global_APIs.get_file_mode(input_file)
	block_pattern_list_file_path = "sql_database/"
	block_pattern_list_file_path += file_mode
	file_list = global_APIs.get_folder_file_list(block_pattern_list_file_path)
	last_sub_folder = global_APIs.get_latest_sql_db_path(input_file)

	block_pattern_list = read_block_pattern_list(input_file, last_sub_folder)
	single_line_pattern_db = read_single_line_pattern_db(input_file, last_sub_folder)

	block_covered_message_list = {} 
	block_covered_message_num = 0
	for block in 	block_pattern_list:
		block_pattern = block_pattern_list[block]["pattern"]
		block_covered_message_num += len(block_pattern)
		for message in block_pattern:
			if not message in block_covered_message_list:
				tmp = [block]
				block_covered_message_list[message] = tmp
			else:
				print message
				tmp = block_covered_message_list[message]
				tmp.append(block)
				block_covered_message_list[message] = tmp
				

	message_have_block_info_list = []
	message_have_block_info_num = 0
	for single_line_pattern in single_line_pattern_db:
		if not single_line_pattern_db[single_line_pattern]["belong_block"] == "":
			message_have_block_info_num += 1
			if not single_line_pattern in message_have_block_info_list:
				message_have_block_info_list.append(single_line_pattern)
	if show_report == 1:	
		print "==================================================="
		print "EBD summary report"
		print "	block_covered_message_num: " + str(block_covered_message_num)
		print "	message_have_block_info_num: " + str(message_have_block_info_num)
		print "	total_single_line_message: " + str(len(single_line_pattern_db))
		print "	total_block: " + str(len(block_pattern_list))
		print "==================================================="
	error_list = {}
	
	for message in block_covered_message_list:
		if len(block_covered_message_list[message]) > 1:
			error_list [message] = block_covered_message_list[message]
		
	return error_list
コード例 #9
0
def error_sequence_test(folder_name, sequence_name):
	prior_matrix = prior_matrix_read()
	sub_path_list = sequence_list_read(0)
	sub_path = sub_path_list[sequence_name]
	support_report = get_sub_path_support_report (sub_path, prior_matrix)
	support_num = support_report[0]
	support_block = support_report[1]
	file_list = global_APIs.get_folder_file_list (folder_name)
	
	for block_report_file in file_list:
		block_file_list = []
		fl = open(block_report_file, "r")
		node_block_lines = fl.readlines()
		for line in node_block_lines:
			block_name = line.split()[0]
			block_file_list.append(block_name)
		fl.close()

		support_block_happen_this_round = 0

		sub_path_index = 0
		succeed = 0
		match_list = []
		for i in range(0, len(block_file_list)):
			block = block_file_list[i]
			if block == support_block:
				support_block_happen_this_round += 1
			path_block = sub_path[sub_path_index]
			if block == path_block:
				tmp = [block, i]
				match_list.append(tmp)
				sub_path_index += 1
			if sub_path_index == len(sub_path):
				sub_path_index = 0
				#succeed
				succeed += 1
		if not 	support_block_happen_this_round == succeed:
			print block_report_file
			print "support_block " + support_block + " " + str(support_block_happen_this_round)
			print "succeed " + str(succeed)
			for tmp in match_list:
				print tmp
コード例 #10
0
def whole_daily_folder_block_extract (folder_name):
	total_error_report = "extract_error_report.txt"
	error_report_fl = open(total_error_report, "w")	
	sub_folder_list = global_APIs.get_folder_file_list (folder_name)
	done_example_list = []
	done_example_report_fl = open("example_report.txt", "w")
	folder_num = -1 
	for sub_folder in sub_folder_list:
		folder_num += 1
		if folder_num < folder_control_lower_band:
			continue
		if folder_num >= folder_control_upper_band:
			break
		print "block extract " + sub_folder
		file_mode = global_APIs.get_file_mode(sub_folder)
		if file_mode == "":
			print "can't detect mode " + sub_folder
			continue
		folder_block_extract(sub_folder, error_report_fl, done_example_list, done_example_report_fl)
	error_report_fl.close()
	done_example_report_fl.close()
コード例 #11
0
def happen_matrix_merge(this_happen_matrix, previous_happen_matrix, previous_node_id_last_list, sub_folder):
	merged_matrix =  previous_happen_matrix
	
	file_list = global_APIs.get_folder_file_list (sub_folder)
	for input_file in file_list:
		fl = open (input_file, "r")
		first_message = fl.readline()
		fl.close()	
		node_id = global_APIs.get_line_id(first_message)
		if node_id == "" or not node_id in previous_node_id_last_list:
		#this is same as generate happen matrix
		#if can't detect node id, it will not generate matrix
		#if this node not generated matrix, its last line info will not update
			continue
		message_name = previous_node_id_last_list[node_id]
		merged_matrix[message_name]["last"] -= 1
		merged_matrix[message_name]["happen_time"] -= 1

		#should explain
		#last node's last line happen time and its next message info have been counted in this matrix
		#however last matrix still have its one time of happen time and the info that next line is last
		#we need to erase its one time of happen time and one time of last 


	for message in 	this_happen_matrix:
		this_list = this_happen_matrix[message]
		merged_list = {}
		if message in merged_matrix:
			merged_list = merged_matrix[message]
		for this_message in this_list:
			if this_message in merged_list:
				merged_list[this_message] += this_list[this_message]
			else:
				merged_list[this_message] = this_list[this_message]
		
		merged_matrix[message] = merged_list

	return  merged_matrix
コード例 #12
0
def node_common_block_summary (folder_name):
#{{{
	file_list = global_APIs.get_folder_file_list (folder_name)
	file_list_length = len(file_list)
	block_happen_node_list = {}
	node_count = 0
	for block_report_file in file_list:
		fl = open(block_report_file, "r")
		node_block_lines = fl.readlines()
		for line in node_block_lines:
			block_name = line.split()[0]
			if not block_name in block_happen_node_list:
				block_node_list = []
				block_node_list.append(node_count)
				block_happen_node_list[block_name] = block_node_list
			else:
				block_node_list = block_happen_node_list[block_name]
				if not node_count in block_node_list:
					block_node_list.append(node_count)
					block_happen_node_list[block_name] = block_node_list
		node_count += 1
		fl.close()
	common_list = []
	for node_name in block_happen_node_list:
		if len(block_happen_node_list[node_name]) == file_list_length:
			common_list.append(node_name)
	print len(common_list)
	fl = open("sequence_tmp_report/analyze_block_node.txt", "w")
	fl.write("\n")
	tmp = ""
	for block_name in common_list:
		tmp += block_name
		tmp += " "
	fl.write(tmp)
	fl.write("\n")
	fl.close()
コード例 #13
0
def whole_daily_folder_block_learning (folder_name):
	sub_folder_list = global_APIs.get_folder_file_list (folder_name)
	done_sub_folder_list = []
	folder_num = -1 
	report_fl = open("total_progress.txt", "w")
	last_prefix = -1
	#this last prefix is the last successed folder number
	for sub_folder in sub_folder_list:
		report_tmp = ""
		report_tmp += "block learning " + sub_folder + "\n"
		print "block learning " + sub_folder + "\n"
		folder_num += 1
		file_mode = global_APIs.get_file_mode(sub_folder)
		if file_mode == "":
			print "Can't detect file mode from folder " + sub_folder
			continue
		if folder_num < folder_control_lower_band:
			last_prefix = folder_num
			continue
		if folder_num >= folder_control_upper_band:
			break
		#done_sub_folder		
		done_sub_folder_list = database_opt.read_done_sub_folder_list(sub_folder)
		
		if sub_folder in done_sub_folder_list:
			last_prefix = folder_num
			continue
		prefix_path = str(folder_num)
		global_APIs.sql_prefix_folder_initializer(sub_folder, prefix_path)
		#previous result read from database
		#{{{
		previous_happen_matrix = database_opt.read_happen_matrix(sub_folder, last_prefix, "total")
		new_found_single_line_pattern = []
		single_line_pattern_db = database_opt.read_single_line_pattern_db(sub_folder, last_prefix)
		single_line_pattern_range_line_pattern_list = database_opt.read_single_line_pattern_range_list(sub_folder, last_prefix) 
		message_closest_message_list = database_opt.read_message_closest_message_list(sub_folder, last_prefix) 
		block_pattern_list =  database_opt.read_block_pattern_list(sub_folder, last_prefix)
		previous_node_id_last_list = database_opt.read_node_id_last_list_file(sub_folder, last_prefix)
		#}}}

		#generate this matrix
		#{{{
		ignore_previous_file = 0
		this_happen_matrix = {}
		result = folder_happen_matrix_analyze (
			sub_folder, 
			this_happen_matrix, 
			single_line_pattern_db, 
			new_found_single_line_pattern, 
			single_line_pattern_range_line_pattern_list, 
			previous_node_id_last_list, 
			ignore_previous_file)
		this_happen_matrix = result[0]
		single_line_pattern_db = result[1]
		new_found_single_line_pattern = result[2]
		single_line_pattern_range_line_pattern_list = result[3]
		node_id_last_list = result[4]
		previous_node_id_last_list = database_opt.read_node_id_last_list_file(sub_folder, last_prefix)
		
		total_happen_matrix = happen_matrix_merge(this_happen_matrix, previous_happen_matrix, previous_node_id_last_list, sub_folder)
	
		if global_APIs.invalid_message == 'invalid_message':	
			global_APIs.single_line_db_invalid_message_assign(single_line_pattern_db)
		global_APIs.generate_single_pattern_dynamic_similarity_threshold(total_happen_matrix)

		#store recent record	
		database_opt.output_happen_matrix(total_happen_matrix, sub_folder, folder_num, "total")
		database_opt.output_happen_matrix(this_happen_matrix, sub_folder, folder_num, "this")
		database_opt.output_single_line_pattern_range_list (single_line_pattern_range_line_pattern_list, sub_folder, folder_num)
		database_opt.output_node_id_last_list_file(node_id_last_list, sub_folder,folder_num )
		#}}}

		#new_found_single_line_pattern
		#{{{
		report_tmp += "	new found single line " + str (len(new_found_single_line_pattern)) + "\n"
		print "	new found single line " + str (len(new_found_single_line_pattern))
		orig_new_found_single_line_pattern_list = []
		for message in new_found_single_line_pattern:
			orig_new_found_single_line_pattern_list.append(message)
		previous_happen_matrix = database_opt.read_happen_matrix(sub_folder, last_prefix, "total")
		need_update_single_line_pattern_list = anomaly_detection.this_happen_matrix_anomaly_detection(
			previous_happen_matrix, 
			total_happen_matrix, 
			new_found_single_line_pattern, 
			single_line_pattern_db, 
			message_closest_message_list)	

		new_found_single_line_num = 0
		left_new_found_single_line_pattern_list = []
		for message in 	orig_new_found_single_line_pattern_list:
			if message in need_update_single_line_pattern_list:
				new_found_single_line_num += 1
			else:
				left_new_found_single_line_pattern_list.append(message)
		update_length = len(need_update_single_line_pattern_list)

		report_tmp += "	new_found_single_line " + str(new_found_single_line_num) + "\n"
		print "	new_found_single_line " + str(new_found_single_line_num)
		report_tmp += "	need update previous single line " + str(update_length - new_found_single_line_num) + "\n"
		print "	need update previous single line " + str(update_length - new_found_single_line_num)
		#}}}

		if len(need_update_single_line_pattern_list) > 0:
			result = folder_block_learning (
					sub_folder, 
					total_happen_matrix, 
					single_line_pattern_db, 
					need_update_single_line_pattern_list, 
					single_line_pattern_range_line_pattern_list, 
					block_pattern_list, 
					message_closest_message_list)

			total_happen_matrix = result[0]
			single_line_pattern_range_line_pattern_list = result[1]
			message_closest_message_list = result[2]
			block_pattern_list = result[3]
			single_line_pattern_db = result[4]
			affected_message_list = result[5]
			
			report_tmp += "	affected_message_list_length: " + str(len(affected_message_list)) + "\n"
			print "	affected_message_list_length: " + str(len(affected_message_list))
			if len(affected_message_list) == str(new_found_single_line_num):
				report_tmp += "	previous block list have no change" + "\n"

		database_opt.output_message_closest_message_list(message_closest_message_list, sub_folder, folder_num)
		database_opt.output_block_pattern_list(block_pattern_list, sub_folder, folder_num)
		database_opt.output_single_line_pattern_db(single_line_pattern_db, sub_folder, folder_num)
		
		done_sub_folder_list.append(sub_folder)
		#ARES
		#database_opt.output_done_sub_folder_list(done_sub_folder_list, sub_folder )

		database_opt.output_this_round_affect_message_list(
			sub_folder,
			orig_new_found_single_line_pattern_list,
			need_update_single_line_pattern_list,
			left_new_found_single_line_pattern_list,
			affected_message_list,
			folder_num)

		last_prefix = folder_num
		
		block_merge_error_list = database_opt.block_pattern_list_summary(sub_folder)
		if not len(block_merge_error_list) == 0:
			report_tmp += 	str(block_merge_error_list)

		#print report_tmp
		report_fl.write(report_tmp)

	report_fl.close()
	return folder_num
コード例 #14
0
def block_sequence_detect (folder_name):
	fl = open("sequence_tmp_report/analyze_block_node.txt", "r")
	node_line = fl.readline()
	block_line = fl.readline()	
	node_list = node_line.split()
	care_block_list = block_line.split()
	
	fl.close()
	print "block list " + str(len(care_block_list))
	"""
	care_block_list = []
	care_block_list.append("block_A")	
	care_block_list.append("block_B")	
	care_block_list.append("block_C")	
	care_block_list.append("block_D")	
	care_block_list.append("block_E")
	"""

	#prior matrix 
	#{{{
	time_1 = time.time()	
	file_list = global_APIs.get_folder_file_list (folder_name)
	total_block_report_list = []
	for block_report_file in file_list:
		print block_report_file
		fl = open(block_report_file, "r")
		node_block_lines = fl.readlines()
		node_block_report = []
		for line in node_block_lines:
			block_name = line.split()[0]
			if block_name in care_block_list:
				node_block_report.append(block_name)
		total_block_report_list.append(node_block_report)
		fl.close()
	result = multi_file_sequence_prior_matrix_gen(total_block_report_list, care_block_list)
	prior_matrix = result[0]
	prior_matrix_store (prior_matrix)
	prior_matrix = prior_matrix_read()
	print "prior_matrix done"
	time_2 = time.time()	

	#}}}
	#status_matrix
	total_status_matrix = gen_status_matrix_based_on_prior_matrix (care_block_list, prior_matrix)
	status_matrix_store(total_status_matrix)
	total_status_matrix = status_matrix_read()
	print "status_matrix done"
	time_3 = time.time()	

	#for block in status_matrix:
	#	print block
	#	print status_matrix[block]


	#critical_path
	critical_path = []
	#could be nothing, don't be surprised
	#critical_path_candidate_list = find_critical_path_candidate_list_from_status_matrix(care_block_list, total_status_matrix)
	#critical_path = find_sequential_pattern_from_candidate_list(critical_path_candidate_list, total_status_matrix)
	#print "critical_path done"
	#sub_paths
	sub_path_block_candidate_list = []
	for block in care_block_list:
		if not block in critical_path:
			sub_path_block_candidate_list.append(block)
	print "sub_path_WIL_length " + str( len(sub_path_block_candidate_list))
	#generate 2 rounds
	for i in range (0, 1):
		sorted_candidate_list = []
		jump_step = 30 
		for j in range (i*jump_step, len(sub_path_block_candidate_list)):
			sorted_candidate_list.append(sub_path_block_candidate_list[j])
		for j in range (0, i*jump_step):
			sorted_candidate_list.append(sub_path_block_candidate_list[j])
		sub_path_list = find_sub_path(sorted_candidate_list, total_status_matrix, prior_matrix)
		
		print "round " + str(i)
		print len(sub_path_list)


		sub_path_result = {}
		sub_path_result["critical_path"] = critical_path
		sub_path_num = 0
		for sub_path in sub_path_list:
			sub_path_name = "sub_path_" + str(sub_path_num)
			sub_path_result[sub_path_name] = sub_path
			sub_path_num += 1
		
		sequence_list_store(sub_path_result , i)	
	print "sub_path done"
	time_4 = time.time()	
	print "prior matrix gen time: " + str(time_2 - time_1)
	print "status matrix gen time: " + str(time_3 - time_2)
	print "sub sequence gen time: " + str(time_4 - time_3)


	return sub_path_result
コード例 #15
0
def sequence_file_test (folder_name):
#{{{
	sub_path_list = sequence_list_read(0)
	sub_path_support_time_list = sub_path_support_time_list_gen(sub_path_list)

	file_list = global_APIs.get_folder_file_list (folder_name)
	total_block_report_list = {}
	report_fl = open("sequence_tmp_report/file_test_report.txt", "w")

	fl = open("sequence_tmp_report/analyze_block_node.txt", "r")
	node_line = fl.readline()
	block_line = fl.readline()	
	node_list = node_line.split()
	care_block_list = block_line.split()
	fl.close()


	
	for sub_path_name in sub_path_list:
		total_block_report_list[sub_path_name] = 0


	for block_report_file in file_list:
		report_fl.write("testing: " + block_report_file + "\n")
		report_fl.write("==================================================\n")
		print block_report_file
		block_file_list = []
		fl = open(block_report_file, "r")
		node_block_lines = fl.readlines()
		for line in node_block_lines:
			block_name = line.split()[0]
			if block_name in care_block_list:
				block_file_list.append(block_name)
		fl.close()
		
		for sub_path_name in sub_path_list:
			#if not sub_path_name == "sub_path_1":
			#	continue
			sub_path = sub_path_list[sub_path_name]
			if sub_path == []:
				continue

			total_matched_sub_path = match_one_sub_path_with_one_sequence(sub_path ,block_file_list)
#{{{
			"""
			sub_path_start_block = sub_path[0]
			sub_path_finish_block = sub_path[len(sub_path) - 1]
			sub_path_index = 0
			sub_path_forward_range = 4 
			matched_sub_path = []
			total_matched_sub_path = []
			for block_list_num in range(0, len(block_file_list)):
				block = block_file_list[block_list_num]
				if not sub_path_index == 0 and block == sub_path_start_block:
					total_matched_sub_path.append(matched_sub_path)
					matched_sub_path = [sub_path_start_block]
					continue

				for i in range (0, sub_path_forward_range):
					if sub_path_index + i >= len(sub_path):
						break
					if block == sub_path[sub_path_index + i]:
						#match
						matched_sub_path.append(block)
						sub_path_index += i + 1
						if block == sub_path_finish_block:
							total_matched_sub_path.append(matched_sub_path)
							sub_path_index = 0
							matched_sub_path = []
						break
			"""
#}}}

			error_count, not_fully_match_count, success_count = matched_sub_path_summary(sub_path, total_matched_sub_path)
			#{{{
			"""
			error_count = 0
			not_fully_match_count = 0
			success_count = 0 
			for matched_sub_path in total_matched_sub_path:
				if not len(matched_sub_path) == len(sub_path):
					cover_ratio = float(len(matched_sub_path)) / float(len(sub_path))
					if cover_ratio > 0.95:
						not_fully_match_count += 1
						#total_block_report_list[sub_path_name] += 1
					else:
						error_count += 1
				else:
					success_count += 1
			"""
			#}}}
			
			total_block_report_list[sub_path_name] += success_count 
			if error_count > 0:
				report_fl.write("	error: " + sub_path_name + " error count: " + str(error_count) + "\n")
			if not_fully_match_count > 0:
				report_fl.write("	warning: " + sub_path_name + " not fully match count: " + str(not_fully_match_count) + "\n")

	report_fl.write("====================================\n")
	for sub_path_name in total_block_report_list:
		real_happened_time = total_block_report_list[sub_path_name]
		support_time = sub_path_support_time_list[sub_path_name]
		if not real_happened_time >= support_time:
			#print "warning! " + sub_path_name + " " +  str(real_happened_time) + " " + str(support_time)
			report_fl.write("warning! " + sub_path_name + " " +  str(real_happened_time) + " " + str(support_time) + "\n")
			
	report_fl.close()	
コード例 #16
0
def block_happen_node_summary (folder_name = "block_report"):
#{{{
	analyze_lower_band = 0
	analyze_upper_band = 1
	sub_folder_list = global_APIs.get_folder_file_list (folder_name)
	total_node_block_list = {}
	total_block_node_list = {}	
	folder_num = -1
	for sub_folder in sub_folder_list:
		folder_num += 1
		if folder_num < analyze_lower_band:
			continue
		if folder_num >= analyze_upper_band:
			break

		if sub_folder == "block_report/node_stack.txt":
			continue
		sub_folder_file_list = global_APIs.get_folder_file_list (sub_folder)
		node_file_list = one_sub_daily_folder_report_node_block_summary (sub_folder)
		for node_name in node_file_list:
			if not node_name in total_node_block_list:
				total_node_block_list[node_name] = []
			block_report_file = node_file_list[node_name]["block"]
			fl = open(block_report_file, "r")
			for line in fl.readlines():
				line = line.split()
				block_name = line[0]
				if not block_name in total_block_node_list:
					total_block_node_list[block_name] = []
				if not block_name in total_node_block_list[node_name]:
					total_node_block_list[node_name].append(block_name)
				if not node_name in total_block_node_list[block_name]:
					total_block_node_list[block_name].append(node_name)
		break
	block_cover_node_num_list = {}	
	for block_name in total_block_node_list:
		#print block_name
		#print len(total_block_node_list[block_name])
		block_cover_node_num_list[block_name] = len(total_block_node_list[block_name])
	care_threshold = 18
	care_block_list = []
	care_node_list = []
	for block_name in total_block_node_list:
		if len(total_block_node_list[block_name]) == 18 :
			for node in total_block_node_list[block_name]:
				if not node in care_node_list:
					care_node_list.append(node)
		if len(total_block_node_list[block_name]) >= care_threshold:
			if not block_name in care_block_list:
				care_block_list.append(block_name)
	fl = open("sequence_tmp_report/analyze_block_node.txt", "w")
	tmp = ""
	for node_name in care_node_list:
		tmp += node_name
		tmp += " "
	fl.write(tmp)
	fl.write("\n")
	tmp = ""
	for block_name in care_block_list:
		add = 0 
		for node_name in total_block_node_list[block_name]:
			if node_name in care_node_list:
				add = 1
				break
		if add == 1:
			tmp += block_name
			tmp += " "
	fl.write(tmp)
	fl.write("\n")
	tmp = ""
	for block_name in care_block_list:
		tmp += block_name
		tmp += " "
	fl.write(tmp)
	fl.close()
	fl = open("sequence_tmp_report/block_belong_node.txt", "w")
	for block in total_block_node_list:
		fl.write("main_block " + block + "\n")
		tmp = ""
		for node in total_block_node_list[block]:
			tmp += node + " "
		fl.write(tmp + "\n")
	fl.close()
コード例 #17
0
def block_report_folder_total_event_analyze(
        folder_name="block_report",
        block_extract_report_folder_name="block_extract_report"):
    #{{{
    total_event_report_name = block_extract_report_folder_name + "/total_event_report.txt"
    total_single_line_report_name = block_extract_report_folder_name + "/single_line_report.txt"
    total_event_list = []
    sub_folder_list = global_APIs.get_folder_file_list(folder_name)
    count = 0
    limit = 1
    total_event_happen_count = {}
    total_event_count = 0
    single_line_happen_count = {}
    total_single_line_count = 0
    for sub_folder in sub_folder_list:
        if sub_folder == "block_report/node_stack.txt":
            continue
        node_file_list = sequence_pattern_mining.one_sub_daily_folder_report_node_block_summary(
            sub_folder)
        for node_name in node_file_list:
            block_report_file = node_file_list[node_name]["block"]
            single_line_report_file = node_file_list[node_name]["single"]
            fl = open(block_report_file, "r")
            for line in fl.readlines():
                line = line.split()
                event_name = line[0]
                if event_name in total_event_happen_count:
                    tmp = total_event_happen_count[event_name]
                    tmp += 1
                else:
                    tmp = 1
                total_event_happen_count[event_name] = tmp
                total_event_count += 1
            fl.close()
            fl = open(single_line_report_file, "r")
            for line in fl.readlines():
                line = line.split()
                event_name = line[0]
                if event_name in single_line_happen_count:
                    tmp = single_line_happen_count[event_name]
                    tmp += 1
                else:
                    tmp = 1
                single_line_happen_count[event_name] = tmp
                total_single_line_count += 1
            fl.close()
        count += 1
    fl = open(total_event_report_name, "w")
    happen_only_once = []
    for event in total_event_happen_count:
        if total_event_happen_count[event] == 1:
            happen_only_once.append(event)
            continue
        tmp = event
        tmp += ": "
        tmp += str(total_event_happen_count[event])
        tmp += "\n"
        fl.write(tmp)
    tmp = "total_event_count: " + str(total_event_count) + ("\n")
    fl.write(tmp)
    fl.write("\n")
    fl.write("\n")
    fl.write("\n")
    for event in happen_only_once:
        tmp = event + ("\n")
        fl.write(tmp)
    fl.close()

    fl = open(total_single_line_report_name, "w")
    happen_only_once = []
    for event in single_line_happen_count:
        if single_line_happen_count[event] == 1:
            happen_only_once.append(event)
            continue
        tmp = event
        tmp += ": "
        tmp += str(single_line_happen_count[event])
        tmp += "\n"
        fl.write(tmp)
    tmp = "total_single_line_count: " + str(total_single_line_count) + ("\n")
    fl.write(tmp)
    fl.write("\n")
    fl.write("\n")
    fl.write("\n")
    for event in happen_only_once:
        tmp = event + ("\n")
        fl.write(tmp)
    fl.close()
コード例 #18
0
def affected_message_pattern_history_analyze(folder_name):
    #this is testing cutoff
    sub_folder_list = global_APIs.get_folder_file_list(folder_name)
    folder_num = -1
    prefix = -1
    last_prefix = -1
    each_message_record_dict = {}
    cutoff_message_list = []
    each_interval_record = {}
    cut_off_happen_time_threshold = 100
    cut_off_folder_num_threshold = 10

    daily_num = 2
    interval_num = 0
    interval_control_num = 10
    total_day_num = 0

    this_interval_need_update_list = []
    this_interval_affected_list = []
    this_interval_new_found_num = 0
    this_interval_new_found_update_num = 0

    for sub_folder in sub_folder_list:
        folder_num += 1
        prefix = folder_num
        file_mode = global_APIs.get_file_mode(sub_folder)
        if file_mode == "":
            continue
        if folder_num < multi_file_folder.folder_control_lower_band:
            last_prefix = folder_num
            continue
        if folder_num >= multi_file_folder.folder_control_upper_band:
            break

        affect_record = database_opt.read_this_round_affect_message_list(
            sub_folder, prefix)
        new_found_single_line_pattern = affect_record[0]
        need_update_single_line_pattern_list = affect_record[1]
        left_new_found_single_line_pattern_list = affect_record[2]
        affected_message_list = affect_record[3]

        for need_update_pattern in need_update_single_line_pattern_list:
            if not need_update_pattern in this_interval_need_update_list:
                this_interval_need_update_list.append(need_update_pattern)
        for affected_message in affected_message_list:
            if not affected_message in this_interval_affected_list:
                this_interval_affected_list.append(affected_message)
        this_interval_new_found_num += len(new_found_single_line_pattern)
        this_interval_new_found_update_num += len(
            new_found_single_line_pattern) - len(
                left_new_found_single_line_pattern_list)

        total_day_num = (folder_num + 1) * daily_num
        if total_day_num % interval_control_num == 0:
            tmp = []
            tmp.append(this_interval_new_found_num)
            tmp.append(this_interval_new_found_update_num)
            tmp.append(len(this_interval_need_update_list))
            tmp.append(len(this_interval_affected_list))
            print total_day_num
            print tmp
            each_interval_record[interval_num] = tmp
            this_interval_need_update_list = []
            this_interval_affected_list = []
            this_interval_new_found_num = 0
            this_interval_new_found_update_num = 0
            interval_num += 1
        continue

        for message in new_found_single_line_pattern:
            tmp = {}
            tmp["first_seen"] = folder_num
            tmp["last_need_update"] = -1
            tmp["last_change"] = -1
            each_message_record_dict[message] = tmp
        for message in need_update_single_line_pattern_list:
            each_message_record_dict[message]["last_need_update"] = folder_num
        for message in affected_message_list:
            previous_block_pattern_list = database_opt.read_block_pattern_list(
                sub_folder, last_prefix)
            previous_single_line_pattern_db = database_opt.read_single_line_pattern_db(
                sub_folder, last_prefix)
            this_block_pattern_list = database_opt.read_block_pattern_list(
                sub_folder, prefix)
            this_single_line_pattern_db = database_opt.read_single_line_pattern_db(
                sub_folder, prefix)
            if not message in previous_single_line_pattern_db or previous_single_line_pattern_db[
                    message]["belong_block"] == "":
                each_message_record_dict[message]["last_change"] = folder_num
                continue
            previous_belong_block = previous_single_line_pattern_db[message][
                "belong_block"]
            previous_belong_block_pattern = previous_block_pattern_list[
                previous_belong_block]
            this_belong_block = this_single_line_pattern_db[message][
                "belong_block"]
            if this_belong_block == "":
                #previous belong to a block, now disconnected
                if message in cutoff_message_list:
                    print "cutoff message update " + message + " " + str(
                        each_message_record_dict[message]
                        ["last_change"]) + " " + str(folder_num)

                each_message_record_dict[message]["last_change"] = folder_num
                continue
            this_belong_block_pattern = this_block_pattern_list[
                this_belong_block]
            #now this message have a belong block
            same_result = judge_two_block_pattern_list_same(
                previous_belong_block_pattern, this_belong_block_pattern)
            if same_result == 0:
                if message in cutoff_message_list:
                    print "cutoff message update " + message + " " + str(
                        each_message_record_dict[message]
                        ["last_change"]) + " " + str(folder_num)
                each_message_record_dict[message]["last_change"] = folder_num
                continue

        #here make cut off decision
        this_happen_matrix = database_opt.read_happen_matrix(
            sub_folder, prefix, "total")
        #happen_matrix is for count each message's happen time
        for message in each_message_record_dict:
            if message in cutoff_message_list:
                continue
            message_happen_time = this_happen_matrix[message]["happen_time"]
            if message_happen_time < cut_off_happen_time_threshold:
                continue
            last_change_folder_num = each_message_record_dict[message][
                "last_change"]
            if last_change_folder_num == -1:
                #continue
                #this means this message never merged or dismerged with any other messages
                first_seen_folder_num = each_message_record_dict[message][
                    "first_seen"]
                if folder_num - first_seen_folder_num > cut_off_folder_num_threshold:
                    cutoff_message_list.append(message)
            else:
                interval_to_last_change = folder_num - last_change_folder_num
                if interval_to_last_change >= cut_off_folder_num_threshold:
                    cutoff_message_list.append(message)
        #final test
        #for message in cutoff_message_list:
        #	last_change_folder_num = each_message_record_dict[message]["last_change"]
        #	interval_to_last_change = folder_num - last_change_folder_num
        #	if interval_to_last_change < cut_off_folder_num_threshold:
        #		print "error "	+ message + " " + str(folder_num)

        last_prefix = folder_num