def one_sub_daily_folder_report_node_block_summary (sub_folder): #{{{ node_file_list = {} sub_folder_file_list = global_APIs.get_folder_file_list (sub_folder) for sub_folder_file in sub_folder_file_list: sub_folder_file_real_name = global_APIs.get_real_file_name(sub_folder_file) single_line_file_name_pattern = r'([0-9]+)\_(.*)\.txt\_single\_line\_report\.txt$' block_file_name_pattern = r'([0-9]+)\_(.*)\.txt\_block\_report\.txt$' #block_report/0/0_c0-0c0s9n1.txt_single_line_report.txt #block_report/0/0_c0-0c0s9n1.txt_block_report.txt matchobj = re.match (single_line_file_name_pattern, sub_folder_file_real_name) if matchobj: node_name = matchobj.group(2) if node_name in node_file_list: tmp = node_file_list[node_name] else: tmp = {} tmp["single"] = sub_folder_file node_file_list[node_name] = tmp matchobj = re.match (block_file_name_pattern, sub_folder_file_real_name) if matchobj: node_name = matchobj.group(2) if node_name in node_file_list: tmp = node_file_list[node_name] else: tmp = {} tmp["block"] = sub_folder_file node_file_list[node_name] = tmp return node_file_list
def folder_happen_matrix_analyze (folder_name, happen_matrix, single_line_pattern_db, need_update_single_line_pattern_list, single_line_pattern_range_line_pattern_list, node_id_last_list , ignore_previous_file): file_list = global_APIs.get_folder_file_list (folder_name) file_count = 0 for input_file in file_list: fl = open (input_file, "r") first_message = fl.readline() fl.close() node_id = global_APIs.get_line_id(first_message) if node_id == "": #this is a empty file, do nothing continue file_count += 1 if file_count % 20 == 0: print "generate matrix " + str(file_count) result = block_learning.generate_happen_matrix_from_message_list (input_file, happen_matrix, single_line_pattern_db, need_update_single_line_pattern_list, single_line_pattern_range_line_pattern_list, node_id_last_list, ignore_previous_file) happen_matrix = result[0] single_line_pattern_db = result[1] need_update_single_line_pattern_list = result[2] single_line_pattern_range_line_pattern_list = result[3] node_id_last_list = result[4] result = [] result.append(happen_matrix) result.append(single_line_pattern_db) result.append(need_update_single_line_pattern_list) result.append(single_line_pattern_range_line_pattern_list) result.append(node_id_last_list) return result
def correct_sequences_extract (care_block_name, block_report_path): #{{{ sub_folder_list = global_APIs.get_folder_file_list(block_report_path) #be safe, sort it sub_folder_list = global_APIs.sub_folder_number_sort(block_report_path, sub_folder_list) total_event_sequence_list = [] for sub_folder in sub_folder_list: if sub_folder == "block_report/150": break print sub_folder sub_file_list = global_APIs.get_folder_file_list(sub_folder) for file_name in sub_file_list: #print file_name if not global_APIs.this_report_file_is_block_file(file_name, block_report_path): continue fl = open (file_name, "r") event_list = [] for line in fl.readlines(): event_list.append(line.split()[0]) fl.close() last_care_block_num = -1 for i in range (0, len( event_list)): event = event_list[i] care_block_num = -1 if event == care_block_name: care_block_num = i else: continue start_num = find_event_sequence_extract_range(care_block_num, last_care_block_num) last_care_block_num = care_block_num if not start_num == -1: event_sequence = event_list[start_num: care_block_num] total_event_sequence_list.append(event_sequence) care_block_work_path_name = care_block_name + "_work_path" sequence_file_name = care_block_work_path_name + "/correct_sequences.txt" fl = open(sequence_file_name, "w") for event_sequence in total_event_sequence_list: new_event_sequence = "" for event in event_sequence: #new_event_sequence += event + " " new_event_sequence += event_name_convert(event) + " " new_event_sequence += ("\n") fl.write(new_event_sequence) fl.close()
def block_length_average_summary(folder_name="block_report"): #{{{ total_event_list = [] sub_folder_list = global_APIs.get_folder_file_list(folder_name) count = 0 limit = 1 total_event_length_count = {} for sub_folder in sub_folder_list: if sub_folder == "block_report/node_stack.txt": continue print sub_folder node_file_list = sequence_pattern_mining.one_sub_daily_folder_report_node_block_summary( sub_folder) for node_name in node_file_list: block_report_file = node_file_list[node_name]["block"] single_line_report_file = node_file_list[node_name]["single"] fl = open(block_report_file, "r") block_name_pattern = r'block\_([0-9]+)$' for line in fl.readlines(): line = line.split() event_name = line[0] matchobj = re.match(block_name_pattern, event_name) if not matchobj: continue start_line = int(line[1]) finish_line = int(line[2]) length = finish_line - start_line + 1 if event_name in total_event_length_count: tmp = total_event_length_count[event_name] tmp["count"] += 1 tmp["total_length"] += length else: tmp = {} tmp["count"] = 1 tmp["total_length"] = length total_event_length_count[event_name] = tmp fl.close() event_name_average_length_list = {} for event_name in total_event_length_count: average_length = total_event_length_count[event_name][ "total_length"] / total_event_length_count[event_name]["count"] event_name_average_length_list[event_name] = average_length length_count_list = {} for event_name in event_name_average_length_list: length = event_name_average_length_list[event_name] if length in length_count_list: length_count_list[length] += 1 else: length_count_list[length] = 1 for length in length_count_list: print "length " + str(length) + " count " + str( length_count_list[length])
def test_file_event_count(folder_name): file_list = global_APIs.get_folder_file_list (folder_name) total_event_count = 0 for block_report_file in file_list: print block_report_file fl = open(block_report_file, "r") node_block_lines = fl.readlines() print len(node_block_lines) total_event_count += len(node_block_lines) print total_event_count
def folder_block_extract (folder_name, error_report_fl, done_example_list, done_example_report_fl): last_sub_folder = global_APIs.get_latest_sql_db_path(folder_name) file_list = global_APIs.get_folder_file_list (folder_name) block_pattern_list = database_opt.read_block_pattern_list(folder_name, last_sub_folder) single_line_pattern_db = database_opt.read_single_line_pattern_db(folder_name, last_sub_folder) #we don't need to load closest message list file_list_error = {} file_count = 0 for input_file in file_list: file_count += 1 #if file_count % 5 == 0: # print "file extract " + str(file_count) result = input_file_block_report_extraction(input_file, block_pattern_list, single_line_pattern_db) if result == []: continue block_list = result[0] file_single_line_report = result[1] summary = result[2] file_failed_list = result[3] database_opt.output_file_block_report_list(block_list, input_file) database_opt.output_file_single_line_list(file_single_line_report, input_file) if not file_failed_list == []: file_list_error[input_file] = file_failed_list error_report_fl.write(input_file) error_report_fl.write("\n") for file_record in file_failed_list: error_report_fl.write(str(file_record)) error_report_fl.write("\n") #dump exmaple for block in block_list: block_name = block[0] if not block_name in block_pattern_list: continue #this is not a block if block_name in done_example_list: continue done_example_list.append(block_name) done_example_report_fl.write(block_name) done_example_report_fl.write("\n") block_start_line = block[1] block_finish_line = block[2] fl = open(input_file, "r") fl_line = fl.readlines() for i in range (block_start_line, block_finish_line + 1): done_example_report_fl.write(fl_line[i-1]) done_example_report_fl.write("\n") database_opt.output_extract_error_list(file_list_error, input_file)
def block_report_file_merge (folder_name = "block_report"): #{{{ fl = open("analyze_block_node.txt", "r") node_line = fl.readline() node_list = node_line.split() fl.close() if not os.path.isdir("merged_files"): os.mkdir("merged_files") analyze_lower_band = 0 analyze_upper_band = 3 sub_folder_list = global_APIs.get_folder_file_list (folder_name) folder_num = -1 for sub_folder in sub_folder_list: sub_folder_file_list = global_APIs.get_folder_file_list (sub_folder) print sub_folder folder_num += 1 if folder_num < analyze_lower_band: continue if folder_num >= analyze_upper_band: break sub_folder_file_list = global_APIs.get_folder_file_list (sub_folder) node_file_list = one_sub_daily_folder_report_node_block_summary (sub_folder) for node_name in node_file_list: if not node_name in node_list: continue file_out = folder_name + "/" + str(folder_num) + "/" + str(folder_num) + "_" + node_name + ".txt_block_report.txt" #block_report/0/0_c0-0c0s8n3.txt_block_report.txt file_in = "merged_files" + "/" + node_name + "_" + str(analyze_lower_band) + "_" + str(analyze_upper_band) + "_block_report.txt" #merged_files/0_c0-0c0s8n3_0_5_block_report.txt print file_out print file_in out_fl = open(file_out, "r") in_fl = open(file_in, "a") for line in out_fl.readlines(): in_fl.write(line) out_fl.close() in_fl.close()
def block_pattern_list_summary (input_file, show_report = 0): #this input_file is just a file mode tragger file_mode = global_APIs.get_file_mode(input_file) block_pattern_list_file_path = "sql_database/" block_pattern_list_file_path += file_mode file_list = global_APIs.get_folder_file_list(block_pattern_list_file_path) last_sub_folder = global_APIs.get_latest_sql_db_path(input_file) block_pattern_list = read_block_pattern_list(input_file, last_sub_folder) single_line_pattern_db = read_single_line_pattern_db(input_file, last_sub_folder) block_covered_message_list = {} block_covered_message_num = 0 for block in block_pattern_list: block_pattern = block_pattern_list[block]["pattern"] block_covered_message_num += len(block_pattern) for message in block_pattern: if not message in block_covered_message_list: tmp = [block] block_covered_message_list[message] = tmp else: print message tmp = block_covered_message_list[message] tmp.append(block) block_covered_message_list[message] = tmp message_have_block_info_list = [] message_have_block_info_num = 0 for single_line_pattern in single_line_pattern_db: if not single_line_pattern_db[single_line_pattern]["belong_block"] == "": message_have_block_info_num += 1 if not single_line_pattern in message_have_block_info_list: message_have_block_info_list.append(single_line_pattern) if show_report == 1: print "===================================================" print "EBD summary report" print " block_covered_message_num: " + str(block_covered_message_num) print " message_have_block_info_num: " + str(message_have_block_info_num) print " total_single_line_message: " + str(len(single_line_pattern_db)) print " total_block: " + str(len(block_pattern_list)) print "===================================================" error_list = {} for message in block_covered_message_list: if len(block_covered_message_list[message]) > 1: error_list [message] = block_covered_message_list[message] return error_list
def error_sequence_test(folder_name, sequence_name): prior_matrix = prior_matrix_read() sub_path_list = sequence_list_read(0) sub_path = sub_path_list[sequence_name] support_report = get_sub_path_support_report (sub_path, prior_matrix) support_num = support_report[0] support_block = support_report[1] file_list = global_APIs.get_folder_file_list (folder_name) for block_report_file in file_list: block_file_list = [] fl = open(block_report_file, "r") node_block_lines = fl.readlines() for line in node_block_lines: block_name = line.split()[0] block_file_list.append(block_name) fl.close() support_block_happen_this_round = 0 sub_path_index = 0 succeed = 0 match_list = [] for i in range(0, len(block_file_list)): block = block_file_list[i] if block == support_block: support_block_happen_this_round += 1 path_block = sub_path[sub_path_index] if block == path_block: tmp = [block, i] match_list.append(tmp) sub_path_index += 1 if sub_path_index == len(sub_path): sub_path_index = 0 #succeed succeed += 1 if not support_block_happen_this_round == succeed: print block_report_file print "support_block " + support_block + " " + str(support_block_happen_this_round) print "succeed " + str(succeed) for tmp in match_list: print tmp
def whole_daily_folder_block_extract (folder_name): total_error_report = "extract_error_report.txt" error_report_fl = open(total_error_report, "w") sub_folder_list = global_APIs.get_folder_file_list (folder_name) done_example_list = [] done_example_report_fl = open("example_report.txt", "w") folder_num = -1 for sub_folder in sub_folder_list: folder_num += 1 if folder_num < folder_control_lower_band: continue if folder_num >= folder_control_upper_band: break print "block extract " + sub_folder file_mode = global_APIs.get_file_mode(sub_folder) if file_mode == "": print "can't detect mode " + sub_folder continue folder_block_extract(sub_folder, error_report_fl, done_example_list, done_example_report_fl) error_report_fl.close() done_example_report_fl.close()
def happen_matrix_merge(this_happen_matrix, previous_happen_matrix, previous_node_id_last_list, sub_folder): merged_matrix = previous_happen_matrix file_list = global_APIs.get_folder_file_list (sub_folder) for input_file in file_list: fl = open (input_file, "r") first_message = fl.readline() fl.close() node_id = global_APIs.get_line_id(first_message) if node_id == "" or not node_id in previous_node_id_last_list: #this is same as generate happen matrix #if can't detect node id, it will not generate matrix #if this node not generated matrix, its last line info will not update continue message_name = previous_node_id_last_list[node_id] merged_matrix[message_name]["last"] -= 1 merged_matrix[message_name]["happen_time"] -= 1 #should explain #last node's last line happen time and its next message info have been counted in this matrix #however last matrix still have its one time of happen time and the info that next line is last #we need to erase its one time of happen time and one time of last for message in this_happen_matrix: this_list = this_happen_matrix[message] merged_list = {} if message in merged_matrix: merged_list = merged_matrix[message] for this_message in this_list: if this_message in merged_list: merged_list[this_message] += this_list[this_message] else: merged_list[this_message] = this_list[this_message] merged_matrix[message] = merged_list return merged_matrix
def node_common_block_summary (folder_name): #{{{ file_list = global_APIs.get_folder_file_list (folder_name) file_list_length = len(file_list) block_happen_node_list = {} node_count = 0 for block_report_file in file_list: fl = open(block_report_file, "r") node_block_lines = fl.readlines() for line in node_block_lines: block_name = line.split()[0] if not block_name in block_happen_node_list: block_node_list = [] block_node_list.append(node_count) block_happen_node_list[block_name] = block_node_list else: block_node_list = block_happen_node_list[block_name] if not node_count in block_node_list: block_node_list.append(node_count) block_happen_node_list[block_name] = block_node_list node_count += 1 fl.close() common_list = [] for node_name in block_happen_node_list: if len(block_happen_node_list[node_name]) == file_list_length: common_list.append(node_name) print len(common_list) fl = open("sequence_tmp_report/analyze_block_node.txt", "w") fl.write("\n") tmp = "" for block_name in common_list: tmp += block_name tmp += " " fl.write(tmp) fl.write("\n") fl.close()
def whole_daily_folder_block_learning (folder_name): sub_folder_list = global_APIs.get_folder_file_list (folder_name) done_sub_folder_list = [] folder_num = -1 report_fl = open("total_progress.txt", "w") last_prefix = -1 #this last prefix is the last successed folder number for sub_folder in sub_folder_list: report_tmp = "" report_tmp += "block learning " + sub_folder + "\n" print "block learning " + sub_folder + "\n" folder_num += 1 file_mode = global_APIs.get_file_mode(sub_folder) if file_mode == "": print "Can't detect file mode from folder " + sub_folder continue if folder_num < folder_control_lower_band: last_prefix = folder_num continue if folder_num >= folder_control_upper_band: break #done_sub_folder done_sub_folder_list = database_opt.read_done_sub_folder_list(sub_folder) if sub_folder in done_sub_folder_list: last_prefix = folder_num continue prefix_path = str(folder_num) global_APIs.sql_prefix_folder_initializer(sub_folder, prefix_path) #previous result read from database #{{{ previous_happen_matrix = database_opt.read_happen_matrix(sub_folder, last_prefix, "total") new_found_single_line_pattern = [] single_line_pattern_db = database_opt.read_single_line_pattern_db(sub_folder, last_prefix) single_line_pattern_range_line_pattern_list = database_opt.read_single_line_pattern_range_list(sub_folder, last_prefix) message_closest_message_list = database_opt.read_message_closest_message_list(sub_folder, last_prefix) block_pattern_list = database_opt.read_block_pattern_list(sub_folder, last_prefix) previous_node_id_last_list = database_opt.read_node_id_last_list_file(sub_folder, last_prefix) #}}} #generate this matrix #{{{ ignore_previous_file = 0 this_happen_matrix = {} result = folder_happen_matrix_analyze ( sub_folder, this_happen_matrix, single_line_pattern_db, new_found_single_line_pattern, single_line_pattern_range_line_pattern_list, previous_node_id_last_list, ignore_previous_file) this_happen_matrix = result[0] single_line_pattern_db = result[1] new_found_single_line_pattern = result[2] single_line_pattern_range_line_pattern_list = result[3] node_id_last_list = result[4] previous_node_id_last_list = database_opt.read_node_id_last_list_file(sub_folder, last_prefix) total_happen_matrix = happen_matrix_merge(this_happen_matrix, previous_happen_matrix, previous_node_id_last_list, sub_folder) if global_APIs.invalid_message == 'invalid_message': global_APIs.single_line_db_invalid_message_assign(single_line_pattern_db) global_APIs.generate_single_pattern_dynamic_similarity_threshold(total_happen_matrix) #store recent record database_opt.output_happen_matrix(total_happen_matrix, sub_folder, folder_num, "total") database_opt.output_happen_matrix(this_happen_matrix, sub_folder, folder_num, "this") database_opt.output_single_line_pattern_range_list (single_line_pattern_range_line_pattern_list, sub_folder, folder_num) database_opt.output_node_id_last_list_file(node_id_last_list, sub_folder,folder_num ) #}}} #new_found_single_line_pattern #{{{ report_tmp += " new found single line " + str (len(new_found_single_line_pattern)) + "\n" print " new found single line " + str (len(new_found_single_line_pattern)) orig_new_found_single_line_pattern_list = [] for message in new_found_single_line_pattern: orig_new_found_single_line_pattern_list.append(message) previous_happen_matrix = database_opt.read_happen_matrix(sub_folder, last_prefix, "total") need_update_single_line_pattern_list = anomaly_detection.this_happen_matrix_anomaly_detection( previous_happen_matrix, total_happen_matrix, new_found_single_line_pattern, single_line_pattern_db, message_closest_message_list) new_found_single_line_num = 0 left_new_found_single_line_pattern_list = [] for message in orig_new_found_single_line_pattern_list: if message in need_update_single_line_pattern_list: new_found_single_line_num += 1 else: left_new_found_single_line_pattern_list.append(message) update_length = len(need_update_single_line_pattern_list) report_tmp += " new_found_single_line " + str(new_found_single_line_num) + "\n" print " new_found_single_line " + str(new_found_single_line_num) report_tmp += " need update previous single line " + str(update_length - new_found_single_line_num) + "\n" print " need update previous single line " + str(update_length - new_found_single_line_num) #}}} if len(need_update_single_line_pattern_list) > 0: result = folder_block_learning ( sub_folder, total_happen_matrix, single_line_pattern_db, need_update_single_line_pattern_list, single_line_pattern_range_line_pattern_list, block_pattern_list, message_closest_message_list) total_happen_matrix = result[0] single_line_pattern_range_line_pattern_list = result[1] message_closest_message_list = result[2] block_pattern_list = result[3] single_line_pattern_db = result[4] affected_message_list = result[5] report_tmp += " affected_message_list_length: " + str(len(affected_message_list)) + "\n" print " affected_message_list_length: " + str(len(affected_message_list)) if len(affected_message_list) == str(new_found_single_line_num): report_tmp += " previous block list have no change" + "\n" database_opt.output_message_closest_message_list(message_closest_message_list, sub_folder, folder_num) database_opt.output_block_pattern_list(block_pattern_list, sub_folder, folder_num) database_opt.output_single_line_pattern_db(single_line_pattern_db, sub_folder, folder_num) done_sub_folder_list.append(sub_folder) #ARES #database_opt.output_done_sub_folder_list(done_sub_folder_list, sub_folder ) database_opt.output_this_round_affect_message_list( sub_folder, orig_new_found_single_line_pattern_list, need_update_single_line_pattern_list, left_new_found_single_line_pattern_list, affected_message_list, folder_num) last_prefix = folder_num block_merge_error_list = database_opt.block_pattern_list_summary(sub_folder) if not len(block_merge_error_list) == 0: report_tmp += str(block_merge_error_list) #print report_tmp report_fl.write(report_tmp) report_fl.close() return folder_num
def block_sequence_detect (folder_name): fl = open("sequence_tmp_report/analyze_block_node.txt", "r") node_line = fl.readline() block_line = fl.readline() node_list = node_line.split() care_block_list = block_line.split() fl.close() print "block list " + str(len(care_block_list)) """ care_block_list = [] care_block_list.append("block_A") care_block_list.append("block_B") care_block_list.append("block_C") care_block_list.append("block_D") care_block_list.append("block_E") """ #prior matrix #{{{ time_1 = time.time() file_list = global_APIs.get_folder_file_list (folder_name) total_block_report_list = [] for block_report_file in file_list: print block_report_file fl = open(block_report_file, "r") node_block_lines = fl.readlines() node_block_report = [] for line in node_block_lines: block_name = line.split()[0] if block_name in care_block_list: node_block_report.append(block_name) total_block_report_list.append(node_block_report) fl.close() result = multi_file_sequence_prior_matrix_gen(total_block_report_list, care_block_list) prior_matrix = result[0] prior_matrix_store (prior_matrix) prior_matrix = prior_matrix_read() print "prior_matrix done" time_2 = time.time() #}}} #status_matrix total_status_matrix = gen_status_matrix_based_on_prior_matrix (care_block_list, prior_matrix) status_matrix_store(total_status_matrix) total_status_matrix = status_matrix_read() print "status_matrix done" time_3 = time.time() #for block in status_matrix: # print block # print status_matrix[block] #critical_path critical_path = [] #could be nothing, don't be surprised #critical_path_candidate_list = find_critical_path_candidate_list_from_status_matrix(care_block_list, total_status_matrix) #critical_path = find_sequential_pattern_from_candidate_list(critical_path_candidate_list, total_status_matrix) #print "critical_path done" #sub_paths sub_path_block_candidate_list = [] for block in care_block_list: if not block in critical_path: sub_path_block_candidate_list.append(block) print "sub_path_WIL_length " + str( len(sub_path_block_candidate_list)) #generate 2 rounds for i in range (0, 1): sorted_candidate_list = [] jump_step = 30 for j in range (i*jump_step, len(sub_path_block_candidate_list)): sorted_candidate_list.append(sub_path_block_candidate_list[j]) for j in range (0, i*jump_step): sorted_candidate_list.append(sub_path_block_candidate_list[j]) sub_path_list = find_sub_path(sorted_candidate_list, total_status_matrix, prior_matrix) print "round " + str(i) print len(sub_path_list) sub_path_result = {} sub_path_result["critical_path"] = critical_path sub_path_num = 0 for sub_path in sub_path_list: sub_path_name = "sub_path_" + str(sub_path_num) sub_path_result[sub_path_name] = sub_path sub_path_num += 1 sequence_list_store(sub_path_result , i) print "sub_path done" time_4 = time.time() print "prior matrix gen time: " + str(time_2 - time_1) print "status matrix gen time: " + str(time_3 - time_2) print "sub sequence gen time: " + str(time_4 - time_3) return sub_path_result
def sequence_file_test (folder_name): #{{{ sub_path_list = sequence_list_read(0) sub_path_support_time_list = sub_path_support_time_list_gen(sub_path_list) file_list = global_APIs.get_folder_file_list (folder_name) total_block_report_list = {} report_fl = open("sequence_tmp_report/file_test_report.txt", "w") fl = open("sequence_tmp_report/analyze_block_node.txt", "r") node_line = fl.readline() block_line = fl.readline() node_list = node_line.split() care_block_list = block_line.split() fl.close() for sub_path_name in sub_path_list: total_block_report_list[sub_path_name] = 0 for block_report_file in file_list: report_fl.write("testing: " + block_report_file + "\n") report_fl.write("==================================================\n") print block_report_file block_file_list = [] fl = open(block_report_file, "r") node_block_lines = fl.readlines() for line in node_block_lines: block_name = line.split()[0] if block_name in care_block_list: block_file_list.append(block_name) fl.close() for sub_path_name in sub_path_list: #if not sub_path_name == "sub_path_1": # continue sub_path = sub_path_list[sub_path_name] if sub_path == []: continue total_matched_sub_path = match_one_sub_path_with_one_sequence(sub_path ,block_file_list) #{{{ """ sub_path_start_block = sub_path[0] sub_path_finish_block = sub_path[len(sub_path) - 1] sub_path_index = 0 sub_path_forward_range = 4 matched_sub_path = [] total_matched_sub_path = [] for block_list_num in range(0, len(block_file_list)): block = block_file_list[block_list_num] if not sub_path_index == 0 and block == sub_path_start_block: total_matched_sub_path.append(matched_sub_path) matched_sub_path = [sub_path_start_block] continue for i in range (0, sub_path_forward_range): if sub_path_index + i >= len(sub_path): break if block == sub_path[sub_path_index + i]: #match matched_sub_path.append(block) sub_path_index += i + 1 if block == sub_path_finish_block: total_matched_sub_path.append(matched_sub_path) sub_path_index = 0 matched_sub_path = [] break """ #}}} error_count, not_fully_match_count, success_count = matched_sub_path_summary(sub_path, total_matched_sub_path) #{{{ """ error_count = 0 not_fully_match_count = 0 success_count = 0 for matched_sub_path in total_matched_sub_path: if not len(matched_sub_path) == len(sub_path): cover_ratio = float(len(matched_sub_path)) / float(len(sub_path)) if cover_ratio > 0.95: not_fully_match_count += 1 #total_block_report_list[sub_path_name] += 1 else: error_count += 1 else: success_count += 1 """ #}}} total_block_report_list[sub_path_name] += success_count if error_count > 0: report_fl.write(" error: " + sub_path_name + " error count: " + str(error_count) + "\n") if not_fully_match_count > 0: report_fl.write(" warning: " + sub_path_name + " not fully match count: " + str(not_fully_match_count) + "\n") report_fl.write("====================================\n") for sub_path_name in total_block_report_list: real_happened_time = total_block_report_list[sub_path_name] support_time = sub_path_support_time_list[sub_path_name] if not real_happened_time >= support_time: #print "warning! " + sub_path_name + " " + str(real_happened_time) + " " + str(support_time) report_fl.write("warning! " + sub_path_name + " " + str(real_happened_time) + " " + str(support_time) + "\n") report_fl.close()
def block_happen_node_summary (folder_name = "block_report"): #{{{ analyze_lower_band = 0 analyze_upper_band = 1 sub_folder_list = global_APIs.get_folder_file_list (folder_name) total_node_block_list = {} total_block_node_list = {} folder_num = -1 for sub_folder in sub_folder_list: folder_num += 1 if folder_num < analyze_lower_band: continue if folder_num >= analyze_upper_band: break if sub_folder == "block_report/node_stack.txt": continue sub_folder_file_list = global_APIs.get_folder_file_list (sub_folder) node_file_list = one_sub_daily_folder_report_node_block_summary (sub_folder) for node_name in node_file_list: if not node_name in total_node_block_list: total_node_block_list[node_name] = [] block_report_file = node_file_list[node_name]["block"] fl = open(block_report_file, "r") for line in fl.readlines(): line = line.split() block_name = line[0] if not block_name in total_block_node_list: total_block_node_list[block_name] = [] if not block_name in total_node_block_list[node_name]: total_node_block_list[node_name].append(block_name) if not node_name in total_block_node_list[block_name]: total_block_node_list[block_name].append(node_name) break block_cover_node_num_list = {} for block_name in total_block_node_list: #print block_name #print len(total_block_node_list[block_name]) block_cover_node_num_list[block_name] = len(total_block_node_list[block_name]) care_threshold = 18 care_block_list = [] care_node_list = [] for block_name in total_block_node_list: if len(total_block_node_list[block_name]) == 18 : for node in total_block_node_list[block_name]: if not node in care_node_list: care_node_list.append(node) if len(total_block_node_list[block_name]) >= care_threshold: if not block_name in care_block_list: care_block_list.append(block_name) fl = open("sequence_tmp_report/analyze_block_node.txt", "w") tmp = "" for node_name in care_node_list: tmp += node_name tmp += " " fl.write(tmp) fl.write("\n") tmp = "" for block_name in care_block_list: add = 0 for node_name in total_block_node_list[block_name]: if node_name in care_node_list: add = 1 break if add == 1: tmp += block_name tmp += " " fl.write(tmp) fl.write("\n") tmp = "" for block_name in care_block_list: tmp += block_name tmp += " " fl.write(tmp) fl.close() fl = open("sequence_tmp_report/block_belong_node.txt", "w") for block in total_block_node_list: fl.write("main_block " + block + "\n") tmp = "" for node in total_block_node_list[block]: tmp += node + " " fl.write(tmp + "\n") fl.close()
def block_report_folder_total_event_analyze( folder_name="block_report", block_extract_report_folder_name="block_extract_report"): #{{{ total_event_report_name = block_extract_report_folder_name + "/total_event_report.txt" total_single_line_report_name = block_extract_report_folder_name + "/single_line_report.txt" total_event_list = [] sub_folder_list = global_APIs.get_folder_file_list(folder_name) count = 0 limit = 1 total_event_happen_count = {} total_event_count = 0 single_line_happen_count = {} total_single_line_count = 0 for sub_folder in sub_folder_list: if sub_folder == "block_report/node_stack.txt": continue node_file_list = sequence_pattern_mining.one_sub_daily_folder_report_node_block_summary( sub_folder) for node_name in node_file_list: block_report_file = node_file_list[node_name]["block"] single_line_report_file = node_file_list[node_name]["single"] fl = open(block_report_file, "r") for line in fl.readlines(): line = line.split() event_name = line[0] if event_name in total_event_happen_count: tmp = total_event_happen_count[event_name] tmp += 1 else: tmp = 1 total_event_happen_count[event_name] = tmp total_event_count += 1 fl.close() fl = open(single_line_report_file, "r") for line in fl.readlines(): line = line.split() event_name = line[0] if event_name in single_line_happen_count: tmp = single_line_happen_count[event_name] tmp += 1 else: tmp = 1 single_line_happen_count[event_name] = tmp total_single_line_count += 1 fl.close() count += 1 fl = open(total_event_report_name, "w") happen_only_once = [] for event in total_event_happen_count: if total_event_happen_count[event] == 1: happen_only_once.append(event) continue tmp = event tmp += ": " tmp += str(total_event_happen_count[event]) tmp += "\n" fl.write(tmp) tmp = "total_event_count: " + str(total_event_count) + ("\n") fl.write(tmp) fl.write("\n") fl.write("\n") fl.write("\n") for event in happen_only_once: tmp = event + ("\n") fl.write(tmp) fl.close() fl = open(total_single_line_report_name, "w") happen_only_once = [] for event in single_line_happen_count: if single_line_happen_count[event] == 1: happen_only_once.append(event) continue tmp = event tmp += ": " tmp += str(single_line_happen_count[event]) tmp += "\n" fl.write(tmp) tmp = "total_single_line_count: " + str(total_single_line_count) + ("\n") fl.write(tmp) fl.write("\n") fl.write("\n") fl.write("\n") for event in happen_only_once: tmp = event + ("\n") fl.write(tmp) fl.close()
def affected_message_pattern_history_analyze(folder_name): #this is testing cutoff sub_folder_list = global_APIs.get_folder_file_list(folder_name) folder_num = -1 prefix = -1 last_prefix = -1 each_message_record_dict = {} cutoff_message_list = [] each_interval_record = {} cut_off_happen_time_threshold = 100 cut_off_folder_num_threshold = 10 daily_num = 2 interval_num = 0 interval_control_num = 10 total_day_num = 0 this_interval_need_update_list = [] this_interval_affected_list = [] this_interval_new_found_num = 0 this_interval_new_found_update_num = 0 for sub_folder in sub_folder_list: folder_num += 1 prefix = folder_num file_mode = global_APIs.get_file_mode(sub_folder) if file_mode == "": continue if folder_num < multi_file_folder.folder_control_lower_band: last_prefix = folder_num continue if folder_num >= multi_file_folder.folder_control_upper_band: break affect_record = database_opt.read_this_round_affect_message_list( sub_folder, prefix) new_found_single_line_pattern = affect_record[0] need_update_single_line_pattern_list = affect_record[1] left_new_found_single_line_pattern_list = affect_record[2] affected_message_list = affect_record[3] for need_update_pattern in need_update_single_line_pattern_list: if not need_update_pattern in this_interval_need_update_list: this_interval_need_update_list.append(need_update_pattern) for affected_message in affected_message_list: if not affected_message in this_interval_affected_list: this_interval_affected_list.append(affected_message) this_interval_new_found_num += len(new_found_single_line_pattern) this_interval_new_found_update_num += len( new_found_single_line_pattern) - len( left_new_found_single_line_pattern_list) total_day_num = (folder_num + 1) * daily_num if total_day_num % interval_control_num == 0: tmp = [] tmp.append(this_interval_new_found_num) tmp.append(this_interval_new_found_update_num) tmp.append(len(this_interval_need_update_list)) tmp.append(len(this_interval_affected_list)) print total_day_num print tmp each_interval_record[interval_num] = tmp this_interval_need_update_list = [] this_interval_affected_list = [] this_interval_new_found_num = 0 this_interval_new_found_update_num = 0 interval_num += 1 continue for message in new_found_single_line_pattern: tmp = {} tmp["first_seen"] = folder_num tmp["last_need_update"] = -1 tmp["last_change"] = -1 each_message_record_dict[message] = tmp for message in need_update_single_line_pattern_list: each_message_record_dict[message]["last_need_update"] = folder_num for message in affected_message_list: previous_block_pattern_list = database_opt.read_block_pattern_list( sub_folder, last_prefix) previous_single_line_pattern_db = database_opt.read_single_line_pattern_db( sub_folder, last_prefix) this_block_pattern_list = database_opt.read_block_pattern_list( sub_folder, prefix) this_single_line_pattern_db = database_opt.read_single_line_pattern_db( sub_folder, prefix) if not message in previous_single_line_pattern_db or previous_single_line_pattern_db[ message]["belong_block"] == "": each_message_record_dict[message]["last_change"] = folder_num continue previous_belong_block = previous_single_line_pattern_db[message][ "belong_block"] previous_belong_block_pattern = previous_block_pattern_list[ previous_belong_block] this_belong_block = this_single_line_pattern_db[message][ "belong_block"] if this_belong_block == "": #previous belong to a block, now disconnected if message in cutoff_message_list: print "cutoff message update " + message + " " + str( each_message_record_dict[message] ["last_change"]) + " " + str(folder_num) each_message_record_dict[message]["last_change"] = folder_num continue this_belong_block_pattern = this_block_pattern_list[ this_belong_block] #now this message have a belong block same_result = judge_two_block_pattern_list_same( previous_belong_block_pattern, this_belong_block_pattern) if same_result == 0: if message in cutoff_message_list: print "cutoff message update " + message + " " + str( each_message_record_dict[message] ["last_change"]) + " " + str(folder_num) each_message_record_dict[message]["last_change"] = folder_num continue #here make cut off decision this_happen_matrix = database_opt.read_happen_matrix( sub_folder, prefix, "total") #happen_matrix is for count each message's happen time for message in each_message_record_dict: if message in cutoff_message_list: continue message_happen_time = this_happen_matrix[message]["happen_time"] if message_happen_time < cut_off_happen_time_threshold: continue last_change_folder_num = each_message_record_dict[message][ "last_change"] if last_change_folder_num == -1: #continue #this means this message never merged or dismerged with any other messages first_seen_folder_num = each_message_record_dict[message][ "first_seen"] if folder_num - first_seen_folder_num > cut_off_folder_num_threshold: cutoff_message_list.append(message) else: interval_to_last_change = folder_num - last_change_folder_num if interval_to_last_change >= cut_off_folder_num_threshold: cutoff_message_list.append(message) #final test #for message in cutoff_message_list: # last_change_folder_num = each_message_record_dict[message]["last_change"] # interval_to_last_change = folder_num - last_change_folder_num # if interval_to_last_change < cut_off_folder_num_threshold: # print "error " + message + " " + str(folder_num) last_prefix = folder_num