def check_sum3(candidate_file, reference_file): candidate = m3summary.m3summary_list() reference = m3summary.m3summary_list() ret = False if candidate.load_file(candidate_file) and reference.load_file(reference_file): nb_candidates = len(candidate.summary_list) nb_references = len(reference.summary_list) if not nb_candidates == nb_references: print("Found " + str(nb_candidates) + " entries in " + candidate_file + ", expected " + str(nb_references)) else: candidate.Sort() reference.Sort() ret = True for i in range(0, nb_candidates): if not candidate.summary_list[i] == reference.summary_list[i]: print("Found:\n" + candidate.summary_list[i].to_string() + "\nin " + candidate_file + "\nExpected: " + reference.summary_list[i].to_string()) ret = False break return ret
def add_m3_summary_file(self, file_name): m3s = m3summary.m3summary_list() ret = m3s.load_file(file_name) if ret: m3s.compute_daytime_stats() limit = m3s.day_time_average + 3*m3s.day_time_stdev if limit < 100000.0: limit = 100000.0 if limit < 2*m3s.day_time_average: limit = 2*m3s.day_time_average for summary in m3s.summary_list: if summary.nb_queries > limit: outlier = m3_outlier() outlier.address_id = summary.address_id outlier.cc = summary.cc outlier.city = summary.city outlier.date = summary.date outlier.hour = summary.hour outlier.duration = summary.duration outlier.nb_queries = summary.nb_queries outlier.nb_nx_domains = summary.nb_nx_domains outlier.ref_average = m3s.day_time_average outlier.ref_stdev = m3s.day_time_stdev outlier.ref_q3 = m3s.day_time_q3 outlier.ref_iqd = m3s.day_time_iqd if outlier.ref_average > 0: outlier.query_ratio = (outlier.nb_queries - outlier.ref_average)/outlier.ref_average if outlier.ref_stdev > 0: outlier.stdev_ratio = (outlier.nb_queries - outlier.ref_average)/outlier.ref_stdev if outlier.ref_iqd > 0: outlier.iqd_ratio = (outlier.nb_queries - outlier.ref_q3)/outlier.ref_iqd outlier.cat_v = [ summary.nb_useful, summary.nb_useless, \ summary.dga, summary.jumbo, summary.nb_nx_others] outlier.excess_cat_index = 0 outlier.excess_cat_name = "useful" v_max = 0 i = 0 while i < len(outlier.cat_v): if outlier.cat_v[i] > v_max: outlier.excess_cat_index = i outlier.excess_cat_name = m3_outlier_cat[i] v_max = outlier.cat_v[i] i += 1 self.outlier_list.append(outlier) self.is_sorted = False return ret
# coding=utf-8 # # This scripts tries to estimate the variance of a few M3 test metrics. import codecs import sys import m3summary if len(sys.argv) < 3: print("usage: " + sys.argv[0] + " sum_m3_file.csv dedup.csv") ret = -1 else: ret = 0 if ret == 0: msl = m3summary.m3summary_list() ret = msl.load_file(sys.argv[1]) if ret == 0: msl.summary_list.sort() sum_file = codecs.open(sys.argv[2], "w", "UTF-8") sum_file.write(m3summary.summary_title_line() + "\n") i = 0 nb_dups = 0 while i < len(msl.summary_list): if i == 0 or msl.summary_list[i] != msl.summary_list[i - 1]: sum_file.write(msl.summary_list[i].to_string() + "\n") else: nb_dups += 1 i += 1 sum_file.close() print("Out of " + str(len(msl.summary_list)) + " found " +
#!/usr/bin/python # coding=utf-8 # # This scripts tries to estimate the variance of a few M3 test metrics. import codecs import sys import m3name import captures import m3summary import os from os.path import isfile, join # Argv[1] = name of the summary file to read. # argv[2] = name of the evaluation file m3s = m3summary.m3summary_list() if not m3s.load_file(sys.argv[1]): exit(-1) m3s.compute_daytime_stats() m3s.save_for_evaluation(sys.argv[2])