def split_logs_to_two_and_write(traces): first_half, second_half = split_trace_in_half_by_time(traces) first_half_traces = [] for tr in first_half: first_half_traces.append([ev.label for ev in tr]) second_half_traces = [] for tr in second_half: second_half_traces.append([ev.label for ev in tr]) from log_writer import LogWriter LogWriter.write_log(first_half_traces, '../../data/bear/first_half_traces.log') LogWriter.write_log(second_half_traces, '../../data/bear/last_half_traces.log')
def split_traces_by_months_and_write(traces): months = {} for trace in traces: month_year = (trace[0].time.year, trace[0].time.month) if month_year not in months: months[month_year] = [] trace_labels = [ev.label for ev in trace] months[month_year].append(trace_labels) from log_writer import LogWriter for m in months: print('month/year', m, 'had', len(months[m]), 'traces') LogWriter.write_log( months[m], '../../data/bear/' + str(m[0]) + '_' + str(m[1]) + '.log')
def split_traces_by_quarters_and_write(traces): quarters = {'Q1': [], 'Q2': [], 'Q3': [], 'Q4': []} for trace in traces: if trace[0].time.year == 2011: continue trace_labels = [ev.label for ev in trace] if trace[0].time.month <= 3: quarters['Q1'].append(trace_labels) elif trace[0].time.month <= 6: quarters['Q2'].append(trace_labels) elif trace[0].time.month <= 9: quarters['Q3'].append(trace_labels) elif trace[0].time.month <= 12: quarters['Q4'].append(trace_labels) from log_writer import LogWriter for q in quarters: print('month/year', q, 'had', len(quarters[q]), 'traces') LogWriter.write_log(quarters[q], '../../data/bear/' + str(q) + '.log')
def produce_logs(): MODEL_TO_PRODUCE = 6 TRACE2PRODUCE = 100000 first_model = ProtocolModel(MODELS_PATH, 0, assign_transtion_probs=True) second_model = ProtocolModel(MODELS_PATH, 0, assign_transtion_probs=True) for instance_id in range(MODEL_TO_PRODUCE): print('processing instance:', instance_id, MODELS_PATH) ## read model & add transition probabilities # model_generator = ProtocolModel(MODELS_PATH, instance_id, assign_transtion_probs=True) model_generator = first_model if instance_id < 3 else second_model log = LogGenerator.produce_log_from_model( model_generator.graph, transition_probability_attribute=TRANSITION_PROBABILITY_ATTRIBUTE, traces2produce=TRACE2PRODUCE) ## generate transition probabilities model_generator.write_transitions_probabilities(LOGS_OUTPUT_PATH) ## produce k-Tail model LogWriter.write_log(log, LOGS_OUTPUT_PATH + 'l' + str(instance_id) + ".log")
def bear_based_experiments(): ## read log # k = 11 # ks = [20, 40, 80] # ks = [1, 2, 3, 4, 6, 8, 10] LOG_SUFFIX = '.log' MODEL_SUFFIX = '_model.dot' LOG_PATH = '../../data/bear/findyourhouse_long.log' LOG_OUT_PATH = '../../data/bear/filtered_logs/' GRAPH_OUTPUT = "../../data/bear_models/bear_models" ks = [1, 2, 3, 4] log_parser = BearLogParser(LOG_PATH) traces = log_parser.process_log(True) # log1_traces = log_parser.get_traces_of_browser(traces, "Mozilla/4.0") # log2_traces = log_parser.get_traces_of_browser(traces, "Mozilla/5.0") # log1_filename = 'mozzila4' # log2_filename = 'mozzila5' log1_filename = 'desktop' log2_filename = 'mobile' log1_traces = log_parser.get_desktop_traces(traces) log2_traces = log_parser.get_mobile_traces(traces) # events2keep = set(['search','sales_anncs', # 'sales_page, facebook', # 'sales_page, page_1', # 'sales_page, page_2', # 'sales_page, page_3', # 'sales_page, page_4', # 'sales_page, page_5', # 'sales_page, page_6', # 'sales_page, page_7', # 'sales_page, page_8', # 'sales_page, page_9', # ]) # filter_traces_mozilla4 = log_parser.filter_events(events2keep, mozilla4_traces, True) # filter_traces_mozilla5 = log_parser.filter_events(events2keep, mozilla5_traces, True) new_name_mapping = { 'sales_page, page_1': 'sales_page', 'sales_page, page_2': 'sales_page', 'sales_page, page_3': 'sales_page', 'sales_page, page_4': 'sales_page', 'sales_page, page_5': 'sales_page', 'sales_page, page_6': 'sales_page', 'sales_page, page_7': 'sales_page', 'sales_page, page_8': 'sales_page', 'sales_page, page_9': 'sales_page', 'renting_page, page_1': 'renting_page', 'renting_page, page_2': 'renting_page', 'contacts_requested': 'contact_requested' } filter_traces_log1 = log_parser.abstract_events(new_name_mapping, log1_traces) filter_traces_log2 = log_parser.abstract_events(new_name_mapping, log2_traces) log1_traces = log_parser.get_traces_as_lists_of_event_labels( filter_traces_log1) log2_traces = log_parser.get_traces_as_lists_of_event_labels( filter_traces_log2) from log_writer import LogWriter LogWriter.write_log(log1_traces, LOG_OUT_PATH + log1_filename + LOG_SUFFIX) LogWriter.write_log(log2_traces, LOG_OUT_PATH + log2_filename + LOG_SUFFIX) # mozilla4_traces = change_tuples_to_list(mozilla4_traces) # mozilla5_traces = change_tuples_to_list(mozilla5_traces) # traces = log_parser.get_traces_as_lists_of_event_labels log1_traces_tups = [] for tr in log1_traces: log1_traces_tups.append(tuple(tr)) log1_traces = log1_traces_tups log2_traces_tups = [] for tr in log2_traces: log2_traces_tups.append(tuple(tr)) log2_traces = log2_traces_tups for k in ks: ktail_runner_4 = kTailsRunner(log1_traces, k) ktail_runner_5 = kTailsRunner(log2_traces, k) ktail_runner_4_past = kTailsRunner(log1_traces, k) ktail_runner_5_past = kTailsRunner(log2_traces, k) ktail_runner_4.run_ktails(add_dummy_init=False, add_dummy_terminal=False) ktail_runner_5.run_ktails(add_dummy_init=False, add_dummy_terminal=False) ktail_runner_4_past.run_ktails(add_dummy_init=False, add_dummy_terminal=False, graph_simplification=1) ktail_runner_5_past.run_ktails(add_dummy_init=False, add_dummy_terminal=False, graph_simplification=1) g4 = ktail_runner_4.get_graph() g5 = ktail_runner_5.get_graph() g4_past = ktail_runner_4_past.get_graph() g5_past = ktail_runner_5_past.get_graph() print(len(g4.nodes()), len(g4_past.nodes()), len(g5.nodes()), len(g5_past.nodes())) continue filtering_str = "" low_probability_filter = None ## 0.05 # if low_probability_filter: # print("FILTER APPLIED: low prob filter!") # g4 = graph_filtering.filter_low_probability_transitions(g4, low_probability_filter) # g5 = graph_filtering.filter_low_probability_transitions(g5, low_probability_filter) # filtering_str += "_lp_" + str(low_probability_filter) # # simple_filter = 20 # if simple_filter: # print("FILTER APPLIED: simple filter!") # g4 = graph_filtering.simple_filter_graph(g4, simple_filter, False) # g5 = graph_filtering.simple_filter_graph(g5, simple_filter, False) # filtering_str += "_sim_" + str(simple_filter) ktail_runner_4.write2file(GRAPH_OUTPUT + log1_filename + filtering_str + '_k' + str(k) + DOT_SUFFIX) ktail_runner_5.write2file(GRAPH_OUTPUT + log2_filename + filtering_str + '_k' + str(k) + DOT_SUFFIX) print("done running with k=", k)