Esempio n. 1
0
def split_logs_to_two_and_write(traces):

    first_half, second_half = split_trace_in_half_by_time(traces)
    first_half_traces = []
    for tr in first_half:
        first_half_traces.append([ev.label for ev in tr])
    second_half_traces = []
    for tr in second_half:
        second_half_traces.append([ev.label for ev in tr])
    from log_writer import LogWriter
    LogWriter.write_log(first_half_traces,
                        '../../data/bear/first_half_traces.log')
    LogWriter.write_log(second_half_traces,
                        '../../data/bear/last_half_traces.log')
Esempio n. 2
0
def split_traces_by_months_and_write(traces):

    months = {}
    for trace in traces:
        month_year = (trace[0].time.year, trace[0].time.month)
        if month_year not in months:
            months[month_year] = []
        trace_labels = [ev.label for ev in trace]
        months[month_year].append(trace_labels)
    from log_writer import LogWriter
    for m in months:
        print('month/year', m, 'had', len(months[m]), 'traces')
        LogWriter.write_log(
            months[m],
            '../../data/bear/' + str(m[0]) + '_' + str(m[1]) + '.log')
Esempio n. 3
0
def split_traces_by_quarters_and_write(traces):

    quarters = {'Q1': [], 'Q2': [], 'Q3': [], 'Q4': []}
    for trace in traces:
        if trace[0].time.year == 2011:
            continue
        trace_labels = [ev.label for ev in trace]
        if trace[0].time.month <= 3:
            quarters['Q1'].append(trace_labels)
        elif trace[0].time.month <= 6:
            quarters['Q2'].append(trace_labels)
        elif trace[0].time.month <= 9:
            quarters['Q3'].append(trace_labels)
        elif trace[0].time.month <= 12:
            quarters['Q4'].append(trace_labels)

    from log_writer import LogWriter
    for q in quarters:
        print('month/year', q, 'had', len(quarters[q]), 'traces')
        LogWriter.write_log(quarters[q], '../../data/bear/' + str(q) + '.log')
Esempio n. 4
0
def produce_logs():

    MODEL_TO_PRODUCE = 6
    TRACE2PRODUCE = 100000

    first_model = ProtocolModel(MODELS_PATH, 0, assign_transtion_probs=True)
    second_model = ProtocolModel(MODELS_PATH, 0, assign_transtion_probs=True)

    for instance_id in range(MODEL_TO_PRODUCE):
        print('processing instance:', instance_id, MODELS_PATH)
        ## read model & add transition probabilities
        # model_generator = ProtocolModel(MODELS_PATH, instance_id, assign_transtion_probs=True)
        model_generator = first_model if instance_id < 3 else second_model
        log = LogGenerator.produce_log_from_model(
            model_generator.graph,
            transition_probability_attribute=TRANSITION_PROBABILITY_ATTRIBUTE,
            traces2produce=TRACE2PRODUCE)
        ## generate transition probabilities
        model_generator.write_transitions_probabilities(LOGS_OUTPUT_PATH)
        ## produce k-Tail model
        LogWriter.write_log(log,
                            LOGS_OUTPUT_PATH + 'l' + str(instance_id) + ".log")
Esempio n. 5
0
def bear_based_experiments():

    ## read log
    # k = 11
    # ks = [20, 40, 80]
    # ks = [1, 2, 3, 4, 6, 8, 10]
    LOG_SUFFIX = '.log'
    MODEL_SUFFIX = '_model.dot'
    LOG_PATH = '../../data/bear/findyourhouse_long.log'
    LOG_OUT_PATH = '../../data/bear/filtered_logs/'
    GRAPH_OUTPUT = "../../data/bear_models/bear_models"
    ks = [1, 2, 3, 4]
    log_parser = BearLogParser(LOG_PATH)
    traces = log_parser.process_log(True)
    # log1_traces = log_parser.get_traces_of_browser(traces, "Mozilla/4.0")
    # log2_traces = log_parser.get_traces_of_browser(traces, "Mozilla/5.0")
    # log1_filename = 'mozzila4'
    # log2_filename = 'mozzila5'

    log1_filename = 'desktop'
    log2_filename = 'mobile'
    log1_traces = log_parser.get_desktop_traces(traces)
    log2_traces = log_parser.get_mobile_traces(traces)

    # events2keep = set(['search','sales_anncs',
    #                    'sales_page, facebook',
    #                    'sales_page, page_1',
    #                    'sales_page, page_2',
    #                    'sales_page, page_3',
    #                    'sales_page, page_4',
    #                    'sales_page, page_5',
    #                    'sales_page, page_6',
    #                    'sales_page, page_7',
    #                    'sales_page, page_8',
    #                    'sales_page, page_9',
    #                    ])
    # filter_traces_mozilla4 = log_parser.filter_events(events2keep, mozilla4_traces, True)
    # filter_traces_mozilla5 = log_parser.filter_events(events2keep, mozilla5_traces, True)

    new_name_mapping = {
        'sales_page, page_1': 'sales_page',
        'sales_page, page_2': 'sales_page',
        'sales_page, page_3': 'sales_page',
        'sales_page, page_4': 'sales_page',
        'sales_page, page_5': 'sales_page',
        'sales_page, page_6': 'sales_page',
        'sales_page, page_7': 'sales_page',
        'sales_page, page_8': 'sales_page',
        'sales_page, page_9': 'sales_page',
        'renting_page, page_1': 'renting_page',
        'renting_page, page_2': 'renting_page',
        'contacts_requested': 'contact_requested'
    }

    filter_traces_log1 = log_parser.abstract_events(new_name_mapping,
                                                    log1_traces)
    filter_traces_log2 = log_parser.abstract_events(new_name_mapping,
                                                    log2_traces)

    log1_traces = log_parser.get_traces_as_lists_of_event_labels(
        filter_traces_log1)
    log2_traces = log_parser.get_traces_as_lists_of_event_labels(
        filter_traces_log2)

    from log_writer import LogWriter
    LogWriter.write_log(log1_traces, LOG_OUT_PATH + log1_filename + LOG_SUFFIX)
    LogWriter.write_log(log2_traces, LOG_OUT_PATH + log2_filename + LOG_SUFFIX)
    # mozilla4_traces = change_tuples_to_list(mozilla4_traces)
    # mozilla5_traces = change_tuples_to_list(mozilla5_traces)
    # traces = log_parser.get_traces_as_lists_of_event_labels

    log1_traces_tups = []
    for tr in log1_traces:
        log1_traces_tups.append(tuple(tr))
    log1_traces = log1_traces_tups
    log2_traces_tups = []
    for tr in log2_traces:
        log2_traces_tups.append(tuple(tr))
    log2_traces = log2_traces_tups

    for k in ks:
        ktail_runner_4 = kTailsRunner(log1_traces, k)
        ktail_runner_5 = kTailsRunner(log2_traces, k)
        ktail_runner_4_past = kTailsRunner(log1_traces, k)
        ktail_runner_5_past = kTailsRunner(log2_traces, k)
        ktail_runner_4.run_ktails(add_dummy_init=False,
                                  add_dummy_terminal=False)
        ktail_runner_5.run_ktails(add_dummy_init=False,
                                  add_dummy_terminal=False)
        ktail_runner_4_past.run_ktails(add_dummy_init=False,
                                       add_dummy_terminal=False,
                                       graph_simplification=1)
        ktail_runner_5_past.run_ktails(add_dummy_init=False,
                                       add_dummy_terminal=False,
                                       graph_simplification=1)
        g4 = ktail_runner_4.get_graph()
        g5 = ktail_runner_5.get_graph()
        g4_past = ktail_runner_4_past.get_graph()
        g5_past = ktail_runner_5_past.get_graph()
        print(len(g4.nodes()), len(g4_past.nodes()), len(g5.nodes()),
              len(g5_past.nodes()))
        continue
        filtering_str = ""
        low_probability_filter = None  ##  0.05
        # if low_probability_filter:
        #     print("FILTER APPLIED: low prob filter!")
        #     g4 = graph_filtering.filter_low_probability_transitions(g4, low_probability_filter)
        #     g5 = graph_filtering.filter_low_probability_transitions(g5, low_probability_filter)
        #     filtering_str += "_lp_" + str(low_probability_filter)
        #
        # simple_filter = 20
        # if simple_filter:
        #     print("FILTER APPLIED: simple filter!")
        #     g4 = graph_filtering.simple_filter_graph(g4, simple_filter, False)
        #     g5 = graph_filtering.simple_filter_graph(g5, simple_filter, False)
        #     filtering_str += "_sim_" + str(simple_filter)

        ktail_runner_4.write2file(GRAPH_OUTPUT + log1_filename +
                                  filtering_str + '_k' + str(k) + DOT_SUFFIX)
        ktail_runner_5.write2file(GRAPH_OUTPUT + log2_filename +
                                  filtering_str + '_k' + str(k) + DOT_SUFFIX)
        print("done running with k=", k)