Esempio n. 1
0
def my_version_report(fdict_exhaustive, data_graph, pattern, monitoring_marks,
                      output_path, detailed_result_path, monitoring_reports,
                      exhaustive_approach_results_path, Plist, nr,
                      pattern_file_name, write):
    approaches.globals_sampling.report = "furer"
    nr_non_observed_combinations = None
    if write == True:
        size_fdict = len(fdict_exhaustive)
        num_embeddings = 0
        for k in fdict_exhaustive.keys():
            num_embeddings = num_embeddings + fdict_exhaustive[
                k]  # I remove +1 added by Laplace smoothing BEWARE: previous Laplace smoothing assumed
        nr_possible_combinations = smplr.complete_combinations_1(
            fdict_exhaustive, data_graph, pattern,
            Plist)  # add zeros to all not present combinations
        nr_non_observed_combinations = nr_possible_combinations - size_fdict
        approaches.globals_sampling.nr_non_observed_combinations = nr_non_observed_combinations
    report.report_monitoring_my_version(
        monitoring_marks, output_path, detailed_result_path,
        monitoring_reports, exhaustive_approach_results_path, data_graph,
        pattern, Plist, nr, pattern_file_name, fdict_exhaustive,
        nr_non_observed_combinations, write)  #print monitoring_reports
Esempio n. 2
0
def report_monitoring_my_version_online(
        monitoring_marks, output_path, detailed_result_path,
        monitoring_reports, exhaustive_approach_result_file, data_graph,
        pattern, Plist, repetitions, pattern_file_name, fdict_exhaustive,
        nr_non_observed_combinations):
    dict = {}

    duration = []
    nr_iterations = []
    sum_of_embeddings = []
    sum_of_squares = []
    sum_of_root_node_emb = []
    sum_of_squares_root_node_emb = []
    begin = 0

    for time_int in monitoring_marks:
        duration.append(time_int - begin)
        begin = time_int

    #the problem might be that some runs finished earlier, and some later.

    for i in range(len(monitoring_marks)):
        #for key_iter in monitoring_reports.keys():
        if not (monitoring_marks[i] in dict.keys()):
            dict[monitoring_marks[i]] = []
        try:
            dict[monitoring_marks[i]].append(monitoring_reports[i])
            nr_iterations.append(monitoring_reports[i].nr_iterations)
            sum_of_embeddings.append(monitoring_reports[i].sum_nr_embeddings)
            sum_of_squares.append(
                monitoring_reports[i].sum_of_the_square_embeddings)
            sum_of_root_node_emb.append(
                monitoring_reports[i].sum_nr_extra_embeddings)
            sum_of_squares_root_node_emb.append(
                monitoring_reports[i].sum_of_the_extra_square_embeddings)

        except IndexError:
            break
    snapshot_inits = []
    for i in range(repetitions):
        snapshot_inits.append(0)

    counter_duration = 0
    counter = 0

    average_klds = []
    average_bhattas = []
    average_hellingers = []
    std_klds = []
    std_bhattas = []
    std_hellingers = []
    avg_nodes_observed = []
    nr_nodes_per_time_interval_per_runs = []
    number_of_sampling_iterations = []
    average_of_embeddings = []
    stdevs = []

    for time_snapshot in monitoring_marks:
        false_furer_results_KLD = []
        false_furer_results_bhatta = []
        false_furer_results_hellinger = []
        false_furer_times = []
        observed_nodes = []
        observed_nodes_difference_per_snapshot = []

        snapshot_directory_path = os.path.join(detailed_result_path, )
        if not (os.path.exists(snapshot_directory_path)):
            os.mkdir(snapshot_directory_path)
        snapshot_directory_file = os.path.join(
            snapshot_directory_path,
            'res_time_' + str(time_snapshot) + '.info')

        fdict_furer_temp = dict[time_snapshot]

        fdicts_Furer = []

        for f in fdict_furer_temp:
            fdicts_Furer.append(f.current_fdict)
            observed_nodes.append(f.number_of_observed_nodes)

        if len(fdict_furer_temp) == 0:
            continue

        for i in range(len(fdict_furer_temp)):
            approaches.globals_sampling.nr_iterations = nr_iterations[i]
            fdict_limited = fdicts_Furer[i]
            fdict_Furer = fdicts_Furer[i]
            observed_nodes_difference_per_snapshot.append(observed_nodes[i] -
                                                          snapshot_inits[i])
            snapshot_inits[i] = observed_nodes[i]
            [pde, trash_list,
             default_key] = smplr.make_pd_general_kickout_default_my_version(
                 fdict_exhaustive)
            if len(pde) < 1:
                break
            nr_possible_combinations = smplr.complete_combinations_1(
                fdict_Furer, data_graph, pattern, Plist)
            pdf = smplr.make_pd_general_kickout_default_limited_my_version(
                fdict_Furer)
            #print smplr.transform_to_ptable(pde)
            #print smplr.transform_to_ptable(pdf)
            false_furer_results_KLD.append(
                su.avg_kld(smplr.transform_to_ptable(pde),
                           smplr.transform_to_ptable(pdf)))
            false_furer_results_bhatta.append(
                su.avg_bhatta(smplr.transform_to_ptable(pde),
                              smplr.transform_to_ptable(pdf)))
            false_furer_results_hellinger.append(
                su.avg_hellinger(smplr.transform_to_ptable(pde),
                                 smplr.transform_to_ptable(pdf)))

        average_klds.append(numpy.mean(false_furer_results_KLD))
        std_klds.append(numpy.std(false_furer_results_KLD, ddof=1))
        average_bhattas.append(numpy.mean(false_furer_results_bhatta))
        std_bhattas.append(numpy.std(false_furer_results_bhatta, ddof=1))
        average_hellingers.append(numpy.mean(false_furer_results_hellinger))
        std_hellingers.append(numpy.std(false_furer_results_hellinger, ddof=1))
        avg_nodes_observed.append(numpy.mean(observed_nodes))
        number_of_sampling_iterations.append(nr_iterations[counter])
        nr_nodes_per_time_interval_per_runs.append(
            float((numpy.mean(observed_nodes_difference_per_snapshot) /
                   duration[counter_duration])))
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[
                counter]
        else:
            nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[
                counter]
        average_of_embeddings.append(nr_embeddings_temp)
        stdeviation = numpy.nan
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            #we do the old standard deviation
            a = Decimal(sum_of_squares[counter]) - (
                Decimal(math.pow(sum_of_embeddings[counter], 2)) /
                Decimal(float(nr_iterations[counter])))
            stdeviation = math.sqrt(
                a / Decimal(float((nr_iterations[counter] - 1))))
        else:
            a = Decimal(sum_of_squares_root_node_emb[counter]) - (
                Decimal(math.pow(sum_of_root_node_emb[counter], 2)) /
                Decimal(float(nr_iterations[counter])))
            stdeviation = math.sqrt(
                a / Decimal(float((nr_iterations[counter] - 1))))
        stdevs.append(stdeviation)
        counter += 1
        counter_duration += 1
    return {
        "average_klds": average_klds,
        "average_bhattas": average_bhattas,
        "average_hellingers": average_hellingers,
        "std_klds": std_klds,
        "std_bhattas": std_bhattas,
        "std_hellingers": std_hellingers,
        "avg_nodes_observed": avg_nodes_observed,
        "nr_nodes_per_time_interval_per_runs":
        nr_nodes_per_time_interval_per_runs,
        "number_of_sampling_iterations": number_of_sampling_iterations,
        "average_of_embeddings": average_of_embeddings,
        "stdevs": stdevs
    }
Esempio n. 3
0
def report_monitoring_my_version(monitoring_marks, output_path,
                                 detailed_result_path, monitoring_reports,
                                 exhaustive_approach_result_file, data_graph,
                                 pattern, Plist, repetitions,
                                 pattern_file_name, fdict_exhaustive,
                                 nr_non_observed_combinations, write):
    # CREATE DIRECTORY THAT WILL CONTAIN RESULTS FOR EACH TIME INSTANCE
    dict = {}
    duration = []
    begin = 0
    nr_iterations = []
    sum_of_embeddings = []
    sum_of_squares = []
    embeddings_estimate = []
    sum_of_root_node_emb = []
    sum_of_squares_root_node_emb = []

    for time_int in monitoring_marks:
        duration.append(time_int - begin)
        begin = time_int

    # the problem might be that some runs finished earlier, and some later.
    for i in xrange(len(monitoring_marks)):
        for key_iter in monitoring_reports.keys():
            if not (monitoring_marks[i] in dict.keys()):
                dict[monitoring_marks[i]] = []
            try:
                dict[monitoring_marks[i]].append(
                    monitoring_reports[key_iter][i])
                nr_iterations.append(
                    monitoring_reports[key_iter][i].nr_iterations)
                sum_of_embeddings.append(
                    monitoring_reports[key_iter][i].sum_nr_embeddings)
                sum_of_squares.append(monitoring_reports[key_iter]
                                      [i].sum_of_the_square_embeddings)
                embeddings_estimate.append(
                    monitoring_reports[key_iter][i].embeddings_estimate)
                sum_of_root_node_emb.append(
                    monitoring_reports[key_iter][i].sum_nr_extra_embeddings)
                sum_of_squares_root_node_emb.append(
                    monitoring_reports[key_iter]
                    [i].sum_of_the_extra_square_embeddings)
            except IndexError:
                break
    snapshot_inits = []
    for i in range(repetitions):
        snapshot_inits.append(0)

    counter_duration = 0
    counter = 0

    for time_snapshot in monitoring_marks:
        approaches.globals_sampling.current_time_snapshot = time_snapshot
        furer_results_KLD = []
        furer_results_bhatta = []
        furer_results_hellinger = []
        furer_times = []
        observed_nodes = []
        observed_nodes_difference_per_snapshot = []

        snapshot_directory_path = os.path.join(detailed_result_path, )
        if not (os.path.exists(snapshot_directory_path)):
            os.mkdir(snapshot_directory_path)
        snapshot_directory_file = os.path.join(
            snapshot_directory_path,
            'res_time_' + str(time_snapshot) + '.info')

        if write == True:
            fdict_furer_temp = dict[time_snapshot]
            fdicts_Furer = []

            for f in fdict_furer_temp:
                fdicts_Furer.append(f.current_fdict)
                observed_nodes.append(f.number_of_observed_nodes)

            if len(fdict_furer_temp) == 0:
                continue

            for i in range(len(fdict_furer_temp)):
                approaches.globals_sampling.nr_iterations = nr_iterations[i]
                fdict_limited = fdicts_Furer[i]
                fdict_Furer = fdicts_Furer[i]
                #    fdict_Furer[k]=fdict_Furer[k]*experiments.globals.nr_iterations
                observed_nodes_difference_per_snapshot.append(
                    observed_nodes[i] - snapshot_inits[i])
                snapshot_inits[i] = observed_nodes[i]
                [pde, trash, default_key
                 ] = smplr.make_pd_general_kickout_default_my_version(
                     fdict_exhaustive)
                if len(pde) < 1:
                    break
                nr_possible_combinations = smplr.complete_combinations_1(
                    fdict_Furer, data_graph, pattern, Plist)
                pdf = smplr.make_pd_general_kickout_default_limited_my_version(
                    fdict_Furer)
                furer_results_KLD.append(
                    su.avg_kld(smplr.transform_to_ptable(pde),
                               smplr.transform_to_ptable(pdf)))
                furer_results_bhatta.append(
                    su.avg_bhatta(smplr.transform_to_ptable(pde),
                                  smplr.transform_to_ptable(pdf)))
                furer_results_hellinger.append(
                    su.avg_hellinger(smplr.transform_to_ptable(pde),
                                     smplr.transform_to_ptable(pdf)))

        resultfile = open(snapshot_directory_file, 'w')
        resultfile.write('Furer\n')
        resultfile.write("experiment on graph: " + str(pattern_file_name) +
                         " and pattern: " + pattern_file_name + "\n")
        resultfile.write("repetitions (for this time snapshot): " +
                         str(repetitions) + "\n")
        resultfile.write(" " + "\n")
        resultfile.write("average average KLD on furer: " +
                         str(numpy.mean(furer_results_KLD)) + " with SSTD: " +
                         str(numpy.std(furer_results_KLD, ddof=1)) + "\n")
        resultfile.write("average average bhatta on furer: " +
                         str(numpy.mean(furer_results_bhatta)) +
                         " with SSTD: " +
                         str(numpy.std(furer_results_bhatta, ddof=1)) + "\n")
        resultfile.write("average average hellinger on furer: " +
                         str(numpy.mean(furer_results_hellinger)) +
                         " with SSTD: " +
                         str(numpy.std(furer_results_hellinger, ddof=1)) +
                         "\n")
        resultfile.write(" " + "\n")
        resultfile.write('-----DETAILED RESULTS-----' + "\n")
        resultfile.write('furer_results_KLD : ' + str(furer_results_KLD) +
                         "\n")
        resultfile.write('furer_results_bhatta : ' +
                         str(furer_results_bhatta) + "\n")
        resultfile.write('furer_results_hellinger : ' +
                         str(furer_results_hellinger) + "\n")
        resultfile.write('avg #nodes observed : ' +
                         str(numpy.mean(observed_nodes)) + "\n")
        resultfile.write('# nodes per time interval per run :' + str(
            (numpy.mean(observed_nodes_difference_per_snapshot) /
             duration[counter_duration])) + "\n")
        resultfile.write(
            'avg difference of nodes observed from previous snapshot :' +
            str(numpy.mean(observed_nodes_difference_per_snapshot)) + "\n")
        resultfile.write(
            "------------------------------------ Sampling info ------------------------------\n"
        )
        resultfile.write('number of sampling iterations : ' +
                         str(nr_iterations[counter]) + "\n")
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[
                counter]
        else:
            nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[
                counter]
            # embeddings_estimate[counter]
        resultfile.write('average of embeddings : ' + str(nr_embeddings_temp) +
                         "\n")
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            # we do the old standard deviation
            a = Decimal(sum_of_squares[counter]) - (
                Decimal(math.pow(sum_of_embeddings[counter], 2)) /
                Decimal(float(nr_iterations[counter])))
            stdeviation = math.sqrt(
                a / Decimal(float((nr_iterations[counter] - 1))))
        else:
            a = Decimal(sum_of_squares_root_node_emb[counter]) - (
                Decimal(math.pow(sum_of_root_node_emb[counter], 2)) /
                Decimal(float(nr_iterations[counter])))
            if a > 0:
                stdeviation = math.sqrt(
                    a / Decimal(float((nr_iterations[counter] - 1))))
            else:
                stdeviation = 0
        resultfile.write('stdeviation of # embeddings: ' + str(stdeviation) +
                         "\n")
        resultfile.close()
        counter += 1
        counter_duration += 1
Esempio n. 4
0
def my_version_report_online(fdict_exhaustive, data_graph, pattern,
                             monitoring_marks, output_path,
                             detailed_result_path, monitoring_reports,
                             exhaustive_approach_results_path, Plist_base, nr,
                             pattern_file_name):
    approaches.globals_sampling.report = "furer"
    size_fdict = len(fdict_exhaustive)
    num_embeddings = 0
    for k in fdict_exhaustive.keys():
        num_embeddings = num_embeddings + fdict_exhaustive[
            k]  # I remove +1 added by Laplace smoothing BEWARE: previous Laplace smoothing assumed
    results = {}
    counter = 1
    for i in monitoring_reports.keys():
        approaches.globals_sampling.nr_non_observed_combinations = 0
        nr_possible_combinations = smplr.complete_combinations_1(
            fdict_exhaustive, data_graph, pattern,
            Plist_base)  # add zeros to all not present combinations
        nr_non_observed_combinations = nr_possible_combinations - size_fdict
        approaches.globals_sampling.nr_non_observed_combinations = nr_non_observed_combinations
        results[counter] = report.report_monitoring_my_version_online(
            monitoring_marks, output_path, detailed_result_path,
            monitoring_reports[i], exhaustive_approach_results_path,
            data_graph, pattern, Plist_base, nr, pattern_file_name,
            fdict_exhaustive,
            nr_non_observed_combinations)  #print monitoring_reports
        counter += 1

    for i in range(len(monitoring_marks)):
        avg_klds = []
        average_bhattas = []
        average_hellingers = []
        std_klds = []
        std_bhattas = []
        std_hellingers = []
        avg_nodes_observed = []
        nr_nodes_per_time_interval_per_runs = []
        number_of_sampling_iterations = []
        average_of_embeddings = []
        stdevs = []
        with open(
                os.path.join(detailed_result_path,
                             'res_time_' + str(monitoring_marks[i]) + ".info"),
                'w') as resultfile:
            resultfile.write('False Furer with different orderings\n')
            for k in monitoring_reports.keys():
                #print "run:",k,results[k]['average_klds']
                avg_klds.append(results[k]['average_klds'][i])
                average_bhattas.append(results[k]['average_bhattas'][i])
                average_hellingers.append(results[k]['average_hellingers'][i])
                std_klds.append(results[k]['std_klds'][i])
                std_bhattas.append(results[k]['std_bhattas'][i])
                std_hellingers.append(results[k]['std_hellingers'][i])
                avg_nodes_observed.append(results[k]['avg_nodes_observed'][i])
                nr_nodes_per_time_interval_per_runs.append(
                    results[k]['nr_nodes_per_time_interval_per_runs'][i])
                number_of_sampling_iterations.append(
                    results[k]['number_of_sampling_iterations'][i])
                average_of_embeddings.append(
                    float(results[k]['average_of_embeddings'][i]))
                stdevs.append(results[k]['stdevs'][i])
            resultfile.write("average KLD on false furer: " +
                             str(numpy.mean(avg_klds)) + " with SSTD: " +
                             str(numpy.std(avg_klds, ddof=1)) + "\n")
            resultfile.write("average bhatta on false furer: " +
                             str(numpy.mean(average_bhattas)) +
                             " with SSTD: " +
                             str(numpy.std(average_bhattas, ddof=1)) + "\n")
            resultfile.write("average hellinger on false furer: " +
                             str(numpy.mean(average_hellingers)) +
                             " with SSTD: " +
                             str(numpy.std(average_hellingers, ddof=1)) + "\n")
            resultfile.write(" " + "\n")
            resultfile.write('avg #nodes observed :' +
                             str(numpy.mean(avg_nodes_observed)) + "\n")
            resultfile.write('average of embeddings : ' +
                             str(numpy.mean(average_of_embeddings)) + "\n")
            resultfile.write('stdeviation of # embeddings: ' +
                             str(numpy.nanmean(stdevs)) + "\n")