コード例 #1
0
def martin_version_report(fdict_exhaustive, data_graph, pattern,
                          monitoring_marks, output_path, detailed_result_path,
                          monitoring_reports, exhaustive_approach_results_path,
                          Plist, nr, pattern_file_name):
    print "LEN FDICT_EXHAUSTIVE (BEFORE): ", len(fdict_exhaustive)
    size_fdict = len(fdict_exhaustive)
    num_embeddings = 0
    for k in fdict_exhaustive.keys():
        num_embeddings = num_embeddings + fdict_exhaustive[k]
    start_time = time.time()
    nr_possible_combinations = smplr.complete_combinations(
        fdict_exhaustive, data_graph, pattern,
        Plist)  # add zeros to all not present combinations
    smplr.smooth(fdict_exhaustive, fdict_exhaustive)
    report.report_monitoring(monitoring_marks, output_path,
                             detailed_result_path, monitoring_reports,
                             exhaustive_approach_results_path, data_graph,
                             pattern, Plist, nr, pattern_file_name,
                             fdict_exhaustive)  #print monitoring_reports
    print "ELAPSED TIME: ", time.time() - start_time
コード例 #2
0
def report(output_path,detailed_result_path,fudicts,plot_result_dict,all_furer_times,exhaustive_approach_result_file,data_graph,pattern,Plist,NLIMIT_values,repetitions,pattern_file_name,fdict_exhaustive,iteration_counter_n_limit,n_limit_embeddings):    
    if (len(fudicts)==0):
        with open(os.path.join(output_path,'no_results.info'), 'wb') as file:
            file.write("No results for random - empty fudicts!")
    

    pickout = open(os.path.join(output_path,'fudicts.pickle'), 'wb')
    pickle.dump(fudicts, pickout)
    pickout.close()
    
    pickout = open(os.path.join(output_path,'all_furer_times.pickle'), 'wb')
    pickle.dump(all_furer_times, pickout)
    pickout.close()
    
    picklename = os.path.join(exhaustive_approach_result_file,"fdict_exhaustive_%s.pickle" % pattern_file_name)
    pickin = open(picklename, 'rb')
    fdict_exhaustive = pickle.load(pickin)
    
    smplr.complete_combinations(fdict_exhaustive, data_graph,  pattern,  Plist)      # add zeros to all not present combinations
    smplr.smooth(fdict_exhaustive,  fdict_exhaustive)     # Laplace smoothing also for the exhaustive
    
    for nli in range(len(NLIMIT_values)):
        print "ITERATION COUNTER FOR THIS LIMIT: ",iteration_counter_n_limit[nli]
        print "REPORTING LIMIT: ",NLIMIT_values[nli]
        plot_result_dict[NLIMIT_values[nli]] = {}
        furer_results_KLD = []
        furer_results_bhatta = []
        furer_results_hellinger = []
        furer_times = []
        
        for i in range(repetitions):
            furer_times.append(all_furer_times[i][nli])
            
            fdict_limited = fudicts[i][nli]
            smplr.smooth(fdict_limited,  fdict_exhaustive)    # smoothing to avoid zeros
            fdict_Furer = fudicts[i][nli]   
            
            [pde,  trash_list,  default_key] = smplr.make_pd_general_kickout_default(fdict_exhaustive,  trash_factor=0.01)     # we remove rows where frequencies do not reach 1%            
            
            if len(pde) < 1:
                print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                break
            
            emb=n_limit_embeddings[nli]
            
            [pdl,  tl,  dk] = smplr.make_pd_general_kickout_default_limited(fdict_limited,  trash_list,  default_key)
            [pdf ,  tl,  dk]= smplr.make_pd_general_kickout_default_limited(fdict_Furer,  trash_list,  default_key)
            print "Appending results ..."
            furer_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
            furer_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
            furer_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
        plot_result_dict[NLIMIT_values[nli]]["furer_KLD"] = (numpy.mean(furer_results_KLD),  numpy.std(furer_results_KLD,  ddof=1))
        plot_result_dict[NLIMIT_values[nli]]["furer_BHT"] = (numpy.mean(furer_results_bhatta),  numpy.std(furer_results_bhatta,  ddof=1))
        plot_result_dict[NLIMIT_values[nli]]["furer_HEL"] = (numpy.mean(furer_results_hellinger),  numpy.std(furer_results_hellinger,  ddof=1))
        plot_result_dict[NLIMIT_values[nli]]["furer_times"] = (numpy.mean(furer_times),  numpy.std(furer_times,  ddof=1))

        result_file_name = detailed_result_path+"/"+"res_" + pattern_file_name + pattern_file_name+"."+str(repetitions) +"x"+str(NLIMIT_values[nli])+".result"
        resultfile = open(result_file_name,  'w')
        resultfile.write('Furer\n')
        resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n")
        resultfile.write("NLIMIT: " + str(NLIMIT_values[nli]) +"\n")
        resultfile.write("repetitions: " + str(repetitions) +"\n")
        resultfile.write(" " +"\n")
        resultfile.write("average average KLD on randomnode: " + str(numpy.mean(furer_results_KLD))  + " with SSTD: " + str(numpy.std(furer_results_KLD,  ddof=1)) +"\n")
        resultfile.write("average average bhatta on randomnode: " + str(numpy.mean(furer_results_bhatta))  + " with SSTD: " + str(numpy.std(furer_results_bhatta,  ddof=1)) +"\n")
        resultfile.write("average average hellinger on randomnode: " + str(numpy.mean(furer_results_hellinger))  + " with SSTD: " + str(numpy.std(furer_results_hellinger,  ddof=1)) +"\n")
        resultfile.write(" " +"\n")
        resultfile.write("Random node took per run on average: " +str(numpy.mean(furer_times)) + " seconds." +"\n")
        resultfile.write('-----DETAILED RESULTS-----' +"\n")
        resultfile.write('randnode_results_KLD :' + str(furer_results_KLD) +"\n")
        resultfile.write('randnode_results_bhatta :' + str(furer_results_bhatta) +"\n")
        resultfile.write('randnode_results_hellinger :' + str(furer_results_hellinger) +"\n")
        resultfile.write('randnode_times :' + str(furer_times) +"\n")
        resultfile.write('Nr embeddings for limit: '+str(emb))
        resultfile.close()
コード例 #3
0
def report_monitoring(monitoring_marks, output_path, detailed_result_path,
                      monitoring_reports, exhaustive_approach_result_file,
                      data_graph, pattern, Plist, repetitions,
                      pattern_file_name):
    #CREATE DIRECTORY THAT WILL CONTAINS RESULTS FOR EACH TIME INSTANCE

    picklename = os.path.join(exhaustive_approach_result_file,
                              "fdict_exhaustive_%s.pickle" % pattern_file_name)
    pickin = open(picklename, 'rb')
    fdict_exhaustive = pickle.load(pickin)

    smplr.complete_combinations(
        fdict_exhaustive, data_graph, pattern,
        Plist)  # add zeros to all not present combinations
    smplr.smooth(fdict_exhaustive,
                 fdict_exhaustive)  # Laplace smoothing also for the exhaustive

    dict = {}

    duration = []
    nr_iterations = []
    sum_of_embeddings = []
    sum_of_squares = []
    sum_of_root_node_emb = []
    sum_of_squares_root_node_emb = []
    begin = 0

    for time_int in monitoring_marks:
        duration.append(time_int - begin)
        begin = time_int

    #the problem might be that some runs finished earlier, and some later.
    for i in xrange(len(monitoring_marks)):
        for key_iter in monitoring_reports.keys():
            if not (monitoring_marks[i] in dict.keys()):
                dict[monitoring_marks[i]] = []
            try:
                dict[monitoring_marks[i]].append(
                    monitoring_reports[key_iter][i])
                nr_iterations.append(
                    monitoring_reports[key_iter][i].nr_iterations)
                sum_of_embeddings.append(
                    monitoring_reports[key_iter][i].sum_nr_embeddings)
                sum_of_squares.append(monitoring_reports[key_iter]
                                      [i].sum_of_the_square_embeddings)
                sum_of_root_node_emb.append(
                    monitoring_reports[key_iter][i].sum_nr_extra_embeddings)
                sum_of_squares_root_node_emb.append(
                    monitoring_reports[key_iter]
                    [i].sum_of_the_extra_square_embeddings)

            except IndexError:
                break

    print "NR ITERATIONS: ", nr_iterations
    print "sum_of_embeddings: ", sum_of_embeddings
    print "sum_of_squares: ", sum_of_squares
    snapshot_inits = []
    for i in range(repetitions):
        snapshot_inits.append(0)

    counter_duration = 0
    counter = 0
    for time_snapshot in monitoring_marks:
        false_furer_results_KLD = []
        false_furer_results_bhatta = []
        false_furer_results_hellinger = []
        false_furer_times = []
        observed_nodes = []
        observed_nodes_difference_per_snapshot = []

        snapshot_directory_path = os.path.join(detailed_result_path, )
        if not (os.path.exists(snapshot_directory_path)):
            os.mkdir(snapshot_directory_path)
        snapshot_directory_file = os.path.join(
            snapshot_directory_path,
            'res_time_' + str(time_snapshot) + '.info')

        fdict_furer_temp = dict[time_snapshot]

        fdicts_Furer = []

        for f in fdict_furer_temp:
            fdicts_Furer.append(f.current_fdict)
            observed_nodes.append(f.number_of_observed_nodes)

        if len(fdict_furer_temp) == 0:
            continue

        for i in range(len(fdict_furer_temp)):
            fdict_limited = fdicts_Furer[i]
            smplr.smooth(fdict_limited,
                         fdict_exhaustive)  # smoothing to avoid zeros
            fdict_Furer = fdicts_Furer[i]

            observed_nodes_difference_per_snapshot.append(observed_nodes[i] -
                                                          snapshot_inits[i])
            snapshot_inits[i] = observed_nodes[i]

            [pde, trash_list, default_key
             ] = smplr.make_pd_general_kickout_default(fdict_exhaustive,
                                                       trash_factor=0.01)

            if len(pde) < 1:
                print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                break

            #[pdl,  tl,  dk] = smplr.make_pd_general_kickout_default_limited(fdict_limited,  trash_list,  default_key)
            [pdf, tl, dk] = smplr.make_pd_general_kickout_default_limited(
                fdict_Furer, trash_list, default_key)
            false_furer_results_KLD.append(
                su.avg_kld(smplr.transform_to_ptable(pde),
                           smplr.transform_to_ptable(pdf)))
            false_furer_results_bhatta.append(
                su.avg_bhatta(smplr.transform_to_ptable(pde),
                              smplr.transform_to_ptable(pdf)))
            false_furer_results_hellinger.append(
                su.avg_hellinger(smplr.transform_to_ptable(pde),
                                 smplr.transform_to_ptable(pdf)))

        print "Writing to: ", snapshot_directory_file
        resultfile = open(snapshot_directory_file, 'w')
        resultfile.write('False Furer\n')
        resultfile.write("experiment on graph: " + str(pattern_file_name) +
                         " and pattern: " + pattern_file_name + "\n")
        resultfile.write("repetitions (for this time snapshot): " +
                         str(repetitions) + "\n")
        resultfile.write(" " + "\n")
        resultfile.write("average KLD on false furer: " +
                         str(numpy.mean(false_furer_results_KLD)) +
                         " with SSTD: " +
                         str(numpy.std(false_furer_results_KLD, ddof=1)) +
                         "\n")
        resultfile.write("average bhatta on false furer: " +
                         str(numpy.mean(false_furer_results_bhatta)) +
                         " with SSTD: " +
                         str(numpy.std(false_furer_results_bhatta, ddof=1)) +
                         "\n")
        resultfile.write(
            "average hellinger on false furer: " +
            str(numpy.mean(false_furer_results_hellinger)) + " with SSTD: " +
            str(numpy.std(false_furer_results_hellinger, ddof=1)) + "\n")
        resultfile.write(" " + "\n")
        resultfile.write('-----DETAILED RESULTS-----' + "\n")
        resultfile.write('false_results_KLD :' + str(false_furer_results_KLD) +
                         "\n")
        resultfile.write('false_results_bhatta :' +
                         str(false_furer_results_bhatta) + "\n")
        resultfile.write('false_results_hellinger :' +
                         str(false_furer_results_hellinger) + "\n")
        resultfile.write('avg #nodes observed :' +
                         str(numpy.mean(observed_nodes)) + "\n")
        resultfile.write('# nodes per time interval per run:' + str(
            (numpy.mean(observed_nodes_difference_per_snapshot) /
             duration[counter_duration])) + "\n")
        resultfile.write(
            'avg difference of nodes observed from previous snapshot :' +
            str(numpy.mean(observed_nodes_difference_per_snapshot)) + "\n")
        resultfile.write(
            "------------------------------------ Sampling info ------------------------------\n"
        )
        resultfile.write('number of sampling iterations : ' +
                         str(nr_iterations[counter]) + "\n")
        resultfile.write('average of embeddings : ' +
                         str(sum_of_embeddings[counter] /
                             nr_iterations[counter]) + "\n")
        #resultfile.write('average of embeddings w.r.t sampling iterations:' + str(sum_of_embeddings[counter]/float(nr_iterations[counter]))+"\n")
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[
                counter]
        else:
            nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[
                counter]

        print "Writing to file: ", nr_embeddings_temp
        resultfile.write('average of embeddings : ' + str(nr_embeddings_temp) +
                         "\n")
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            #we do the old standard deviation
            a = Decimal(sum_of_squares[counter]) - (
                Decimal(math.pow(sum_of_embeddings[counter], 2)) /
                Decimal(float(nr_iterations[counter])))
            stdeviation = math.sqrt(
                a / Decimal(float((nr_iterations[counter] - 1))))
        else:
            a = Decimal(sum_of_squares_root_node_emb[counter]) - (
                Decimal(math.pow(sum_of_root_node_emb[counter], 2)) /
                Decimal(float(nr_iterations[counter])))
            stdeviation = math.sqrt(
                a / Decimal(float((nr_iterations[counter] - 1))))
        print "old stdev: ", stdeviation
        resultfile.write('stdeviation of # embeddings: ' + str(stdeviation) +
                         "\n")
        resultfile.close()
        counter += 1
        counter_duration += 1
コード例 #4
0
ファイル: getStatistics.py プロジェクト: irmaR/graph_sampling
def getStatistics_furer(fudicts, fdict_exhaustive, pattern, data_graph,
                        target_indices, targe_ids, head_node, target_nodes,
                        detailed_result_path, pattern_file_name):
    #CREATE DIRECTORY THAT WILL CONTAIN RESULTS FOR EACH TIME INSTANCE
    furer_results_KLD = []
    furer_results_bhatta = []
    furer_results_hellinger = []
    furer_times = []
    observed_nodes = []
    observed_nodes_difference_per_snapshot = []

    snapshot_directory_path = os.path.join(detailed_result_path)
    if not (os.path.exists(snapshot_directory_path)):
        os.mkdir(snapshot_directory_path)
    snapshot_directory_file = os.path.join(snapshot_directory_path,
                                           'statistics.info')

    #filter out exhaustive dictionary
    filtered_f_dict_exhaustive = {}
    for key in fdict_exhaustive.keys():
        new_key = ()
        for target in target_indices:
            new_key += (key[target - 1])
        if not new_key in filtered_f_dict_exhaustive.keys():
            filtered_f_dict_exhaustive[new_key] = 0
        if new_key in filtered_f_dict_exhaustive.keys():
            filtered_f_dict_exhaustive[new_key] += fdict_exhaustive[key]

    #filter out furer dictionaries
    filtered_fudicts = []
    fudict_monitors = fudicts[0]
    for dict in fudict_monitors:
        temp_fudict = {}
        for key in dict.keys():
            new_key = ()
            for target in target_indices:
                new_key += (key[target - 1])
            if not new_key in temp_fudict.keys():
                temp_fudict[new_key] = 0
            if new_key in temp_fudict.keys():
                temp_fudict[new_key] += dict[key]
        filtered_fudicts.append(temp_fudict)

    smplr.complete_combinations(
        filtered_f_dict_exhaustive, data_graph, pattern,
        targe_ids)  # add zeros to all not present combinations
    smplr.smooth(filtered_f_dict_exhaustive, filtered_f_dict_exhaustive)

    fdict_limited = filtered_fudicts[1]
    smplr.smooth(fdict_limited, filtered_f_dict_exhaustive)
    fdict_Furer = filtered_fudicts[1]
    [pde, trash_list, default_key
     ] = smplr.make_pd_general_kickout_default(filtered_f_dict_exhaustive,
                                               trash_factor=0.01)
    [pdl, tl, dk
     ] = smplr.make_pd_general_kickout_default_limited(fdict_limited,
                                                       trash_list, default_key)
    [pdf, tl, dk
     ] = smplr.make_pd_general_kickout_default_limited(fdict_Furer, trash_list,
                                                       default_key)

    furer_results_KLD.append(
        su.avg_kld(smplr.transform_to_ptable(pde),
                   smplr.transform_to_ptable(pdf)))
    furer_results_bhatta.append(
        su.avg_bhatta(smplr.transform_to_ptable(pde),
                      smplr.transform_to_ptable(pdf)))
    furer_results_hellinger.append(
        su.avg_hellinger(smplr.transform_to_ptable(pde),
                         smplr.transform_to_ptable(pdf)))