def report_monitoring(monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_result_file, data_graph, pattern, Plist, repetitions, pattern_file_name): #CREATE DIRECTORY THAT WILL CONTAINS RESULTS FOR EACH TIME INSTANCE picklename = os.path.join(exhaustive_approach_result_file, "fdict_exhaustive_%s.pickle" % pattern_file_name) pickin = open(picklename, 'rb') fdict_exhaustive = pickle.load(pickin) #smplr.complete_combinations(fdict_exhaustive, data_graph, pattern, Plist) # add zeros to all not present combinations #smplr.smooth(fdict_exhaustive, fdict_exhaustive) # Laplace smoothing also for the exhaustive dict = {} duration = [] nr_iterations = [] sum_number_of_embeddings = [] sum_of_embeddings_vers1 = [] sum_of_squares_vers1 = [] sum_of_the_square_embeddings = [] sum_of_embeddings_random_old = [] sum_of_square_emb_random_old = [] nr_root_nodes = [] begin = 0 for time_int in monitoring_marks: duration.append(time_int - begin) begin = time_int #the problem might be that some runs finished earlier, and some later. for i in range(len(monitoring_marks)): for key_iter in monitoring_reports.keys(): if not (monitoring_marks[i] in dict.keys()): dict[monitoring_marks[i]] = [] try: dict[monitoring_marks[i]].append( monitoring_reports[key_iter][i]) nr_iterations.append( monitoring_reports[key_iter][i].nr_iterations) sum_number_of_embeddings.append( monitoring_reports[key_iter][i].sum_nr_embeddings) sum_of_the_square_embeddings.append( monitoring_reports[key_iter] [i].sum_of_the_square_embeddings) nr_root_nodes.append( monitoring_reports[key_iter][i].nr_root_nodes) try: sum_of_embeddings_random_old.append( monitoring_reports[key_iter] [i].sum_number_of_embeddings_random) sum_of_square_emb_random_old.append( monitoring_reports[key_iter] [i].sum_of_the_square_embeddings_random) except: continue try: sum_of_embeddings_vers1.append( monitoring_reports[key_iter][i].sum_nr_embeddings_aux) sum_of_squares_vers1.append( monitoring_reports[key_iter] [i].sum_of_the_square_embeddings_aux) except: continue except IndexError: break snapshot_inits = [] for i in range(repetitions): snapshot_inits.append(0) counter_duration = 0 counter = 0 interval = 0 for time_snapshot in monitoring_marks: interval += 1 randnode_results_KLD = [] randnode_results_bhatta = [] randnode_results_hellinger = [] observed_nodes = [] observed_nodes_difference_per_snapshot = [] snapshot_directory_path = os.path.join(detailed_result_path, ) if not (os.path.exists(snapshot_directory_path)): os.mkdir(snapshot_directory_path) snapshot_directory_file = os.path.join( snapshot_directory_path, 'res_time_' + str(time_snapshot) + '.info') fdict_furer_temp = dict[time_snapshot] fdicts_Furer = [] for f in fdict_furer_temp: fdicts_Furer.append(f.current_fdict) observed_nodes.append(f.number_of_observed_nodes) if len(fdict_furer_temp) == 0: continue for i in range(len(fdict_furer_temp)): fdict_limited = fdicts_Furer[i] smplr.smooth(fdict_limited, fdict_exhaustive) # smoothing to avoid zeros observed_nodes_difference_per_snapshot.append(observed_nodes[i] - snapshot_inits[i]) snapshot_inits[i] = observed_nodes[i] [pde, trash_list, default_key ] = smplr.make_pd_general_kickout_default_old(fdict_exhaustive, trash_factor=0.01) if len(pde) < 1: break [pdl, tl, dk] = smplr.make_pd_general_kickout_default_limited_old( fdict_limited, trash_list, default_key) randnode_results_KLD.append( su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) randnode_results_bhatta.append( su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) randnode_results_hellinger.append( su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) resultfile = open(snapshot_directory_file, 'w') resultfile.write('Random\n') resultfile.write("experiment on graph: " + str(pattern_file_name) + " and pattern: " + pattern_file_name + "\n") resultfile.write("repetitions (for this time snapshot): " + str(repetitions) + "\n") resultfile.write(" " + "\n") resultfile.write("average KLD on random: " + str(numpy.mean(randnode_results_KLD)) + " with SSTD: " + str(numpy.std(randnode_results_KLD, ddof=1)) + "\n") resultfile.write("average bhatta on random: " + str(numpy.mean(randnode_results_bhatta)) + " with SSTD: " + str(numpy.std(randnode_results_bhatta, ddof=1)) + "\n") resultfile.write("average hellinger on random: " + str(numpy.mean(randnode_results_hellinger)) + " with SSTD: " + str(numpy.std(randnode_results_hellinger, ddof=1)) + "\n") resultfile.write(" " + "\n") resultfile.write('-----DETAILED RESULTS-----' + "\n") resultfile.write('random_results_KLD :' + str(randnode_results_KLD) + "\n") resultfile.write('random_results_bhatta :' + str(randnode_results_bhatta) + "\n") resultfile.write('random_results_hellinger :' + str(randnode_results_hellinger) + "\n") resultfile.write('avg #nodes observed :' + str(numpy.mean(observed_nodes)) + "\n") resultfile.write('# nodes per time interval per run:' + str( (numpy.mean(observed_nodes_difference_per_snapshot) / duration[counter_duration])) + "\n") resultfile.write( 'avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) + "\n") resultfile.write( "------------------------------------ Sampling info ------------------------------\n" ) resultfile.write('number of sampling iterations :' + str(nr_iterations[counter]) + "\n") avg = (float(Decimal(sum_number_of_embeddings[counter])) / nr_iterations[counter]) * ( approaches.globals_sampling.nr_root_nodes) old = False if avg < 0: #this means we handle the old version avg = float( sum_of_embeddings_random_old[counter]) / nr_iterations[counter] old = True sum1 = Decimal(sum_of_the_square_embeddings[counter]) sum2 = Decimal(sum_number_of_embeddings[counter]) var = Decimal(sum1) - (Decimal(math.pow(Decimal(sum2), 2)) / nr_iterations[counter]) stdev2 = math.sqrt(var / (nr_iterations[counter] - 1)) stdev = Decimal(stdev2) * Decimal(math.sqrt(nr_iterations[counter])) if old: #this means we handle the old version variance = sum_of_square_emb_random_old[counter] / ( nr_iterations[counter]) stdev2 = math.sqrt(variance / (nr_iterations[counter] - 1)) resultfile.write('average of embeddings w.r.t sampling iterations:' + str(avg) + "\n") resultfile.write('stdeviation of # embeddings:' + str(stdev2) + "\n") resultfile.write('2 stdeviation of # embeddings:' + str(stdev) + "\n") counter += 1 resultfile.close() counter_duration += 1
def report(rndicts, all_randnode_times, NLIMIT_values, plot_result_dict, repetitions, detailed_result_path, output_path, exhaustive_approach_result_file, pattern_file_name, nr_embeddings_n_limits): if (len(rndicts) == 0): with open(os.path.join(output_path, 'no_results.info'), 'wb') as file: file.write("No results for random - empty rndicts!") pickout = open(os.path.join(output_path, 'rndicts.pickle'), 'wb') pickle.dump(rndicts, pickout) pickout.close() pickout = open(os.path.join(output_path, 'all_randnode_times.pickle'), 'wb') pickle.dump(all_randnode_times, pickout) pickout.close() picklename = os.path.join(exhaustive_approach_result_file, "fdict_exhaustive_%s.pickle" % pattern_file_name) pickin = open(picklename, 'rb') fdict_exhaustive = pickle.load(pickin) for nli in range(len(NLIMIT_values)): plot_result_dict[NLIMIT_values[nli]] = {} randnode_results_KLD = [] randnode_results_bhatta = [] randnode_results_hellinger = [] randnode_times = [] for i in range(repetitions): emb = nr_embeddings_n_limits[nli] randnode_times.append(all_randnode_times[i][nli]) fdict_limited = rndicts[i][nli] smplr.smooth(fdict_limited, fdict_exhaustive) # smoothing to avoid zeros [pde, trash_list, default_key] = smplr.make_pd_general_kickout_default( fdict_exhaustive, trash_factor=0.01 ) # we remove rows where frequencies do not reach 1% if len(pde) < 1: break #print "pde length: ",len(pde) [pdl, tl, dk] = smplr.make_pd_general_kickout_default_limited( fdict_limited, trash_list, default_key) #print "fdict exhaustive: ",len(fdict_exhaustive),"fdict limited",len(fdict_limited) # new function also for limited ones : make_pd_general_kickout_default_limited(fdict, trash, default_key) randnode_results_KLD.append( su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) randnode_results_bhatta.append( su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) randnode_results_hellinger.append( su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) plot_result_dict[NLIMIT_values[nli]]["randomnode_KLD"] = ( numpy.mean(randnode_results_KLD), numpy.std(randnode_results_KLD, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["randomnode_BHT"] = ( numpy.mean(randnode_results_bhatta), numpy.std(randnode_results_bhatta, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["randomnode_HEL"] = ( numpy.mean(randnode_results_hellinger), numpy.std(randnode_results_hellinger, ddof=1)) # added to store and plot the times plot_result_dict[NLIMIT_values[nli]]["randomnode_times"] = ( numpy.mean(randnode_times), numpy.std(randnode_times, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["randomnode_times"] = ( numpy.mean(randnode_times), numpy.std(randnode_times, ddof=1)) result_file_name = detailed_result_path + "/" + "ultimex_ICDM_" + pattern_file_name + pattern_file_name + "." + str( repetitions) + "x" + str(NLIMIT_values[nli]) + ".result" resultfile = open(result_file_name, 'w') resultfile.write('Random Vertex\n') resultfile.write("experiment on graph: " + str(pattern_file_name) + " and pattern: " + pattern_file_name + "\n") resultfile.write("NLIMIT: " + str(NLIMIT_values[nli]) + "\n") resultfile.write("repetitions: " + str(repetitions) + "\n") resultfile.write(" " + "\n") resultfile.write("average average KLD on randomnode: " + str(numpy.mean(randnode_results_KLD)) + " with SSTD: " + str(numpy.std(randnode_results_KLD, ddof=1)) + "\n") resultfile.write("average average bhatta on randomnode: " + str(numpy.mean(randnode_results_bhatta)) + " with SSTD: " + str(numpy.std(randnode_results_bhatta, ddof=1)) + "\n") resultfile.write("average average hellinger on randomnode: " + str(numpy.mean(randnode_results_hellinger)) + " with SSTD: " + str(numpy.std(randnode_results_hellinger, ddof=1)) + "\n") resultfile.write(" " + "\n") resultfile.write("Random node took per run on average: " + str(numpy.mean(randnode_times)) + " seconds." + "\n") resultfile.write('-----DETAILED RESULTS-----' + "\n") resultfile.write('randnode_results_KLD :' + str(randnode_results_KLD) + "\n") resultfile.write('randnode_results_bhatta :' + str(randnode_results_bhatta) + "\n") resultfile.write('randnode_results_hellinger :' + str(randnode_results_hellinger) + "\n") resultfile.write('randnode_times :' + str(randnode_times) + "\n") resultfile.write('Nr embeddings for limit: ' + str(emb)) resultfile.close()
def report(output_path, detailed_result_path, fudicts, plot_result_dict, all_furer_times, exhaustive_approach_result_file, data_graph, pattern, Plist, NLIMIT_values, repetitions, pattern_file_name, fdict_exhaustive, iteration_counter_n_limit, n_limit_embeddings): if (len(fudicts) == 0): with open(os.path.join(output_path, 'no_results.info'), 'wb') as file: file.write("No results for random - empty fudicts!") pickout = open(os.path.join(output_path, 'fudicts.pickle'), 'wb') pickle.dump(fudicts, pickout) pickout.close() pickout = open(os.path.join(output_path, 'all_furer_times.pickle'), 'wb') pickle.dump(all_furer_times, pickout) pickout.close() picklename = os.path.join(exhaustive_approach_result_file, "fdict_exhaustive_%s.pickle" % pattern_file_name) pickin = open(picklename, 'rb') fdict_exhaustive = pickle.load(pickin) smplr.complete_combinations( fdict_exhaustive, data_graph, pattern, Plist) # add zeros to all not present combinations smplr.smooth(fdict_exhaustive, fdict_exhaustive) # Laplace smoothing also for the exhaustive for nli in range(len(NLIMIT_values)): plot_result_dict[NLIMIT_values[nli]] = {} furer_results_KLD = [] furer_results_bhatta = [] furer_results_hellinger = [] furer_times = [] for i in range(repetitions): furer_times.append(all_furer_times[i][nli]) fdict_limited = fudicts[i][nli] smplr.smooth(fdict_limited, fdict_exhaustive) # smoothing to avoid zeros fdict_Furer = fudicts[i][nli] [pde, trash_list, default_key ] = smplr.make_pd_general_kickout_default(fdict_exhaustive, trash_factor=0.01) if len(pde) < 1: break emb = n_limit_embeddings[nli] [pdl, tl, dk] = smplr.make_pd_general_kickout_default_limited( fdict_limited, trash_list, default_key) [pdf, tl, dk] = smplr.make_pd_general_kickout_default_limited( fdict_Furer, trash_list, default_key) furer_results_KLD.append( su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) furer_results_bhatta.append( su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) furer_results_hellinger.append( su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) plot_result_dict[NLIMIT_values[nli]]["furer_KLD"] = ( numpy.mean(furer_results_KLD), numpy.std(furer_results_KLD, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["furer_BHT"] = ( numpy.mean(furer_results_bhatta), numpy.std(furer_results_bhatta, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["furer_HEL"] = ( numpy.mean(furer_results_hellinger), numpy.std(furer_results_hellinger, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["furer_times"] = ( numpy.mean(furer_times), numpy.std(furer_times, ddof=1)) result_file_name = detailed_result_path + "/" + "res_" + pattern_file_name + pattern_file_name + "." + str( repetitions) + "x" + str(NLIMIT_values[nli]) + ".result" resultfile = open(result_file_name, 'w') resultfile.write('Furer\n') resultfile.write("experiment on graph: " + str(pattern_file_name) + " and pattern: " + pattern_file_name + "\n") resultfile.write("NLIMIT: " + str(NLIMIT_values[nli]) + "\n") resultfile.write("repetitions: " + str(repetitions) + "\n") resultfile.write(" " + "\n") resultfile.write("average average KLD on randomnode: " + str(numpy.mean(furer_results_KLD)) + " with SSTD: " + str(numpy.std(furer_results_KLD, ddof=1)) + "\n") resultfile.write("average average bhatta on randomnode: " + str(numpy.mean(furer_results_bhatta)) + " with SSTD: " + str(numpy.std(furer_results_bhatta, ddof=1)) + "\n") resultfile.write("average average hellinger on randomnode: " + str(numpy.mean(furer_results_hellinger)) + " with SSTD: " + str(numpy.std(furer_results_hellinger, ddof=1)) + "\n") resultfile.write(" " + "\n") resultfile.write("Random node took per run on average: " + str(numpy.mean(furer_times)) + " seconds." + "\n") resultfile.write('-----DETAILED RESULTS-----' + "\n") resultfile.write('randnode_results_KLD :' + str(furer_results_KLD) + "\n") resultfile.write('randnode_results_bhatta :' + str(furer_results_bhatta) + "\n") resultfile.write('randnode_results_hellinger :' + str(furer_results_hellinger) + "\n") resultfile.write('randnode_times :' + str(furer_times) + "\n") resultfile.write('Nr embeddings for limit: ' + str(emb)) resultfile.close()
def report_monitoring(monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_result_file, data_graph, pattern, Plist, repetitions, pattern_file_name, fdict_exhaustive): # CREATE DIRECTORY THAT WILL CONTAIN RESULTS FOR EACH TIME INSTANCE dict = {} duration = [] begin = 0 nr_iterations = [] sum_of_embeddings = [] sum_of_squares = [] embeddings_estimate = [] sum_of_root_node_emb = [] sum_of_squares_root_node_emb = [] for time_int in monitoring_marks: duration.append(time_int - begin) begin = time_int # the problem might be that some runs finished earlier, and some later. for i in xrange(len(monitoring_marks)): for key_iter in monitoring_reports.keys(): if not (monitoring_marks[i] in dict.keys()): dict[monitoring_marks[i]] = [] try: dict[monitoring_marks[i]].append( monitoring_reports[key_iter][i]) nr_iterations.append( monitoring_reports[key_iter][i].nr_iterations) sum_of_embeddings.append( monitoring_reports[key_iter][i].sum_nr_embeddings) sum_of_squares.append(monitoring_reports[key_iter] [i].sum_of_the_square_embeddings) embeddings_estimate.append( monitoring_reports[key_iter][i].embeddings_estimate) sum_of_root_node_emb.append( monitoring_reports[key_iter][i].sum_nr_extra_embeddings) sum_of_squares_root_node_emb.append( monitoring_reports[key_iter] [i].sum_of_the_extra_square_embeddings) except IndexError: break snapshot_inits = [] for i in range(repetitions): snapshot_inits.append(0) counter_duration = 0 counter = 0 for time_snapshot in monitoring_marks: if counter == 1: break furer_results_KLD = [] furer_results_bhatta = [] furer_results_hellinger = [] observed_nodes = [] observed_nodes_difference_per_snapshot = [] snapshot_directory_path = os.path.join(detailed_result_path, ) if not (os.path.exists(snapshot_directory_path)): os.mkdir(snapshot_directory_path) snapshot_directory_file = os.path.join( snapshot_directory_path, 'res_time_' + str(time_snapshot) + '.info') fdict_furer_temp = dict[time_snapshot] fdicts_Furer = [] for f in fdict_furer_temp: fdicts_Furer.append(f.current_fdict) observed_nodes.append(f.number_of_observed_nodes) if len(fdict_furer_temp) == 0: continue for i in range(len(fdict_furer_temp)): fdict_limited = fdicts_Furer[i] smplr.smooth(fdict_limited, fdict_exhaustive) # smoothing to avoid zeros fdict_Furer = fdicts_Furer[i] observed_nodes_difference_per_snapshot.append(observed_nodes[i] - snapshot_inits[i]) snapshot_inits[i] = observed_nodes[i] [pde, trash_list, default_key ] = smplr.make_pd_general_kickout_default(fdict_exhaustive, trash_factor=0.01) # print "Exhaustive dict: ",pde if len(pde) < 1: break [pdf, tl, dk] = smplr.make_pd_general_kickout_default_limited( fdict_Furer, trash_list, default_key) furer_results_KLD.append( su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) furer_results_bhatta.append( su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) furer_results_hellinger.append( su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) resultfile = open(snapshot_directory_file, 'w') resultfile.write('Furer\n') resultfile.write("experiment on graph: " + str(pattern_file_name) + " and pattern: " + pattern_file_name + "\n") resultfile.write("repetitions (for this time snapshot): " + str(repetitions) + "\n") resultfile.write(" " + "\n") resultfile.write("average average KLD on furer: " + str(numpy.mean(furer_results_KLD)) + " with SSTD: " + str(numpy.std(furer_results_KLD, ddof=1)) + "\n") resultfile.write("average average bhatta on furer: " + str(numpy.mean(furer_results_bhatta)) + " with SSTD: " + str(numpy.std(furer_results_bhatta, ddof=1)) + "\n") resultfile.write("average average hellinger on furer: " + str(numpy.mean(furer_results_hellinger)) + " with SSTD: " + str(numpy.std(furer_results_hellinger, ddof=1)) + "\n") resultfile.write(" " + "\n") resultfile.write('-----DETAILED RESULTS-----' + "\n") resultfile.write('furer_results_KLD : ' + str(furer_results_KLD) + "\n") resultfile.write('furer_results_bhatta : ' + str(furer_results_bhatta) + "\n") resultfile.write('furer_results_hellinger : ' + str(furer_results_hellinger) + "\n") resultfile.write('avg #nodes observed : ' + str(numpy.mean(observed_nodes)) + "\n") resultfile.write('# nodes per time interval per run :' + str( (numpy.mean(observed_nodes_difference_per_snapshot) / duration[counter_duration])) + "\n") resultfile.write( 'avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) + "\n") resultfile.write( "------------------------------------ Sampling info ------------------------------\n" ) resultfile.write('number of sampling iterations : ' + str(nr_iterations[counter]) + "\n") if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[ counter] == 0: nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[ counter] else: nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[ counter] # embeddings_estimate[counter] resultfile.write('average of embeddings : ' + str(nr_embeddings_temp) + "\n") if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[ counter] == 0: # we do the old standard deviation a = Decimal(sum_of_squares[counter]) - ( Decimal(math.pow(sum_of_embeddings[counter], 2)) / Decimal(float(nr_iterations[counter]))) stdeviation = math.sqrt( a / Decimal(float((nr_iterations[counter] - 1)))) else: a = Decimal(sum_of_squares_root_node_emb[counter]) - ( Decimal(math.pow(sum_of_root_node_emb[counter], 2)) / Decimal(float(nr_iterations[counter]))) stdeviation = math.sqrt( a / Decimal(float((nr_iterations[counter] - 1)))) resultfile.write('stdeviation of # embeddings: ' + str(stdeviation) + "\n") resultfile.close() counter += 1 counter_duration += 1
def report_monitoring_my_version_online( monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_result_file, data_graph, pattern, Plist, repetitions, pattern_file_name, fdict_exhaustive, nr_non_observed_combinations): dict = {} duration = [] nr_iterations = [] sum_of_embeddings = [] sum_of_squares = [] sum_of_root_node_emb = [] sum_of_squares_root_node_emb = [] begin = 0 for time_int in monitoring_marks: duration.append(time_int - begin) begin = time_int #the problem might be that some runs finished earlier, and some later. for i in range(len(monitoring_marks)): #for key_iter in monitoring_reports.keys(): if not (monitoring_marks[i] in dict.keys()): dict[monitoring_marks[i]] = [] try: dict[monitoring_marks[i]].append(monitoring_reports[i]) nr_iterations.append(monitoring_reports[i].nr_iterations) sum_of_embeddings.append(monitoring_reports[i].sum_nr_embeddings) sum_of_squares.append( monitoring_reports[i].sum_of_the_square_embeddings) sum_of_root_node_emb.append( monitoring_reports[i].sum_nr_extra_embeddings) sum_of_squares_root_node_emb.append( monitoring_reports[i].sum_of_the_extra_square_embeddings) except IndexError: break snapshot_inits = [] for i in range(repetitions): snapshot_inits.append(0) counter_duration = 0 counter = 0 average_klds = [] average_bhattas = [] average_hellingers = [] std_klds = [] std_bhattas = [] std_hellingers = [] avg_nodes_observed = [] nr_nodes_per_time_interval_per_runs = [] number_of_sampling_iterations = [] average_of_embeddings = [] stdevs = [] for time_snapshot in monitoring_marks: false_furer_results_KLD = [] false_furer_results_bhatta = [] false_furer_results_hellinger = [] false_furer_times = [] observed_nodes = [] observed_nodes_difference_per_snapshot = [] snapshot_directory_path = os.path.join(detailed_result_path, ) if not (os.path.exists(snapshot_directory_path)): os.mkdir(snapshot_directory_path) snapshot_directory_file = os.path.join( snapshot_directory_path, 'res_time_' + str(time_snapshot) + '.info') fdict_furer_temp = dict[time_snapshot] fdicts_Furer = [] for f in fdict_furer_temp: fdicts_Furer.append(f.current_fdict) observed_nodes.append(f.number_of_observed_nodes) if len(fdict_furer_temp) == 0: continue for i in range(len(fdict_furer_temp)): approaches.globals_sampling.nr_iterations = nr_iterations[i] fdict_limited = fdicts_Furer[i] fdict_Furer = fdicts_Furer[i] observed_nodes_difference_per_snapshot.append(observed_nodes[i] - snapshot_inits[i]) snapshot_inits[i] = observed_nodes[i] [pde, trash_list, default_key] = smplr.make_pd_general_kickout_default_my_version( fdict_exhaustive) if len(pde) < 1: break nr_possible_combinations = smplr.complete_combinations_1( fdict_Furer, data_graph, pattern, Plist) pdf = smplr.make_pd_general_kickout_default_limited_my_version( fdict_Furer) #print smplr.transform_to_ptable(pde) #print smplr.transform_to_ptable(pdf) false_furer_results_KLD.append( su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) false_furer_results_bhatta.append( su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) false_furer_results_hellinger.append( su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) average_klds.append(numpy.mean(false_furer_results_KLD)) std_klds.append(numpy.std(false_furer_results_KLD, ddof=1)) average_bhattas.append(numpy.mean(false_furer_results_bhatta)) std_bhattas.append(numpy.std(false_furer_results_bhatta, ddof=1)) average_hellingers.append(numpy.mean(false_furer_results_hellinger)) std_hellingers.append(numpy.std(false_furer_results_hellinger, ddof=1)) avg_nodes_observed.append(numpy.mean(observed_nodes)) number_of_sampling_iterations.append(nr_iterations[counter]) nr_nodes_per_time_interval_per_runs.append( float((numpy.mean(observed_nodes_difference_per_snapshot) / duration[counter_duration]))) if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[ counter] == 0: nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[ counter] else: nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[ counter] average_of_embeddings.append(nr_embeddings_temp) stdeviation = numpy.nan if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[ counter] == 0: #we do the old standard deviation a = Decimal(sum_of_squares[counter]) - ( Decimal(math.pow(sum_of_embeddings[counter], 2)) / Decimal(float(nr_iterations[counter]))) stdeviation = math.sqrt( a / Decimal(float((nr_iterations[counter] - 1)))) else: a = Decimal(sum_of_squares_root_node_emb[counter]) - ( Decimal(math.pow(sum_of_root_node_emb[counter], 2)) / Decimal(float(nr_iterations[counter]))) stdeviation = math.sqrt( a / Decimal(float((nr_iterations[counter] - 1)))) stdevs.append(stdeviation) counter += 1 counter_duration += 1 return { "average_klds": average_klds, "average_bhattas": average_bhattas, "average_hellingers": average_hellingers, "std_klds": std_klds, "std_bhattas": std_bhattas, "std_hellingers": std_hellingers, "avg_nodes_observed": avg_nodes_observed, "nr_nodes_per_time_interval_per_runs": nr_nodes_per_time_interval_per_runs, "number_of_sampling_iterations": number_of_sampling_iterations, "average_of_embeddings": average_of_embeddings, "stdevs": stdevs }