def my_version_report(fdict_exhaustive, data_graph, pattern, monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_results_path, Plist, nr, pattern_file_name, write): experiments.globals.report = "furer" nr_non_observed_combinations = None start_time = time.time() if write == True: size_fdict = len(fdict_exhaustive) num_embeddings = 0 for k in fdict_exhaustive.keys(): num_embeddings = num_embeddings + fdict_exhaustive[ k] # I remove +1 added by Laplace smoothing BEWARE: previous Laplace smoothing assumed print "NR EMBEDDINGS BEFORE COMPLETING: ", num_embeddings nr_possible_combinations = smplr.complete_combinations_1( fdict_exhaustive, data_graph, pattern, Plist) # add zeros to all not present combinations nr_non_observed_combinations = nr_possible_combinations - size_fdict experiments.globals.nr_non_observed_combinations = nr_non_observed_combinations report.report_monitoring_my_version( monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_results_path, data_graph, pattern, Plist, nr, pattern_file_name, fdict_exhaustive, nr_non_observed_combinations, write) #print monitoring_reports print "ELAPSED TIME: ", time.time() - start_time
def report_monitoring_my_version(monitoring_marks,output_path,detailed_result_path,monitoring_reports,exhaustive_approach_result_file,data_graph,pattern,Plist,repetitions,pattern_file_name,fdict_exhaustive,nr_non_observed_combinations,write): #CREATE DIRECTORY THAT WILL CONTAIN RESULTS FOR EACH TIME INSTANCE dict={} duration=[] begin=0 nr_iterations=[] sum_of_embeddings=[] sum_of_squares=[] embeddings_estimate=[] sum_of_root_node_emb=[] sum_of_squares_root_node_emb=[] for time_int in monitoring_marks: duration.append(time_int-begin) begin=time_int #the problem might be that some runs finished earlier, and some later. for i in xrange(len(monitoring_marks)): for key_iter in monitoring_reports.keys(): if not(monitoring_marks[i] in dict.keys()): dict[monitoring_marks[i]]=[] try: dict[monitoring_marks[i]].append(monitoring_reports[key_iter][i]) nr_iterations.append(monitoring_reports[key_iter][i].nr_iterations) sum_of_embeddings.append(monitoring_reports[key_iter][i].sum_nr_embeddings) sum_of_squares.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings) embeddings_estimate.append(monitoring_reports[key_iter][i].embeddings_estimate) sum_of_root_node_emb.append(monitoring_reports[key_iter][i].sum_nr_extra_embeddings) sum_of_squares_root_node_emb.append(monitoring_reports[key_iter][i].sum_of_the_extra_square_embeddings) except IndexError: print "Something wrong" break print "NR ITERATIONS: ",nr_iterations print "sum_of_embeddings: ",sum_of_embeddings print "sum_of_squares: ",sum_of_squares snapshot_inits=[] for i in range(repetitions): snapshot_inits.append(0) counter_duration=0 counter=0 for time_snapshot in monitoring_marks: experiments.globals.current_time_snapshot=time_snapshot print "Processed ",counter," out of: ",len(monitoring_marks) furer_results_KLD = [] furer_results_bhatta = [] furer_results_hellinger = [] furer_times = [] observed_nodes=[] observed_nodes_difference_per_snapshot=[] snapshot_directory_path=os.path.join(detailed_result_path,) if not(os.path.exists(snapshot_directory_path)): os.mkdir(snapshot_directory_path) snapshot_directory_file=os.path.join(snapshot_directory_path,'res_time_'+str(time_snapshot)+'.info') if write==True: fdict_furer_temp=dict[time_snapshot] fdicts_Furer=[] for f in fdict_furer_temp: fdicts_Furer.append(f.current_fdict) observed_nodes.append(f.number_of_observed_nodes) if len(fdict_furer_temp)==0: continue for i in range(len(fdict_furer_temp)): experiments.globals.nr_iterations=nr_iterations[i] fdict_limited = fdicts_Furer[i] fdict_Furer=fdicts_Furer[i] #for k in fdict_Furer: # fdict_Furer[k]=fdict_Furer[k]*experiments.globals.nr_iterations observed_nodes_difference_per_snapshot.append(observed_nodes[i]-snapshot_inits[i]) snapshot_inits[i]=observed_nodes[i] print "Making pde Exhaustive" #for k in fdict_exhaustive.keys(): # if k==((u'location', u'Location_id_705003'), (u'function', u'Func_id_40')): # print k,fdict_exhaustive[k] [pde,trash,default_key] = smplr.make_pd_general_kickout_default_my_version(fdict_exhaustive) print "Made pde Exhaustive" print "Len PDE: ",len(pde) if len(pde) < 1: print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING." break print "Completing combinations for fdict Furer" nr_possible_combinations=smplr.complete_combinations_1(fdict_Furer, data_graph, pattern, Plist) print "Completed combinations for fdict Furer" print "Nr poss combos: ",nr_possible_combinations #print "Making pdf for Furer" pdf= smplr.make_pd_general_kickout_default_limited_my_version(fdict_Furer) print "Made pdf for Furer" print "Len pdf: ",len(pdf) print "Furer ..." print "EXHAUSTIVE:" #for k in pde.keys(): # print "KEY: ",k # for e in pde[k]: # print e #print "SMPL:" #for k in pdf.keys(): # print "KEY: ",k # for e in pdf[k]: # print e furer_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) print "HALO?" furer_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) furer_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) print "Writing to: ",snapshot_directory_file resultfile = open(snapshot_directory_file, 'w') resultfile.write('Furer\n') resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n") resultfile.write("repetitions (for this time snapshot): " + str(repetitions) +"\n") resultfile.write(" " +"\n") print "KLD: ",str(numpy.mean(furer_results_KLD)) resultfile.write("average average KLD on furer: " + str(numpy.mean(furer_results_KLD)) + " with SSTD: " + str(numpy.std(furer_results_KLD, ddof=1)) +"\n") resultfile.write("average average bhatta on furer: " + str(numpy.mean(furer_results_bhatta)) + " with SSTD: " + str(numpy.std(furer_results_bhatta, ddof=1)) +"\n") resultfile.write("average average hellinger on furer: " + str(numpy.mean(furer_results_hellinger)) + " with SSTD: " + str(numpy.std(furer_results_hellinger, ddof=1)) +"\n") resultfile.write(" " +"\n") resultfile.write('-----DETAILED RESULTS-----' +"\n") resultfile.write('furer_results_KLD : ' + str(furer_results_KLD) +"\n") resultfile.write('furer_results_bhatta : ' + str(furer_results_bhatta) +"\n") resultfile.write('furer_results_hellinger : ' + str(furer_results_hellinger) +"\n") resultfile.write('avg #nodes observed : ' + str(numpy.mean(observed_nodes)) +"\n") resultfile.write('# nodes per time interval per run :' + str((numpy.mean(observed_nodes_difference_per_snapshot)/duration[counter_duration])) +"\n") resultfile.write('avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) +"\n") resultfile.write("------------------------------------ Sampling info ------------------------------\n") resultfile.write('number of sampling iterations : ' + str(nr_iterations[counter])+"\n") if sum_of_squares_root_node_emb[counter]==0 and sum_of_root_node_emb[counter]==0: nr_embeddings_temp=sum_of_embeddings[counter]/nr_iterations[counter] else: nr_embeddings_temp=sum_of_root_node_emb[counter]/nr_iterations[counter] #embeddings_estimate[counter] print "Writing to file: ",nr_embeddings_temp resultfile.write('average of embeddings : ' + str(nr_embeddings_temp)+"\n") if sum_of_squares_root_node_emb[counter]==0 and sum_of_root_node_emb[counter]==0: #we do the old standard deviation print "Old stdev" print sum_of_squares[counter] print sum_of_embeddings[counter] a=Decimal(sum_of_squares[counter])-(Decimal(math.pow(sum_of_embeddings[counter], 2))/Decimal(float(nr_iterations[counter]))) stdeviation=math.sqrt(a/Decimal(float((nr_iterations[counter]-1)))) else: a=Decimal(sum_of_squares_root_node_emb[counter])-(Decimal(math.pow(sum_of_root_node_emb[counter], 2))/Decimal(float(nr_iterations[counter]))) if a>0: stdeviation=math.sqrt(a/Decimal(float((nr_iterations[counter]-1)))) else: stdeviation=0 print "old stdev: ",stdeviation resultfile.write('stdeviation of # embeddings: ' + str(stdeviation)+"\n") resultfile.close() counter+=1 counter_duration+=1
def report_monitoring_my_version(monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_result_file, data_graph, pattern, Plist, repetitions, pattern_file_name, fdict_exhaustive, nr_non_observed_combinations, write): #CREATE DIRECTORY THAT WILL CONTAINS RESULTS FOR EACH TIME INSTANCE #picklename = os.path.join(exhaustive_approach_result_file,"fdict_exhaustive_%s.pickle" % pattern_file_name) #pickin = open(picklename, 'rb') #fdict_exhaustive = pickle.load(pickin) #smplr.complete_combinations(fdict_exhaustive, data_graph, pattern, Plist) # add zeros to all not present combinations #smplr.smooth(fdict_exhaustive, fdict_exhaustive) # Laplace smoothing also for the exhaustive dict = {} duration = [] nr_iterations = [] sum_of_embeddings = [] sum_of_squares = [] sum_of_root_node_emb = [] sum_of_squares_root_node_emb = [] begin = 0 for time_int in monitoring_marks: duration.append(time_int - begin) begin = time_int #the problem might be that some runs finished earlier, and some later. for i in xrange(len(monitoring_marks)): for key_iter in monitoring_reports.keys(): if not (monitoring_marks[i] in dict.keys()): dict[monitoring_marks[i]] = [] try: dict[monitoring_marks[i]].append( monitoring_reports[key_iter][i]) nr_iterations.append( monitoring_reports[key_iter][i].nr_iterations) sum_of_embeddings.append( monitoring_reports[key_iter][i].sum_nr_embeddings) sum_of_squares.append(monitoring_reports[key_iter] [i].sum_of_the_square_embeddings) sum_of_root_node_emb.append( monitoring_reports[key_iter][i].sum_nr_extra_embeddings) sum_of_squares_root_node_emb.append( monitoring_reports[key_iter] [i].sum_of_the_extra_square_embeddings) except IndexError: break print "NR ITERATIONS: ", nr_iterations print "sum_of_embeddings: ", sum_of_embeddings print "sum_of_squares: ", sum_of_squares snapshot_inits = [] for i in range(repetitions): snapshot_inits.append(0) counter_duration = 0 counter = 0 for time_snapshot in monitoring_marks: print "Processing ", counter, "out of: ", len(monitoring_marks) false_furer_results_KLD = [] false_furer_results_bhatta = [] false_furer_results_hellinger = [] false_furer_times = [] observed_nodes = [] observed_nodes_difference_per_snapshot = [] snapshot_directory_path = os.path.join(detailed_result_path, ) if not (os.path.exists(snapshot_directory_path)): os.mkdir(snapshot_directory_path) snapshot_directory_file = os.path.join( snapshot_directory_path, 'res_time_' + str(time_snapshot) + '.info') if write == True: fdict_furer_temp = dict[time_snapshot] fdicts_Furer = [] for f in fdict_furer_temp: fdicts_Furer.append(f.current_fdict) observed_nodes.append(f.number_of_observed_nodes) if len(fdict_furer_temp) == 0: continue for i in range(len(fdict_furer_temp)): experiments.globals.nr_iterations = nr_iterations[i] fdict_limited = fdicts_Furer[i] fdict_Furer = fdicts_Furer[i] observed_nodes_difference_per_snapshot.append( observed_nodes[i] - snapshot_inits[i]) snapshot_inits[i] = observed_nodes[i] [pde, trash_list, default_key ] = smplr.make_pd_general_kickout_default_my_version( fdict_exhaustive) if len(pde) < 1: print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING." break nr_possible_combinations = smplr.complete_combinations_1( fdict_Furer, data_graph, pattern, Plist) pdf = smplr.make_pd_general_kickout_default_limited_my_version( fdict_Furer) false_furer_results_KLD.append( su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) false_furer_results_bhatta.append( su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) false_furer_results_hellinger.append( su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) print "Writing to: ", snapshot_directory_file resultfile = open(snapshot_directory_file, 'w') resultfile.write('False Furer\n') resultfile.write("experiment on graph: " + str(pattern_file_name) + " and pattern: " + pattern_file_name + "\n") resultfile.write("repetitions (for this time snapshot): " + str(repetitions) + "\n") resultfile.write(" " + "\n") resultfile.write("average KLD on false furer: " + str(numpy.mean(false_furer_results_KLD)) + " with SSTD: " + str(numpy.std(false_furer_results_KLD, ddof=1)) + "\n") resultfile.write("average bhatta on false furer: " + str(numpy.mean(false_furer_results_bhatta)) + " with SSTD: " + str(numpy.std(false_furer_results_bhatta, ddof=1)) + "\n") resultfile.write( "average hellinger on false furer: " + str(numpy.mean(false_furer_results_hellinger)) + " with SSTD: " + str(numpy.std(false_furer_results_hellinger, ddof=1)) + "\n") resultfile.write(" " + "\n") resultfile.write('-----DETAILED RESULTS-----' + "\n") resultfile.write('false_results_KLD :' + str(false_furer_results_KLD) + "\n") resultfile.write('false_results_bhatta :' + str(false_furer_results_bhatta) + "\n") resultfile.write('false_results_hellinger :' + str(false_furer_results_hellinger) + "\n") resultfile.write('avg #nodes observed :' + str(numpy.mean(observed_nodes)) + "\n") resultfile.write('# nodes per time interval per run:' + str( (numpy.mean(observed_nodes_difference_per_snapshot) / duration[counter_duration])) + "\n") resultfile.write( 'avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) + "\n") resultfile.write( "------------------------------------ Sampling info ------------------------------\n" ) resultfile.write('number of sampling iterations : ' + str(nr_iterations[counter]) + "\n") #resultfile.write('average of embeddings : ' + str(sum_of_embeddings[counter]/nr_iterations[counter])+"\n") #resultfile.write('average of embeddings w.r.t sampling iterations:' + str(sum_of_embeddings[counter]/float(nr_iterations[counter]))+"\n") if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[ counter] == 0: nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[ counter] else: nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[ counter] print "Writing to file: ", nr_embeddings_temp resultfile.write('average of embeddings : ' + str(nr_embeddings_temp) + "\n") if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[ counter] == 0: #we do the old standard deviation a = Decimal(sum_of_squares[counter]) - ( Decimal(math.pow(sum_of_embeddings[counter], 2)) / Decimal(float(nr_iterations[counter]))) if a > 0: stdeviation = math.sqrt( a / Decimal(float((nr_iterations[counter] - 1)))) else: stdeviation = 0 else: a = Decimal(sum_of_squares_root_node_emb[counter]) - ( Decimal(math.pow(sum_of_root_node_emb[counter], 2)) / Decimal(float(nr_iterations[counter]))) if a > 0: stdeviation = math.sqrt( a / Decimal(float((nr_iterations[counter] - 1)))) else: stdeviation = 0 print "old stdev: ", stdeviation resultfile.write('stdeviation of # embeddings: ' + str(stdeviation) + "\n") resultfile.close() counter += 1 counter_duration += 1
def report_monitoring_my_version_online( monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_result_file, data_graph, pattern, Plist, repetitions, pattern_file_name, fdict_exhaustive, nr_non_observed_combinations): dict = {} duration = [] nr_iterations = [] sum_of_embeddings = [] sum_of_squares = [] sum_of_root_node_emb = [] sum_of_squares_root_node_emb = [] begin = 0 for time_int in monitoring_marks: duration.append(time_int - begin) begin = time_int #the problem might be that some runs finished earlier, and some later. for i in xrange(len(monitoring_marks)): print i #for key_iter in monitoring_reports.keys(): if not (monitoring_marks[i] in dict.keys()): dict[monitoring_marks[i]] = [] try: dict[monitoring_marks[i]].append(monitoring_reports[i]) nr_iterations.append(monitoring_reports[i].nr_iterations) sum_of_embeddings.append(monitoring_reports[i].sum_nr_embeddings) sum_of_squares.append( monitoring_reports[i].sum_of_the_square_embeddings) sum_of_root_node_emb.append( monitoring_reports[i].sum_nr_extra_embeddings) sum_of_squares_root_node_emb.append( monitoring_reports[i].sum_of_the_extra_square_embeddings) except IndexError: break print "NR ITERATIONS: ", nr_iterations print "sum_of_embeddings: ", sum_of_embeddings print "sum_of_squares: ", sum_of_squares snapshot_inits = [] for i in range(repetitions): snapshot_inits.append(0) counter_duration = 0 counter = 0 average_klds = [] average_bhattas = [] average_hellingers = [] std_klds = [] std_bhattas = [] std_hellingers = [] avg_nodes_observed = [] nr_nodes_per_time_interval_per_runs = [] number_of_sampling_iterations = [] average_of_embeddings = [] stdevs = [] for time_snapshot in monitoring_marks: print "Processing ", counter, "out of: ", len(monitoring_marks) false_furer_results_KLD = [] false_furer_results_bhatta = [] false_furer_results_hellinger = [] false_furer_times = [] observed_nodes = [] observed_nodes_difference_per_snapshot = [] snapshot_directory_path = os.path.join(detailed_result_path, ) if not (os.path.exists(snapshot_directory_path)): os.mkdir(snapshot_directory_path) snapshot_directory_file = os.path.join( snapshot_directory_path, 'res_time_' + str(time_snapshot) + '.info') fdict_furer_temp = dict[time_snapshot] fdicts_Furer = [] for f in fdict_furer_temp: fdicts_Furer.append(f.current_fdict) observed_nodes.append(f.number_of_observed_nodes) if len(fdict_furer_temp) == 0: continue for i in range(len(fdict_furer_temp)): experiments.globals.nr_iterations = nr_iterations[i] fdict_limited = fdicts_Furer[i] fdict_Furer = fdicts_Furer[i] observed_nodes_difference_per_snapshot.append(observed_nodes[i] - snapshot_inits[i]) snapshot_inits[i] = observed_nodes[i] [pde, trash_list, default_key] = smplr.make_pd_general_kickout_default_my_version( fdict_exhaustive) if len(pde) < 1: print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING." break print fdict_Furer nr_possible_combinations = smplr.complete_combinations_1( fdict_Furer, data_graph, pattern, Plist) pdf = smplr.make_pd_general_kickout_default_limited_my_version( fdict_Furer) #print smplr.transform_to_ptable(pde) #print smplr.transform_to_ptable(pdf) false_furer_results_KLD.append( su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) false_furer_results_bhatta.append( su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) false_furer_results_hellinger.append( su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) average_klds.append(numpy.mean(false_furer_results_KLD)) std_klds.append(numpy.std(false_furer_results_KLD, ddof=1)) average_bhattas.append(numpy.mean(false_furer_results_bhatta)) std_bhattas.append(numpy.std(false_furer_results_bhatta, ddof=1)) average_hellingers.append(numpy.mean(false_furer_results_hellinger)) std_hellingers.append(numpy.std(false_furer_results_hellinger, ddof=1)) avg_nodes_observed.append(numpy.mean(observed_nodes)) number_of_sampling_iterations.append(nr_iterations[counter]) nr_nodes_per_time_interval_per_runs.append( float((numpy.mean(observed_nodes_difference_per_snapshot) / duration[counter_duration]))) if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[ counter] == 0: nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[ counter] else: nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[ counter] average_of_embeddings.append(nr_embeddings_temp) stdeviation = numpy.nan try: if sum_of_squares_root_node_emb[ counter] == 0 and sum_of_root_node_emb[counter] == 0: #we do the old standard deviation a = Decimal(sum_of_squares[counter]) - ( Decimal(math.pow(sum_of_embeddings[counter], 2)) / Decimal(float(nr_iterations[counter]))) stdeviation = math.sqrt( a / Decimal(float((nr_iterations[counter] - 1)))) else: a = Decimal(sum_of_squares_root_node_emb[counter]) - ( Decimal(math.pow(sum_of_root_node_emb[counter], 2)) / Decimal(float(nr_iterations[counter]))) stdeviation = math.sqrt( a / Decimal(float((nr_iterations[counter] - 1)))) except: print "not successful" stdevs.append(stdeviation) counter += 1 counter_duration += 1 return { "average_klds": average_klds, "average_bhattas": average_bhattas, "average_hellingers": average_hellingers, "std_klds": std_klds, "std_bhattas": std_bhattas, "std_hellingers": std_hellingers, "avg_nodes_observed": avg_nodes_observed, "nr_nodes_per_time_interval_per_runs": nr_nodes_per_time_interval_per_runs, "number_of_sampling_iterations": number_of_sampling_iterations, "average_of_embeddings": average_of_embeddings, "stdevs": stdevs }
def my_version_report_online(fdict_exhaustive, data_graph, pattern, monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_results_path, Plist_base, nr, pattern_file_name): print "LEN FDICT_EXHAUSTIVE (BEFORE): ", len(fdict_exhaustive) experiments.globals.report = "furer" size_fdict = len(fdict_exhaustive) num_embeddings = 0 for k in fdict_exhaustive.keys(): num_embeddings = num_embeddings + fdict_exhaustive[ k] # I remove +1 added by Laplace smoothing BEWARE: previous Laplace smoothing assumed #print "NR EMBEDDINGS BEFORE COMPLETING: ",num_embeddings start_time = time.time() results = {} counter = 1 for i in monitoring_reports.keys(): #print "---- RUN!-------",i experiments.globals.nr_non_observed_combinations = 0 nr_possible_combinations = smplr.complete_combinations_1( fdict_exhaustive, data_graph, pattern, Plist_base) # add zeros to all not present combinations nr_non_observed_combinations = nr_possible_combinations - size_fdict experiments.globals.nr_non_observed_combinations = nr_non_observed_combinations results[counter] = report.report_monitoring_my_version_online( monitoring_marks, output_path, detailed_result_path, monitoring_reports[i], exhaustive_approach_results_path, data_graph, pattern, Plist_base, nr, pattern_file_name, fdict_exhaustive, nr_non_observed_combinations) #print monitoring_reports #print "collected results:",results[counter] counter += 1 print "Calculating averages" for i in xrange(len(monitoring_marks)): avg_klds = [] average_bhattas = [] average_hellingers = [] std_klds = [] std_bhattas = [] std_hellingers = [] avg_nodes_observed = [] nr_nodes_per_time_interval_per_runs = [] number_of_sampling_iterations = [] average_of_embeddings = [] stdevs = [] with open( os.path.join(detailed_result_path, 'res_time_' + str(monitoring_marks[i]) + ".info"), 'w') as resultfile: resultfile.write('False Furer with different orderings\n') for k in monitoring_reports.keys(): #print "run:",k,results[k]['average_klds'] avg_klds.append(results[k]['average_klds'][i]) average_bhattas.append(results[k]['average_bhattas'][i]) average_hellingers.append(results[k]['average_hellingers'][i]) std_klds.append(results[k]['std_klds'][i]) std_bhattas.append(results[k]['std_bhattas'][i]) std_hellingers.append(results[k]['std_hellingers'][i]) avg_nodes_observed.append(results[k]['avg_nodes_observed'][i]) nr_nodes_per_time_interval_per_runs.append( results[k]['nr_nodes_per_time_interval_per_runs'][i]) number_of_sampling_iterations.append( results[k]['number_of_sampling_iterations'][i]) average_of_embeddings.append( float(results[k]['average_of_embeddings'][i])) stdevs.append(results[k]['stdevs'][i]) #print "Embeddings array: ",average_of_embeddings,"average: ",str(numpy.mean(average_of_embeddings)) resultfile.write("average KLD on false furer: " + str(numpy.mean(avg_klds)) + " with SSTD: " + str(numpy.std(avg_klds, ddof=1)) + "\n") resultfile.write("average bhatta on false furer: " + str(numpy.mean(average_bhattas)) + " with SSTD: " + str(numpy.std(average_bhattas, ddof=1)) + "\n") resultfile.write("average hellinger on false furer: " + str(numpy.mean(average_hellingers)) + " with SSTD: " + str(numpy.std(average_hellingers, ddof=1)) + "\n") resultfile.write(" " + "\n") resultfile.write('avg #nodes observed :' + str(numpy.mean(avg_nodes_observed)) + "\n") #print nr_nodes_per_time_interval_per_runs #resultfile.write('# nodes per time interval per run:' + str((numpy.mean(nr_nodes_per_time_interval_per_runs))+"\n")) resultfile.write('average of embeddings : ' + str(numpy.mean(average_of_embeddings)) + "\n") #print numpy.nanmean(stdevs) resultfile.write('stdeviation of # embeddings: ' + str(numpy.nanmean(stdevs)) + "\n")
def report_monitoring_my_version(monitoring_marks,output_path,detailed_result_path,monitoring_reports,exhaustive_approach_result_file,data_graph,pattern,Plist,repetitions,pattern_file_name,fdict_exhaustive,nr_non_observed_combinations,write): #CREATE DIRECTORY THAT WILL CONTAINS RESULTS FOR EACH TIME INSTANCE dict={} duration=[] nr_iterations=[] sum_number_of_embeddings=[] sum_of_embeddings_vers1=[] sum_of_embeddings_random_old=[] sum_of_square_emb_random_old=[] sum_of_squares_vers1=[] sum_of_the_square_embeddings=[] nr_root_nodes=[] begin=0 for time_int in monitoring_marks: duration.append(time_int-begin) begin=time_int #if not type(monitoring_reports)==dict: # print "monitoring reports to dictionary" # tmp=monitoring_reports # monitoring_reports={} # monitoring_reports["1"]=tmp # print monitoring_reports #the problem might be that some runs finished earlier, and some later. for i in xrange(len(monitoring_marks)): for key_iter in monitoring_reports.keys(): if not(monitoring_marks[i] in dict.keys()): dict[monitoring_marks[i]]=[] try: dict[monitoring_marks[i]].append(monitoring_reports[key_iter][i]) nr_iterations.append(monitoring_reports[key_iter][i].nr_iterations) sum_number_of_embeddings.append(monitoring_reports[key_iter][i].sum_nr_embeddings) sum_of_the_square_embeddings.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings) nr_root_nodes.append(monitoring_reports[key_iter][i].nr_root_nodes) try: sum_of_embeddings_random_old.append(monitoring_reports[key_iter][i].sum_number_of_embeddings_random) sum_of_square_emb_random_old.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings_random) except: continue try: sum_of_embeddings_vers1.append(monitoring_reports[key_iter][i].sum_nr_embeddings_aux) sum_of_squares_vers1.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings_aux) except: continue except IndexError: break print "Sum of embeddings random old: ",len(sum_of_embeddings_random_old) print "NR ITERATIONS: ",nr_iterations print "sum_of_embeddings: ",sum_of_embeddings_vers1 print "sum_of_squares: ",sum_of_squares_vers1 snapshot_inits=[] for i in range(repetitions): snapshot_inits.append(0) counter_duration=0 counter=0 interval=0 acc_nr_emb=0 acc_nr_emb_minus_average=0 nr_emb_per_interval=[] for time_snapshot in monitoring_marks: experiments.globals.current_time_snapshot=time_snapshot print "TIME SNAPSHOT: ",time_snapshot interval+=1 randnode_results_KLD = [] randnode_results_bhatta = [] randnode_results_hellinger = [] furer_times = [] observed_nodes=[] observed_nodes_difference_per_snapshot=[] snapshot_directory_path=os.path.join(detailed_result_path,) if not(os.path.exists(snapshot_directory_path)): os.mkdir(snapshot_directory_path) snapshot_directory_file=os.path.join(snapshot_directory_path,'res_time_'+str(time_snapshot)+'.info') if write==True: fdict_furer_temp=dict[time_snapshot] fdicts_Furer=[] for f in fdict_furer_temp: fdicts_Furer.append(f.current_fdict) observed_nodes.append(f.number_of_observed_nodes) if len(fdict_furer_temp)==0: continue for i in range(len(fdict_furer_temp)): experiments.globals.nr_iterations=nr_iterations[i] fdict_limited = fdicts_Furer[i] fdict_Furer=fdicts_Furer[i] observed_nodes_difference_per_snapshot.append(observed_nodes[i]-snapshot_inits[i]) snapshot_inits[i]=observed_nodes[i] [pde, trash_list,default_key] = smplr.make_pd_general_kickout_default_my_version(fdict_exhaustive) if len(pde) < 1: print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING." break #if time_snapshot==15: # for ke in pde.keys(): # print ke,pde[ke] nr_possible_combinations=smplr.complete_combinations_1(fdict_Furer, data_graph, pattern, Plist) pdl= smplr.make_pd_general_kickout_default_limited_my_version(fdict_Furer) # print "EXHAUSTIVE:" # for k in pde.keys(): # print "KEY: ",k # for e in pde[k]: # print e # print "SMPL:" # for k in pdl.keys(): # print "KEY: ",k # for e in pdl[k]: # print e #print "EXHAUSTIVE ..." #with open('random_exhaustive.csv','w') as f: # for k in pde.keys(): # print "KEY: ",k # f.write(str(k)+";") # for e in pde[k]: # f.write(str(e)+";") # f.write("\n") #print "RANDOM ..." #with open('random_random.csv','w') as f: # for k in pdl.keys(): # f.write(str(k)+";") # for e in pdl[k]: # f.write(str(e)+";") # f.write("\n") randnode_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) randnode_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) randnode_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) print randnode_results_KLD print "Writing to: ",snapshot_directory_file resultfile = open(snapshot_directory_file, 'w') resultfile.write('Random\n') resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n") resultfile.write("repetitions (for this time snapshot): " + str(repetitions) +"\n") resultfile.write(" " +"\n") resultfile.write("average KLD on random: " + str(numpy.mean(randnode_results_KLD)) + " with SSTD: " + str(numpy.std(randnode_results_KLD, ddof=1)) +"\n") print "KLD: ",str(numpy.mean(randnode_results_KLD)) resultfile.write("average bhatta on random: " + str(numpy.mean(randnode_results_bhatta)) + " with SSTD: " + str(numpy.std(randnode_results_bhatta, ddof=1)) +"\n") resultfile.write("average hellinger on random: " + str(numpy.mean(randnode_results_hellinger)) + " with SSTD: " + str(numpy.std(randnode_results_hellinger, ddof=1)) +"\n") resultfile.write(" " +"\n") resultfile.write('-----DETAILED RESULTS-----' +"\n") resultfile.write('random_results_KLD :' + str(randnode_results_KLD) +"\n") resultfile.write('random_results_bhatta :' + str(randnode_results_bhatta) +"\n") resultfile.write('random_results_hellinger :' + str(randnode_results_hellinger) +"\n") resultfile.write('avg #nodes observed :' + str(numpy.mean(observed_nodes)) +"\n") resultfile.write('# nodes per time interval per run:' + str((numpy.mean(observed_nodes_difference_per_snapshot)/duration[counter_duration])) +"\n") resultfile.write('avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) +"\n") resultfile.write("------------------------------------ Sampling info ------------------------------\n") resultfile.write('number of sampling iterations :' + str(nr_iterations[counter])+"\n") nr_iter=nr_iterations[counter] if nr_iter==0 or nr_iter==1: nr_iter=2 avg=(float(Decimal(sum_number_of_embeddings[counter]))/nr_iter)*(experiments.globals.nr_root_nodes) print "HALOOO:", Decimal(sum_number_of_embeddings[counter]) old=False stdev2=0 if avg<0: print "Handling old data structures" #this means we handle the old version avg=float(sum_of_embeddings_random_old[counter])/nr_iter old=True sum1=Decimal(sum_of_the_square_embeddings[counter]) sum2=Decimal(sum_number_of_embeddings[counter]) var=Decimal(sum1)-(Decimal(math.pow(Decimal(sum2),2))/nr_iter) print "Variance: ",var if var>0: stdev2=math.sqrt(var/(nr_iter-1)) stdev=Decimal(stdev2)*Decimal(math.sqrt(nr_iter)) if old: print "Handling old data structures" #this means we handle the old version variance=sum_of_square_emb_random_old[counter]/(nr_iter) stdev2=math.sqrt(variance/(nr_iter-1)) print "STDEV: ",stdev print "STDEV 2: ",stdev2 print "Nr embeddingS: ",avg resultfile.write('average of embeddings w.r.t sampling iterations:' +str(avg) +"\n") resultfile.write('stdeviation of # embeddings:' + str(stdev2)+"\n") resultfile.write('2 stdeviation of # embeddings:' + str(stdev)+"\n") counter+=1 resultfile.close() counter_duration+=1