Esempio n. 1
0
def report_monitoring(monitoring_marks,output_path,detailed_result_path,monitoring_reports,exhaustive_approach_result_file,data_graph,pattern,Plist,repetitions,pattern_file_name,fdict_exhaustive):
      #CREATE DIRECTORY THAT WILL CONTAIN RESULTS FOR EACH TIME INSTANCE     
      dict={}
      
      duration=[]
      begin=0
      nr_iterations=[]
      sum_of_embeddings=[]
      sum_of_squares=[]
      embeddings_estimate=[]
      sum_of_root_node_emb=[]
      sum_of_squares_root_node_emb=[]
      
      for time_int in monitoring_marks:
          duration.append(time_int-begin)
          begin=time_int
      
      #the problem might be that some runs finished earlier, and some later.
      for i in xrange(len(monitoring_marks)):
          for key_iter in monitoring_reports.keys():
              if not(monitoring_marks[i] in dict.keys()):
                  dict[monitoring_marks[i]]=[]
              try:
                  dict[monitoring_marks[i]].append(monitoring_reports[key_iter][i])
                  nr_iterations.append(monitoring_reports[key_iter][i].nr_iterations)
                  sum_of_embeddings.append(monitoring_reports[key_iter][i].sum_nr_embeddings)
                  sum_of_squares.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings)
                  embeddings_estimate.append(monitoring_reports[key_iter][i].embeddings_estimate)
                  sum_of_root_node_emb.append(monitoring_reports[key_iter][i].sum_nr_extra_embeddings)
                  sum_of_squares_root_node_emb.append(monitoring_reports[key_iter][i].sum_of_the_extra_square_embeddings)
                  
              except IndexError:
                  break
      print "NR ITERATIONS: ",nr_iterations
      print "sum_of_embeddings: ",sum_of_embeddings
      print "sum_of_squares: ",sum_of_squares
      snapshot_inits=[]
      for i in range(repetitions):
          snapshot_inits.append(0)     
      counter_duration=0
      counter=0
      for time_snapshot in monitoring_marks:
          if counter==1:
              break
          print "Processed ",counter," out of: ",len(monitoring_marks)
          furer_results_KLD = []
          furer_results_bhatta = []
          furer_results_hellinger = []
          furer_times = []
          observed_nodes=[]
          observed_nodes_difference_per_snapshot=[]
          
          snapshot_directory_path=os.path.join(detailed_result_path,)
          if not(os.path.exists(snapshot_directory_path)):
              os.mkdir(snapshot_directory_path)
          snapshot_directory_file=os.path.join(snapshot_directory_path,'res_time_'+str(time_snapshot)+'.info')
          
          fdict_furer_temp=dict[time_snapshot]
          
          fdicts_Furer=[]
          
          for f in fdict_furer_temp:
              fdicts_Furer.append(f.current_fdict)
              observed_nodes.append(f.number_of_observed_nodes)
          
          if len(fdict_furer_temp)==0:
              continue

          for i in range(len(fdict_furer_temp)):
              fdict_limited = fdicts_Furer[i]
              smplr.smooth(fdict_limited,  fdict_exhaustive)    # smoothing to avoid zeros
              fdict_Furer=fdicts_Furer[i]
              observed_nodes_difference_per_snapshot.append(observed_nodes[i]-snapshot_inits[i])
              snapshot_inits[i]=observed_nodes[i]
              [pde,  trash_list,  default_key] = smplr.make_pd_general_kickout_default(fdict_exhaustive,  trash_factor=0.01)
              #print "Exhaustive dict: ",pde
              if len(pde) < 1:
                  print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                  break
              [pdf ,  tl,  dk]= smplr.make_pd_general_kickout_default_limited(fdict_Furer,  trash_list,  default_key)
              #with open('make_pdf_general_kickout_default_MARTIN.csv','w') as f:
              #    for k in pdf.keys():
              #        f.write(str(k)+';'+str(pdf[k])+'\n')
              furer_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
              #print "KLD: ",su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))
              furer_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
              furer_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
                
          
          print "Writing to: ",snapshot_directory_file
          resultfile = open(snapshot_directory_file,  'w')
          resultfile.write('Furer\n')
          resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n")
          resultfile.write("repetitions (for this time snapshot): " + str(repetitions) +"\n")
          resultfile.write(" " +"\n")
          print "KLD: ",str(numpy.mean(furer_results_KLD))
          resultfile.write("average average KLD on furer: " + str(numpy.mean(furer_results_KLD))  + " with SSTD: " + str(numpy.std(furer_results_KLD,  ddof=1)) +"\n")
          resultfile.write("average average bhatta on furer: " + str(numpy.mean(furer_results_bhatta))  + " with SSTD: " + str(numpy.std(furer_results_bhatta,  ddof=1)) +"\n")
          resultfile.write("average average hellinger on furer: " + str(numpy.mean(furer_results_hellinger))  + " with SSTD: " + str(numpy.std(furer_results_hellinger,  ddof=1)) +"\n")
          resultfile.write(" " +"\n")
          resultfile.write('-----DETAILED RESULTS-----' +"\n")
          resultfile.write('furer_results_KLD : ' + str(furer_results_KLD) +"\n")
          resultfile.write('furer_results_bhatta : ' + str(furer_results_bhatta) +"\n")
          resultfile.write('furer_results_hellinger : ' + str(furer_results_hellinger) +"\n")
          resultfile.write('avg #nodes observed : ' + str(numpy.mean(observed_nodes)) +"\n")
          resultfile.write('# nodes per time interval per run :' + str((numpy.mean(observed_nodes_difference_per_snapshot)/duration[counter_duration])) +"\n")
          resultfile.write('avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) +"\n")          
          resultfile.write("------------------------------------ Sampling info ------------------------------\n")
          resultfile.write('number of sampling iterations : ' + str(nr_iterations[counter])+"\n")    
          if sum_of_squares_root_node_emb[counter]==0 and sum_of_root_node_emb[counter]==0:
             nr_embeddings_temp=sum_of_embeddings[counter]/nr_iterations[counter]
          else:
             nr_embeddings_temp=sum_of_root_node_emb[counter]/nr_iterations[counter] 
          #embeddings_estimate[counter]
          print "Writing to file: ",nr_embeddings_temp
          resultfile.write('average of embeddings : ' + str(nr_embeddings_temp)+"\n")   
          if sum_of_squares_root_node_emb[counter]==0 and sum_of_root_node_emb[counter]==0:
              #we do the old standard deviation
              print "Old stdev"
              print sum_of_squares[counter]
              print sum_of_embeddings[counter]
              a=Decimal(sum_of_squares[counter])-(Decimal(math.pow(sum_of_embeddings[counter], 2))/Decimal(float(nr_iterations[counter])))
              print a
              stdeviation=math.sqrt(a/Decimal(float((nr_iterations[counter]-1))))
          else:
              print "here"
              a=Decimal(sum_of_squares_root_node_emb[counter])-(Decimal(math.pow(sum_of_root_node_emb[counter], 2))/Decimal(float(nr_iterations[counter])))
              stdeviation=math.sqrt(a/Decimal(float((nr_iterations[counter]-1))))
          print "old stdev: ",stdeviation
          resultfile.write('stdeviation of # embeddings: ' + str(stdeviation)+"\n") 
          resultfile.close()
          counter+=1
          counter_duration+=1   
Esempio n. 2
0
def report_monitoring_my_version(monitoring_marks,output_path,detailed_result_path,monitoring_reports,exhaustive_approach_result_file,data_graph,pattern,Plist,repetitions,pattern_file_name,fdict_exhaustive,nr_non_observed_combinations,write):
      #CREATE DIRECTORY THAT WILL CONTAIN RESULTS FOR EACH TIME INSTANCE     
      dict={}
      duration=[]
      begin=0
      nr_iterations=[]
      sum_of_embeddings=[]
      sum_of_squares=[]
      embeddings_estimate=[]
      sum_of_root_node_emb=[]
      sum_of_squares_root_node_emb=[]
      
      for time_int in monitoring_marks:
          duration.append(time_int-begin)
          begin=time_int
      
      #the problem might be that some runs finished earlier, and some later.
      for i in xrange(len(monitoring_marks)):
          for key_iter in monitoring_reports.keys():
              if not(monitoring_marks[i] in dict.keys()):
                  dict[monitoring_marks[i]]=[]
              try:
                  dict[monitoring_marks[i]].append(monitoring_reports[key_iter][i])
                  nr_iterations.append(monitoring_reports[key_iter][i].nr_iterations)
                  sum_of_embeddings.append(monitoring_reports[key_iter][i].sum_nr_embeddings)
                  sum_of_squares.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings)
                  embeddings_estimate.append(monitoring_reports[key_iter][i].embeddings_estimate)
                  sum_of_root_node_emb.append(monitoring_reports[key_iter][i].sum_nr_extra_embeddings)
                  sum_of_squares_root_node_emb.append(monitoring_reports[key_iter][i].sum_of_the_extra_square_embeddings)
              except IndexError:
                  print "Something wrong"
                  break
      print "NR ITERATIONS: ",nr_iterations
      print "sum_of_embeddings: ",sum_of_embeddings
      print "sum_of_squares: ",sum_of_squares
      
      snapshot_inits=[]
      for i in range(repetitions):
          snapshot_inits.append(0)
      
      counter_duration=0
      counter=0
      
      for time_snapshot in monitoring_marks:
              experiments.globals.current_time_snapshot=time_snapshot
              print "Processed ",counter," out of: ",len(monitoring_marks)
              furer_results_KLD = []
              furer_results_bhatta = []
              furer_results_hellinger = []
              furer_times = []
              observed_nodes=[]
              observed_nodes_difference_per_snapshot=[]
              
              snapshot_directory_path=os.path.join(detailed_result_path,)
              if not(os.path.exists(snapshot_directory_path)):
                  os.mkdir(snapshot_directory_path)
              snapshot_directory_file=os.path.join(snapshot_directory_path,'res_time_'+str(time_snapshot)+'.info')
              
              
              if write==True:
                  fdict_furer_temp=dict[time_snapshot]
                  fdicts_Furer=[]
                  
                  for f in fdict_furer_temp:
                      fdicts_Furer.append(f.current_fdict)
                      observed_nodes.append(f.number_of_observed_nodes)
                  
                  if len(fdict_furer_temp)==0:
                      continue
        
                  for i in range(len(fdict_furer_temp)):
                      experiments.globals.nr_iterations=nr_iterations[i]
                      fdict_limited = fdicts_Furer[i]
                      fdict_Furer=fdicts_Furer[i]
                      #for k in fdict_Furer:
                      #    fdict_Furer[k]=fdict_Furer[k]*experiments.globals.nr_iterations
                      observed_nodes_difference_per_snapshot.append(observed_nodes[i]-snapshot_inits[i])
                      snapshot_inits[i]=observed_nodes[i]
                      print "Making pde Exhaustive"
                      #for k in fdict_exhaustive.keys():
                      #   if k==((u'location', u'Location_id_705003'), (u'function', u'Func_id_40')):
                      #       print k,fdict_exhaustive[k]
                      [pde,trash,default_key] = smplr.make_pd_general_kickout_default_my_version(fdict_exhaustive)
                      print "Made pde Exhaustive"
                      print "Len PDE: ",len(pde)
                      if len(pde) < 1:
                          print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                          break
                      print "Completing combinations for fdict Furer"
                      
                      nr_possible_combinations=smplr.complete_combinations_1(fdict_Furer, data_graph,  pattern,  Plist)
                      print "Completed combinations for fdict Furer"
                      print "Nr poss combos: ",nr_possible_combinations
                      #print "Making pdf for Furer"
                      pdf= smplr.make_pd_general_kickout_default_limited_my_version(fdict_Furer)
                      print "Made pdf for Furer"
                      print "Len pdf: ",len(pdf)
                      print "Furer ..."
                      print "EXHAUSTIVE:"
                      #for k in pde.keys():
                      #      print "KEY: ",k
                      #      for e in pde[k]:
                      #          print e
                      #print "SMPL:"
                      #for k in pdf.keys():
                      #      print "KEY: ",k
                      #      for e in pdf[k]:
                      #          print e
                      furer_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
                      print "HALO?"
                      furer_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
                      furer_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
                    
          
              print "Writing to: ",snapshot_directory_file
              resultfile = open(snapshot_directory_file,  'w')
              resultfile.write('Furer\n')
              resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n")
              resultfile.write("repetitions (for this time snapshot): " + str(repetitions) +"\n")
              resultfile.write(" " +"\n")
              print "KLD: ",str(numpy.mean(furer_results_KLD))
              resultfile.write("average average KLD on furer: " + str(numpy.mean(furer_results_KLD))  + " with SSTD: " + str(numpy.std(furer_results_KLD,  ddof=1)) +"\n")
              resultfile.write("average average bhatta on furer: " + str(numpy.mean(furer_results_bhatta))  + " with SSTD: " + str(numpy.std(furer_results_bhatta,  ddof=1)) +"\n")
              resultfile.write("average average hellinger on furer: " + str(numpy.mean(furer_results_hellinger))  + " with SSTD: " + str(numpy.std(furer_results_hellinger,  ddof=1)) +"\n")
              resultfile.write(" " +"\n")
              resultfile.write('-----DETAILED RESULTS-----' +"\n")
              resultfile.write('furer_results_KLD : ' + str(furer_results_KLD) +"\n")
              resultfile.write('furer_results_bhatta : ' + str(furer_results_bhatta) +"\n")
              resultfile.write('furer_results_hellinger : ' + str(furer_results_hellinger) +"\n")
              resultfile.write('avg #nodes observed : ' + str(numpy.mean(observed_nodes)) +"\n")
              resultfile.write('# nodes per time interval per run :' + str((numpy.mean(observed_nodes_difference_per_snapshot)/duration[counter_duration])) +"\n")
              resultfile.write('avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) +"\n")          
              resultfile.write("------------------------------------ Sampling info ------------------------------\n")
              resultfile.write('number of sampling iterations : ' + str(nr_iterations[counter])+"\n")    
              if sum_of_squares_root_node_emb[counter]==0 and sum_of_root_node_emb[counter]==0:
                 nr_embeddings_temp=sum_of_embeddings[counter]/nr_iterations[counter]
              else:
                 nr_embeddings_temp=sum_of_root_node_emb[counter]/nr_iterations[counter] 
              #embeddings_estimate[counter]
              print "Writing to file: ",nr_embeddings_temp
              resultfile.write('average of embeddings : ' + str(nr_embeddings_temp)+"\n")   
              if sum_of_squares_root_node_emb[counter]==0 and sum_of_root_node_emb[counter]==0:
                  #we do the old standard deviation
                  print "Old stdev"
                  print sum_of_squares[counter]
                  print sum_of_embeddings[counter]
                  a=Decimal(sum_of_squares[counter])-(Decimal(math.pow(sum_of_embeddings[counter], 2))/Decimal(float(nr_iterations[counter])))
                  stdeviation=math.sqrt(a/Decimal(float((nr_iterations[counter]-1))))
              else:
                  a=Decimal(sum_of_squares_root_node_emb[counter])-(Decimal(math.pow(sum_of_root_node_emb[counter], 2))/Decimal(float(nr_iterations[counter])))
                  if a>0:
                    stdeviation=math.sqrt(a/Decimal(float((nr_iterations[counter]-1))))
                  else:
                    stdeviation=0
              print "old stdev: ",stdeviation
              resultfile.write('stdeviation of # embeddings: ' + str(stdeviation)+"\n") 
              resultfile.close()
              counter+=1
              counter_duration+=1       
Esempio n. 3
0
def report_monitoring_my_version(monitoring_marks, output_path,
                                 detailed_result_path, monitoring_reports,
                                 exhaustive_approach_result_file, data_graph,
                                 pattern, Plist, repetitions,
                                 pattern_file_name, fdict_exhaustive,
                                 nr_non_observed_combinations, write):
    #CREATE DIRECTORY THAT WILL CONTAINS RESULTS FOR EACH TIME INSTANCE

    #picklename = os.path.join(exhaustive_approach_result_file,"fdict_exhaustive_%s.pickle" % pattern_file_name)
    #pickin = open(picklename, 'rb')
    #fdict_exhaustive = pickle.load(pickin)

    #smplr.complete_combinations(fdict_exhaustive, data_graph,  pattern,  Plist)      # add zeros to all not present combinations
    #smplr.smooth(fdict_exhaustive,  fdict_exhaustive)     # Laplace smoothing also for the exhaustive

    dict = {}

    duration = []
    nr_iterations = []
    sum_of_embeddings = []
    sum_of_squares = []
    sum_of_root_node_emb = []
    sum_of_squares_root_node_emb = []
    begin = 0

    for time_int in monitoring_marks:
        duration.append(time_int - begin)
        begin = time_int

    #the problem might be that some runs finished earlier, and some later.
    for i in xrange(len(monitoring_marks)):
        for key_iter in monitoring_reports.keys():
            if not (monitoring_marks[i] in dict.keys()):
                dict[monitoring_marks[i]] = []
            try:
                dict[monitoring_marks[i]].append(
                    monitoring_reports[key_iter][i])
                nr_iterations.append(
                    monitoring_reports[key_iter][i].nr_iterations)
                sum_of_embeddings.append(
                    monitoring_reports[key_iter][i].sum_nr_embeddings)
                sum_of_squares.append(monitoring_reports[key_iter]
                                      [i].sum_of_the_square_embeddings)
                sum_of_root_node_emb.append(
                    monitoring_reports[key_iter][i].sum_nr_extra_embeddings)
                sum_of_squares_root_node_emb.append(
                    monitoring_reports[key_iter]
                    [i].sum_of_the_extra_square_embeddings)

            except IndexError:
                break

    print "NR ITERATIONS: ", nr_iterations
    print "sum_of_embeddings: ", sum_of_embeddings
    print "sum_of_squares: ", sum_of_squares
    snapshot_inits = []
    for i in range(repetitions):
        snapshot_inits.append(0)

    counter_duration = 0
    counter = 0
    for time_snapshot in monitoring_marks:
        print "Processing ", counter, "out of: ", len(monitoring_marks)
        false_furer_results_KLD = []
        false_furer_results_bhatta = []
        false_furer_results_hellinger = []
        false_furer_times = []
        observed_nodes = []
        observed_nodes_difference_per_snapshot = []

        snapshot_directory_path = os.path.join(detailed_result_path, )
        if not (os.path.exists(snapshot_directory_path)):
            os.mkdir(snapshot_directory_path)
        snapshot_directory_file = os.path.join(
            snapshot_directory_path,
            'res_time_' + str(time_snapshot) + '.info')

        if write == True:
            fdict_furer_temp = dict[time_snapshot]

            fdicts_Furer = []

            for f in fdict_furer_temp:
                fdicts_Furer.append(f.current_fdict)
                observed_nodes.append(f.number_of_observed_nodes)

            if len(fdict_furer_temp) == 0:
                continue

            for i in range(len(fdict_furer_temp)):
                experiments.globals.nr_iterations = nr_iterations[i]
                fdict_limited = fdicts_Furer[i]
                fdict_Furer = fdicts_Furer[i]
                observed_nodes_difference_per_snapshot.append(
                    observed_nodes[i] - snapshot_inits[i])
                snapshot_inits[i] = observed_nodes[i]

                [pde, trash_list, default_key
                 ] = smplr.make_pd_general_kickout_default_my_version(
                     fdict_exhaustive)

                if len(pde) < 1:
                    print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                    break
                nr_possible_combinations = smplr.complete_combinations_1(
                    fdict_Furer, data_graph, pattern, Plist)
                pdf = smplr.make_pd_general_kickout_default_limited_my_version(
                    fdict_Furer)
                false_furer_results_KLD.append(
                    su.avg_kld(smplr.transform_to_ptable(pde),
                               smplr.transform_to_ptable(pdf)))
                false_furer_results_bhatta.append(
                    su.avg_bhatta(smplr.transform_to_ptable(pde),
                                  smplr.transform_to_ptable(pdf)))
                false_furer_results_hellinger.append(
                    su.avg_hellinger(smplr.transform_to_ptable(pde),
                                     smplr.transform_to_ptable(pdf)))

        print "Writing to: ", snapshot_directory_file
        resultfile = open(snapshot_directory_file, 'w')
        resultfile.write('False Furer\n')
        resultfile.write("experiment on graph: " + str(pattern_file_name) +
                         " and pattern: " + pattern_file_name + "\n")
        resultfile.write("repetitions (for this time snapshot): " +
                         str(repetitions) + "\n")
        resultfile.write(" " + "\n")
        resultfile.write("average KLD on false furer: " +
                         str(numpy.mean(false_furer_results_KLD)) +
                         " with SSTD: " +
                         str(numpy.std(false_furer_results_KLD, ddof=1)) +
                         "\n")
        resultfile.write("average bhatta on false furer: " +
                         str(numpy.mean(false_furer_results_bhatta)) +
                         " with SSTD: " +
                         str(numpy.std(false_furer_results_bhatta, ddof=1)) +
                         "\n")
        resultfile.write(
            "average hellinger on false furer: " +
            str(numpy.mean(false_furer_results_hellinger)) + " with SSTD: " +
            str(numpy.std(false_furer_results_hellinger, ddof=1)) + "\n")
        resultfile.write(" " + "\n")
        resultfile.write('-----DETAILED RESULTS-----' + "\n")
        resultfile.write('false_results_KLD :' + str(false_furer_results_KLD) +
                         "\n")
        resultfile.write('false_results_bhatta :' +
                         str(false_furer_results_bhatta) + "\n")
        resultfile.write('false_results_hellinger :' +
                         str(false_furer_results_hellinger) + "\n")
        resultfile.write('avg #nodes observed :' +
                         str(numpy.mean(observed_nodes)) + "\n")
        resultfile.write('# nodes per time interval per run:' + str(
            (numpy.mean(observed_nodes_difference_per_snapshot) /
             duration[counter_duration])) + "\n")
        resultfile.write(
            'avg difference of nodes observed from previous snapshot :' +
            str(numpy.mean(observed_nodes_difference_per_snapshot)) + "\n")
        resultfile.write(
            "------------------------------------ Sampling info ------------------------------\n"
        )
        resultfile.write('number of sampling iterations : ' +
                         str(nr_iterations[counter]) + "\n")
        #resultfile.write('average of embeddings : ' + str(sum_of_embeddings[counter]/nr_iterations[counter])+"\n")
        #resultfile.write('average of embeddings w.r.t sampling iterations:' + str(sum_of_embeddings[counter]/float(nr_iterations[counter]))+"\n")
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[
                counter]
        else:
            nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[
                counter]

        print "Writing to file: ", nr_embeddings_temp
        resultfile.write('average of embeddings : ' + str(nr_embeddings_temp) +
                         "\n")
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            #we do the old standard deviation
            a = Decimal(sum_of_squares[counter]) - (
                Decimal(math.pow(sum_of_embeddings[counter], 2)) /
                Decimal(float(nr_iterations[counter])))
            if a > 0:
                stdeviation = math.sqrt(
                    a / Decimal(float((nr_iterations[counter] - 1))))
            else:
                stdeviation = 0

        else:
            a = Decimal(sum_of_squares_root_node_emb[counter]) - (
                Decimal(math.pow(sum_of_root_node_emb[counter], 2)) /
                Decimal(float(nr_iterations[counter])))
            if a > 0:
                stdeviation = math.sqrt(
                    a / Decimal(float((nr_iterations[counter] - 1))))
            else:
                stdeviation = 0
        print "old stdev: ", stdeviation
        resultfile.write('stdeviation of # embeddings: ' + str(stdeviation) +
                         "\n")
        resultfile.close()
        counter += 1
        counter_duration += 1
Esempio n. 4
0
def report(output_path,detailed_result_path,fudicts,plot_result_dict,all_furer_times,exhaustive_approach_result_file,data_graph,pattern,Plist,NLIMIT_values,repetitions,pattern_file_name,fdict_exhaustive,iteration_counter_n_limit,n_limit_embeddings):    
    if (len(fudicts)==0):
        with open(os.path.join(output_path,'no_results.info'), 'wb') as file:
            file.write("No results for random - empty fudicts!")
    

    pickout = open(os.path.join(output_path,'fudicts.pickle'), 'wb')
    pickle.dump(fudicts, pickout)
    pickout.close()
    
    pickout = open(os.path.join(output_path,'all_furer_times.pickle'), 'wb')
    pickle.dump(all_furer_times, pickout)
    pickout.close()
    
    picklename = os.path.join(exhaustive_approach_result_file,"fdict_exhaustive_%s.pickle" % pattern_file_name)
    pickin = open(picklename, 'rb')
    fdict_exhaustive = pickle.load(pickin)
    
    smplr.complete_combinations(fdict_exhaustive, data_graph,  pattern,  Plist)      # add zeros to all not present combinations
    smplr.smooth(fdict_exhaustive,  fdict_exhaustive)     # Laplace smoothing also for the exhaustive
    
    for nli in range(len(NLIMIT_values)):
        print "ITERATION COUNTER FOR THIS LIMIT: ",iteration_counter_n_limit[nli]
        print "REPORTING LIMIT: ",NLIMIT_values[nli]
        plot_result_dict[NLIMIT_values[nli]] = {}
        furer_results_KLD = []
        furer_results_bhatta = []
        furer_results_hellinger = []
        furer_times = []
        
        for i in range(repetitions):
            furer_times.append(all_furer_times[i][nli])
            
            fdict_limited = fudicts[i][nli]
            smplr.smooth(fdict_limited,  fdict_exhaustive)    # smoothing to avoid zeros
            fdict_Furer = fudicts[i][nli]   
            
            [pde,  trash_list,  default_key] = smplr.make_pd_general_kickout_default(fdict_exhaustive,  trash_factor=0.01)     # we remove rows where frequencies do not reach 1%            
            
            if len(pde) < 1:
                print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                break
            
            emb=n_limit_embeddings[nli]
            
            [pdl,  tl,  dk] = smplr.make_pd_general_kickout_default_limited(fdict_limited,  trash_list,  default_key)
            [pdf ,  tl,  dk]= smplr.make_pd_general_kickout_default_limited(fdict_Furer,  trash_list,  default_key)
            print "Appending results ..."
            furer_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
            furer_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
            furer_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
        plot_result_dict[NLIMIT_values[nli]]["furer_KLD"] = (numpy.mean(furer_results_KLD),  numpy.std(furer_results_KLD,  ddof=1))
        plot_result_dict[NLIMIT_values[nli]]["furer_BHT"] = (numpy.mean(furer_results_bhatta),  numpy.std(furer_results_bhatta,  ddof=1))
        plot_result_dict[NLIMIT_values[nli]]["furer_HEL"] = (numpy.mean(furer_results_hellinger),  numpy.std(furer_results_hellinger,  ddof=1))
        plot_result_dict[NLIMIT_values[nli]]["furer_times"] = (numpy.mean(furer_times),  numpy.std(furer_times,  ddof=1))

        result_file_name = detailed_result_path+"/"+"res_" + pattern_file_name + pattern_file_name+"."+str(repetitions) +"x"+str(NLIMIT_values[nli])+".result"
        resultfile = open(result_file_name,  'w')
        resultfile.write('Furer\n')
        resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n")
        resultfile.write("NLIMIT: " + str(NLIMIT_values[nli]) +"\n")
        resultfile.write("repetitions: " + str(repetitions) +"\n")
        resultfile.write(" " +"\n")
        resultfile.write("average average KLD on randomnode: " + str(numpy.mean(furer_results_KLD))  + " with SSTD: " + str(numpy.std(furer_results_KLD,  ddof=1)) +"\n")
        resultfile.write("average average bhatta on randomnode: " + str(numpy.mean(furer_results_bhatta))  + " with SSTD: " + str(numpy.std(furer_results_bhatta,  ddof=1)) +"\n")
        resultfile.write("average average hellinger on randomnode: " + str(numpy.mean(furer_results_hellinger))  + " with SSTD: " + str(numpy.std(furer_results_hellinger,  ddof=1)) +"\n")
        resultfile.write(" " +"\n")
        resultfile.write("Random node took per run on average: " +str(numpy.mean(furer_times)) + " seconds." +"\n")
        resultfile.write('-----DETAILED RESULTS-----' +"\n")
        resultfile.write('randnode_results_KLD :' + str(furer_results_KLD) +"\n")
        resultfile.write('randnode_results_bhatta :' + str(furer_results_bhatta) +"\n")
        resultfile.write('randnode_results_hellinger :' + str(furer_results_hellinger) +"\n")
        resultfile.write('randnode_times :' + str(furer_times) +"\n")
        resultfile.write('Nr embeddings for limit: '+str(emb))
        resultfile.close()
Esempio n. 5
0
def report_monitoring_my_version_online(
        monitoring_marks, output_path, detailed_result_path,
        monitoring_reports, exhaustive_approach_result_file, data_graph,
        pattern, Plist, repetitions, pattern_file_name, fdict_exhaustive,
        nr_non_observed_combinations):
    dict = {}

    duration = []
    nr_iterations = []
    sum_of_embeddings = []
    sum_of_squares = []
    sum_of_root_node_emb = []
    sum_of_squares_root_node_emb = []
    begin = 0

    for time_int in monitoring_marks:
        duration.append(time_int - begin)
        begin = time_int

    #the problem might be that some runs finished earlier, and some later.

    for i in xrange(len(monitoring_marks)):
        print i
        #for key_iter in monitoring_reports.keys():
        if not (monitoring_marks[i] in dict.keys()):
            dict[monitoring_marks[i]] = []
        try:
            dict[monitoring_marks[i]].append(monitoring_reports[i])
            nr_iterations.append(monitoring_reports[i].nr_iterations)
            sum_of_embeddings.append(monitoring_reports[i].sum_nr_embeddings)
            sum_of_squares.append(
                monitoring_reports[i].sum_of_the_square_embeddings)
            sum_of_root_node_emb.append(
                monitoring_reports[i].sum_nr_extra_embeddings)
            sum_of_squares_root_node_emb.append(
                monitoring_reports[i].sum_of_the_extra_square_embeddings)

        except IndexError:
            break

    print "NR ITERATIONS: ", nr_iterations
    print "sum_of_embeddings: ", sum_of_embeddings
    print "sum_of_squares: ", sum_of_squares
    snapshot_inits = []
    for i in range(repetitions):
        snapshot_inits.append(0)

    counter_duration = 0
    counter = 0

    average_klds = []
    average_bhattas = []
    average_hellingers = []
    std_klds = []
    std_bhattas = []
    std_hellingers = []
    avg_nodes_observed = []
    nr_nodes_per_time_interval_per_runs = []
    number_of_sampling_iterations = []
    average_of_embeddings = []
    stdevs = []

    for time_snapshot in monitoring_marks:
        print "Processing ", counter, "out of: ", len(monitoring_marks)
        false_furer_results_KLD = []
        false_furer_results_bhatta = []
        false_furer_results_hellinger = []
        false_furer_times = []
        observed_nodes = []
        observed_nodes_difference_per_snapshot = []

        snapshot_directory_path = os.path.join(detailed_result_path, )
        if not (os.path.exists(snapshot_directory_path)):
            os.mkdir(snapshot_directory_path)
        snapshot_directory_file = os.path.join(
            snapshot_directory_path,
            'res_time_' + str(time_snapshot) + '.info')

        fdict_furer_temp = dict[time_snapshot]

        fdicts_Furer = []

        for f in fdict_furer_temp:
            fdicts_Furer.append(f.current_fdict)
            observed_nodes.append(f.number_of_observed_nodes)

        if len(fdict_furer_temp) == 0:
            continue

        for i in range(len(fdict_furer_temp)):
            experiments.globals.nr_iterations = nr_iterations[i]
            fdict_limited = fdicts_Furer[i]
            fdict_Furer = fdicts_Furer[i]
            observed_nodes_difference_per_snapshot.append(observed_nodes[i] -
                                                          snapshot_inits[i])
            snapshot_inits[i] = observed_nodes[i]

            [pde, trash_list,
             default_key] = smplr.make_pd_general_kickout_default_my_version(
                 fdict_exhaustive)

            if len(pde) < 1:
                print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                break
            print fdict_Furer
            nr_possible_combinations = smplr.complete_combinations_1(
                fdict_Furer, data_graph, pattern, Plist)
            pdf = smplr.make_pd_general_kickout_default_limited_my_version(
                fdict_Furer)
            #print smplr.transform_to_ptable(pde)
            #print smplr.transform_to_ptable(pdf)
            false_furer_results_KLD.append(
                su.avg_kld(smplr.transform_to_ptable(pde),
                           smplr.transform_to_ptable(pdf)))
            false_furer_results_bhatta.append(
                su.avg_bhatta(smplr.transform_to_ptable(pde),
                              smplr.transform_to_ptable(pdf)))
            false_furer_results_hellinger.append(
                su.avg_hellinger(smplr.transform_to_ptable(pde),
                                 smplr.transform_to_ptable(pdf)))

        average_klds.append(numpy.mean(false_furer_results_KLD))
        std_klds.append(numpy.std(false_furer_results_KLD, ddof=1))
        average_bhattas.append(numpy.mean(false_furer_results_bhatta))
        std_bhattas.append(numpy.std(false_furer_results_bhatta, ddof=1))
        average_hellingers.append(numpy.mean(false_furer_results_hellinger))
        std_hellingers.append(numpy.std(false_furer_results_hellinger, ddof=1))
        avg_nodes_observed.append(numpy.mean(observed_nodes))
        number_of_sampling_iterations.append(nr_iterations[counter])
        nr_nodes_per_time_interval_per_runs.append(
            float((numpy.mean(observed_nodes_difference_per_snapshot) /
                   duration[counter_duration])))
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[
                counter]
        else:
            nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[
                counter]
        average_of_embeddings.append(nr_embeddings_temp)
        stdeviation = numpy.nan
        try:
            if sum_of_squares_root_node_emb[
                    counter] == 0 and sum_of_root_node_emb[counter] == 0:
                #we do the old standard deviation
                a = Decimal(sum_of_squares[counter]) - (
                    Decimal(math.pow(sum_of_embeddings[counter], 2)) /
                    Decimal(float(nr_iterations[counter])))
                stdeviation = math.sqrt(
                    a / Decimal(float((nr_iterations[counter] - 1))))
            else:
                a = Decimal(sum_of_squares_root_node_emb[counter]) - (
                    Decimal(math.pow(sum_of_root_node_emb[counter], 2)) /
                    Decimal(float(nr_iterations[counter])))
                stdeviation = math.sqrt(
                    a / Decimal(float((nr_iterations[counter] - 1))))
        except:
            print "not successful"

        stdevs.append(stdeviation)

        counter += 1
        counter_duration += 1
    return {
        "average_klds": average_klds,
        "average_bhattas": average_bhattas,
        "average_hellingers": average_hellingers,
        "std_klds": std_klds,
        "std_bhattas": std_bhattas,
        "std_hellingers": std_hellingers,
        "avg_nodes_observed": avg_nodes_observed,
        "nr_nodes_per_time_interval_per_runs":
        nr_nodes_per_time_interval_per_runs,
        "number_of_sampling_iterations": number_of_sampling_iterations,
        "average_of_embeddings": average_of_embeddings,
        "stdevs": stdevs
    }
Esempio n. 6
0
def getStatistics_furer(fudicts, fdict_exhaustive, pattern, data_graph,
                        target_indices, targe_ids, head_node, target_nodes,
                        detailed_result_path, pattern_file_name):
    #CREATE DIRECTORY THAT WILL CONTAIN RESULTS FOR EACH TIME INSTANCE
    furer_results_KLD = []
    furer_results_bhatta = []
    furer_results_hellinger = []
    furer_times = []
    observed_nodes = []
    observed_nodes_difference_per_snapshot = []

    snapshot_directory_path = os.path.join(detailed_result_path)
    if not (os.path.exists(snapshot_directory_path)):
        os.mkdir(snapshot_directory_path)
    snapshot_directory_file = os.path.join(snapshot_directory_path,
                                           'statistics.info')

    #filter out exhaustive dictionary
    filtered_f_dict_exhaustive = {}
    for key in fdict_exhaustive.keys():
        new_key = ()
        for target in target_indices:
            new_key += (key[target - 1])
        if not new_key in filtered_f_dict_exhaustive.keys():
            filtered_f_dict_exhaustive[new_key] = 0
        if new_key in filtered_f_dict_exhaustive.keys():
            filtered_f_dict_exhaustive[new_key] += fdict_exhaustive[key]

    #filter out furer dictionaries
    filtered_fudicts = []
    fudict_monitors = fudicts[0]
    for dict in fudict_monitors:
        temp_fudict = {}
        for key in dict.keys():
            new_key = ()
            for target in target_indices:
                new_key += (key[target - 1])
            if not new_key in temp_fudict.keys():
                temp_fudict[new_key] = 0
            if new_key in temp_fudict.keys():
                temp_fudict[new_key] += dict[key]
        filtered_fudicts.append(temp_fudict)

    smplr.complete_combinations(
        filtered_f_dict_exhaustive, data_graph, pattern,
        targe_ids)  # add zeros to all not present combinations
    smplr.smooth(filtered_f_dict_exhaustive, filtered_f_dict_exhaustive)

    fdict_limited = filtered_fudicts[1]
    smplr.smooth(fdict_limited, filtered_f_dict_exhaustive)
    fdict_Furer = filtered_fudicts[1]
    [pde, trash_list, default_key
     ] = smplr.make_pd_general_kickout_default(filtered_f_dict_exhaustive,
                                               trash_factor=0.01)
    [pdl, tl, dk
     ] = smplr.make_pd_general_kickout_default_limited(fdict_limited,
                                                       trash_list, default_key)
    [pdf, tl, dk
     ] = smplr.make_pd_general_kickout_default_limited(fdict_Furer, trash_list,
                                                       default_key)

    furer_results_KLD.append(
        su.avg_kld(smplr.transform_to_ptable(pde),
                   smplr.transform_to_ptable(pdf)))
    furer_results_bhatta.append(
        su.avg_bhatta(smplr.transform_to_ptable(pde),
                      smplr.transform_to_ptable(pdf)))
    furer_results_hellinger.append(
        su.avg_hellinger(smplr.transform_to_ptable(pde),
                         smplr.transform_to_ptable(pdf)))
Esempio n. 7
0
def report_monitoring_my_version(monitoring_marks,output_path,detailed_result_path,monitoring_reports,exhaustive_approach_result_file,data_graph,pattern,Plist,repetitions,pattern_file_name,fdict_exhaustive,nr_non_observed_combinations,write):
      #CREATE DIRECTORY THAT WILL CONTAINS RESULTS FOR EACH TIME INSTANCE
      dict={}
      duration=[]
      nr_iterations=[]
      sum_number_of_embeddings=[]
      sum_of_embeddings_vers1=[]
      sum_of_embeddings_random_old=[]
      sum_of_square_emb_random_old=[]
      
      sum_of_squares_vers1=[]
      sum_of_the_square_embeddings=[]
      nr_root_nodes=[]
      begin=0
      
      for time_int in monitoring_marks:
          duration.append(time_int-begin)
          begin=time_int
      
      #if not type(monitoring_reports)==dict:
      #  print "monitoring reports to dictionary"
      #  tmp=monitoring_reports
      #  monitoring_reports={}
      #  monitoring_reports["1"]=tmp
      #  print monitoring_reports
      #the problem might be that some runs finished earlier, and some later.
      for i in xrange(len(monitoring_marks)):
          for key_iter in monitoring_reports.keys():
              if not(monitoring_marks[i] in dict.keys()):
                  dict[monitoring_marks[i]]=[]
              try:
                  dict[monitoring_marks[i]].append(monitoring_reports[key_iter][i])
                  nr_iterations.append(monitoring_reports[key_iter][i].nr_iterations)
                  sum_number_of_embeddings.append(monitoring_reports[key_iter][i].sum_nr_embeddings)
                  sum_of_the_square_embeddings.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings)
                  nr_root_nodes.append(monitoring_reports[key_iter][i].nr_root_nodes)
                  try:
                      sum_of_embeddings_random_old.append(monitoring_reports[key_iter][i].sum_number_of_embeddings_random)
                      sum_of_square_emb_random_old.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings_random)
                  except:
                      continue  
                  try:
                      sum_of_embeddings_vers1.append(monitoring_reports[key_iter][i].sum_nr_embeddings_aux)
                      sum_of_squares_vers1.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings_aux)
                  except:
                      continue
                  
              except IndexError:
                  break
      print "Sum of embeddings random old: ",len(sum_of_embeddings_random_old)
      print "NR ITERATIONS: ",nr_iterations
      print "sum_of_embeddings: ",sum_of_embeddings_vers1
      print "sum_of_squares: ",sum_of_squares_vers1
      snapshot_inits=[]
      for i in range(repetitions):
          snapshot_inits.append(0)
      
      counter_duration=0
      counter=0
      interval=0
      acc_nr_emb=0
      acc_nr_emb_minus_average=0
      nr_emb_per_interval=[]
      
      for time_snapshot in monitoring_marks:
          experiments.globals.current_time_snapshot=time_snapshot
          print "TIME SNAPSHOT: ",time_snapshot
          interval+=1
          randnode_results_KLD = []
          randnode_results_bhatta = []
          randnode_results_hellinger = []          
          furer_times = []
          observed_nodes=[]
          observed_nodes_difference_per_snapshot=[]
          snapshot_directory_path=os.path.join(detailed_result_path,)
          if not(os.path.exists(snapshot_directory_path)):
              os.mkdir(snapshot_directory_path)
          snapshot_directory_file=os.path.join(snapshot_directory_path,'res_time_'+str(time_snapshot)+'.info')
          
          if write==True:
              fdict_furer_temp=dict[time_snapshot]
              fdicts_Furer=[]
              for f in fdict_furer_temp:
                  fdicts_Furer.append(f.current_fdict)
                  observed_nodes.append(f.number_of_observed_nodes)
              
              if len(fdict_furer_temp)==0:
                  continue
              
              for i in range(len(fdict_furer_temp)):
                  experiments.globals.nr_iterations=nr_iterations[i] 
                  fdict_limited = fdicts_Furer[i]
                  fdict_Furer=fdicts_Furer[i]
                  observed_nodes_difference_per_snapshot.append(observed_nodes[i]-snapshot_inits[i])
                  snapshot_inits[i]=observed_nodes[i]
                  
                  [pde,  trash_list,default_key] = smplr.make_pd_general_kickout_default_my_version(fdict_exhaustive)
                  if len(pde) < 1:
                      print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                      break  
                 
                  #if time_snapshot==15:
                  #    for ke in pde.keys():
                  #        print ke,pde[ke]
                  
                  nr_possible_combinations=smplr.complete_combinations_1(fdict_Furer, data_graph,  pattern,  Plist)
                  
                  pdl= smplr.make_pd_general_kickout_default_limited_my_version(fdict_Furer)
    #               print "EXHAUSTIVE:"
    #               for k in pde.keys():
    #                     print "KEY: ",k
    #                     for e in pde[k]:
    #                         print e
    #               print "SMPL:"
    #               for k in pdl.keys():
    #                     print "KEY: ",k
    #                     for e in pdl[k]:
    #                         print e
                  #print "EXHAUSTIVE ..."
                  #with open('random_exhaustive.csv','w') as f:
                  #   for k in pde.keys():
                  #      print "KEY: ",k
                  #      f.write(str(k)+";")
                  #      for e in pde[k]:
                  #          f.write(str(e)+";")
                  #      f.write("\n")
                  #print "RANDOM ..."
                  #with open('random_random.csv','w') as f:
                  #   for k in pdl.keys():
                  #      f.write(str(k)+";")
                  #      for e in pdl[k]:
                  #          f.write(str(e)+";")
                  #      f.write("\n")
                  randnode_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
                  randnode_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
                  randnode_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
          
          print randnode_results_KLD
          print "Writing to: ",snapshot_directory_file
          resultfile = open(snapshot_directory_file,  'w')
          resultfile.write('Random\n')
          resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n")
          resultfile.write("repetitions (for this time snapshot): " + str(repetitions) +"\n")
          resultfile.write(" " +"\n")
          resultfile.write("average KLD on random: " + str(numpy.mean(randnode_results_KLD))  + " with SSTD: " + str(numpy.std(randnode_results_KLD,  ddof=1)) +"\n")
          print "KLD: ",str(numpy.mean(randnode_results_KLD))
          resultfile.write("average bhatta on random: " + str(numpy.mean(randnode_results_bhatta))  + " with SSTD: " + str(numpy.std(randnode_results_bhatta,  ddof=1)) +"\n")
          resultfile.write("average hellinger on random: " + str(numpy.mean(randnode_results_hellinger))  + " with SSTD: " + str(numpy.std(randnode_results_hellinger,  ddof=1)) +"\n")
          resultfile.write(" " +"\n")
          resultfile.write('-----DETAILED RESULTS-----' +"\n")
          resultfile.write('random_results_KLD :' + str(randnode_results_KLD) +"\n")
          resultfile.write('random_results_bhatta :' + str(randnode_results_bhatta) +"\n")
          resultfile.write('random_results_hellinger :' + str(randnode_results_hellinger) +"\n")
          resultfile.write('avg #nodes observed :' + str(numpy.mean(observed_nodes)) +"\n")
          resultfile.write('# nodes per time interval per run:' + str((numpy.mean(observed_nodes_difference_per_snapshot)/duration[counter_duration])) +"\n")
          resultfile.write('avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) +"\n")          
          resultfile.write("------------------------------------ Sampling info ------------------------------\n")
          resultfile.write('number of sampling iterations :' + str(nr_iterations[counter])+"\n")    
          nr_iter=nr_iterations[counter]
          if nr_iter==0 or nr_iter==1:
              nr_iter=2
          avg=(float(Decimal(sum_number_of_embeddings[counter]))/nr_iter)*(experiments.globals.nr_root_nodes)
          print "HALOOO:", Decimal(sum_number_of_embeddings[counter])
          old=False
          stdev2=0
          if avg<0:
              print "Handling old data structures"
              #this means we handle the old version
              avg=float(sum_of_embeddings_random_old[counter])/nr_iter
              old=True
          sum1=Decimal(sum_of_the_square_embeddings[counter])
          sum2=Decimal(sum_number_of_embeddings[counter])
          var=Decimal(sum1)-(Decimal(math.pow(Decimal(sum2),2))/nr_iter)
          print "Variance: ",var
          if var>0:
            stdev2=math.sqrt(var/(nr_iter-1))
       
          stdev=Decimal(stdev2)*Decimal(math.sqrt(nr_iter)) 
          if old:
              print "Handling old data structures"
              #this means we handle the old version
              variance=sum_of_square_emb_random_old[counter]/(nr_iter)
              stdev2=math.sqrt(variance/(nr_iter-1))
          print "STDEV: ",stdev
          print "STDEV 2: ",stdev2
          print "Nr embeddingS: ",avg
          resultfile.write('average of embeddings w.r.t sampling iterations:' +str(avg) +"\n") 
          resultfile.write('stdeviation of # embeddings:' + str(stdev2)+"\n")  
          resultfile.write('2 stdeviation of # embeddings:' + str(stdev)+"\n")  
          counter+=1  
          resultfile.close()
          counter_duration+=1
Esempio n. 8
0
def report_monitoring(monitoring_marks,output_path,detailed_result_path,monitoring_reports,exhaustive_approach_result_file,data_graph,pattern,Plist,repetitions,pattern_file_name):
      #CREATE DIRECTORY THAT WILL CONTAINS RESULTS FOR EACH TIME INSTANCE
      picklename = os.path.join(exhaustive_approach_result_file,"fdict_exhaustive_%s.pickle" % pattern_file_name)
      pickin = open(picklename, 'rb')
      fdict_exhaustive = pickle.load(pickin)
      #smplr.complete_combinations(fdict_exhaustive, data_graph,  pattern,  Plist)      # add zeros to all not present combinations
      #smplr.smooth(fdict_exhaustive,  fdict_exhaustive)     # Laplace smoothing also for the exhaustive
      dict={}
      duration=[]
      nr_iterations=[]
      sum_number_of_embeddings=[]
      sum_of_embeddings_vers1=[]
      sum_of_squares_vers1=[]
      sum_of_the_square_embeddings=[]
      sum_of_embeddings_random_old=[]
      sum_of_square_emb_random_old=[]
      nr_root_nodes=[]
      begin=0
      
      for time_int in monitoring_marks:
          duration.append(time_int-begin)
          begin=time_int
       
      #the problem might be that some runs finished earlier, and some later.
      for i in xrange(len(monitoring_marks)):
          for key_iter in monitoring_reports.keys():
              if not(monitoring_marks[i] in dict.keys()):
                  dict[monitoring_marks[i]]=[]
              try:
                  dict[monitoring_marks[i]].append(monitoring_reports[key_iter][i])
                  nr_iterations.append(monitoring_reports[key_iter][i].nr_iterations)
                  sum_number_of_embeddings.append(monitoring_reports[key_iter][i].sum_nr_embeddings)
                  sum_of_the_square_embeddings.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings)
                  nr_root_nodes.append(monitoring_reports[key_iter][i].nr_root_nodes)
                  try:
                      sum_of_embeddings_random_old.append(monitoring_reports[key_iter][i].sum_number_of_embeddings_random)
                      sum_of_square_emb_random_old.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings_random)
                  except:
                      continue  
                  try:
                      sum_of_embeddings_vers1.append(monitoring_reports[key_iter][i].sum_nr_embeddings_aux)
                      sum_of_squares_vers1.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings_aux)
                  except:
                      continue
                       
              except IndexError:
                  break
      
      print "NR ITERATIONS: ",nr_iterations
      print "sum_of_embeddings: ",sum_of_embeddings_vers1
      print "sum_of_squares: ",sum_of_squares_vers1
      snapshot_inits=[]
      for i in range(repetitions):
          snapshot_inits.append(0)
      
      counter_duration=0
      counter=0
      interval=0
      acc_nr_emb=0
      acc_nr_emb_minus_average=0
      nr_emb_per_interval=[]
      
      for time_snapshot in monitoring_marks:
          interval+=1
          randnode_results_KLD = []
          randnode_results_bhatta = []
          randnode_results_hellinger = []          
          furer_times = []
          observed_nodes=[]
          observed_nodes_difference_per_snapshot=[]
          snapshot_directory_path=os.path.join(detailed_result_path,)
          if not(os.path.exists(snapshot_directory_path)):
              os.mkdir(snapshot_directory_path)
          snapshot_directory_file=os.path.join(snapshot_directory_path,'res_time_'+str(time_snapshot)+'.info')
          fdict_furer_temp=dict[time_snapshot]
          fdicts_Furer=[]
          for f in fdict_furer_temp:
              fdicts_Furer.append(f.current_fdict)
              observed_nodes.append(f.number_of_observed_nodes)
          
          if len(fdict_furer_temp)==0:
              continue
          
          for i in range(len(fdict_furer_temp)):
              fdict_limited = fdicts_Furer[i]
              smplr.smooth(fdict_limited,  fdict_exhaustive)    # smoothing to avoid zeros
              fdict_Furer=fdicts_Furer[i]
              observed_nodes_difference_per_snapshot.append(observed_nodes[i]-snapshot_inits[i])
              snapshot_inits[i]=observed_nodes[i]
              [pde,  trash_list,  default_key] = smplr.make_pd_general_kickout_default_old(fdict_exhaustive,  trash_factor=0.01)
              if len(pde) < 1:
                  print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                  break   
              print "random vertex counts: "
              #for k in fdict_limited.keys():
              #    print k,fdict_limited[k]
              [pdl,  tl,  dk] = smplr.make_pd_general_kickout_default_limited_old(fdict_limited,  trash_list,  default_key)
              randnode_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
              randnode_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
              randnode_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
          
          
          print "Writing to: ",snapshot_directory_file
          resultfile = open(snapshot_directory_file,  'w')
          resultfile.write('Random\n')
          resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n")
          resultfile.write("repetitions (for this time snapshot): " + str(repetitions) +"\n")
          resultfile.write(" " +"\n")
          resultfile.write("average KLD on random: " + str(numpy.mean(randnode_results_KLD))  + " with SSTD: " + str(numpy.std(randnode_results_KLD,  ddof=1)) +"\n")
          print "KLD: ",str(numpy.mean(randnode_results_KLD))
          resultfile.write("average bhatta on random: " + str(numpy.mean(randnode_results_bhatta))  + " with SSTD: " + str(numpy.std(randnode_results_bhatta,  ddof=1)) +"\n")
          resultfile.write("average hellinger on random: " + str(numpy.mean(randnode_results_hellinger))  + " with SSTD: " + str(numpy.std(randnode_results_hellinger,  ddof=1)) +"\n")
          resultfile.write(" " +"\n")
          resultfile.write('-----DETAILED RESULTS-----' +"\n")
          resultfile.write('random_results_KLD :' + str(randnode_results_KLD) +"\n")
          resultfile.write('random_results_bhatta :' + str(randnode_results_bhatta) +"\n")
          resultfile.write('random_results_hellinger :' + str(randnode_results_hellinger) +"\n")
          resultfile.write('avg #nodes observed :' + str(numpy.mean(observed_nodes)) +"\n")
          resultfile.write('# nodes per time interval per run:' + str((numpy.mean(observed_nodes_difference_per_snapshot)/duration[counter_duration])) +"\n")
          resultfile.write('avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) +"\n")          
          resultfile.write("------------------------------------ Sampling info ------------------------------\n")
          resultfile.write('number of sampling iterations :' + str(nr_iterations[counter])+"\n")    
          
          avg=(float(Decimal(sum_number_of_embeddings[counter]))/nr_iterations[counter])*(experiments.globals.nr_root_nodes)
          print "HALOOO:", Decimal(sum_number_of_embeddings[counter])
          old=False

          if avg<0:
              print "Handling old data structures"
              #this means we handle the old version
              avg=float(sum_of_embeddings_random_old[counter])/nr_iterations[counter]
              old=True
              
          #nr_emb_per_interval.append(avg)
          #print "Nr emb per interval: ",nr_emb_per_interval
          #resultfile.write('average of embeddings w.r.t sampling iterations:' + str(avg)+"\n") 
          #print "Avg old method: ",avg
          #acc_nr_emb+=Decimal(avg)
          #print "Interval: ",interval," Accnr emg: ",acc_nr_emb
          #avg_emb_Jan=acc_nr_emb/(interval)
          #acc_nr_emb_minus_average=0
          #for i in xrange(0,interval):
          #    acc_nr_emb_minus_average+=math.pow((Decimal(nr_emb_per_interval[i])-Decimal(avg_emb_Jan)),2)
          #std_Jan=math.sqrt(acc_nr_emb_minus_average)/(interval)
          #print "STDEV Jan: ",std_Jan
          #print "Average Nr emb: Jan Method: ",avg_emb_Jan
          
          sum1=Decimal(sum_of_the_square_embeddings[counter])
          sum2=Decimal(sum_number_of_embeddings[counter])
          
          #variance=(Decimal(sum1)-Decimal(sum2))/(nr_iterations[counter]-1)
          print "sum 1: ",sum1
          print "sum 2: ",sum2
          #print "Variance: ",variance
          #stdev1=math.sqrt(variance)
          
          var=Decimal(sum1)-(Decimal(math.pow(Decimal(sum2),2))/nr_iterations[counter])
          stdev2=math.sqrt(var/(nr_iterations[counter]-1))
          
          stdev=Decimal(stdev2)*Decimal(math.sqrt(nr_iterations[counter])) 
          if old:
              print "Handling old data structures"
              #this means we handle the old version
              variance=sum_of_square_emb_random_old[counter]/(nr_iterations[counter])
              stdev2=math.sqrt(variance/(nr_iterations[counter]-1))
          print "STDEV: ",stdev
          print "STDEV 2: ",stdev2
          print "Nr embeddingS: ",avg
          resultfile.write('average of embeddings w.r.t sampling iterations:' +str(avg) +"\n") 
          resultfile.write('stdeviation of # embeddings:' + str(stdev2)+"\n")  
          resultfile.write('2 stdeviation of # embeddings:' + str(stdev)+"\n")  
          #resultfile.write('overal nr. of embeddings:' + str(sum_of_embeddings[counter])+"\n") 
          #a=sum_of_squares[counter]-((math.pow(sum_of_embeddings[counter], 2)/nr_iterations[counter]))
          #stdeviation=math.sqrt(a/(nr_iterations[counter]-1))
          #a1=sum_of_squares_vers1[counter]-((math.pow(sum_of_embeddings_vers1[counter], 2)/nr_iterations[counter]))
          #stdeviation1=math.sqrt(a1/(nr_iterations[counter]-1))
          #resultfile.write('stdeviation of # embeddings:' + str(stdeviation)+"\n")  
          
          counter+=1  

          resultfile.close()
          counter_duration+=1
Esempio n. 9
0
def report(rndicts,all_randnode_times,NLIMIT_values,plot_result_dict,repetitions,detailed_result_path,output_path,exhaustive_approach_result_file,pattern_file_name,nr_embeddings_n_limits):
    if (len(rndicts)==0):
        with open(os.path.join(output_path,'no_results.info'), 'wb') as file:
            file.write("No results for random - empty rndicts!")
            
    
    pickout = open(os.path.join(output_path,'rndicts.pickle'), 'wb')
    pickle.dump(rndicts, pickout)
    pickout.close()
      
    pickout = open(os.path.join(output_path,'all_randnode_times.pickle'), 'wb')
    pickle.dump(all_randnode_times, pickout)
    pickout.close()
      
    picklename = os.path.join(exhaustive_approach_result_file,"fdict_exhaustive_%s.pickle" % pattern_file_name)
    pickin = open(picklename, 'rb')
    fdict_exhaustive = pickle.load(pickin)
  
    for nli in range(len(NLIMIT_values)):
        plot_result_dict[NLIMIT_values[nli]] = {}
        randnode_results_KLD = []
        randnode_results_bhatta = []
        randnode_results_hellinger = []
        randnode_times = []
         
        for i in range(repetitions):
            emb=nr_embeddings_n_limits[nli]
            randnode_times.append(all_randnode_times[i][nli])
            fdict_limited = rndicts[i][nli]
            smplr.smooth(fdict_limited,  fdict_exhaustive)    # smoothing to avoid zeros    
            [pde,  trash_list,  default_key] = smplr.make_pd_general_kickout_default(fdict_exhaustive,  trash_factor=0.01)     # we remove rows where frequencies do not reach 1%            
            if len(pde) < 1:
                print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                break
            #print "pde length: ",len(pde)
            [pdl,  tl,  dk] = smplr.make_pd_general_kickout_default_limited(fdict_limited,  trash_list,  default_key)
            #print "fdict exhaustive: ",len(fdict_exhaustive),"fdict limited",len(fdict_limited)
            # new function also for limited ones : make_pd_general_kickout_default_limited(fdict,  trash,  default_key)          
            randnode_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
            randnode_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
            randnode_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
              
        plot_result_dict[NLIMIT_values[nli]]["randomnode_KLD"] = (numpy.mean(randnode_results_KLD),  numpy.std(randnode_results_KLD,  ddof=1))
        plot_result_dict[NLIMIT_values[nli]]["randomnode_BHT"] = (numpy.mean(randnode_results_bhatta),  numpy.std(randnode_results_bhatta,  ddof=1))
        plot_result_dict[NLIMIT_values[nli]]["randomnode_HEL"] = (numpy.mean(randnode_results_hellinger),  numpy.std(randnode_results_hellinger,  ddof=1))
     
    # added to store and plot the times
        plot_result_dict[NLIMIT_values[nli]]["randomnode_times"] = (numpy.mean(randnode_times),  numpy.std(randnode_times,  ddof=1))
        plot_result_dict[NLIMIT_values[nli]]["randomnode_times"] = (numpy.mean(randnode_times),  numpy.std(randnode_times,  ddof=1))
 
        result_file_name = detailed_result_path+"/"+"ultimex_ICDM_" + pattern_file_name + pattern_file_name+"."+str(repetitions) +"x"+str(NLIMIT_values[nli])+".result"
        print "RESULT FILE NAME: ",result_file_name
        resultfile = open(result_file_name,  'w')
        resultfile.write('Random Vertex\n')
        resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n")
        resultfile.write("NLIMIT: " + str(NLIMIT_values[nli]) +"\n")
        resultfile.write("repetitions: " + str(repetitions) +"\n")
        resultfile.write(" " +"\n")
        resultfile.write("average average KLD on randomnode: " + str(numpy.mean(randnode_results_KLD))  + " with SSTD: " + str(numpy.std(randnode_results_KLD,  ddof=1)) +"\n")
        resultfile.write("average average bhatta on randomnode: " + str(numpy.mean(randnode_results_bhatta))  + " with SSTD: " + str(numpy.std(randnode_results_bhatta,  ddof=1)) +"\n")
        resultfile.write("average average hellinger on randomnode: " + str(numpy.mean(randnode_results_hellinger))  + " with SSTD: " + str(numpy.std(randnode_results_hellinger,  ddof=1)) +"\n")
        resultfile.write(" " +"\n")
        resultfile.write("Random node took per run on average: " +str(numpy.mean(randnode_times)) + " seconds." +"\n")
        resultfile.write('-----DETAILED RESULTS-----' +"\n")
        resultfile.write('randnode_results_KLD :' + str(randnode_results_KLD) +"\n")
        resultfile.write('randnode_results_bhatta :' + str(randnode_results_bhatta) +"\n")
        resultfile.write('randnode_results_hellinger :' + str(randnode_results_hellinger) +"\n")
        resultfile.write('randnode_times :' + str(randnode_times) +"\n")
        resultfile.write('Nr embeddings for limit: '+str(emb))
        resultfile.close()