def analyze_clustering_coefficient_distribution(G): """ Gets the distribution of nodes according to their clustter coefficient and computes the mean and average """ n=50 cluster_coef=get_cluster_coefficients(G) #Get histogram title="Distribution of nodes according to their clustering coefficient" xlabel="Clustering coefficient" ylabel="" #print degrees charts.histogram(cluster_coef, title, xlabel, ylabel, n) mean=np.mean(cluster_coef) print "Mean cluster coefficient: %.3f" % mean
def analyze_parser(): """ This function analyses and compares the times needed to parse the articles and the times needed to parse the files and add them to the database. The results are plotted and the Student's t-test is applied. """ #GET DATA FROM FILES f=open("./Parsing/parsingTimes_WB.txt") lines=f.readlines() parsingTimes=[int(e.strip()) for e in lines] parsingTimes=np.asarray(parsingTimes) f=open("./Parsing/processingTimes_WB.txt") lines=f.readlines() processingTimes=[int(e.strip()) for e in lines] processingTimes=np.asarray(processingTimes) #GET DISTRIBUTIONS n=500 title="Distribution of parsing times" xlabel="Parsing time (ms)" ylabel="" plt.figure() charts.histogram(parsingTimes, title, xlabel, ylabel, n) mean=np.mean(parsingTimes) var=np.var(parsingTimes, ddof=1) print "Mean parsing time: %.5f" % mean print "min parsing time: %.5f" % min(parsingTimes) print "max parsin time: %.5f" % max(parsingTimes) print "Vanriance of parsing time: %.3f" % var title="Distribution of processing times times" xlabel="Processing time (ms)" ylabel="" plt.figure() charts.histogram(processingTimes, title, xlabel, ylabel, n) mean=np.mean(processingTimes) var=np.var(processingTimes, ddof=1) print "Mean processing time: %.9f" % mean print "min processsinf time: %.5f" % min(processingTimes) print "max processsinf time: %.5f" % max(processingTimes) print "Vanriance of processing time: %.3f" % var #T-TEST t_times, p_times=sp.ttest_rel(parsingTimes, processingTimes) print "t: %.2f" % t_times print "p-value: %.20f" % p_times print "%d" % len(parsingTimes)
def tet_per_transitions_histogram(table, transitions): required = ["Event", "Duration", "ID"] header = table.header if not all(item in header for item in required): return f_eq = lambda x, y: x == y columns = ["Duration"] filters = [("Event", f_eq, 'T')] names, values = [], [] for t in transitions: names.append(t.get_name_or_id()) tets = table.select(columns, filters + [("ID", f_eq, t.id)]) if len(tets) == 0: tets = [0] values.append(tets) return ( "TETs (grouped by transitions)", charts.histogram( names, values, "Histogram of transition execution times grouped by transitions", "Duration [ms]", "Count"))
def tet_per_processes_and_transitions_histogram(table, processes, transitions): required = ["Event", "Process", "Duration", "ID"] header = table.header if not all(item in header for item in required): return f_eq = lambda x, y: x == y columns = ["Duration"] filters = [("Event", f_eq, 'T')] names, values = [], [] for tran in transitions: f = filters + [("ID", f_eq, tran.id)] for p in processes: names.append("{0}`{1}".format(tran.get_name_or_id(), p)) tets = table.select(columns, f + [("Process", f_eq, p)]) if len(tets) == 0: # tets is a numpy array tets = [0] # data for a histogram chart must not be empty values.append(tets) return ("Transition execution times (TETs)", charts.histogram(names, values, "Histogram of transition execution times", "Duration [ms]", "Count"))
def tet_per_processes_and_transitions_histogram(table, processes, transitions): required = ["Event", "Process", "Duration", "ID"] header = table.header if not all(item in header for item in required): return f_eq = lambda x, y: x == y columns = ["Duration"] filters = [("Event", f_eq, 'T')] names, values = [], [] for tran in transitions: f = filters + [("ID", f_eq, tran.id)] for p in processes: names.append("{0}`{1}".format(tran.get_name_or_id(), p)) tets = table.select(columns, f + [("Process", f_eq, p)]) if len(tets) == 0: # tets is a numpy array tets = [0] # data for a histogram chart must not be empty values.append(tets) return ("Transition execution times (TETs)", charts.histogram( names, values, "Histogram of transition execution times", "Duration [ms]", "Count"))
def analyze_degree_distribution(G): """ This function gets the in-degree distribution of the given digraph and computes the mean and variance. """ n=500 degrees=G.in_degree().values() #Get histogram #degrees=[x for x in degrees if] title="Distribution of nodes according to their degree" xlabel="Degree of the node" ylabel="" #print degrees charts.histogram(degrees, title, xlabel, ylabel, n, yLog=True) mean=np.mean(degrees) var=np.var(degrees, ddof=1) print "Mean degree: %.3f" % mean print "Vanriance of degree: %.3f" % var
def _processes_histogram(self): names = self.tracelog.statistics["proc_hist_names"] values = self.tracelog.statistics["proc_hist_values"] return charts.histogram( names, values, "Histograms of transition execution times", "Time", "Density")
def tet_per_transitions_histogram(table, transitions): required = ["Event", "Duration", "ID"] header = table.header if not all(item in header for item in required): return f_eq = lambda x, y: x == y columns = ["Duration"] filters = [("Event", f_eq, 'T')] names, values = [], [] for t in transitions: names.append(t.get_name_or_id()) tets = table.select(columns, filters + [("ID", f_eq, t.id)]) if len(tets) == 0: tets = [0] values.append(tets) return ("TETs (grouped by transitions)", charts.histogram( names, values, "Histogram of transition execution times grouped by transitions", "Duration [ms]", "Count"))
def tet_per_processes_histogram(table, processes): required = ["Event", "Process", "Duration"] header = table.header if not all(item in header for item in required): return f_eq = lambda x, y: x == y columns = ["Duration"] filters = [("Event", f_eq, 'T')] names, values = [], [] for p in processes: names.append("Process {0}".format(p)) tets = table.select(columns, filters + [("Process", f_eq, p)]) if len(tets) == 0: tets = [0] values.append(tets) return ("TETs (grouped by processes)", charts.histogram( names, values, "Histogram of transition execution times grouped by processes", "Duration [ms]", "Count"))
def analyze_IRSystems(): """ This function analyses and compares the Title Based IR and the Citation Based IR. In particular, it analyses the execution time of the IR systems and the Reciprocal Ranks obtained by them. The distribution of each measure in each IR system is obtained. In addition, the Srudent's paired t-test is applied to compare them. """ n=30 #ANALYZE TITLE BASED IR #Get reciprocal rank f=open(".\IRtesting\IRTitleBased_RR.txt") lines=f.readlines() reciprocalRank_IRTitleBased=[float(e.strip()) for e in lines] reciprocalRank_IRTitleBased=np.asarray(reciprocalRank_IRTitleBased) #Get histogram title="Reciprocal Rank values distribution obtained with the TitleBased IR" xlabel="Reciprocal Rank" ylabel="" charts.histogram(reciprocalRank_IRTitleBased, title, xlabel, ylabel, n) MRR_IR1=np.mean(reciprocalRank_IRTitleBased) varRR_IR1=np.var(reciprocalRank_IRTitleBased, 1) #Get execution times f=open(".\IRtesting\IRTitleBased_MSEC.txt") lines=f.readlines() exeTimes_IRTitleBased=[float(e.strip()) for e in lines] exeTimes_IRTitleBased=np.asarray(exeTimes_IRTitleBased) #Get histogram title="Execution times distribution obtained with the TitleBased IR" xlabel="Execution time (msec)" ylabel="" charts.histogram(exeTimes_IRTitleBased, title, xlabel, ylabel, n) meanTime_IR1=np.mean(exeTimes_IRTitleBased) varTime_IR1=np.var(exeTimes_IRTitleBased, 1) #ANALYZE TITLE BASED IR #Get reciprocal rank f=open(".\IRtesting\IRCitationBased_RR.txt") lines=f.readlines() reciprocalRank_IRCitationBased=[float(e.strip()) for e in lines] reciprocalRank_IRCitationBased=np.asarray(reciprocalRank_IRCitationBased) #Get histogram title="Reciprocal Rank values distribution obtained with the CitationBased IR" xlabel="Reciprocal Rank" ylabel="" charts.histogram(reciprocalRank_IRCitationBased, title, xlabel, ylabel, n) mean=np.mean(reciprocalRank_IRCitationBased) MRR_IR2=np.mean(reciprocalRank_IRCitationBased) varRR_IR2=np.var(reciprocalRank_IRCitationBased, 1) #Get execution times f=open(".\IRtesting\IRCitationBased_MSEC.txt") lines=f.readlines() exeTimes_IRCitationBased=[float(e.strip()) for e in lines] exeTimes_IRCitationBased=np.asarray(exeTimes_IRCitationBased) #Get histogram title="Execution times distribution obtained with the CitationBased IR" xlabel="Execution time (msec)" ylabel="" charts.histogram(exeTimes_IRCitationBased, title, xlabel, ylabel, n) meanTime_IR2=np.mean(exeTimes_IRCitationBased) varTime_IR2=np.var(exeTimes_IRCitationBased, 1) #STUDENT-T TEST #compare rr sp.ttest_rel(reciprocalRank_IRCitationBased, reciprocalRank_IRCitationBased, t_RR, p_RR) print "-----------Reciprocal ranks analysis-------------" print "MRR (IR Title Based): %.2f" % meanRR_IR1 print "Variance of RR (IR Title Based): %.2f" % varRR_IR1 print "MRR (IR Citation Based): %.2f" % meanRR_IR2 print "Variance of RR (IR Citation Based): %.2f" % varRR_IR2 print "t: %.2f" % t_RR print "p-value: %.2f" % p_RR print "" #compare execution times sp.ttest_rel(exeTimes_IRCitationBased, exeTimes_IRCitationBased, t_times, p_times) print "-----------Execution time analysis-------------" print "Mean execution time (IR Title Based): %.2f" % meanTime_IR1 print "Variance of execution time (IR Title Based): %.2f" % varTime_IR1 print "Mean execution time (IR Citation Based): %.2f" % meanTime_IR2 print "Variance of execution time (IR Citation Based): %.2f" % varTime_IR2 print "t: %.2f" % t_times print "p-value: %.2f" % p_times print ""
md=4, lg=2), ], no_gutters=True) ]) return body #write code for main page here to pull data - currently data not stored anywhere so we cannot pull it, need to create it in the script..... could store locally in a chahe #querying local data #need to figure out how to save local data --- maybe need to use mysql?? #df = pd.read_csv('Python_Main/assets/df.csv') #df2 = pd.read_csv('Python_Main/assets/df2.csv') df = dg.generate_data(1000, 5) df2 = dg.create_control_data(df) fig = control_chart(df, df2) fig2 = histogram(df) ###make these dynamic by grabbing HREF name and putting it into the title, also use the HREF name to pull correct data from generated data set.... def sixty_one(): layout = html.Div([nav, header, display_body(fig)]) return layout def sixty_two(): layout = html.Div([nav, header, display_body(fig)]) return layout