def getStatValues(documents): statValues = {} triggerValues = [] unmergingValues = [] argValues = [] for doc in documents: for event in doc.events: for value in sorted(event.trigger.triggerScoreDict.values()): triggerValues.append(value) if hasattr(event.trigger, "unmergingScoreDict"): for value in sorted(event.trigger.unmergingScoreDict.values()): unmergingValues.append(value) for argScoreDict in event.argScoreDicts: for value in sorted(argScoreDict.values()): argValues.append(value) for relation in doc.relations: for argScoreDict in relation.argScoreDicts: for value in sorted(argScoreDict.values()): argValues.append(value) #print triggerValues, unmergingValues, argValues if len(triggerValues) > 0: statValues["trigger-stdev"] = stats.lstdev(triggerValues) statValues["trigger-mean"] = stats.lmean(triggerValues) if len(unmergingValues) > 0: statValues["unmerging-stdev"] = stats.lstdev(unmergingValues) statValues["unmerging-mean"] = stats.lmean(unmergingValues) statValues["arg-stdev"] = stats.lstdev(argValues) statValues["arg-mean"] = stats.lmean(argValues) return statValues
def get_modules(self, cutoff=.05): modules = [] for e in self: if e.val < min(e.lo_min, e.hi_min, cutoff): if self.datatype=="continuous": e.desc = "lo" if lmean(e.a) < lmean(e.b) else "hi" else: e.desc = "enriched" modules.append(e) else: modules += e.get_modules(cutoff=cutoff) return modules
def get_modules(self, cutoff=.05): modules = [] for e in self: if e.val < min(e.lo_min, e.hi_min, cutoff): if self.datatype == "continuous": e.desc = "lo" if lmean(e.a) < lmean(e.b) else "hi" else: e.desc = "enriched" modules.append(e) else: modules += e.get_modules(cutoff=cutoff) return modules
def eu_std_razlika_vzorcnih_arit_sred(var, year): s,f = seznam_vzorec(var, year, 'new') as_n = stats.lmean(s) nn = len(s) s,f = seznam_vzorec(var, year, 'old') as_s = stats.lmean(s) ns = len(s) s = eu_skupni_std_odklon(var, year)[0] h = float((ns + nn) /( float(ns) * float(nn))) te = (as_s - as_n) / (s * math.sqrt(h) ) return te
def _get_id_stats(glyphs, k=None): import stats if len(glyphs) < 3: return (len(glyphs),1.0, 1.0, 1.0) if k is None: k = kNN() distances = k.unique_distances(glyphs) return (len(glyphs),stats.lmean(distances), stats.lstdev(distances), stats.lmedian(distances))
def vzorcna_varianca(var, year): s,f = seznam_vzorec(var, year) n = len(s) - 1 asredina = stats.lmean(s) x = 0 for i in s: x = x + (i - asredina)*(i - asredina) vv = x / float(n) return vv, math.sqrt(vv)
def log_normal_distribution(self): # take logs of sequence log_sequence = [] for number in self.sequence: log_sequence.append(math.log(number,math.e)) mean= stats.lmean(log_sequence) stdev = stats.stdev(log_sequence) number_of_points = len(self.sequence) distribution = log_sequence for each_value in range(number_of_points): distribution[each_value]=(distribution[each_value] - mean)/stdev return distribution
def eu_vzorcna_varianca(var, year, eu): s,f = seznam_vzorec(var, year, eu) asredina = stats.lmean(s) n = len(s) m = 0 for i in s: m = m + ((i - asredina) * (i - asredina)) s2 = m / (n - 1) vso = math.sqrt(s2) return vso, s2
def log_normal_distribution(self): # take logs of sequence log_sequence = [] for number in self.sequence: log_sequence.append(math.log(number, math.e)) mean = stats.lmean(log_sequence) stdev = stats.stdev(log_sequence) number_of_points = len(self.sequence) distribution = log_sequence for each_value in range(number_of_points): distribution[each_value] = (distribution[each_value] - mean) / stdev return distribution
def zaupanje_aritmeticna(var, year): s,f = seznam_vzorec(var, year) n = len(s) asredina = stats.lmean(s) #z = 2.093 # print n, "artimeticna", var, year probability = 0.10 z = round(statistics.tinv(probability, n-1), 2) # print "z is", z std = math.sqrt(vzorcna_varianca(var, year)[0]) interval1 = asredina - (z * (std / math.sqrt(n)) ) interval2 = asredina + (z * (std / math.sqrt(n)) ) return interval1, interval2
def get_avg_box_width(): box_widths = [] filename = './image/test_bi3.jpg' image = cvLoadImage(filename, CV_8UC1) storage = cvCreateMemStorage(0) input_image = cvCloneImage(image) # output_image = cvCloneImage(image) output_image = cvCreateImage(cvGetSize(input_image), 8, 3) cvCvtColor(input_image, output_image, CV_GRAY2BGR) count, contours = cvFindContours (input_image, storage, sizeof_CvContour, CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE, cvPoint (0,0)) for contour in contours.hrange(): bbox = cvBoundingRect(contour, 0) box_width = bbox.width if 100 > box_width > 10: box_widths.append(box_width) # return box_widths width_mean = mean(box_widths) width_lmean = lmean(box_widths) width_stdev = stdev(box_widths) width_lstdev = lstdev(box_widths) return (width_mean,width_lmean,width_stdev,width_lstdev)
def get_avg_box_width(): box_widths = [] filename = './image/test_bi3.jpg' image = cvLoadImage(filename, CV_8UC1) storage = cvCreateMemStorage(0) input_image = cvCloneImage(image) # output_image = cvCloneImage(image) output_image = cvCreateImage(cvGetSize(input_image), 8, 3) cvCvtColor(input_image, output_image, CV_GRAY2BGR) count, contours = cvFindContours(input_image, storage, sizeof_CvContour, CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE, cvPoint(0, 0)) for contour in contours.hrange(): bbox = cvBoundingRect(contour, 0) box_width = bbox.width if 100 > box_width > 10: box_widths.append(box_width) # return box_widths width_mean = mean(box_widths) width_lmean = lmean(box_widths) width_stdev = stdev(box_widths) width_lstdev = lstdev(box_widths) return (width_mean, width_lmean, width_stdev, width_lstdev)
def eu_arit_sred(var, year, eu): s,f = seznam_vzorec(var, year, eu) asredina = stats.lmean(s) return asredina
def vzorcna_aritmeticna(var, year, eu=None): s = seznam_vzorec(var, year, eu) arit = stats.lmean(s[0]) return arit
def nal_32b(var1, year1, var2, year2): v1 = seznam_vzorec(var1, year1)[1] v2 = seznam(var2, year2)[1] for i in v2.copy(): if not v1.has_key(i): del(v2[i]) # iz drugega seznama je treba zbrisat tiste vnose, ki jih ni v prvem temp = dict(v1) for i in temp: try: v2[i] except KeyError: v1.__delitem__(i) temp = dict(v2) for i in temp: try: v1[i] except KeyError: v2.__delitem__(i) n = len(v2) sez1 = [] for i in range(1,29): try: sez1.append(v1['c'+str(i)]) except KeyError: pass sez2 = [] for i in range(1,29): try: sez2.append(v2['c'+str(i)]) except KeyError: pass as1 = stats.lmean(sez1) as2 = stats.lmean(sez2) c = 0 d = 0 e = 0 for i in range(0, n): c = c + (sez1[i] - as1)*(sez2[i] - as2) d = d + (sez1[i] - as1)**2 e = e + (sez2[i] - as2)**2 # print sez1[i], sez2[i], c, d, e rxy = round(c / math.sqrt(d*e),4) texp = (rxy*math.sqrt(n-2)) / math.sqrt(1 - rxy**2) s = math.sqrt( d / (n - 1)) mm1 = c / d mm2 = c / e X = as2 Y = as1 regr1a = Y + mm2*((-1)*X) regr1b = mm2 regr2a = X + mm1*((-1)*Y) regr2b = mm1 R = rxy**2 o = s * math.sqrt(1 - R) return as1, as2, rxy, texp, (regr1a, regr1b), (regr2a, regr2b), R, o
def aritmeticna(var, year): s = seznam(var, year) arit = stats.lmean(s[0]) return arit
score = 0 data = s.read(4) if len(data) != 4: print "test failed: invalid score bytes" failed += 1 if failed >= options.failures: print "ERROR: maximum read failures reached" sys.exit(1) else: continue for value, shift in zip(data, [0, 8, 16, 24]): score += ord(value) << shift print "read score: %8d (0x%08X)" % (score, score) scores.append(score) mean = stats.lmean(scores) print "average score: %.2f" % mean out = open(options.output, "w") print >> out, mean if options.samples > 1 and mean != 0: stdev = stats.lstdev(scores) rating = mean / stdev print "standard deviation: %.2f" % stdev print "rating: %.6f" % rating
def run(self, num_trials): """ Runs the given number of trials. If num_trials is 1, runs a single trial and graphs the result. Otherwise, graphs averaged results over all trials. """ if num_trials == 1: self.run_single(-1) self.graph_single() return for trial in range(num_trials): print "********Running Trial %s**********" % trial self.run_single(trial) filename = "plot_%s.gp" % self.file_prefix gnuplot_file = open(filename, 'w') gnuplot_file.write("set terminal postscript color 'Helvetica' 14\n") #gnuplot_file.write("set size .5, .5\n") gnuplot_file.write("set output 'graphs/%s.ps'\n" % self.file_prefix) gnuplot_file.write("set xlabel 'Utilization'\n") gnuplot_file.write("set ylabel 'Response Time (ms)'\n") gnuplot_file.write("set yrange [0:700]\n") gnuplot_file.write("set grid ytics\n") #gnuplot_file.write("set xtics 0.25\n") extra = "" gnuplot_file.write("set title 'Effect of Load Probing on Response " "Time%s'\n" % extra) #gnuplot_file.write("set key font 'Helvetica,10' left width -5" # "title 'Probes:Tasks' samplen 2\n") gnuplot_file.write("set key left\n") gnuplot_file.write("plot ") for i, probes_ratio in enumerate(self.probes_ratio_values): # Aggregate results and write to a file. # Map of utilization to response times for that utilization. results = {} for trial in range(num_trials): results_filename = ("raw_results/%s_response_time" % self.get_prefix(trial, probes_ratio)) results_file = open(results_filename, "r") index = 0 for line in results_file: values = line.split("\t") if values[0] == "n": continue # Use median response time. normalized_response_time = float(values[6]) if self.remove_delay: normalized_response_time -= 3*float(values[9]) utilization = float(values[2]) if utilization not in results: results[utilization] = [] results[utilization].append(normalized_response_time) agg_output_filename = ("raw_results/agg_%s_%f" % (self.file_prefix, probes_ratio)) agg_output_file = open(agg_output_filename, "w") agg_output_file.write("Utilization\tResponseTime\tStdDev\n") for utilization in sorted(results.keys()): avg_response_time = stats.lmean(results[utilization]) std_dev = stats.lstdev(results[utilization]) agg_output_file.write("%f\t%f\t%f\n" % (utilization, avg_response_time, std_dev)) # Plot aggregated results. if i > 0: gnuplot_file.write(', \\\n') title = "Probes/Tasks = %s" % probes_ratio if probes_ratio == -1: title = "Ideal" gnuplot_file.write(("'%s' using 1:2 title '%s' lc %d lw 4 with l," "\\\n") % (agg_output_filename, title, i)) gnuplot_file.write(("'%s' using 1:2:3 notitle lt %d lw 4 with " "errorbars") % (agg_output_filename, i)) subprocess.call(["gnuplot", filename])
def run(self, num_trials): for trial in range(num_trials): self.run_single(trial) filename = "plot_final_network_delay.gp" gnuplot_file = open(filename, 'w') gnuplot_file.write("set terminal postscript color\n") gnuplot_file.write("set size 0.5,0.5\n") gnuplot_file.write("set output 'graphs/final_network_delay.ps'\n") gnuplot_file.write("set xlabel 'Utilization'\n") gnuplot_file.write("set ylabel 'Normalized Response Time (ms)'\n") gnuplot_file.write("set yrange [100:500]\n") gnuplot_file.write("set grid ytics\n") gnuplot_file.write("set xtics 0.25\n") #gnuplot_file.write("set title 'Effect of Network Delay on Response " # "Time'\n") gnuplot_file.write("set key font 'Helvetica,10' left samplen 2 invert " "\n") gnuplot_file.write("plot ") for i, (network_delay, probes_ratio) in enumerate( zip(self.delay_values, self.probes_ratio_values)): # Aggregate results and write to a file. # Map of utilization to response times for that utilization. results = {} for trial in range(num_trials): results_filename = ("raw_results/%s_response_time" % self.get_prefix(trial, network_delay, probes_ratio)) results_file = open(results_filename, "r") index = 0 for line in results_file: values = line.split("\t") if values[0] == "n": continue normalized_response_time = (float(values[3]) - 3*float(values[6])) utilization = float(values[2]) if utilization not in results: results[utilization] = [] results[utilization].append(normalized_response_time) agg_output_filename = ("raw_results/agg_final_delay_%d_%s" % (network_delay, probes_ratio)) agg_output_file = open(agg_output_filename, "w") agg_output_file.write("Utilization\tResponseTime\tStdDev\n") for utilization in sorted(results.keys()): avg_response_time = stats.lmean(results[utilization]) std_dev = stats.lstdev(results[utilization]) agg_output_file.write("%f\t%f\t%f\n" % (utilization, avg_response_time, std_dev)) # Plot aggregated results. if i > 0: gnuplot_file.write(', \\\n') title = "%dms" % network_delay if probes_ratio == 1.0: title = "Random" if probes_ratio == -1: title = "Ideal" gnuplot_file.write(("'%s' using 1:2 title '%s' lt %d lw 4 with l" ",\\\n") % (agg_output_filename, title, i)) gnuplot_file.write(("'%s' using 1:2:3 notitle lt %d lw 4 with " "errorbars") % (agg_output_filename, i)) subprocess.call(["gnuplot", filename])
def calculate_ellipse_data(xdata, ydata): xcenter = stats.lmean(xdata) xradius = stats.lstdev(xdata) ycenter = stats.lmean(ydata) yradius = stats.lstdev(ydata) return (xcenter, ycenter), (xradius, yradius)
def mean(self): return float('%1.4f'%stats.lmean(self.datapoints))
def run(self, num_trials): for trial in range(num_trials): self.run_single(trial) filename = "plot_final_network_delay.gp" gnuplot_file = open(filename, 'w') gnuplot_file.write("set terminal postscript color\n") gnuplot_file.write("set size 0.5,0.5\n") gnuplot_file.write("set output 'graphs/final_network_delay.ps'\n") gnuplot_file.write("set xlabel 'Utilization'\n") gnuplot_file.write("set ylabel 'Normalized Response Time (ms)'\n") gnuplot_file.write("set yrange [100:500]\n") gnuplot_file.write("set grid ytics\n") gnuplot_file.write("set xtics 0.25\n") #gnuplot_file.write("set title 'Effect of Network Delay on Response " # "Time'\n") gnuplot_file.write("set key font 'Helvetica,10' left samplen 2 invert " "\n") gnuplot_file.write("plot ") for i, (network_delay, probes_ratio) in enumerate( zip(self.delay_values, self.probes_ratio_values)): # Aggregate results and write to a file. # Map of utilization to response times for that utilization. results = {} for trial in range(num_trials): results_filename = ( "raw_results/%s_response_time" % self.get_prefix(trial, network_delay, probes_ratio)) results_file = open(results_filename, "r") index = 0 for line in results_file: values = line.split("\t") if values[0] == "n": continue normalized_response_time = (float(values[3]) - 3 * float(values[6])) utilization = float(values[2]) if utilization not in results: results[utilization] = [] results[utilization].append(normalized_response_time) agg_output_filename = ("raw_results/agg_final_delay_%d_%s" % (network_delay, probes_ratio)) agg_output_file = open(agg_output_filename, "w") agg_output_file.write("Utilization\tResponseTime\tStdDev\n") for utilization in sorted(results.keys()): avg_response_time = stats.lmean(results[utilization]) std_dev = stats.lstdev(results[utilization]) agg_output_file.write( "%f\t%f\t%f\n" % (utilization, avg_response_time, std_dev)) # Plot aggregated results. if i > 0: gnuplot_file.write(', \\\n') title = "%dms" % network_delay if probes_ratio == 1.0: title = "Random" if probes_ratio == -1: title = "Ideal" gnuplot_file.write(("'%s' using 1:2 title '%s' lt %d lw 4 with l" ",\\\n") % (agg_output_filename, title, i)) gnuplot_file.write(("'%s' using 1:2:3 notitle lt %d lw 4 with " "errorbars") % (agg_output_filename, i)) subprocess.call(["gnuplot", filename])
def run(self, num_trials): """ Runs the given number of trials. If num_trials is 1, runs a single trial and graphs the result. Otherwise, graphs averaged results over all trials. """ if num_trials == 1: self.run_single(-1) self.graph_single() return for trial in range(num_trials): print "********Running Trial %s**********" % trial self.run_single(trial) filename = "plot_%s.gp" % self.file_prefix gnuplot_file = open(filename, 'w') gnuplot_file.write("set terminal postscript color 'Helvetica' 14\n") #gnuplot_file.write("set size .5, .5\n") gnuplot_file.write("set output 'graphs/%s.ps'\n" % self.file_prefix) gnuplot_file.write("set xlabel 'Utilization'\n") gnuplot_file.write("set ylabel 'Response Time (ms)'\n") gnuplot_file.write("set yrange [0:700]\n") gnuplot_file.write("set grid ytics\n") #gnuplot_file.write("set xtics 0.25\n") extra = "" gnuplot_file.write("set title 'Effect of Load Probing on Response " "Time%s'\n" % extra) #gnuplot_file.write("set key font 'Helvetica,10' left width -5" # "title 'Probes:Tasks' samplen 2\n") gnuplot_file.write("set key left\n") gnuplot_file.write("plot ") for i, probes_ratio in enumerate(self.probes_ratio_values): # Aggregate results and write to a file. # Map of utilization to response times for that utilization. results = {} for trial in range(num_trials): results_filename = ("raw_results/%s_response_time" % self.get_prefix(trial, probes_ratio)) results_file = open(results_filename, "r") index = 0 for line in results_file: values = line.split("\t") if values[0] == "n": continue # Use median response time. normalized_response_time = float(values[6]) if self.remove_delay: normalized_response_time -= 3 * float(values[9]) utilization = float(values[2]) if utilization not in results: results[utilization] = [] results[utilization].append(normalized_response_time) agg_output_filename = ("raw_results/agg_%s_%f" % (self.file_prefix, probes_ratio)) agg_output_file = open(agg_output_filename, "w") agg_output_file.write("Utilization\tResponseTime\tStdDev\n") for utilization in sorted(results.keys()): avg_response_time = stats.lmean(results[utilization]) std_dev = stats.lstdev(results[utilization]) agg_output_file.write( "%f\t%f\t%f\n" % (utilization, avg_response_time, std_dev)) # Plot aggregated results. if i > 0: gnuplot_file.write(', \\\n') title = "Probes/Tasks = %s" % probes_ratio if probes_ratio == -1: title = "Ideal" gnuplot_file.write(("'%s' using 1:2 title '%s' lc %d lw 4 with l," "\\\n") % (agg_output_filename, title, i)) gnuplot_file.write(("'%s' using 1:2:3 notitle lt %d lw 4 with " "errorbars") % (agg_output_filename, i)) subprocess.call(["gnuplot", filename])