Beispiel #1
0
def getStatValues(documents):
    statValues = {}
    triggerValues = []
    unmergingValues = []
    argValues = []
    for doc in documents:
        for event in doc.events:
            for value in sorted(event.trigger.triggerScoreDict.values()):
                triggerValues.append(value)
            if hasattr(event.trigger, "unmergingScoreDict"):
                for value in sorted(event.trigger.unmergingScoreDict.values()):
                    unmergingValues.append(value)
            for argScoreDict in event.argScoreDicts:
                for value in sorted(argScoreDict.values()):
                    argValues.append(value)
        for relation in doc.relations:
            for argScoreDict in relation.argScoreDicts:
                for value in sorted(argScoreDict.values()):
                    argValues.append(value)
    #print triggerValues, unmergingValues, argValues
    if len(triggerValues) > 0:
        statValues["trigger-stdev"] = stats.lstdev(triggerValues)
        statValues["trigger-mean"] = stats.lmean(triggerValues)
    if len(unmergingValues) > 0:
        statValues["unmerging-stdev"] = stats.lstdev(unmergingValues)
        statValues["unmerging-mean"] = stats.lmean(unmergingValues)
    statValues["arg-stdev"] = stats.lstdev(argValues)
    statValues["arg-mean"] = stats.lmean(argValues)
    return statValues
Beispiel #2
0
def getStatValues(documents):
    statValues = {}
    triggerValues = []
    unmergingValues = []
    argValues = []
    for doc in documents:
        for event in doc.events:
            for value in sorted(event.trigger.triggerScoreDict.values()):
                triggerValues.append(value)
            if hasattr(event.trigger, "unmergingScoreDict"):
                for value in sorted(event.trigger.unmergingScoreDict.values()):
                    unmergingValues.append(value)
            for argScoreDict in event.argScoreDicts:
                for value in sorted(argScoreDict.values()):
                    argValues.append(value)
        for relation in doc.relations:
            for argScoreDict in relation.argScoreDicts:
                for value in sorted(argScoreDict.values()):
                    argValues.append(value)
    #print triggerValues, unmergingValues, argValues
    if len(triggerValues) > 0:
        statValues["trigger-stdev"] = stats.lstdev(triggerValues)
        statValues["trigger-mean"] = stats.lmean(triggerValues)
    if len(unmergingValues) > 0:
        statValues["unmerging-stdev"] = stats.lstdev(unmergingValues)
        statValues["unmerging-mean"] = stats.lmean(unmergingValues)
    statValues["arg-stdev"] = stats.lstdev(argValues)
    statValues["arg-mean"] = stats.lmean(argValues)
    return statValues
Beispiel #3
0
 def get_modules(self, cutoff=.05):
     modules = []
     for e in self:
         if e.val < min(e.lo_min, e.hi_min, cutoff):
             if self.datatype=="continuous":
                 e.desc = "lo" if lmean(e.a) < lmean(e.b) else "hi"
             else:
                 e.desc = "enriched"
             modules.append(e)
         else:
             modules += e.get_modules(cutoff=cutoff)
     return modules
Beispiel #4
0
 def get_modules(self, cutoff=.05):
     modules = []
     for e in self:
         if e.val < min(e.lo_min, e.hi_min, cutoff):
             if self.datatype == "continuous":
                 e.desc = "lo" if lmean(e.a) < lmean(e.b) else "hi"
             else:
                 e.desc = "enriched"
             modules.append(e)
         else:
             modules += e.get_modules(cutoff=cutoff)
     return modules
Beispiel #5
0
def eu_std_razlika_vzorcnih_arit_sred(var, year):
    s,f = seznam_vzorec(var, year, 'new')
    as_n = stats.lmean(s)
    nn = len(s)
    s,f = seznam_vzorec(var, year, 'old')
    as_s = stats.lmean(s)
    ns = len(s)

    s = eu_skupni_std_odklon(var, year)[0]

    h = float((ns + nn) /( float(ns) * float(nn)))
    te = (as_s - as_n) / (s * math.sqrt(h) ) 

    return te
Beispiel #6
0
def _get_id_stats(glyphs, k=None):
   import stats
   if len(glyphs) < 3:
      return (len(glyphs),1.0, 1.0, 1.0)
   if k is None:
      k = kNN()
   distances = k.unique_distances(glyphs)
   return (len(glyphs),stats.lmean(distances), stats.lstdev(distances), stats.lmedian(distances))
Beispiel #7
0
def _get_id_stats(glyphs, k=None):
   import stats
   if len(glyphs) < 3:
      return (len(glyphs),1.0, 1.0, 1.0)
   if k is None:
      k = kNN()
   distances = k.unique_distances(glyphs)
   return (len(glyphs),stats.lmean(distances), stats.lstdev(distances), stats.lmedian(distances))
Beispiel #8
0
def vzorcna_varianca(var, year):
    s,f = seznam_vzorec(var, year)
    n = len(s) - 1
    asredina = stats.lmean(s)
    
    x = 0
    for i in s:
        x = x + (i - asredina)*(i - asredina)

    vv = x / float(n)

    return vv, math.sqrt(vv)
Beispiel #9
0
	def log_normal_distribution(self):
	# take logs of sequence
	 log_sequence = []
	 for number in self.sequence:
		log_sequence.append(math.log(number,math.e))
	 mean= stats.lmean(log_sequence)
	 stdev = stats.stdev(log_sequence)
	 number_of_points = len(self.sequence)
	 distribution = log_sequence
	 for each_value in range(number_of_points):
	  distribution[each_value]=(distribution[each_value] - mean)/stdev	 
         return distribution
Beispiel #10
0
def eu_vzorcna_varianca(var, year, eu):
    s,f = seznam_vzorec(var, year, eu)
    asredina = stats.lmean(s)
    n = len(s)

    m = 0
    for i in s:
        m = m + ((i - asredina) * (i - asredina))
    s2 = m / (n - 1)
    vso = math.sqrt(s2)

    return vso, s2
Beispiel #11
0
 def log_normal_distribution(self):
     # take logs of sequence
     log_sequence = []
     for number in self.sequence:
         log_sequence.append(math.log(number, math.e))
     mean = stats.lmean(log_sequence)
     stdev = stats.stdev(log_sequence)
     number_of_points = len(self.sequence)
     distribution = log_sequence
     for each_value in range(number_of_points):
         distribution[each_value] = (distribution[each_value] -
                                     mean) / stdev
     return distribution
Beispiel #12
0
def zaupanje_aritmeticna(var, year):
    s,f = seznam_vzorec(var, year)
    n = len(s)
    asredina = stats.lmean(s)
    #z = 2.093
    
    # print n, "artimeticna", var, year
    probability = 0.10
    
    z = round(statistics.tinv(probability, n-1), 2)
    # print "z is", z
           
    std = math.sqrt(vzorcna_varianca(var, year)[0])

    interval1 = asredina - (z * (std / math.sqrt(n)) )
    interval2 = asredina + (z * (std / math.sqrt(n)) )

    return interval1, interval2
def get_avg_box_width():
    box_widths = []
    
    filename = './image/test_bi3.jpg'
    image = cvLoadImage(filename, CV_8UC1)
    storage = cvCreateMemStorage(0)
    input_image = cvCloneImage(image)
#    output_image = cvCloneImage(image)
    output_image = cvCreateImage(cvGetSize(input_image), 8, 3)
    cvCvtColor(input_image, output_image, CV_GRAY2BGR)
    count, contours = cvFindContours (input_image, storage, sizeof_CvContour, CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE, cvPoint (0,0))
    for contour in contours.hrange():
        bbox = cvBoundingRect(contour, 0)
        box_width = bbox.width
        if 100 > box_width > 10:
            box_widths.append(box_width)
#    return box_widths
    width_mean = mean(box_widths)
    width_lmean = lmean(box_widths)
    width_stdev = stdev(box_widths)
    width_lstdev = lstdev(box_widths)    
    return (width_mean,width_lmean,width_stdev,width_lstdev)
Beispiel #14
0
def get_avg_box_width():
    box_widths = []

    filename = './image/test_bi3.jpg'
    image = cvLoadImage(filename, CV_8UC1)
    storage = cvCreateMemStorage(0)
    input_image = cvCloneImage(image)
    #    output_image = cvCloneImage(image)
    output_image = cvCreateImage(cvGetSize(input_image), 8, 3)
    cvCvtColor(input_image, output_image, CV_GRAY2BGR)
    count, contours = cvFindContours(input_image, storage, sizeof_CvContour,
                                     CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE,
                                     cvPoint(0, 0))
    for contour in contours.hrange():
        bbox = cvBoundingRect(contour, 0)
        box_width = bbox.width
        if 100 > box_width > 10:
            box_widths.append(box_width)
#    return box_widths
    width_mean = mean(box_widths)
    width_lmean = lmean(box_widths)
    width_stdev = stdev(box_widths)
    width_lstdev = lstdev(box_widths)
    return (width_mean, width_lmean, width_stdev, width_lstdev)
Beispiel #15
0
def eu_arit_sred(var, year, eu):
    s,f = seznam_vzorec(var, year, eu)
    asredina = stats.lmean(s)

    return asredina
Beispiel #16
0
def vzorcna_aritmeticna(var, year, eu=None):
    s = seznam_vzorec(var, year, eu)
    arit = stats.lmean(s[0])
    
    return arit
Beispiel #17
0
def nal_32b(var1, year1, var2, year2):
    v1 = seznam_vzorec(var1, year1)[1]
    v2 = seznam(var2, year2)[1]
    
    for i in v2.copy():
      if not v1.has_key(i):
        del(v2[i])

    # iz drugega seznama je treba zbrisat tiste vnose, ki jih ni v prvem
    temp = dict(v1)
    for i in temp:
        try:
            v2[i]
        except KeyError:
            v1.__delitem__(i)
    temp = dict(v2)
    for i in temp:
        try:
            v1[i]
        except KeyError:
            v2.__delitem__(i)
    n = len(v2)
    
    sez1 = []
    for i in range(1,29):
        try:
            sez1.append(v1['c'+str(i)])
        except KeyError:
            pass
    sez2 = []
    for i in range(1,29):
        try:
            sez2.append(v2['c'+str(i)])
        except KeyError:
            pass
    
    as1 = stats.lmean(sez1)
    as2 = stats.lmean(sez2)

    c = 0
    d = 0
    e = 0
    for i in range(0, n):

        c = c + (sez1[i] - as1)*(sez2[i] - as2)
        d = d + (sez1[i] - as1)**2
        e = e + (sez2[i] - as2)**2

        # print sez1[i], sez2[i], c, d, e
    rxy = round(c / math.sqrt(d*e),4)

    texp = (rxy*math.sqrt(n-2)) / math.sqrt(1 - rxy**2) 

    s = math.sqrt( d / (n - 1))
    mm1 = c / d
    mm2 = c / e


    X = as2
    Y = as1
   
    regr1a = Y + mm2*((-1)*X)
    regr1b = mm2
    
    regr2a = X + mm1*((-1)*Y)
    regr2b = mm1

    R = rxy**2

    o = s * math.sqrt(1 - R)

    return as1, as2, rxy, texp, (regr1a, regr1b), (regr2a, regr2b), R, o
Beispiel #18
0
def aritmeticna(var, year):
    s = seznam(var, year)
    arit = stats.lmean(s[0])
    
    return arit
Beispiel #19
0
	score = 0
	data = s.read(4)
	if len(data) != 4:
		print "test failed: invalid score bytes"
		failed += 1
		if failed >= options.failures:
			print "ERROR: maximum read failures reached"
			sys.exit(1)
		else:
			continue

	for value, shift in zip(data, [0, 8, 16, 24]):
		score += ord(value) << shift
	
	print "read score: %8d (0x%08X)" % (score, score)
	scores.append(score)

mean = stats.lmean(scores)
print "average score: %.2f" % mean

out = open(options.output, "w")
print >> out, mean

if options.samples > 1 and mean != 0:
	stdev = stats.lstdev(scores)
	rating = mean / stdev

	print "standard deviation: %.2f" % stdev
	print "rating: %.6f" % rating

Beispiel #20
0
    def run(self, num_trials):
        """ Runs the given number of trials.
        
        If num_trials is 1, runs a single trial and graphs the result.
        Otherwise, graphs averaged results over all trials.
        """
        if num_trials == 1:
            self.run_single(-1)
            self.graph_single()
            return

        for trial in range(num_trials):
            print "********Running Trial %s**********" % trial
            self.run_single(trial)
            
        filename = "plot_%s.gp" % self.file_prefix
        gnuplot_file = open(filename, 'w')
        gnuplot_file.write("set terminal postscript color 'Helvetica' 14\n")
        #gnuplot_file.write("set size .5, .5\n")
        gnuplot_file.write("set output 'graphs/%s.ps'\n" % self.file_prefix)
        gnuplot_file.write("set xlabel 'Utilization'\n")
        gnuplot_file.write("set ylabel 'Response Time (ms)'\n")
        gnuplot_file.write("set yrange [0:700]\n")
        gnuplot_file.write("set grid ytics\n")
        #gnuplot_file.write("set xtics 0.25\n")
        extra = ""
        gnuplot_file.write("set title 'Effect of Load Probing on Response "
                           "Time%s'\n" % extra)
        #gnuplot_file.write("set key font 'Helvetica,10' left width -5"
        #                   "title 'Probes:Tasks' samplen 2\n")
        gnuplot_file.write("set key left\n")
        gnuplot_file.write("plot ")
        
        for i, probes_ratio in enumerate(self.probes_ratio_values):
            # Aggregate results and write to a file.
            # Map of utilization to response times for that utilization.
            results = {}
            for trial in range(num_trials):
                results_filename = ("raw_results/%s_response_time" %
                                    self.get_prefix(trial, probes_ratio))
                results_file = open(results_filename, "r")
                index = 0
                for line in results_file:
                    values = line.split("\t")
                    if values[0] == "n":
                        continue
                    # Use median response time.
                    normalized_response_time = float(values[6])
                    if self.remove_delay:
                        normalized_response_time -= 3*float(values[9])
                    utilization = float(values[2])
                    if utilization not in results:
                        results[utilization] = []
                    results[utilization].append(normalized_response_time)
                    
            agg_output_filename = ("raw_results/agg_%s_%f" %
                                   (self.file_prefix, probes_ratio))
            agg_output_file = open(agg_output_filename, "w")
            agg_output_file.write("Utilization\tResponseTime\tStdDev\n")
            for utilization in sorted(results.keys()):
                avg_response_time = stats.lmean(results[utilization])
                std_dev = stats.lstdev(results[utilization])
                agg_output_file.write("%f\t%f\t%f\n" %
                                      (utilization, avg_response_time, std_dev))
                
            # Plot aggregated results.
            if i > 0:
                gnuplot_file.write(', \\\n')
            title = "Probes/Tasks = %s" % probes_ratio
            if probes_ratio == -1:
                title = "Ideal"
            gnuplot_file.write(("'%s' using 1:2 title '%s' lc %d lw 4 with l,"
                                "\\\n") %
                               (agg_output_filename, title, i))
            gnuplot_file.write(("'%s' using 1:2:3 notitle lt %d lw 4 with "
                                "errorbars") % (agg_output_filename, i))
            
        subprocess.call(["gnuplot", filename])
 def run(self, num_trials):
     for trial in range(num_trials):
         self.run_single(trial)
         
     filename = "plot_final_network_delay.gp"
     gnuplot_file = open(filename, 'w')
     gnuplot_file.write("set terminal postscript color\n")
     gnuplot_file.write("set size 0.5,0.5\n")
     gnuplot_file.write("set output 'graphs/final_network_delay.ps'\n")
     gnuplot_file.write("set xlabel 'Utilization'\n")
     gnuplot_file.write("set ylabel 'Normalized Response Time (ms)'\n")
     gnuplot_file.write("set yrange [100:500]\n")
     gnuplot_file.write("set grid ytics\n")
     gnuplot_file.write("set xtics 0.25\n")
     #gnuplot_file.write("set title 'Effect of Network Delay on Response "
     #                   "Time'\n")
     gnuplot_file.write("set key font 'Helvetica,10' left samplen 2 invert "
                        "\n")
     gnuplot_file.write("plot ")
     
     for i, (network_delay, probes_ratio) in enumerate(
             zip(self.delay_values, self.probes_ratio_values)):
         # Aggregate results and write to a file.
         # Map of utilization to response times for that utilization.
         results = {}
         for trial in range(num_trials):
             results_filename = ("raw_results/%s_response_time" %
                                 self.get_prefix(trial, network_delay,
                                                 probes_ratio))
             results_file = open(results_filename, "r")
             index = 0
             for line in results_file:
                 values = line.split("\t")
                 if values[0] == "n":
                     continue
                 normalized_response_time = (float(values[3]) -
                                             3*float(values[6]))
                 utilization = float(values[2])
                 if utilization not in results:
                     results[utilization] = []
                 results[utilization].append(normalized_response_time)
                 
         agg_output_filename = ("raw_results/agg_final_delay_%d_%s" %
                                (network_delay, probes_ratio))
         agg_output_file = open(agg_output_filename, "w")
         agg_output_file.write("Utilization\tResponseTime\tStdDev\n")
         for utilization in sorted(results.keys()):
             avg_response_time = stats.lmean(results[utilization])
             std_dev = stats.lstdev(results[utilization])
             agg_output_file.write("%f\t%f\t%f\n" %
                                   (utilization, avg_response_time, std_dev))
             
         # Plot aggregated results.
         if i > 0:
             gnuplot_file.write(', \\\n')
         title = "%dms" % network_delay
         if probes_ratio == 1.0:
             title = "Random"
         if probes_ratio == -1:
             title = "Ideal"
         gnuplot_file.write(("'%s' using 1:2 title '%s' lt %d lw 4 with l"
                             ",\\\n") % (agg_output_filename, title, i))
         gnuplot_file.write(("'%s' using 1:2:3 notitle lt %d lw 4 with "
                             "errorbars") % (agg_output_filename, i))
         
     subprocess.call(["gnuplot", filename])
Beispiel #22
0
def calculate_ellipse_data(xdata, ydata):
    xcenter = stats.lmean(xdata)
    xradius = stats.lstdev(xdata)
    ycenter = stats.lmean(ydata)
    yradius = stats.lstdev(ydata)
    return (xcenter, ycenter), (xradius, yradius)
 def mean(self):
     return float('%1.4f'%stats.lmean(self.datapoints))
    def run(self, num_trials):
        for trial in range(num_trials):
            self.run_single(trial)

        filename = "plot_final_network_delay.gp"
        gnuplot_file = open(filename, 'w')
        gnuplot_file.write("set terminal postscript color\n")
        gnuplot_file.write("set size 0.5,0.5\n")
        gnuplot_file.write("set output 'graphs/final_network_delay.ps'\n")
        gnuplot_file.write("set xlabel 'Utilization'\n")
        gnuplot_file.write("set ylabel 'Normalized Response Time (ms)'\n")
        gnuplot_file.write("set yrange [100:500]\n")
        gnuplot_file.write("set grid ytics\n")
        gnuplot_file.write("set xtics 0.25\n")
        #gnuplot_file.write("set title 'Effect of Network Delay on Response "
        #                   "Time'\n")
        gnuplot_file.write("set key font 'Helvetica,10' left samplen 2 invert "
                           "\n")
        gnuplot_file.write("plot ")

        for i, (network_delay, probes_ratio) in enumerate(
                zip(self.delay_values, self.probes_ratio_values)):
            # Aggregate results and write to a file.
            # Map of utilization to response times for that utilization.
            results = {}
            for trial in range(num_trials):
                results_filename = (
                    "raw_results/%s_response_time" %
                    self.get_prefix(trial, network_delay, probes_ratio))
                results_file = open(results_filename, "r")
                index = 0
                for line in results_file:
                    values = line.split("\t")
                    if values[0] == "n":
                        continue
                    normalized_response_time = (float(values[3]) -
                                                3 * float(values[6]))
                    utilization = float(values[2])
                    if utilization not in results:
                        results[utilization] = []
                    results[utilization].append(normalized_response_time)

            agg_output_filename = ("raw_results/agg_final_delay_%d_%s" %
                                   (network_delay, probes_ratio))
            agg_output_file = open(agg_output_filename, "w")
            agg_output_file.write("Utilization\tResponseTime\tStdDev\n")
            for utilization in sorted(results.keys()):
                avg_response_time = stats.lmean(results[utilization])
                std_dev = stats.lstdev(results[utilization])
                agg_output_file.write(
                    "%f\t%f\t%f\n" % (utilization, avg_response_time, std_dev))

            # Plot aggregated results.
            if i > 0:
                gnuplot_file.write(', \\\n')
            title = "%dms" % network_delay
            if probes_ratio == 1.0:
                title = "Random"
            if probes_ratio == -1:
                title = "Ideal"
            gnuplot_file.write(("'%s' using 1:2 title '%s' lt %d lw 4 with l"
                                ",\\\n") % (agg_output_filename, title, i))
            gnuplot_file.write(("'%s' using 1:2:3 notitle lt %d lw 4 with "
                                "errorbars") % (agg_output_filename, i))

        subprocess.call(["gnuplot", filename])
Beispiel #25
0
    def run(self, num_trials):
        """ Runs the given number of trials.
        
        If num_trials is 1, runs a single trial and graphs the result.
        Otherwise, graphs averaged results over all trials.
        """
        if num_trials == 1:
            self.run_single(-1)
            self.graph_single()
            return

        for trial in range(num_trials):
            print "********Running Trial %s**********" % trial
            self.run_single(trial)

        filename = "plot_%s.gp" % self.file_prefix
        gnuplot_file = open(filename, 'w')
        gnuplot_file.write("set terminal postscript color 'Helvetica' 14\n")
        #gnuplot_file.write("set size .5, .5\n")
        gnuplot_file.write("set output 'graphs/%s.ps'\n" % self.file_prefix)
        gnuplot_file.write("set xlabel 'Utilization'\n")
        gnuplot_file.write("set ylabel 'Response Time (ms)'\n")
        gnuplot_file.write("set yrange [0:700]\n")
        gnuplot_file.write("set grid ytics\n")
        #gnuplot_file.write("set xtics 0.25\n")
        extra = ""
        gnuplot_file.write("set title 'Effect of Load Probing on Response "
                           "Time%s'\n" % extra)
        #gnuplot_file.write("set key font 'Helvetica,10' left width -5"
        #                   "title 'Probes:Tasks' samplen 2\n")
        gnuplot_file.write("set key left\n")
        gnuplot_file.write("plot ")

        for i, probes_ratio in enumerate(self.probes_ratio_values):
            # Aggregate results and write to a file.
            # Map of utilization to response times for that utilization.
            results = {}
            for trial in range(num_trials):
                results_filename = ("raw_results/%s_response_time" %
                                    self.get_prefix(trial, probes_ratio))
                results_file = open(results_filename, "r")
                index = 0
                for line in results_file:
                    values = line.split("\t")
                    if values[0] == "n":
                        continue
                    # Use median response time.
                    normalized_response_time = float(values[6])
                    if self.remove_delay:
                        normalized_response_time -= 3 * float(values[9])
                    utilization = float(values[2])
                    if utilization not in results:
                        results[utilization] = []
                    results[utilization].append(normalized_response_time)

            agg_output_filename = ("raw_results/agg_%s_%f" %
                                   (self.file_prefix, probes_ratio))
            agg_output_file = open(agg_output_filename, "w")
            agg_output_file.write("Utilization\tResponseTime\tStdDev\n")
            for utilization in sorted(results.keys()):
                avg_response_time = stats.lmean(results[utilization])
                std_dev = stats.lstdev(results[utilization])
                agg_output_file.write(
                    "%f\t%f\t%f\n" % (utilization, avg_response_time, std_dev))

            # Plot aggregated results.
            if i > 0:
                gnuplot_file.write(', \\\n')
            title = "Probes/Tasks = %s" % probes_ratio
            if probes_ratio == -1:
                title = "Ideal"
            gnuplot_file.write(("'%s' using 1:2 title '%s' lc %d lw 4 with l,"
                                "\\\n") % (agg_output_filename, title, i))
            gnuplot_file.write(("'%s' using 1:2:3 notitle lt %d lw 4 with "
                                "errorbars") % (agg_output_filename, i))

        subprocess.call(["gnuplot", filename])