예제 #1
0
def print_stats(datums):
    print 'Mean:', stats.tmean(datums)
    print 'Median:', stats.cmedian(datums)
    print 'Std Dev:', stats.tstd(datums)
    print 'Variation:', stats.variation(datums)
    print 'Kurtosis:', stats.kurtosis(datums, fisher=False)
    print 'Skewness:', stats.skew(datums)
예제 #2
0
def plot_histogram(histogram, html_writer, title='', max_pathway_length=8, xmin=None, xlim=20, error_bars=True, min_to_show=20, legend_loc='upper left'):
    fig = pylab.figure()

    pylab.hold(True)

    reps = 1000
    
    y_offset = 0
    offset_step = 0.007
    colors = {1:'r', 2:'orange', 3:'green', 4:'cyan', 5:'blue', 'Rest':'violet', 'Not first':'k--', 'No known regulation':'grey', 'Activated':'green', 'Inhibited':'r', 'Mixed regulation':'blue'}
    for key, value in histogram.iteritems():
        if len(value) >= min_to_show:
            m = stats.cmedian(value)
            
            sample_std = None
            
            if error_bars:
                sample_vals = []
                i = 0
                while i < reps:
                    samples = []
                    while len(samples) < len(value):
                        samples.append(random.choice(value))
                    sample_vals.append(pylab.median(samples))
                    i += 1
                
                sample_std = pylab.std(sample_vals)
                        
            plotting.cdf(value, label='%s (med=%.1f, N=%d)' % \
                (key, m, len(value)),
                style=colors.get(key, 'grey'), std=sample_std, y_offset=y_offset)
            y_offset += offset_step
            

    xmin = -1 * xlim if xmin == None else xmin
    pylab.xlim(xmin, xlim)
    pylab.xlabel('Irreversability')
    #pylab.xlabel('deltaG')
    pylab.ylabel('Cumulative distribution')
    legendfont = matplotlib.font_manager.FontProperties(size=11)
    pylab.legend(loc=legend_loc, prop=legendfont)
    pylab.title(title)
    pylab.hold(False)
    
    if 'Not first' in histogram:
        print '%s, first vs. non-first ranksum test: ' % title + '(%f, %f)' % stats.ranksums(histogram[1], histogram['Not first'])
    
    if 'Inhibited' in histogram:
        print '%s, inhibited vs. non-regulated ranksum test: ' % title + '(%f, %f)' % stats.ranksums(histogram['Inhibited'], histogram['No known regulation'])
         
    
    #for k1, h1 in histogram.iteritems():
    #    for k2, h2 in histogram.iteritems():
    #        print k1, k2, stats.ranksums(h1, h2)
    
    return fig
예제 #3
0
    def freq_correlate(self):
        xs = []  # human score
        ys = []  # left freq
        zs = []  # right freq

        for key in self.collocdict.keys():
            xs.append(self.collocdict[key])
            ys.append(self.freqdict.get(key, 0))
            parts = key.split(":")
            rel = parts[1]
            invrel = self.parameters["inversefeatures"][rel]
            inverted = parts[2] + ":" + invrel + ":" + parts[0]
            zs.append(self.freqdict.get(inverted, 0))

        print xs
        print ys
        print zs
        xarray = np.array(xs)
        yarray = np.array(ys)
        zarray = np.array(zs)
        leftcorr = stats.spearmanr(xarray, yarray)
        rightcorr = stats.spearmanr(xarray, zarray)
        print "Correlation with left frequency ", leftcorr
        print "Correlation with right frequency ", rightcorr
        if self.parameters["wn_wiki"]:
            self.freqthresh = stats.cmedian(yarray)
            print "Median left frequency ", self.freqthresh
        else:
            self.freqthresh = stats.cmedian(zarray)
            print "Median right frequency ", self.freqthresh

        if self.parameters["diff"]:
            self.chunkthresh = [
                stats.cmedian(xarray),
                np.max(xarray),
            ]  # for chunking the input into len(self.chunkthresh) chunks  - needs work for generalisation to more than 2 chunks
        else:
            self.chunkthresh = [np.max(xarray)]
예제 #4
0
	def fit(self, X, y):
		self.business_winner_bias = {}
		business_review_votes = defaultdict(list)
		for review in self.data.training_reviews.values():
			business_review_votes[review['business_id']].append(review['votes']['useful'])
		for business_id, review_votes in business_review_votes.iteritems():
			median = cmedian(review_votes)
			mean = tmean(review_votes)
			if len(review_votes) > 0 and mean != 0:
				bias = median / mean
			else:
				bias = 1
			self.business_winner_bias[business_id] = bias
			
		return self
예제 #5
0
 def test_basic(self):
     data = [1, 2, 3, 1, 5, 3, 6, 4, 3, 2, 4, 3, 5, 2.0]
     assert_almost_equal(stats.cmedian(data, 5), 3.2916666666666665)
     assert_almost_equal(stats.cmedian(data, 3), 3.083333333333333)
     assert_almost_equal(stats.cmedian(data), 3.0020020020020022)
예제 #6
0
#!/usr/bin/env python

import os
from numpy import mean, std
from scipy.stats import cmedian 

DIR = "./times"

#print "Chunk Size, LFS Mean, LFS StdDev, LFS Median, ext4 Mean, ext4 StdDev, ext4 Median, LFS (kcache) Mean, LFS (kcache) StdDev, LFS (kcache) Median"

files = sorted(os.listdir(DIR), key=lambda x: int(x.split(".")[1]))
files = [x for x in files if x.startswith("nokcache")]
for filename in files:
	fn = os.path.join(DIR, filename)
	fn2 = os.path.join(DIR, filename[2:])

	a = [map(float, x.split(" ")) for x in open(fn).read().split("\n")[:-1]]
	b = [map(float, x.split(" ")) for x in open(fn2).read().split("\n")[:-1]]
	
	chunk = filename[9:] if int(filename[9:]) < 1024 else str(int(filename[9:])/1024) + "K"
	print ','.join(map(str, [chunk, mean(a[0]), std(a[0]), cmedian(a[0]), mean(a[1]), std(a[1]), cmedian(a[1]), mean(b[0]), std(b[0]), cmedian(b[0])]))
    def execute(cls, choices, galaxyFn=None, username=""):
        """Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history.
        If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        """
        print "Executing..."
        genome = choices[0]
        infile = choices[1]
        windowSize = int(choices[2])
        normquantile = float(choices[3])
        percentile = float(choices[4])

        inFn = ExternalTrackManager.extractFnFromGalaxyTN(infile.split(":"))
        data = open(inFn, "r").read()
        fetVals, addr = cls.preProcessPvalues(data, 2)
        stddevs, addr = cls.preProcessPvalues(data, 3)
        output = open(galaxyFn, "w")
        # Tuva changed sorted elms to FALSE
        output.write(
            "##gtrack version: 1.0\n"
            + "##track type: segments\n"
            + "##uninterrupted data lines: true\n"
            + "##sorted elements: false\n"
            + "##no overlapping elements: true\n"
            + "###seqid\tstart\tend\n"
        )

        # Calculate limit for FET:
        m = stats.cmedian(fetVals)
        upperquant = stats.scoreatpercentile(stddevs, percentile)
        qnorm = stats.norm.ppf(normquantile)
        limit = m + qnorm * upperquant
        print "Windows found", sum(fetVals >= limit)
        print "percentile", percentile, "normquantile", normquantile
        print "mean", m, "upperquant", upperquant, "qnorm", qnorm
        print "Limit", limit
        addrs = numpy.array(addr)
        filteredaddrs = addrs[fetVals >= limit]

        print GenomeInfo.getChrList(genome)

        curchrom = ""
        start = ""
        end = sys.maxint
        prevAddr = -1000000.0
        for addr in filteredaddrs:
            addrList = addr.split("\t")
            if addrList[0] != curchrom or int(addrList[1]) - windowSize > prevAddr:
                if curchrom != "":
                    newend = prevAddr + windowSize if prevAddr + windowSize < end else end
                    output.write(start + "\t" + str(newend) + "\n")
                start = addr
                curchrom = addrList[0]
                end = int(GenomeInfo.getChrLen(genome, curchrom)) - 1

            prevAddr = int(addr.split("\t")[1])

        newend = prevAddr + windowSize if prevAddr + windowSize < end else end
        output.write(start + "\t" + str(newend) + "\n")
        output.close()
예제 #8
0
 def test_basic(self):
     data = [1,2,3,1,5,3,6,4,3,2,4,3,5,2.0]
     assert_almost_equal(stats.cmedian(data,5),3.2916666666666665)
     assert_almost_equal(stats.cmedian(data,3),3.083333333333333)
     assert_almost_equal(stats.cmedian(data),3.0020020020020022)