Example #1
0
	def __init__(self,filename):
		self.array=self.toArray(filename)
		self.numericArray,self.numericColumnIndicesMap,self.numericColumnIndexMap=self.toNumericArray()
		self.clusters=[]
		self.enum=self.calcEnumStrs()
		#prj7:
		#confidence interval for each species
		self.cidata=stats.ci(self)
Example #2
0
def summarizer(sp_tree, sp_quartet_tree, outfile):

    if outfile:
        v_out = open(outfile + ".verbose.tsv", "w")
        outf = open(outfile + ".tre", "w")
    #Data stored on species tree
    for i in sp_tree.iternodes():
        if i == sp_tree:
            continue
        #No children means a tip
        if len(i.children) == 0:

            holder = i.data["qln"]
            if outfile:

                #convert all floats to strings for printing
                temp = list(map(str, holder))
                v_out.write(i.label + "\t" + "\t".join(temp) + "\n")
        else:

            holder = sp_quartet_tree[i.data["q"]]
            if outfile:

                temp = list(map(str, holder))
                v_out.write(
                    i.get_newick_repr(False) + "\t" + "\t".join(temp) + "\n")

        #Make sure something has actually been concordant and met the cutoff
        if len(holder) == 0:
            mean = 0.0
            median = 0.0
            min = 0.0
            max = 0.0
            CIL = 0.0
            CIH = 0.0
            i.data["concord"] = len(holder)
        else:
            mean = stats.mean(holder)
            median = stats.median(holder)
            min = stats.min(holder)
            max = stats.max(holder)
            i.data["concord"] = len(holder)
            #account for the fact you need two for CI's
            if len(holder) > 1:
                #array and z-value (95% is 1.96)
                CIL, CIH = stats.ci(holder, 1.96)
            else:
                CIL = 0.0
                CIH = 0.0

        i.data["mean"] = mean
        i.data["median"] = median
        i.data["min"] = min
        i.data["max"] = max
        i.data["cih"] = CIH
        i.data["cil"] = CIL
    array = ["mean", "median", "min", "max", "cil", "cih", "concord"]
    for i in array:
        if outfile:
            outf.write(sp_tree.get_newick_otherlen(i) + ";\n")
        else:
            print sp_tree.get_newick_otherlen(i) + ";"
Example #3
0
  '1-sigma': erf(1/sqrt(2)),
  '95%': 0.95,
  }

z = [int(ki) for ki in logspace(1,3,low_points)]
if mid_points: z += [int(ki) for ki in logspace(3,5,mid_points)]
if high_points: z += [int(ki) for ki in logspace(5,7,high_points)]
z = list(sorted(set(z))) # Make z unique

ni,nj = len(intervals),len(dists)
for pi,interval_name  in enumerate(sorted(intervals.keys())):
    for pj,dist_name in enumerate(sorted(dists.keys())):
        print "processing",dist_name,interval_name
        s,rng = intervals[interval_name], dists[dist_name]
        #print [min(1000,work//ki+1) for ki in z]
        unbiased = [array([ci(rng(ki),s,unbiased=True) 
                           for _ in range(min(maxn,work//ki+1))])
                    for ki in z]
        biased = [array([ci(rng(ki),s,unbiased=False) 
                         for _ in range(min(maxn,work//ki+1))])
                  for ki in z]
        pylab.subplot(ni,nj,pi*nj+pj+1)
        #print data
        for data in biased, unbiased:
            delta = [diff(di,axis=1)[:,0] for di in data]
            w = [mean(di) for di in delta]
            dw = [std(di) for di in delta]
        #print z,w,dw
        #print interval_name, dist_name, ni, nj, pi, pj, pj*ni+pi
            pylab.semilogx(z,w,'o')
            pylab.errorbar(z,w,dw)