Beispiel #1
0
def doF(studyName):
    myStudy = study.Study(studyName)
    myStudy.configStudy()
    files = os.listdir(".")
    fCases = []
    for fName in files:
        if fName.startswith("Fstat") and fName.endswith(".txt"):
            fCases.append(int(fName[5:fName.find("-")]))
    fCases = set(fCases)
    for fCase in fCases:
        doZip(None, "f%d.zip" % fCase, "Fstat%d-*.txt" % fCase)
        vals = []
        posChro = {}
        valsChro = {}
        for chro in karyo.groups:
            posChro[chro] = []
            valsChro[chro] = []
            try:
                f = open("Fstat%d-%s.txt" % (fCase, chro))
            except IOError:
                continue  # Chro is missing, OK (for sex chros)
            f.readline()
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[2] == "None":
                    continue
                pos = int(toks[0].split("/")[2])
                val = float(toks[2])  # FST
                posChro[chro].append(pos)
                valsChro[chro].append(val)
                vals.append(float(toks[2]))
            f.close()
        vals.sort()
        w = open("FST-summary-%d" % fCase, "w")
        w.write("mean\tmin\t0.005\t0.025\t0.5\t0.975\t0.995\tmax\n")
        myStats = (
            sum(vals) / len(vals),
            min(vals),
            vals[int(len(vals) * 0.005)],
            vals[int(len(vals) * 0.025)],
            vals[len(vals) // 2],
            vals[int(len(vals) * 0.975)],
            vals[int(len(vals) * 0.995)],
            max(vals))
        w.write("%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n" % myStats)
        w.close()
        w = open("FST-window-%d" % fCase, "w")
        fsts = []
        ggp = plot.GridGenomePlot(karyo.genome, 4,
                                  fig=plt.figure(figsize=(16, 9)))
        for chro in karyo.groups:
            xsnp = posChro[chro]
            ysnp = valsChro[chro]
            ysAll, buckPos = doSlidingWindow(xsnp, ysnp, 200000, 200000)
            xs = [(x[0] + x[1]) // 2 for x in buckPos]
            ys = []
            p = 0
            for lst in ysAll:
                if lst == []:
                    ys.append(None)
                    w.write("%s\t%d\tNone\t\n" % (chro, xs[p]))
                    fsts.append((chro, xs[p], None, None))
                else:
                    ys.append(max(lst))
                    # fstLStr =  "/".join(map(lambda x:str(x),lst))
                    # above01 = float(len(filter(lambda x:x>0.1,lst)))/len(lst)
                    above01 = float(len([x for x in lst if x > 0.1]))
                    w.write("%s\t%d\t%f\t%s\f" % (
                            chro, xs[p], max(lst), above01))
                    fsts.append((chro, xs[p], max(lst), above01))
                p += 1
            data = list(zip(xsnp, ysnp))
            ax = ggp.features[chro]
            plot.plot_percentile(ax, data, 200000, 200000,
                                 funcs=[(max, '.'), (numpy.mean, '#0099FF')])
            bot, top = ax.get_ylim()
            ax.set_ylim(0, 1)

        ggp.fig.savefig("Fst-%d.png" % fCase)
        w.close()
        sortWrite("FST-window-%s" % fCase, fsts)
Beispiel #2
0
    print(freq, means[freq], stds[freq], file=STAT)
    #print freq, means[freq], stds[freq]
STAT.close()
chroCalc={}
myBins = {}
for k in range(1, maxChro+1):
    w=open("%d.iHS" %(k,), "w")
    pos, val = [], []
    f=open("%d.uiHS" %(k,))
    for res in standartizeIHS(f, means, stds):
        pos.append(res["pos"])
        val.append(res["iHS"])
        #print res["freq"], "Y", res["iHS"], "X", res["uiHS"], means[res["freq"]],stds[res["freq"]]
        w.write("%s\t%d\t%f\n" % (res["snp"], res["pos"], res["iHS"]))
    f.close()
    buckets, buckPos = doSlidingWindow(pos, val, slide, size)
    posResult = {}
    chroCalc[k] = posResult
    for i in range(len(buckPos)):
        myMin, myMax = buckPos[i]
        vals = buckets[i]
        above = [x for x in vals if abs(x)>iHSThr]
        absVals = [abs(x) for x in vals]
        #print vals
        if len(vals)>0:
            bin = len(vals)/20
            iHSCut = float(len(above))/len(vals)
            myBins.setdefault(bin,[]).append(iHSCut)
            xpEHHCut =max(absVals)
            posResult[(myMin+myMax)/2] = len(vals), iHSCut, sum(absVals)/len(vals), bin
            #print len(vals), iHSCut, sum(absVals)/len(vals), bin