def doF(studyName): myStudy = study.Study(studyName) myStudy.configStudy() files = os.listdir(".") fCases = [] for fName in files: if fName.startswith("Fstat") and fName.endswith(".txt"): fCases.append(int(fName[5:fName.find("-")])) fCases = set(fCases) for fCase in fCases: doZip(None, "f%d.zip" % fCase, "Fstat%d-*.txt" % fCase) vals = [] posChro = {} valsChro = {} for chro in karyo.groups: posChro[chro] = [] valsChro[chro] = [] try: f = open("Fstat%d-%s.txt" % (fCase, chro)) except IOError: continue # Chro is missing, OK (for sex chros) f.readline() for l in f: toks = l.rstrip().split("\t") if toks[2] == "None": continue pos = int(toks[0].split("/")[2]) val = float(toks[2]) # FST posChro[chro].append(pos) valsChro[chro].append(val) vals.append(float(toks[2])) f.close() vals.sort() w = open("FST-summary-%d" % fCase, "w") w.write("mean\tmin\t0.005\t0.025\t0.5\t0.975\t0.995\tmax\n") myStats = ( sum(vals) / len(vals), min(vals), vals[int(len(vals) * 0.005)], vals[int(len(vals) * 0.025)], vals[len(vals) // 2], vals[int(len(vals) * 0.975)], vals[int(len(vals) * 0.995)], max(vals)) w.write("%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n" % myStats) w.close() w = open("FST-window-%d" % fCase, "w") fsts = [] ggp = plot.GridGenomePlot(karyo.genome, 4, fig=plt.figure(figsize=(16, 9))) for chro in karyo.groups: xsnp = posChro[chro] ysnp = valsChro[chro] ysAll, buckPos = doSlidingWindow(xsnp, ysnp, 200000, 200000) xs = [(x[0] + x[1]) // 2 for x in buckPos] ys = [] p = 0 for lst in ysAll: if lst == []: ys.append(None) w.write("%s\t%d\tNone\t\n" % (chro, xs[p])) fsts.append((chro, xs[p], None, None)) else: ys.append(max(lst)) # fstLStr = "/".join(map(lambda x:str(x),lst)) # above01 = float(len(filter(lambda x:x>0.1,lst)))/len(lst) above01 = float(len([x for x in lst if x > 0.1])) w.write("%s\t%d\t%f\t%s\f" % ( chro, xs[p], max(lst), above01)) fsts.append((chro, xs[p], max(lst), above01)) p += 1 data = list(zip(xsnp, ysnp)) ax = ggp.features[chro] plot.plot_percentile(ax, data, 200000, 200000, funcs=[(max, '.'), (numpy.mean, '#0099FF')]) bot, top = ax.get_ylim() ax.set_ylim(0, 1) ggp.fig.savefig("Fst-%d.png" % fCase) w.close() sortWrite("FST-window-%s" % fCase, fsts)
print(freq, means[freq], stds[freq], file=STAT) #print freq, means[freq], stds[freq] STAT.close() chroCalc={} myBins = {} for k in range(1, maxChro+1): w=open("%d.iHS" %(k,), "w") pos, val = [], [] f=open("%d.uiHS" %(k,)) for res in standartizeIHS(f, means, stds): pos.append(res["pos"]) val.append(res["iHS"]) #print res["freq"], "Y", res["iHS"], "X", res["uiHS"], means[res["freq"]],stds[res["freq"]] w.write("%s\t%d\t%f\n" % (res["snp"], res["pos"], res["iHS"])) f.close() buckets, buckPos = doSlidingWindow(pos, val, slide, size) posResult = {} chroCalc[k] = posResult for i in range(len(buckPos)): myMin, myMax = buckPos[i] vals = buckets[i] above = [x for x in vals if abs(x)>iHSThr] absVals = [abs(x) for x in vals] #print vals if len(vals)>0: bin = len(vals)/20 iHSCut = float(len(above))/len(vals) myBins.setdefault(bin,[]).append(iHSCut) xpEHHCut =max(absVals) posResult[(myMin+myMax)/2] = len(vals), iHSCut, sum(absVals)/len(vals), bin #print len(vals), iHSCut, sum(absVals)/len(vals), bin