def TestCase(cutoff=23,num=2,filename="../GWAS/NoFam_0.05",k=10,sz=450): [[y1,sFil1],[y2,sFil2]]=loadData(filename=filename,useRand=False,sz=sz); MU1=MU_STRAT(sFil1,k) MU2=MU_STRAT(sFil2,k) n=len(y2); n2=len(y1); print "Exact 1!" MU1.calcMU(k,exact=True); print "Exact 2!" MU2.calcMU(k,exact=True); print "Get top in set 1" top1=pt(y1,MU1,20,-1,algor="noise"); print "Get Scores" scores=wt(y1,MU1,-1,snps=top1); print "Get "+str(num)+" SNPs directly below "+str(cutoff); i=min([i for i in range(0,len(top1)) if scores[i]<cutoff/float(n2-k-1)]) print top1[i]; print float((n2-k-1)*scores[i]) #print i; #print scores[:10]; print ""; print top1[i+1]; print float((n2-k-1)*scores[i+1]) CIlst=CI(y2,MU2,.95,2.0,top1[i:i+2]); Clst=[[(n-k-1)*i for i in s] for s in CIlst] print Clst;
def RunGWAS(filename,k=5,eps=2.0): sz=450 [[y1,sFil1],[y2,sFil2]]=loadData(filename=filename,useRand=False,sz=sz); MU1=MU_STRAT(sFil1,k); MU2=MU_STRAT(sFil2,k); print "EXACT1!" MU1.calcMU(k,exact=True) print "Exact 2!" MU2.calcMU(k,exact=True); res=pt(y1,MU1,1,-1,algor="noise"); resL=pt(y1,MU1,20,-1,algor="noise"); print resL #print wt(y1,MU1,-1,snps=res); res2=pt(y2,MU2,1,-1,algor="noise"); print res; resT=res; print res2; resT=wt(y2,MU2,eps,snps=resL[5:7]); res2=wt(y1,MU1,eps,snps=res); res3=wt(y1,MU1,-1,snps=[]); res4=wt(y2,MU2,-1,snps=[]); n=len(y2); print len(y1); print n; val=[resT[0]*(n-k-1),resT[1]*(n-k-1)] n2=len(y1); print "In Val:" print val; print "In Orig" print res2[0]*(n2-k-1); n2=len(y1); print np.median(res3)*(n2-k-1) n2=len(y1); res3=sorted(res3,reverse=True)[:10]; res3=[(n2-k-1)*i for i in res3]; print res3 fil=open("saveRes.txt","w") for r in res4: fil.write(str(r)+"\n") fil.close(); res4=sorted(res4,reverse=True)[:20]; res3=[(n-k-1)*i for i in res4]; print res3 CIlst=CI(y2,MU2,.95,2.0,resL[5:6]); CIlst2=CI(y2,MU2,.95,2.0,resL[6:7]); CIlst=[[(n-k-1)*i for i in s] for s in CIlst] CIlst22=[[(n-k-1)*i for i in s] for s in CIlst2] print CIlst; print CIlst2;
def plotTop(mret,eps,filename,savename=""): if len(savename)==0: savename="OutputDir/res_top_"+str(eps)+"_"+str(mret)+".txt" epsilons=[eps*i for i in range(1,11)]; print "load Data" [y,BED]=ld.getData(filename); print "calc MU!" MU=MU_STRAT(BED,5); sc=MU.prod(y); sc=[abs(s) for s in sc]; sc=sorted(sc,reverse=True); print "get True!"; tru=pt(y,MU,mret,-1,algor="noise"); neighs=[0.0 for i in range(1,11)]; score=[0.0 for i in range(1,11)]; noise=[0.0 for i in range(1,11)]; reps=20; for i in range(0,10): e=epsilons[i]; print e; for j in range(0,reps): print j; gs=pt(y,MU,mret,e,algor="neighbor",reuse=True); neighs[i]=neighs[i]+inter(tru,gs)/float(reps); gs=pt(y,MU,mret,e,algor="score"); score[i]=score[i]+inter(tru,gs)/float(reps); gs=pt(y,MU,mret,e,algor="noise"); noise[i]=noise[i]+inter(tru,gs)/float(reps); fil=open(savename,"w"); fil.write("Testing Top SNPs with DP, "+filename); fil.write("\nEpsilon:") for i in range(0,10): fil.write(" "+str(epsilons[i])) fil.write("\nNoise:") for i in range(0,10): fil.write(" "+str(noise[i])) fil.write("\nScore:") for i in range(0,10): fil.write(" "+str(score[i])) fil.write("\nNeighbor:") for i in range(0,10): fil.write(" "+str(neighs[i])) fil.close();
def plotTop(mret, eps, filename, savename=""): if len(savename) == 0: savename = "OutputDir/res_top_" + str(eps) + "_" + str(mret) + ".txt" epsilons = [eps * i for i in range(1, 11)] print "load Data" [y, BED] = ld.getData(filename) print "calc MU!" MU = MU_STRAT(BED, 5) sc = MU.prod(y) sc = [abs(s) for s in sc] sc = sorted(sc, reverse=True) print "get True!" tru = pt(y, MU, mret, -1, algor="noise") neighs = [0.0 for i in range(1, 11)] score = [0.0 for i in range(1, 11)] noise = [0.0 for i in range(1, 11)] reps = 20 for i in range(0, 10): e = epsilons[i] print e for j in range(0, reps): print j gs = pt(y, MU, mret, e, algor="neighbor", reuse=True) neighs[i] = neighs[i] + inter(tru, gs) / float(reps) gs = pt(y, MU, mret, e, algor="score") score[i] = score[i] + inter(tru, gs) / float(reps) gs = pt(y, MU, mret, e, algor="noise") noise[i] = noise[i] + inter(tru, gs) / float(reps) fil = open(savename, "w") fil.write("Testing Top SNPs with DP, " + filename) fil.write("\nEpsilon:") for i in range(0, 10): fil.write(" " + str(epsilons[i])) fil.write("\nNoise:") for i in range(0, 10): fil.write(" " + str(noise[i])) fil.write("\nScore:") for i in range(0, 10): fil.write(" " + str(score[i])) fil.write("\nNeighbor:") for i in range(0, 10): fil.write(" " + str(neighs[i])) fil.close()
def plotWald(eps,filename,savename="",k=5): if len(savename)==0: savename="OutputDir/res_wald_"+str(eps[0])+"_"+str(k)+".txt" print "load Data" [y,BED]=ld.getData(filename); print "calc MU!" MU=MU_STRAT(BED,k); print "get True!"; tru=wt(y,MU,-1,snps=[],forFigs=False); fil=open(savename,"w"); n=len(y); for i in range(0,10): e=eps[i]; print e; res=wt(y,MU,e,snps=[],forFigs=True); err=sorted([float(n-k-1)*abs(res[i]-tru[i]) for i in range(0,len(tru))]); m=len(err); med=err[int(.5*m)] up=err[int(.75*m)]; down=err[int(.25*m)]; print med; fil.write(str(e)+" "+str(down)+" "+str(med)+" "+str(up)+"\n"); fil.close();
def TestMU_STRAT(): print "Test MU_Strat!" testfile1="TestCases/pop"; [y,sFil]=getData(testfile1); sFil=sFil[:900,:1000] y=y[:900] mm=MU_STRAT(sFil,5); try: mm=MU_STRAT(sFil,5); except: print "Error creating MU_STRAT!" return; else: print "Created MU_STRAT!" if isinstance(mm,MU_Mem): print "Is a MU_Mem"; else: print "Is not a MU_Mem!" return; print "Check Uk"; try: Uk=mm.Uk except: print "Uk not generated!" return; else: print "Uk is generated!" a=np.shape(Uk) if a[0]!=900 and a[1]!=5: print "Error in dimensions of Uk!" return; print "Dimensions correct!" I=np.dot(Uk.T,Uk); for i in range(0,5): for j in range(0,5): if i!=j and abs(I[i][j])>.0001: print "Error in Uk!" print [i,j]; return; elif i==j and abs(I[i][j]-1)>.0001: print "Error in Uk!" print i; return print "Uk seems to conists of k unit, orthogonal vectors!" mm.X=np.diag([10.0-i for i in range(0,10)]); for k in range(0,10): mm.calcMU(k); Uk=mm.Uk; for i in range(0,10): for j in range(0,k): if i!=j and abs(Uk[i][j])>.0001: print "Uk fails on diag!" return; print "Passes on diag!" print "Check normY"; mm.X=np.diag([10.0-i for i in range(0,10)]); for k in range(1,10): mm.calcMU(k); for l in range(0,20): y=[rand.uniform(0,1) for i in range(0,10)]; [bot,val]=mm.normY(y); mn=sum(y)/float(len(y)); y=[i-mn for i in y]; y=y[k:]; val=sum([i**2 for i in y]) if abs(bot**2-val)>.001: print "Error in normY!"; print mn; print bot**2; return; print "normY seems ok" print "Finally do some sanity checks on MU"; for l in range(0,20): X=[[rand.uniform(-1,1) for i in range(0,50)] for j in range(0,100)] sm=[sum(x)/50.0 for x in X] X=[[a-sm[i] for a in X[i]] for i in range(0,100)]; X=np.asarray(X).T; mm.X=X; mm.calcMU(k); MU=mm.MU; Uk=mm.Uk; if max([abs(sum(mu)) for mu in MU])>.001: print "Error in calculating MU!" return; dt=np.dot(MU,Uk); if np.max(dt)>.001: print "Error in calculating MU!" return; if abs(max([sum([i**2 for i in mu]) for mu in MU])-1)>.001: print "Error in calculating MU!" return; for i in range(0,100): x=[X[j][i] for j in range(0,50)]; mu=MU[i] val=np.dot(x,mu); [bot,some]=mm.normY(x); if abs(bot-val)>.001: print "Error in MU!" print bot; print val; return; print "MU seems ok!" print "So MU_STRAT seems ok!!"
def TestMU_STRAT(): print "Test MU_Strat!" testfile1 = "TestCases/pop" [y, sFil] = getData(testfile1) sFil = sFil[:900, :1000] y = y[:900] mm = MU_STRAT(sFil, 5) try: mm = MU_STRAT(sFil, 5) except: print "Error creating MU_STRAT!" return else: print "Created MU_STRAT!" if isinstance(mm, MU_Mem): print "Is a MU_Mem" else: print "Is not a MU_Mem!" return print "Check Uk" try: Uk = mm.Uk except: print "Uk not generated!" return else: print "Uk is generated!" a = np.shape(Uk) if a[0] != 900 and a[1] != 5: print "Error in dimensions of Uk!" return print "Dimensions correct!" I = np.dot(Uk.T, Uk) for i in range(0, 5): for j in range(0, 5): if i != j and abs(I[i][j]) > .0001: print "Error in Uk!" print[i, j] return elif i == j and abs(I[i][j] - 1) > .0001: print "Error in Uk!" print i return print "Uk seems to conists of k unit, orthogonal vectors!" mm.X = np.diag([10.0 - i for i in range(0, 10)]) for k in range(0, 10): mm.calcMU(k) Uk = mm.Uk for i in range(0, 10): for j in range(0, k): if i != j and abs(Uk[i][j]) > .0001: print "Uk fails on diag!" return print "Passes on diag!" print "Check normY" mm.X = np.diag([10.0 - i for i in range(0, 10)]) for k in range(1, 10): mm.calcMU(k) for l in range(0, 20): y = [rand.uniform(0, 1) for i in range(0, 10)] [bot, val] = mm.normY(y) mn = sum(y) / float(len(y)) y = [i - mn for i in y] y = y[k:] val = sum([i**2 for i in y]) if abs(bot**2 - val) > .001: print "Error in normY!" print mn print bot**2 return print "normY seems ok" print "Finally do some sanity checks on MU" for l in range(0, 20): X = [[rand.uniform(-1, 1) for i in range(0, 50)] for j in range(0, 100)] sm = [sum(x) / 50.0 for x in X] X = [[a - sm[i] for a in X[i]] for i in range(0, 100)] X = np.asarray(X).T mm.X = X mm.calcMU(k) MU = mm.MU Uk = mm.Uk if max([abs(sum(mu)) for mu in MU]) > .001: print "Error in calculating MU!" return dt = np.dot(MU, Uk) if np.max(dt) > .001: print "Error in calculating MU!" return if abs(max([sum([i**2 for i in mu]) for mu in MU]) - 1) > .001: print "Error in calculating MU!" return for i in range(0, 100): x = [X[j][i] for j in range(0, 50)] mu = MU[i] val = np.dot(x, mu) [bot, some] = mm.normY(x) if abs(bot - val) > .001: print "Error in MU!" print bot print val return print "MU seems ok!" print "So MU_STRAT seems ok!!"