def profileMu(): M=loadData() # est.Estimate.LD(M,measure='DPrime').loc[].sum(0).plot();plt.axvline(50000,color='r') ld=(est.Estimate.LD(M,measure='DPrime')) (ld>0.99).mean(1).plot() df=pd.DataFrame([pd.Series(v,name=k) for k,v in mh.sort_mutations(M.values).items()]).T;df.index=M.columns;df=df.loc[:,df.columns!='haf'] dff=pd.concat([ld,(ld[ld>0]).sum(),df],axis=1) dff=dff.sort_values([0,'p'],ascending=False); dff=dff[np.append( dff.index.values, dff.columns[-8:])] dff plt.imshow(dff.loc[:,dff.index])
def PCANumDistinct(M): X = decomposition.PCA(n_components=2).fit(M.T).transform(M.T) df=pd.DataFrame([pd.Series(v,name=k) for k,v in mh.sort_mutations(M.values).items()]).T X2=(est.Estimate.LD(M,measure='Rho')**2).sum().values # X3=(pd.Series((df.N/df.f).values,index=M.columns)).values X3=df.f.values Y=M.iloc[0,:]*0;Y.iloc[M.siteUnderSelection]=1 fig = plt.figure(1, figsize=(8, 6)) ax = Axes3D(fig, elev=-150, azim=110) ax.scatter(X[:, 0], X2, X3, c=Y,cmap=plt.cm.Paired) ax.scatter(X[M.siteUnderSelection, 0], X2[M.siteUnderSelection],X3[M.siteUnderSelection], c='r',cmap=plt.cm.Paired) ax.set_xlabel("1st eigenvector") ax.set_ylabel("2nd eigenvector") ax.set_zlabel("3rd eigenvector")
def LDA(): plt.figure(figsize=(8,8),dpi=100) for i in range(20): M=loadData();df=pd.DataFrame([pd.Series(v,name=k) for k,v in mh.sort_mutations(M.values).items()]).T; df.index=M.columns numTopics=2;lda = LatentDirichletAllocation(n_topics=numTopics, max_iter=100,learning_method='online', learning_offset=50.,random_state=0);lda.fit(M.values);topics= pd.DataFrame([topic for topic_idx, topic in enumerate(lda.components_)],columns=M.columns).T;X=topics.values topics.plot.scatter(ax=plt.ioff(),x=0,y=1);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=0,y=1,color='r');plt.title('Ali Rank={}'.format(df.p.rank(ascending=False).loc[M.posUnderSelection])) plt.savefig(utl.home+'lda2/{}.png'.format(i)) # topics.plot.scatter(x=2,y=3);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=2,y=3,color='r') # onedim=topics[[0,1]].copy(True);onedim[1]=0;onedim.plot.scatter(x=0,y=1);onedim[onedim.index==50000].plot.scatter(ax=plt.gca(),x=0,y=1,color='r');plt.title('Ali Rank={}'.format(df.p.rank(ascending=False).loc[50000])) from matplotlib.backends.backend_pdf import PdfPages plt.ion() plt.ioff() import popgen.Util as utl for i in range(numTopics): j=i;k=i # for j in range(1,10): # for k in range(2,10): I=[0,i,j,k] fig = plt.figure( figsize=(8, 6)) com=[i,j,k] # for ii,com in enumerate([[I[0],I[1],I[2]],[I[0],I[1],I[3]],[I[0],I[2],I[3]],[I[1],I[2],I[3]]]): # for ii,com in enumerate([[0,1,2],[0,1,3],[0,2,3],[1,2,3]]): ax = fig.add_subplot(2,2,1);topics.plot.scatter(ax=ax,x=com[0],y=com[1]);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=com[0],y=com[1],color='r');plt.title('Ali Rank={}'.format(df.p.rank(ascending=False).loc[M.posUnderSelection])) plt.xlabel('topic {}'.format(com[0]));plt.ylabel('topic {}'.format(com[1])); ax = fig.add_subplot(2,2,2);topics.plot.scatter(ax=ax,x=com[0],y=com[2]);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=com[0],y=com[2],color='r') plt.xlabel('topic {}'.format(com[0]));plt.ylabel('topic {}'.format(com[2])); ax = fig.add_subplot(2,2,3);topics.plot.scatter(ax=ax,x=com[1],y=com[2]);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=com[1],y=com[2],color='r');plt.title('Ali Rank={}'.format(df.p.rank(ascending=False).loc[M.posUnderSelection])) plt.xlabel('topic {}'.format(com[1]));plt.ylabel('topic {}'.format(com[2])); # topics.plot.scatter(x=2,y=3);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=2,y=3,color='r') # ax = fig.add_subplot(2, 2, 1, projection='3d',elev=0, azim=0);ax.scatter(X[:, com[0]], X[:, com[1]], X[:, com[2]], cmap=plt.cm.Paired);ax.scatter(X[M.siteUnderSelection, com[0]], X[M.siteUnderSelection,com[1]],X[M.siteUnderSelection,com[2]], c='r',s=100,cmap=plt.cm.Paired);ax.set_xlabel("Topic {}".format(com[0]));ax.set_ylabel("Topic {}".format(com[1]));ax.set_zlabel("Topic {}".format(com[2])) # ax = fig.add_subplot(2, 2, 2, projection='3d',elev=180, azim=0);ax.scatter(X[:, com[0]], X[:, com[1]], X[:, com[2]], cmap=plt.cm.Paired);ax.scatter(X[M.siteUnderSelection, com[0]], X[M.siteUnderSelection,com[1]],X[M.siteUnderSelection,com[2]], c='r',s=100,cmap=plt.cm.Paired);ax.set_xlabel("Topic {}".format(com[0]));ax.set_ylabel("Topic {}".format(com[1]));ax.set_zlabel("Topic {}".format(com[2])) # ax = fig.add_subplot(2, 2, 3, projection='3d',elev=0, azim=180);ax.scatter(X[:, com[0]], X[:, com[1]], X[:, com[2]], cmap=plt.cm.Paired);ax.scatter(X[M.siteUnderSelection, com[0]], X[M.siteUnderSelection,com[1]],X[M.siteUnderSelection,com[2]], c='r',s=100,cmap=plt.cm.Paired);ax.set_xlabel("Topic {}".format(com[0]));ax.set_ylabel("Topic {}".format(com[1]));ax.set_zlabel("Topic {}".format(com[2])) ax = fig.add_subplot(2, 2, 4, projection='3d', elev=-150, azim=110);ax.scatter(X[:, com[0]], X[:, com[1]], X[:, com[2]], cmap=plt.cm.Paired);ax.scatter(X[M.siteUnderSelection, com[0]], X[M.siteUnderSelection,com[1]],X[M.siteUnderSelection,com[2]], c='r',s=100,cmap=plt.cm.Paired);ax.set_xlabel("Topic {}".format(com[0]));ax.set_ylabel("Topic {}".format(com[1]));ax.set_zlabel("Topic {}".format(com[2])) plt.savefig(utl.home+'lda2/{}.{}.{}.png'.format(i,j,k)) plt.close(fig) def profileMu(): M=loadData() # est.Estimate.LD(M,measure='DPrime').loc[].sum(0).plot();plt.axvline(50000,color='r') ld=(est.Estimate.LD(M,measure='DPrime')) (ld>0.99).mean(1).plot() df=pd.DataFrame([pd.Series(v,name=k) for k,v in mh.sort_mutations(M.values).items()]).T;df.index=M.columns;df=df.loc[:,df.columns!='haf'] dff=pd.concat([ld,(ld[ld>0]).sum(),df],axis=1) dff=dff.sort_values([0,'p'],ascending=False); dff=dff[np.append( dff.index.values, dff.columns[-8:])] dff plt.imshow(dff.loc[:,dff.index])
def runOne(): maxIter=10 M=loadData();df=pd.DataFrame([pd.Series(v,name=k) for k,v in mh.sort_mutations(M.values).items()]).T;xali=np.exp(df.p);xali.index=M.columns;xali/=xali.sum();print 'ali Rank:',xali.rank(ascending=False).loc[50000] # D=(M.shape[1]-(M.dot(M.T)+(1-M).dot((1-M).T))).applymap(lambda x: x**2);Dy=D.apply(lambda x:x/x.sum(),axis=1) normMy=pd.DataFrame(M.apply(lambda x: np.linalg.norm(x),axis=1));normMx=pd.DataFrame(M.apply(lambda x: np.linalg.norm(x),axis=0))[0] Dy=M.dot(M.T)/normMy.dot(normMy.T);#Dy=D.apply(lambda x:x/x.sum(),axis=1) Dx=est.Estimate.LD(M,measure='Rho').applymap(lambda x: x**2); plt.imshow(Dy) Y=M.iloc[:,M.siteUnderSelection] # y=M.iloc[:,M.siteUnderSelection].copy(True);y.name='y';x=xali.copy(True);x0=x.copy(True);y=y-y+1 # # MM=M.copy(True) # M=MM.copy(True);ld=(est.Estimate.LD(M,measure='Rho').applymap(lambda x: (x**2,0)[x<0.9]));Dx=M.T.dot(M);Dy=M.dot(M.T);Dx=Dx.apply(lambda x:x/x.sum(),axis=1);Dy=Dy.apply(lambda x:x/x.sum(),axis=1); # y=pd.Series(-np.linalg.svd(M)[0][:,0]);y=np.exp(y);y/=y.max()-y.min();y-=y.min();print 'AUC',evl.ROCforAllMethods( pd.concat([y,Y],axis=1),showPlot=False,doplot=False)[0].AUC[0] x=xali.copy(True) # ;x/=x.sum(); # for iter in range(maxIter): # print 'iter',iter,50*'*' N=M.T.drop_duplicates(keep='last').T O=N.drop_duplicates(keep='last') ld=est.Estimate.LD(O,measure='Rho').abs() a=pd.concat([pd.Series(np.exp(-(df.N/df.f).values),index=M.columns)*ld.sum(),xali,M.mean(),ld.sum(),O.mean()],axis=1).dropna() a*pd.Series(abs(np.linalg.svd(O)[2][:,0]),index=a.index).shape a.rank(ascending=False).loc[50000] a.sort_values(0) plt.imshow() a=pd.concat([pd.Series(exp(-(df.N/df.f).values),index=M.columns),xali,M.mean(),Dx.sum()],axis=1) a.rank(ascending=False).loc[50000] y.loc[O.index].dot(O).rank(ascending=False).loc[50000] p=x*pd.Series((np.linalg.svd(est.Estimate.LD(M,measure='Rho').applymap(lambda x: x**2).values)[0][0]),index=M.columns).abs() l=((y>y.quantile(0.5)).astype(int)).dot(M)/normMx;l/=l.sum(); post=p*l;post/=post.sum();x=post plt.subplot(2,2,1);plot(x0,'x0');plt.subplot(2,2,2);plot(p,'prior');plt.subplot(2,2,3);plot(l,'likelihood');plt.subplot(2,2,4);plot(x,'posterior');plt.suptitle(str(iter)) # M=M.apply(lambda xx: xx*x,axis=1) # Dx=M.T.dot(M).apply(lambda xx: xx*x,axis=1);Dy=M.dot(M.T).apply(lambda xx: xx*y);Dx=Dx.apply(lambda x:x/x.sum(),axis=1);Dy=Dy.apply(lambda x:x/x.sum(),axis=1); p=Dy.dot(y);p/=p.sum(); l=M.dot(x.dot(Dx));l/=l.sum(); post=p*l;post/=post.sum();y=post ploty(y) # M=M.apply(lambda x: x*y);M/=M.max().max() # Dx=M.T.dot(M).apply(lambda xx: xx*x,axis=1);Dy=M.dot(M.T).apply(lambda xx: xx*y);Dx=Dx.apply(lambda x:x/x.sum(),axis=1);Dy=Dy.apply(lambda x:x/x.sum(),axis=1); print 'AUC',evl.ROCforAllMethods( pd.concat([y,Y],axis=1),showPlot=False,doplot=False)[0].AUC[0],'Rank',x.rank(method='min',ascending=False).loc[50000] plt.figure();plt.subplot(3,1,1);plot(M.sum());plt.subplot(3,1,2);plot(x);plt.subplot(3,1,3);ploty(y) return (xali.rank(method='min',ascending=False)).loc[50000],(x.rank(method='min',ascending=False)).loc[50000],evl.ROCforAllMethods( pd.concat([y,Y],axis=1),showPlot=False,doplot=False)[0].AUC[0]