Ejemplo n.º 1
0
def profileMu():
    M=loadData()

    # est.Estimate.LD(M,measure='DPrime').loc[].sum(0).plot();plt.axvline(50000,color='r')
    ld=(est.Estimate.LD(M,measure='DPrime'))
    (ld>0.99).mean(1).plot()
    df=pd.DataFrame([pd.Series(v,name=k) for k,v in mh.sort_mutations(M.values).items()]).T;df.index=M.columns;df=df.loc[:,df.columns!='haf']
    dff=pd.concat([ld,(ld[ld>0]).sum(),df],axis=1)
    dff=dff.sort_values([0,'p'],ascending=False);
    dff=dff[np.append( dff.index.values, dff.columns[-8:])]
    dff
    plt.imshow(dff.loc[:,dff.index])
Ejemplo n.º 2
0
def PCANumDistinct(M):
    X = decomposition.PCA(n_components=2).fit(M.T).transform(M.T)
    df=pd.DataFrame([pd.Series(v,name=k) for k,v in mh.sort_mutations(M.values).items()]).T
    X2=(est.Estimate.LD(M,measure='Rho')**2).sum().values
    # X3=(pd.Series((df.N/df.f).values,index=M.columns)).values
    X3=df.f.values
    Y=M.iloc[0,:]*0;Y.iloc[M.siteUnderSelection]=1
    fig = plt.figure(1, figsize=(8, 6))
    ax = Axes3D(fig, elev=-150, azim=110)
    ax.scatter(X[:, 0], X2, X3, c=Y,cmap=plt.cm.Paired)
    ax.scatter(X[M.siteUnderSelection, 0], X2[M.siteUnderSelection],X3[M.siteUnderSelection], c='r',cmap=plt.cm.Paired)
    ax.set_xlabel("1st eigenvector")
    ax.set_ylabel("2nd eigenvector")
    ax.set_zlabel("3rd eigenvector")
Ejemplo n.º 3
0
def LDA():
plt.figure(figsize=(8,8),dpi=100)
for i in range(20):
    M=loadData();df=pd.DataFrame([pd.Series(v,name=k) for k,v in mh.sort_mutations(M.values).items()]).T; df.index=M.columns
    numTopics=2;lda = LatentDirichletAllocation(n_topics=numTopics, max_iter=100,learning_method='online', learning_offset=50.,random_state=0);lda.fit(M.values);topics= pd.DataFrame([topic for topic_idx, topic in enumerate(lda.components_)],columns=M.columns).T;X=topics.values
    topics.plot.scatter(ax=plt.ioff(),x=0,y=1);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=0,y=1,color='r');plt.title('Ali Rank={}'.format(df.p.rank(ascending=False).loc[M.posUnderSelection]))
    plt.savefig(utl.home+'lda2/{}.png'.format(i))
    # topics.plot.scatter(x=2,y=3);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=2,y=3,color='r')
    # onedim=topics[[0,1]].copy(True);onedim[1]=0;onedim.plot.scatter(x=0,y=1);onedim[onedim.index==50000].plot.scatter(ax=plt.gca(),x=0,y=1,color='r');plt.title('Ali Rank={}'.format(df.p.rank(ascending=False).loc[50000]))
from matplotlib.backends.backend_pdf import PdfPages
plt.ion()
plt.ioff()
import popgen.Util as utl
for i in range(numTopics):
    j=i;k=i
    # for j in range(1,10):
    #     for k in range(2,10):
    I=[0,i,j,k]
    fig = plt.figure( figsize=(8, 6))
    com=[i,j,k]
    # for ii,com in enumerate([[I[0],I[1],I[2]],[I[0],I[1],I[3]],[I[0],I[2],I[3]],[I[1],I[2],I[3]]]):
    # for ii,com in enumerate([[0,1,2],[0,1,3],[0,2,3],[1,2,3]]):
    ax = fig.add_subplot(2,2,1);topics.plot.scatter(ax=ax,x=com[0],y=com[1]);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=com[0],y=com[1],color='r');plt.title('Ali Rank={}'.format(df.p.rank(ascending=False).loc[M.posUnderSelection]))
    plt.xlabel('topic {}'.format(com[0]));plt.ylabel('topic {}'.format(com[1]));
    ax = fig.add_subplot(2,2,2);topics.plot.scatter(ax=ax,x=com[0],y=com[2]);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=com[0],y=com[2],color='r')
    plt.xlabel('topic {}'.format(com[0]));plt.ylabel('topic {}'.format(com[2]));
    ax = fig.add_subplot(2,2,3);topics.plot.scatter(ax=ax,x=com[1],y=com[2]);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=com[1],y=com[2],color='r');plt.title('Ali Rank={}'.format(df.p.rank(ascending=False).loc[M.posUnderSelection]))
    plt.xlabel('topic {}'.format(com[1]));plt.ylabel('topic {}'.format(com[2]));
# topics.plot.scatter(x=2,y=3);topics[topics.index==M.posUnderSelection].plot.scatter(ax=plt.gca(),x=2,y=3,color='r')
#     ax = fig.add_subplot(2, 2, 1, projection='3d',elev=0, azim=0);ax.scatter(X[:, com[0]], X[:, com[1]], X[:, com[2]], cmap=plt.cm.Paired);ax.scatter(X[M.siteUnderSelection, com[0]], X[M.siteUnderSelection,com[1]],X[M.siteUnderSelection,com[2]], c='r',s=100,cmap=plt.cm.Paired);ax.set_xlabel("Topic {}".format(com[0]));ax.set_ylabel("Topic {}".format(com[1]));ax.set_zlabel("Topic {}".format(com[2]))
#     ax = fig.add_subplot(2, 2, 2, projection='3d',elev=180, azim=0);ax.scatter(X[:, com[0]], X[:, com[1]], X[:, com[2]], cmap=plt.cm.Paired);ax.scatter(X[M.siteUnderSelection, com[0]], X[M.siteUnderSelection,com[1]],X[M.siteUnderSelection,com[2]], c='r',s=100,cmap=plt.cm.Paired);ax.set_xlabel("Topic {}".format(com[0]));ax.set_ylabel("Topic {}".format(com[1]));ax.set_zlabel("Topic {}".format(com[2]))
#     ax = fig.add_subplot(2, 2, 3, projection='3d',elev=0, azim=180);ax.scatter(X[:, com[0]], X[:, com[1]], X[:, com[2]], cmap=plt.cm.Paired);ax.scatter(X[M.siteUnderSelection, com[0]], X[M.siteUnderSelection,com[1]],X[M.siteUnderSelection,com[2]], c='r',s=100,cmap=plt.cm.Paired);ax.set_xlabel("Topic {}".format(com[0]));ax.set_ylabel("Topic {}".format(com[1]));ax.set_zlabel("Topic {}".format(com[2]))
    ax = fig.add_subplot(2, 2, 4, projection='3d', elev=-150, azim=110);ax.scatter(X[:, com[0]], X[:, com[1]], X[:, com[2]], cmap=plt.cm.Paired);ax.scatter(X[M.siteUnderSelection, com[0]], X[M.siteUnderSelection,com[1]],X[M.siteUnderSelection,com[2]], c='r',s=100,cmap=plt.cm.Paired);ax.set_xlabel("Topic {}".format(com[0]));ax.set_ylabel("Topic {}".format(com[1]));ax.set_zlabel("Topic {}".format(com[2]))
    plt.savefig(utl.home+'lda2/{}.{}.{}.png'.format(i,j,k))
    plt.close(fig)



def profileMu():
    M=loadData()

    # est.Estimate.LD(M,measure='DPrime').loc[].sum(0).plot();plt.axvline(50000,color='r')
    ld=(est.Estimate.LD(M,measure='DPrime'))
    (ld>0.99).mean(1).plot()
    df=pd.DataFrame([pd.Series(v,name=k) for k,v in mh.sort_mutations(M.values).items()]).T;df.index=M.columns;df=df.loc[:,df.columns!='haf']
    dff=pd.concat([ld,(ld[ld>0]).sum(),df],axis=1)
    dff=dff.sort_values([0,'p'],ascending=False);
    dff=dff[np.append( dff.index.values, dff.columns[-8:])]
    dff
    plt.imshow(dff.loc[:,dff.index])
Ejemplo n.º 4
0
def runOne():
    maxIter=10
    M=loadData();df=pd.DataFrame([pd.Series(v,name=k) for k,v in mh.sort_mutations(M.values).items()]).T;xali=np.exp(df.p);xali.index=M.columns;xali/=xali.sum();print 'ali Rank:',xali.rank(ascending=False).loc[50000]
    # D=(M.shape[1]-(M.dot(M.T)+(1-M).dot((1-M).T))).applymap(lambda x: x**2);Dy=D.apply(lambda x:x/x.sum(),axis=1)
    normMy=pd.DataFrame(M.apply(lambda x: np.linalg.norm(x),axis=1));normMx=pd.DataFrame(M.apply(lambda x: np.linalg.norm(x),axis=0))[0]
    Dy=M.dot(M.T)/normMy.dot(normMy.T);#Dy=D.apply(lambda x:x/x.sum(),axis=1)
    Dx=est.Estimate.LD(M,measure='Rho').applymap(lambda x: x**2);
    plt.imshow(Dy)
    Y=M.iloc[:,M.siteUnderSelection]
    # y=M.iloc[:,M.siteUnderSelection].copy(True);y.name='y';x=xali.copy(True);x0=x.copy(True);y=y-y+1
    # # MM=M.copy(True)
    # M=MM.copy(True);ld=(est.Estimate.LD(M,measure='Rho').applymap(lambda x: (x**2,0)[x<0.9]));Dx=M.T.dot(M);Dy=M.dot(M.T);Dx=Dx.apply(lambda x:x/x.sum(),axis=1);Dy=Dy.apply(lambda x:x/x.sum(),axis=1);
    #
    y=pd.Series(-np.linalg.svd(M)[0][:,0]);y=np.exp(y);y/=y.max()-y.min();y-=y.min();print 'AUC',evl.ROCforAllMethods( pd.concat([y,Y],axis=1),showPlot=False,doplot=False)[0].AUC[0]
    x=xali.copy(True)
        # ;x/=x.sum();
    # for iter in range(maxIter):
        # print 'iter',iter,50*'*'
    N=M.T.drop_duplicates(keep='last').T
    O=N.drop_duplicates(keep='last')
    ld=est.Estimate.LD(O,measure='Rho').abs()
    a=pd.concat([pd.Series(np.exp(-(df.N/df.f).values),index=M.columns)*ld.sum(),xali,M.mean(),ld.sum(),O.mean()],axis=1).dropna()
    a*pd.Series(abs(np.linalg.svd(O)[2][:,0]),index=a.index).shape
    a.rank(ascending=False).loc[50000]
    a.sort_values(0)
    plt.imshow()

    a=pd.concat([pd.Series(exp(-(df.N/df.f).values),index=M.columns),xali,M.mean(),Dx.sum()],axis=1)
    a.rank(ascending=False).loc[50000]
    y.loc[O.index].dot(O).rank(ascending=False).loc[50000]
    p=x*pd.Series((np.linalg.svd(est.Estimate.LD(M,measure='Rho').applymap(lambda x: x**2).values)[0][0]),index=M.columns).abs()
    l=((y>y.quantile(0.5)).astype(int)).dot(M)/normMx;l/=l.sum();          post=p*l;post/=post.sum();x=post
    plt.subplot(2,2,1);plot(x0,'x0');plt.subplot(2,2,2);plot(p,'prior');plt.subplot(2,2,3);plot(l,'likelihood');plt.subplot(2,2,4);plot(x,'posterior');plt.suptitle(str(iter))
    # M=M.apply(lambda  xx: xx*x,axis=1)
    # Dx=M.T.dot(M).apply(lambda  xx: xx*x,axis=1);Dy=M.dot(M.T).apply(lambda  xx: xx*y);Dx=Dx.apply(lambda x:x/x.sum(),axis=1);Dy=Dy.apply(lambda x:x/x.sum(),axis=1);
    p=Dy.dot(y);p/=p.sum();     l=M.dot(x.dot(Dx));l/=l.sum();         post=p*l;post/=post.sum();y=post
    ploty(y)
    # M=M.apply(lambda x: x*y);M/=M.max().max()
    # Dx=M.T.dot(M).apply(lambda  xx: xx*x,axis=1);Dy=M.dot(M.T).apply(lambda  xx: xx*y);Dx=Dx.apply(lambda x:x/x.sum(),axis=1);Dy=Dy.apply(lambda x:x/x.sum(),axis=1);


    print 'AUC',evl.ROCforAllMethods( pd.concat([y,Y],axis=1),showPlot=False,doplot=False)[0].AUC[0],'Rank',x.rank(method='min',ascending=False).loc[50000]
    plt.figure();plt.subplot(3,1,1);plot(M.sum());plt.subplot(3,1,2);plot(x);plt.subplot(3,1,3);ploty(y)


    return (xali.rank(method='min',ascending=False)).loc[50000],(x.rank(method='min',ascending=False)).loc[50000],evl.ROCforAllMethods( pd.concat([y,Y],axis=1),showPlot=False,doplot=False)[0].AUC[0]