Ejemplo n.º 1
0
reload(kutl)
padding=500000
a=pd.read_pickle('/media/arya/d4565cf2-d44a-4b67-bf97-226a486c01681/Data/Human/WNG_1000GP_Phase3/EDAR.dfreq.pkl')
a=a.reset_index();a=a.rename(columns={'#CHROM':'CHROM'});a=a.set_index(['CHROM','POS','ID'])
a.index=a.index.droplevel(2)
xx=a.iloc[:,0]
pops=pd.read_csv('/media/arya/d4565cf2-d44a-4b67-bf97-226a486c01681/Data/Human/WNG_1000GP_Phase3/counts.csv',header=None).set_index(0)[1]
pops.apply(np.log)
pops.shape
m=a.apply(lambda xx: utl.scanGenome(xx,f=lambda x:x[(x>0)&(x<1)].size,winSize=100000))
pops['ALL']=pops.sum()
mm=(m/pops).T.dropna().T

mm=mm.apply(lambda x: x/mm['ALL'])
# for gene in ['EDAR','LCT']:
pos,shade=kutl.getPosShade(gene,kutl.getNpop(gene)[-1])
shade.start=int(pos);shade.end=int(pos)
shade,unmap=utl.BED.xmap_bed(shade.reset_index(),38,19)
shade.start=shade.start.astype(int)-padding;shade.end=shade.end.astype(int)+padding
z=pd.read_pickle(ppath+'{}.df'.format(gene)).reorder_levels([2,1,0],1).xs(100,level=2,axis=1).dropna()
# z=kutl.getStats(z)
    # .apply(kutl.normalize);
# z=z['Fst']*z['SFSel']
x=(1-z['case']/z['all'] )['Pi'].dropna().astype(float)
y=(1-z['control']/z['all'] )['Pi'].dropna().astype(float)
xx=x-y
m=z.xs('m',axis=1,level=1)
# x[x>1]=None
# x=x-x.mean()
pplt.Manhattan(pd.concat([x,m],1),shade=shade.set_index('CHROM'));plt.suptitle(gene)
pplt.Manhattan(mm,shade=shade,ticksize=6);plt.suptitle(gene)