reload(kutl) padding=500000 a=pd.read_pickle('/media/arya/d4565cf2-d44a-4b67-bf97-226a486c01681/Data/Human/WNG_1000GP_Phase3/EDAR.dfreq.pkl') a=a.reset_index();a=a.rename(columns={'#CHROM':'CHROM'});a=a.set_index(['CHROM','POS','ID']) a.index=a.index.droplevel(2) xx=a.iloc[:,0] pops=pd.read_csv('/media/arya/d4565cf2-d44a-4b67-bf97-226a486c01681/Data/Human/WNG_1000GP_Phase3/counts.csv',header=None).set_index(0)[1] pops.apply(np.log) pops.shape m=a.apply(lambda xx: utl.scanGenome(xx,f=lambda x:x[(x>0)&(x<1)].size,winSize=100000)) pops['ALL']=pops.sum() mm=(m/pops).T.dropna().T mm=mm.apply(lambda x: x/mm['ALL']) # for gene in ['EDAR','LCT']: pos,shade=kutl.getPosShade(gene,kutl.getNpop(gene)[-1]) shade.start=int(pos);shade.end=int(pos) shade,unmap=utl.BED.xmap_bed(shade.reset_index(),38,19) shade.start=shade.start.astype(int)-padding;shade.end=shade.end.astype(int)+padding z=pd.read_pickle(ppath+'{}.df'.format(gene)).reorder_levels([2,1,0],1).xs(100,level=2,axis=1).dropna() # z=kutl.getStats(z) # .apply(kutl.normalize); # z=z['Fst']*z['SFSel'] x=(1-z['case']/z['all'] )['Pi'].dropna().astype(float) y=(1-z['control']/z['all'] )['Pi'].dropna().astype(float) xx=x-y m=z.xs('m',axis=1,level=1) # x[x>1]=None # x=x-x.mean() pplt.Manhattan(pd.concat([x,m],1),shade=shade.set_index('CHROM'));plt.suptitle(gene) pplt.Manhattan(mm,shade=shade,ticksize=6);plt.suptitle(gene)
np.set_printoptions(linewidth=200, precision=5, suppress=True) import pandas as pd; pd.options.display.max_rows = 20; pd.options.display.expand_frame_repr = False import seaborn as sns import pylab as plt; import matplotlib as mpl import os; home = os.path.expanduser('~') + '/' import popgen.Util as utl import popgen.Estimate as est import popgen.Kyrgys.Utils as kutl # kutl.scan1000('LCT') # kutl.scan1000('EDAR') print 'cd {} && grep -v "#" Kyrgyz_merged_all34_NoChr_filter1_info.vcf | cut -f1,2 > Kyrgyz_merged_all34_NoChr_filter1_info.vcf.pos'.format(kutl.path+'data/') reload(utl) # utl.createAnnotation(kutl.path+'data/Kyrgyz_merged_all34_NoChr_filter1_info.vcf',db='hg38') kutl.createMap() def createCADD(): " less 1000G_phase3_inclAnno.tsv.gz | cut -f1,2 > coord.hg19.tsv" ' bedtools intersect -sorted -a Kyrgyz.hg19.tsv -wb -b ../CADD/1000G_phase3_inclAnno.tsv > CADD.hg19.tsv ' cad=pd.read_csv(kutl.path+'data/CADD.hg19.tsv',sep='\t',header=None).iloc[:,3:].rename(columns={3:'CHROM',4:'POS'}).sort_values(['CHROM','POS']).set_index('CHROM') coor=pd.read_pickle(kutl.path+'data/map.df').dropna().apply(lambda x: x.astype(int)).set_index(19,append=True)[38].rename('POShg38') pd.read_pickle(kutl.path+'data/map.df').isnull().sum() cad.iloc[:10000].groupby(level=0).apply(lambda x: pd.merge(coor.loc[str(x.name)].sort_index().reset_index(),x,left_on=19,right_on='POS').iloc[:,2:] )