Beispiel #1
0
reload(kutl)
padding=500000
a=pd.read_pickle('/media/arya/d4565cf2-d44a-4b67-bf97-226a486c01681/Data/Human/WNG_1000GP_Phase3/EDAR.dfreq.pkl')
a=a.reset_index();a=a.rename(columns={'#CHROM':'CHROM'});a=a.set_index(['CHROM','POS','ID'])
a.index=a.index.droplevel(2)
xx=a.iloc[:,0]
pops=pd.read_csv('/media/arya/d4565cf2-d44a-4b67-bf97-226a486c01681/Data/Human/WNG_1000GP_Phase3/counts.csv',header=None).set_index(0)[1]
pops.apply(np.log)
pops.shape
m=a.apply(lambda xx: utl.scanGenome(xx,f=lambda x:x[(x>0)&(x<1)].size,winSize=100000))
pops['ALL']=pops.sum()
mm=(m/pops).T.dropna().T

mm=mm.apply(lambda x: x/mm['ALL'])
# for gene in ['EDAR','LCT']:
pos,shade=kutl.getPosShade(gene,kutl.getNpop(gene)[-1])
shade.start=int(pos);shade.end=int(pos)
shade,unmap=utl.BED.xmap_bed(shade.reset_index(),38,19)
shade.start=shade.start.astype(int)-padding;shade.end=shade.end.astype(int)+padding
z=pd.read_pickle(ppath+'{}.df'.format(gene)).reorder_levels([2,1,0],1).xs(100,level=2,axis=1).dropna()
# z=kutl.getStats(z)
    # .apply(kutl.normalize);
# z=z['Fst']*z['SFSel']
x=(1-z['case']/z['all'] )['Pi'].dropna().astype(float)
y=(1-z['control']/z['all'] )['Pi'].dropna().astype(float)
xx=x-y
m=z.xs('m',axis=1,level=1)
# x[x>1]=None
# x=x-x.mean()
pplt.Manhattan(pd.concat([x,m],1),shade=shade.set_index('CHROM'));plt.suptitle(gene)
pplt.Manhattan(mm,shade=shade,ticksize=6);plt.suptitle(gene)
Beispiel #2
0
np.set_printoptions(linewidth=200, precision=5, suppress=True)
import pandas as pd;

pd.options.display.max_rows = 20;
pd.options.display.expand_frame_repr = False
import seaborn as sns
import pylab as plt;
import matplotlib as mpl
import os;

home = os.path.expanduser('~') + '/'
import popgen.Util as utl
import popgen.Estimate as est
import popgen.Kyrgys.Utils as kutl
# kutl.scan1000('LCT')
# kutl.scan1000('EDAR')

print 'cd {} && grep -v "#" Kyrgyz_merged_all34_NoChr_filter1_info.vcf  | cut -f1,2 > Kyrgyz_merged_all34_NoChr_filter1_info.vcf.pos'.format(kutl.path+'data/')
reload(utl)
# utl.createAnnotation(kutl.path+'data/Kyrgyz_merged_all34_NoChr_filter1_info.vcf',db='hg38')
kutl.createMap()

def createCADD():
    " less 1000G_phase3_inclAnno.tsv.gz | cut -f1,2  > coord.hg19.tsv"
    ' bedtools intersect -sorted -a Kyrgyz.hg19.tsv -wb -b ../CADD/1000G_phase3_inclAnno.tsv > CADD.hg19.tsv '
    cad=pd.read_csv(kutl.path+'data/CADD.hg19.tsv',sep='\t',header=None).iloc[:,3:].rename(columns={3:'CHROM',4:'POS'}).sort_values(['CHROM','POS']).set_index('CHROM')
    coor=pd.read_pickle(kutl.path+'data/map.df').dropna().apply(lambda x: x.astype(int)).set_index(19,append=True)[38].rename('POShg38')
    pd.read_pickle(kutl.path+'data/map.df').isnull().sum()
    cad.iloc[:10000].groupby(level=0).apply(lambda x: pd.merge(coor.loc[str(x.name)].sort_index().reset_index(),x,left_on=19,right_on='POS').iloc[:,2:] )