np.set_printoptions(linewidth=200, precision=5, suppress=True) import pandas as pd; pd.options.display.max_rows = 20; pd.options.display.expand_frame_repr = False import seaborn as sns import pylab as plt; import matplotlib as mpl import os; home = os.path.expanduser('~') + '/' import popgen.Util as utl import popgen.Estimate as est import popgen.Kyrgys.Utils as kutl # kutl.scan1000('LCT') # kutl.scan1000('EDAR') print 'cd {} && grep -v "#" Kyrgyz_merged_all34_NoChr_filter1_info.vcf | cut -f1,2 > Kyrgyz_merged_all34_NoChr_filter1_info.vcf.pos'.format(kutl.path+'data/') reload(utl) # utl.createAnnotation(kutl.path+'data/Kyrgyz_merged_all34_NoChr_filter1_info.vcf',db='hg38') kutl.createMap() def createCADD(): " less 1000G_phase3_inclAnno.tsv.gz | cut -f1,2 > coord.hg19.tsv" ' bedtools intersect -sorted -a Kyrgyz.hg19.tsv -wb -b ../CADD/1000G_phase3_inclAnno.tsv > CADD.hg19.tsv ' cad=pd.read_csv(kutl.path+'data/CADD.hg19.tsv',sep='\t',header=None).iloc[:,3:].rename(columns={3:'CHROM',4:'POS'}).sort_values(['CHROM','POS']).set_index('CHROM') coor=pd.read_pickle(kutl.path+'data/map.df').dropna().apply(lambda x: x.astype(int)).set_index(19,append=True)[38].rename('POShg38') pd.read_pickle(kutl.path+'data/map.df').isnull().sum() cad.iloc[:10000].groupby(level=0).apply(lambda x: pd.merge(coor.loc[str(x.name)].sort_index().reset_index(),x,left_on=19,right_on='POS').iloc[:,2:] )