def scanSFS(): scores = rutl.loadScores() field = comale; df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[ [field, 'Num. of SNPs']] plotOne(df, df[df[field] > df[field].quantile(0.99)], fname='all') nu0 = rutl.getNut(0) nut = rutl.getNut(59) reload(rutl) # n= int(pd.read_pickle(utl.outpath + 'real/CD.F59.df').loc[:,pd.IndexSlice[:,0,'D']].mean().mean()) n = 100 SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=n) sf0 = scanOne(nu0, SFSelect, 'SFSelect.Base', 'SFSelect.Base'); SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=n) sft = scanOne(nut, SFSelect, 'SFSelect.Final', 'SFSelect.Final') sfr = pd.concat( [(sft.iloc[:, 0] - sf0.iloc[:, 0]).rename('SFS(59)-SFS(0)'), sf0.iloc[:, 0], sft.iloc[:, 0], df.iloc[:, 0]], axis=1) outlier = sfr[sfr.iloc[:, 0] > sfr.iloc[:, 0].quantile(0.99)] sfr.loc[(sfr.iloc[:, 0] < 0).values, sfr.columns[0]] = None fig = plt.figure(figsize=(7, 4.5), dpi=300); pplt.Manhattan(data=sfr, Outliers=outlier, fig=fig, markerSize=2, ticksize=8, sortedAlready=True) [pplt.setSize(ax, 5) for ax in fig.get_axes()] plt.savefig(utl.paperPath + 'new/{}.pdf'.format('sfs-clear'))
def saveTopKSNPs(): scores = rutl.loadScores() ann = loadANN()["Annotation Annotation_Impact Gene_Name Gene_ID".split()] scores = pd.concat([scores, rutl.loadSNPIDs()], axis=1).set_index('ID', append=True)[0].rename('Hstatistic') top = scores[scores > scores.quantile(0.9999)].reset_index('ID').join(rutl.getNut(0), how='inner') top = top.join(ann).drop_duplicates().sort_values('Hstatistic', ascending=False) top = top[top['Annotation_Impact'] != 'LOW'] top.to_csv(utl.outpath + 'real/top_1e-4_quantile_SNPs.csv')
def scanSFSSNPbased(): scores = rutl.loadScores(skipHetChroms=True) # field = comale; # df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[ # [field, 'Num. of SNPs']] # plotOne(df, df[df[field] > df[field].quantile(0.99)], fname='all') reload(rutl) reload(pplt) reload(utl) # SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=100) # sfs0 = utl.scanGenomeSNP(rutl.getNut(0, skipHetChroms=True), SFSelect) # sfst = utl.scanGenomeSNP(rutl.getNut(59, skipHetChroms=True), SFSelect).rename(59); sfs=(sfst-sfs0); sfs[sfs<0]=None g = ga.loadGeneCoordinates().set_index('name') genes = g.loc[['Ace', 'Cyp6g1', 'CHKov1']].reset_index().set_index('CHROM') shade = scores.sort_values().reset_index().iloc[-2:].rename(columns={'POS': 'start'}); shade['end'] = shade.start + 100 cand = pd.concat([scores, scores.rank(ascending=False).rename('rank'), rutl.getNut(0, skipHetChroms=True)], axis=1).sort_values('rank') chroms = ['2L', '2R', '3L', '3R'] reload(utl) # reload(pplt);pplt.Genome(sfs.loc[chroms],genes=genes);plt.tight_layout(pad=0.1) df = pd.concat( [utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=200, step=100, skipFromFirst=900).rename(200), utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=500, step=100, skipFromFirst=750).rename(500), utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=1000, step=100, skipFromFirst=500).rename( 1000)], axis=1) df['comb'] = df[200] * df[500] * df[1000] fig = plt.figure(figsize=(7, 4.5), dpi=300); pplt.Manhattan(data=sort(df.rename(columns={'comb': '200*500*1000'})), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased')) pplt.Genome(df.comb); plt.tight_layout(pad=0.1) # analyzie() # scanSFS() # outlier() # scanSFSSNPbased() a = df.comb o = localOutliers(a, q=0.9); fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.candidates')) Scores = pd.concat([scores.rename('scores').abs(), scores.groupby(level=0).apply( lambda x: pd.Series(range(x.size), index=x.loc[x.name].index)).rename('i')], axis=1) cutoff = FDR(o, Scores); a = pd.concat([df, cutoff[cutoff.sum(1) > 0]], axis=1).dropna(); for fdr in [0.95, 0.99, 0.999]: o = a[a.comb > a[fdr]] fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=df.comb, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.fdr{}'.format(fdr)))