pplt.Manhattan(utl.zpvalgenome(utl.scanGenome(utl.zpvalgenome2tail(s)))) (x.s*(x.alt-x.null)).hist(bins=100) D=cdAll.xs('D',axis=1,level='READ') d=D.median(1).rename('d') f=lambda x:(x.alt-x.null) pplt.Manhattan(utl.scanGenome(x2p(f(x)))) x2p=lambda X2: -pd.Series(1 - sc.stats.chi2.cdf(X2, 1),index=X2.index).apply(np.log) y=(f(pd.read_pickle('/home/arya/out/real/HMM/h5.000000E-01.df')[0.5]).loc[chroms].rename('y')*pd.read_pickle('/home/arya/out/real/HMM/h5.000000E-01.df')[0.5].s).dropna() y.sort_values() y=utl.zpvalgenome(pd.read_pickle('/home/arya/out/real/HMM/h5.000000E-01.df')[0.5].s.loc[chroms]) i=utl.getEuChromatin(y.sort_values()).index[-20] pplt.GenomeChromosomewise(utl.scanGenome(utl.zpvalgenome(y.abs()))) pplt.GenomeChromosomewise(utl.scanGenome(utl.zpvalgenome(s))) scan=pd.concat([utl.scanGenome(utl.zpvalgenome(s)).rename('win'),utl.scanGenomeSNP(utl.zpvalgenome(s)).rename('snp')],1) pplt.Manhattan(scan) pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(utl.zpvalgenome(s)))) pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(scores.abs()))) reload(utl) pplt.GenomeChromosomewise(utl.scanGenomeSNP(utl.zpvalgenome2tail(s))) scores.sort_values() pplt.GenomeChromosomewise(utl.scanGenomeSNP(scores.abs(),lambda x: x[x>=x.quantile(0.5)].sum())) df=pd.concat([scores,s],1);df=pd.concat([df,df.rank()],1,keys=['val','rank']).sort_values(('val','s')) dfy=pd.concat([df,y],1).dropna() dfy.sort_values(0) i=df.index[-1]; cdi=cdAll.loc[i];print cdi.unstack('REP');pplt.plotSiteReal(cdi) cdiun=cdi.unstack('REP') CD,E=dta.precomputeCDandEmissionsFor(pd.DataFrame(cdi).T)
def scanSFSSNPbased(): scores = rutl.loadScores(skipHetChroms=True) # field = comale; # df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[ # [field, 'Num. of SNPs']] # plotOne(df, df[df[field] > df[field].quantile(0.99)], fname='all') reload(rutl) reload(pplt) reload(utl) # SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=100) # sfs0 = utl.scanGenomeSNP(rutl.getNut(0, skipHetChroms=True), SFSelect) # sfst = utl.scanGenomeSNP(rutl.getNut(59, skipHetChroms=True), SFSelect).rename(59); sfs=(sfst-sfs0); sfs[sfs<0]=None g = ga.loadGeneCoordinates().set_index('name') genes = g.loc[['Ace', 'Cyp6g1', 'CHKov1']].reset_index().set_index('CHROM') shade = scores.sort_values().reset_index().iloc[-2:].rename(columns={'POS': 'start'}); shade['end'] = shade.start + 100 cand = pd.concat([scores, scores.rank(ascending=False).rename('rank'), rutl.getNut(0, skipHetChroms=True)], axis=1).sort_values('rank') chroms = ['2L', '2R', '3L', '3R'] reload(utl) # reload(pplt);pplt.Genome(sfs.loc[chroms],genes=genes);plt.tight_layout(pad=0.1) df = pd.concat( [utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=200, step=100, skipFromFirst=900).rename(200), utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=500, step=100, skipFromFirst=750).rename(500), utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=1000, step=100, skipFromFirst=500).rename( 1000)], axis=1) df['comb'] = df[200] * df[500] * df[1000] fig = plt.figure(figsize=(7, 4.5), dpi=300); pplt.Manhattan(data=sort(df.rename(columns={'comb': '200*500*1000'})), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased')) pplt.Genome(df.comb); plt.tight_layout(pad=0.1) # analyzie() # scanSFS() # outlier() # scanSFSSNPbased() a = df.comb o = localOutliers(a, q=0.9); fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.candidates')) Scores = pd.concat([scores.rename('scores').abs(), scores.groupby(level=0).apply( lambda x: pd.Series(range(x.size), index=x.loc[x.name].index)).rename('i')], axis=1) cutoff = FDR(o, Scores); a = pd.concat([df, cutoff[cutoff.sum(1) > 0]], axis=1).dropna(); for fdr in [0.95, 0.99, 0.999]: o = a[a.comb > a[fdr]] fig = plt.figure(figsize=(7, 1.5), dpi=300); pplt.Manhattan(data=df.comb, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True); [pplt.setSize(ax, 5) for ax in fig.get_axes()]; plt.gcf().subplots_adjust(bottom=0.15); plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.fdr{}'.format(fdr)))