Ejemplo n.º 1
0
pplt.Manhattan(utl.zpvalgenome(utl.scanGenome(utl.zpvalgenome2tail(s))))
(x.s*(x.alt-x.null)).hist(bins=100)
D=cdAll.xs('D',axis=1,level='READ')
d=D.median(1).rename('d')
f=lambda x:(x.alt-x.null)
pplt.Manhattan(utl.scanGenome(x2p(f(x))))
x2p=lambda X2: -pd.Series(1 - sc.stats.chi2.cdf(X2, 1),index=X2.index).apply(np.log)
y=(f(pd.read_pickle('/home/arya/out/real/HMM/h5.000000E-01.df')[0.5]).loc[chroms].rename('y')*pd.read_pickle('/home/arya/out/real/HMM/h5.000000E-01.df')[0.5].s).dropna()
y.sort_values()
y=utl.zpvalgenome(pd.read_pickle('/home/arya/out/real/HMM/h5.000000E-01.df')[0.5].s.loc[chroms])
i=utl.getEuChromatin(y.sort_values()).index[-20]

pplt.GenomeChromosomewise(utl.scanGenome(utl.zpvalgenome(y.abs())))
pplt.GenomeChromosomewise(utl.scanGenome(utl.zpvalgenome(s)))

scan=pd.concat([utl.scanGenome(utl.zpvalgenome(s)).rename('win'),utl.scanGenomeSNP(utl.zpvalgenome(s)).rename('snp')],1)
pplt.Manhattan(scan)
pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(utl.zpvalgenome(s))))
pplt.GenomeChromosomewise(utl.zpvalgenome(utl.scanGenome(scores.abs())))
reload(utl)
pplt.GenomeChromosomewise(utl.scanGenomeSNP(utl.zpvalgenome2tail(s)))
scores.sort_values()
pplt.GenomeChromosomewise(utl.scanGenomeSNP(scores.abs(),lambda x: x[x>=x.quantile(0.5)].sum()))
df=pd.concat([scores,s],1);df=pd.concat([df,df.rank()],1,keys=['val','rank']).sort_values(('val','s'))
dfy=pd.concat([df,y],1).dropna()
dfy.sort_values(0)

i=df.index[-1];
cdi=cdAll.loc[i];print cdi.unstack('REP');pplt.plotSiteReal(cdi)
cdiun=cdi.unstack('REP')
CD,E=dta.precomputeCDandEmissionsFor(pd.DataFrame(cdi).T)
Ejemplo n.º 2
0
def scanSFSSNPbased():
    scores = rutl.loadScores(skipHetChroms=True)
    # field = comale;
    # df = sort(utl.scanGenome(scores.abs(), {field: lambda x: x.abs().mean(), 'Num. of SNPs': lambda x: x.size}))[
    #     [field, 'Num. of SNPs']]
    # plotOne(df, df[df[field] > df[field].quantile(0.99)], fname='all')
    reload(rutl)
    reload(pplt)
    reload(utl)
    # SFSelect = lambda x: est.Estimate.getEstimate(x=x, method='SFSelect', n=100)
    # sfs0 = utl.scanGenomeSNP(rutl.getNut(0, skipHetChroms=True), SFSelect)
    # sfst = utl.scanGenomeSNP(rutl.getNut(59, skipHetChroms=True), SFSelect).rename(59);     sfs=(sfst-sfs0);    sfs[sfs<0]=None
    g = ga.loadGeneCoordinates().set_index('name')
    genes = g.loc[['Ace', 'Cyp6g1', 'CHKov1']].reset_index().set_index('CHROM')

    shade = scores.sort_values().reset_index().iloc[-2:].rename(columns={'POS': 'start'});
    shade['end'] = shade.start + 100
    cand = pd.concat([scores, scores.rank(ascending=False).rename('rank'), rutl.getNut(0, skipHetChroms=True)],
                     axis=1).sort_values('rank')
    chroms = ['2L', '2R', '3L', '3R']
    reload(utl)

    # reload(pplt);pplt.Genome(sfs.loc[chroms],genes=genes);plt.tight_layout(pad=0.1)
    df = pd.concat(
            [utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=200, step=100, skipFromFirst=900).rename(200),
             utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=500, step=100, skipFromFirst=750).rename(500),
             utl.scanGenomeSNP(scores.abs(), lambda x: x.mean(), winSize=1000, step=100, skipFromFirst=500).rename(
                 1000)], axis=1)
    df['comb'] = df[200] * df[500] * df[1000]

    fig = plt.figure(figsize=(7, 4.5), dpi=300);
    pplt.Manhattan(data=sort(df.rename(columns={'comb': '200*500*1000'})), fig=fig, markerSize=2, ticksize=8,
                   sortedAlready=True);
    [pplt.setSize(ax, 5) for ax in fig.get_axes()];
    plt.gcf().subplots_adjust(bottom=0.15);
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased'))
    pplt.Genome(df.comb);
    plt.tight_layout(pad=0.1)

    # analyzie()
    # scanSFS()
    # outlier()
    # scanSFSSNPbased()
    a = df.comb
    o = localOutliers(a, q=0.9);
    fig = plt.figure(figsize=(7, 1.5), dpi=300);
    pplt.Manhattan(data=a, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
    [pplt.setSize(ax, 5) for ax in fig.get_axes()];
    plt.gcf().subplots_adjust(bottom=0.15);
    plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.candidates'))

    Scores = pd.concat([scores.rename('scores').abs(), scores.groupby(level=0).apply(
        lambda x: pd.Series(range(x.size), index=x.loc[x.name].index)).rename('i')], axis=1)
    cutoff = FDR(o, Scores);

    a = pd.concat([df, cutoff[cutoff.sum(1) > 0]], axis=1).dropna();
    for fdr in [0.95, 0.99, 0.999]:
        o = a[a.comb > a[fdr]]
        fig = plt.figure(figsize=(7, 1.5), dpi=300);
        pplt.Manhattan(data=df.comb, Outliers=pd.DataFrame(o), fig=fig, markerSize=2, ticksize=8, sortedAlready=True);
        [pplt.setSize(ax, 5) for ax in fig.get_axes()];
        plt.gcf().subplots_adjust(bottom=0.15);
        plt.savefig(utl.paperPath + 'new/{}.pdf'.format('SNPbased.fdr{}'.format(fdr)))