예제 #1
0
def qc_cumAvg(
        X,
        axis=0,
        silent=1,
        axs=None,
        nMax=int(1E6),
        ybin=None,
    #               axiS = [2,None,None]
):
    '''Calcuate average and standard error for bootstrapped statistics.
'''
    # axis = 0
    #     X = egList[:5]
    X = np.array(X)
    X = np.moveaxis(X, axis, 0)
    # L = np.shape(X)[axis]
    X = np.reshape(X, (len(X), -1))
    if len(X.T) > nMax:
        ind = np.random.randint(0, len(X.T), nMax)
        X = X[:, ind]

    L = len(X)
    Lx = (1 + np.arange(L))[:, None]

    Ex = np.cumsum(X, axis=axis) / Lx
    Ex2 = np.cumsum(np.square(X), axis=axis) / Lx

    M = Ex
    VAR = (Ex2 - np.square(Ex))
    SD = np.sqrt(VAR)
    SE = SD / np.sqrt(Lx)
    CV = SE / M
    CV = abs(CV)
    Lx = np.broadcast_to(Lx, SE.shape)
    if not silent:
        if axs is None:
            fig, axs = plt.subplots(1, 3, figsize=[14, 4])
            axs = [None, None, axs[0]]
        X, Y = Lx, CV
        ybin = np.linspace(*pyutil.span(Y, 99.),
                           num=80) if ybin is None else ybin
        xbin = np.arange(0.5, L + 1, 1)
        pyvis.qc_2var(
            X,
            Y,
            #                       axs=axs,
            xbin=xbin,
            ybin=ybin,
            ylab=r'$\left| {StdErr} / {Mean} \right |$',
            xlab='sample size',
            axs=axs,
        )
    return (M, SE, CV, Lx), axs
예제 #2
0
def plotModel(m, X):
    clu = m.predict(X)
    print sorted(m.weights_)[::-1]
    pyvis.qc_2var(X.T[0], X.T[1], clu=clu)
    _ = m.predict(X)
    Y = m.x_post.sample(len(X)).eval()
    #     Y = toyData(Cs=   m.covariances_,
    #                 mus = m.means_,
    #                 pi  = m.weights_,
    #                 K=3)
    pyvis.qc_2var(
        Y.T[0],
        Y.T[1],
    )
예제 #3
0
def qc_Avg(
        C,
        silent=1,
        axis=1,
        #            nMax = 150, ### depracated size check
        **kwargs):

    #     if axs is None:
    #         if not silent:
    #             fig,axs= plt.subplots(1,3,figsize=[14,3])
    C = np.array(C)
    #     assert C.shape[axis]<nMax
    MEAN = C.mean(axis=axis, )
    STD = C.std(axis=axis, )
    # plt.hist(X) def parseBedmap
    # plt.hist(X[1])
    #     X = MEAN[None,:]
    X = MEAN
    MIN, MAX = X.min(), np.percentile(X, 99)
    BINS = np.linspace(MIN, MAX, 100)
    CV = STD / MEAN
    if not silent:
        xs, ys = MEAN, STD
        axs = pyvis.qc_2var(xs, ys, xlab='$E(X)$', ylab='$Std(X)$', **kwargs)
    else:
        axs = []
    return (MEAN, STD, CV), axs
예제 #4
0
def qc_PCA(df, xi=0, yi=1, xlab=None, ylab=None, **kwargs):
    res = smod.fit_PCA(df)
    xs, ys = res.trans_data.T[[xi, yi]]
    if xlab is None:
        xlab = 'PC%d' % xi
    if ylab is None:
        ylab = 'PC%d' % yi
    axs = pyvis.qc_2var(xs, ys, xlab=xlab, ylab=ylab, **kwargs)
    return res, axs
print(pyext.np.sum(sel))
df = df.loc[sel].to_csv('temp.bed', sep='\t', header=None)

[bwFiles]
res = synotil.dio.extract_bigwig_multiple(bedFile='temp.bed',
                                          outIndex=DATA_ACC_LIST,
                                          bwFiles=bwFiles,
                                          radius=300,
                                          stepSize=10)

tab = colGroupMean(res)
tab = tab.apply(pyext.log2p1)
xs = (tab['189CS10']) - (tab['189CS11'])
ys = tab['192CS17'] - tab['192CS18']
clu = (xs + ys) > 2
pyvis.qc_2var(xs, ys, nMax=-1, xlim=[-2, 4], ylim=[-2, 4], clu=clu)

df = pyext.readData('temp.bed', columns=pyext.columns.bed,
                    guess_index=0).set_index('acc', drop=0)

# OUTPUT_FILE = 'OUTPUT/0918-elf3target.bed'
pyext.dir__real(dirname="OUTPUT")

[OUTPUT_BED_FILE]
df.loc[clu].to_csv(OUTPUT_BED_FILE, sep='\t', header=None, index=0)

[OUTPUT_CSV_FILE]
df.loc[clu].to_csv(OUTPUT_CSV_FILE, sep=',', header=None, index=0)

#### making gene list
FNAME = OUTPUT_BED_FILE
예제 #6
0
파일: jobs.py 프로젝트: shouldsee/synotil
def job__chipTargPaired(
    bwCurr=None,
    bwMeta=None,
    control=None,
    treatment=None,
    xlab=None,
    ylab=None,
    name=None,
    #     bwMeta,
    NCORE=2,
    params__peakBW=None,
    CUTOFF_FC=3.0,
    CUTOFF_CHIPDIFF=0.7,
    innerRadius=100,
):
    figs = pyutil.collections.OrderedDict()

    if control is not None and treatment is not None:
        xlab, ylab = control, treatment
    if xlab is None or ylab is None:
        xlab, ylab = bwCurr.index
    elif bwCurr is None:
        bwCurr = bwMeta.reindex([xlab, ylab])

    if params__peakBW is None:

        params__peakBW = dict(
            outerRadius=500,
            innerRadius=innerRadius,
            NCORE=NCORE,
            outIndex=bwCurr.header,
            #     detailByCHIP = 0,
        )
    params__peakBW['innerRadius'] = innerRadius

    if name is None:
        name = '{xlab}-{ylab}'.format(**locals())
#     bwCurr = bwMeta
#     bwCurr = bwCurr.loc[[xlab,ylab]]

#     bwCurr.npkFile

    dfs = map(
        sdio.extract_peak,
        bwCurr.npkFile,
    )

    fig, ax = plt.subplots(1, 1, figsize=[7, 7])
    #     ax = plt.gca()
    for df in dfs:
        df['per_FC'] = pyutil.dist2ppf(df.FC)
        df.plot.scatter('per_FC', 'FC', ax=ax)

    fnames = [
        pyutil.queryCopy(infile=fname,
                         query='FC>%.3f' % CUTOFF_FC,
                         reader=sdio.extract_peak,
                         inplace=False) for fname in bwCurr.npkFile
    ]
    # dfs[1]

    peakFlat = ' '.join(fnames)
    ofname = '%s-combined.bed' % ('-'.join(bwCurr.index))
    pyutil.shellexec('cat {peakFlat}>{ofname}'.format(**locals()))
    ofname = sdio.npk_expandSummit(fname=ofname, radius=1)

    pyutil.lineCount(ofname)
    peakFileOrig = peakFile = ofname

    res = sjob.figs__peakBW(peakFile=peakFile,
                            bwFiles=bwCurr.RPKMFile,
                            name=name,
                            **params__peakBW)
    figs.update(res[0])

    bwTrack, bwAvg = res[1]
    bwAvg.columns = bwAvg.columns.map(
        pyutil.df2mapper(bwCurr, 'header', 'index').get)
    #     .set_index('RPKMFile').loc[bwAvg.columns].
    #     bwAvg.columns = bwCurr.index

    xs, ys = bwAvg[[xlab, ylab]].values.T
    #     clu = None

    #     peakIndex = pyutil.df__pad(bwAvg).query(query).index
    clu = pd.DataFrame(pyutil.df__pad(bwAvg))
    query = ' val_{ylab} - val_{xlab} > {CUTOFF_CHIPDIFF} '.format(**locals())
    qsans = pyutil.sanitise_query(query)
    peakIndex = clu.query(query).index
    clu['clu'] = clu.eval('index in @peakIndex')

    stats = sdio.extract_peak(peakFile).set_index('acc', drop=0)
    stats['CHIPDIFF'] = clu.eval(query.split('>')[0])

    pyvis.qc_2var(xs, ys, clu=clu.clu, xlab=xlab, ylab=ylab)
    figs['scatterPlot__%s' % name] = plt.gcf()
    cluFile = ofname = qsans + '.csv'
    clu.to_csv(ofname)
    print(ofname, pyutil.lineCount(ofname))
    peakBase = pyutil.getBname(peakFile)
    ofname = '{peakBase}-{qsans}.bed'.format(**locals())
    peakFile = pyutil.to_tsv(stats.reindex(peakIndex), ofname)
    pyutil.shellexec('mkdir -p output/')
    pyutil.file__link(ofname, 'output/%s.bed' % name, force=True)

    #     peakFile = pyutil.queryCopy(peakFile,
    #                                 query='acc in @peakIndex',
    #                                 reader=sdio.extract_peak,
    #                                 peakIndex=peakIndex,
    #                                )
    #     peakFile =  '{peakFile}-{qsans}.bed'
    #     pyutil.fileDict__main(ofname='FILE.json',
    #                          **pyutil.dictFilter(locals(),
    #                                              keys=['cluFile','peakFile',
    #                                             'peakFileOrig']
    #                                             ))

    pyutil.fileDict__save(d=locals(),
                          keys=['cluFile', 'peakFile', 'peakFileOrig'],
                          fname='FILE.json')
    return figs, clu
예제 #7
0
# In[ ]:

m = mym.GMM_VIMAP(D=2)

X = np.random.random(size=(500, 2))
m.fit(X)
plotModel(m, X)
m_diag = m

# In[ ]:

X = toyData(K=3)
# print X.shape
pyvis.qc_2var(
    X.T[0],
    X.T[1],
)

# In[ ]:

m = mym.GMM_VIMAP(D=2, K=3, name='testB')
m.fit(X)
plotModel(m, X)

# In[ ]:

mi = 5
m = mym.GMMLRP_VIMAP(name='t%d' % mi, D=2, K=3).init_model()
m.fit(X=X)
plotModel(m, X)
예제 #8
0
        ax = axs[i]
        plt.sca(ax)
        prof.plot(xticks=range(len(tdf.columns)), rot='vertical')

        i += 1
        ax = axs[i]
        plt.sca(ax)

        per_score = pd.Series(per_score, tdf.index)
        clu = per_score > 0.95

        ##### adding diagnostic plots
        xs, ys = tdf.summary.MSQ, score
        pyvis.qc_2var(xs,
                      ys,
                      axs=[None, ax, None, None],
                      clu=clu,
                      nMax=len(clu))
        pyvis.add_text(xs, ys, keyDF.BioName, ax=ax)

        figs['qc_TempReponse'] = fig
        stats['tempResponsive'] = clu
        scores['tempResponsive'] = per_score
        ###-------------------------------------------
        ##############################################

        ##############################################
        ###-------------------------------------------
        _ = '''
        For each gene derive its PIF7-knockout responsiveness by
        calculating at its dot-product similarity with a set of