Beispiel #1
0
def _func(
    self,
    key,
    signature_datasets,
    signature_profile,
    signature_CUTOFF,
    pyvis,
    #           WORKDIR,
):
    vdf = signature_datasets
    signature_score = vdf.dot(signature_profile)
    #     ax.set_ylim(0,5000)
    ppf = pyext.dist2ppf(signature_score)
    silent = 1
    if not silent:
        fig, ax = plt.subplots(1, 1)
        ax.set_ylabel('signature_score')
        ax.set_xlabel('percentage')
        plt.scatter(ppf, signature_score)
        ax = plt.gca()
        ax.set_xlim(0.95, 1.01)
        ax.grid(1)

        pyvis.abline(x0=signature_CUTOFF)


#     CUTOFF = 0.99 ### top 1% of
    _targets = vdf.index[ppf > signature_CUTOFF]
    #     with pyext.getPathStack([WORKDIR,key],force=1) as stack:
    pyext.printlines(_targets, pyext.f('_temp-{key}-.it'))
    return _targets
Beispiel #2
0
def qc_libsize(dfc0, silent=1, ax=None, n=20):
    '''
    Correct the lib size deviation using lowly varying genes
    '''
    if not isinstance(dfc0, scount.countMatrix):
        dfc0 = scount.countMatrix(dfc0.copy())
    dfc0.qc_Avg()

    def getLoss(per, debug=0, ax=None, estimator=np.median):
        vdf = dfc0.copy()
        index = vdf.summary.query('per_SD < %s ' % per).index
        #     index = vdf.qc_Avg().summary.query('per_M < 0.3 ').index
        #     vdf = sutil.meanNorm(vdf)
        #         const = vdf.reindex(index).values.mean(axis=0)[None]
        #         const = np.median(vals, axis=0)[None]
        #         const = np.mean(vals, axis=0)[None]
        vals = vdf.reindex(index).values
        const = estimator(vals, axis=0)[None]
        vdf = vdf.setDF(vdf.values - const)

        #         sd = vdf.summary.reindex(index)['SD']
        sd = vdf.qc_Avg().summary['SD']
        lossA, lossB = sd.median(), sd.mean()
        if debug == 1:
            #             if ax is None:
            #                 ax = plt.gca()
            vv = vals.T[-4]
            pyvis.histoLine(vv, 30)
            #             print vals.T[0].mean()
            #             print vals.shape
            ax.plot(vv.mean(), 0.05, 'x')
            ax.plot(np.median(vv), 0.07, 'x')
            return vdf
        if debug == 2:
            return const, vdf
        return lossA, lossB

    xs = np.linspace(0, 1, n + 1)[1:]
    #     xs = np.arange(0,1,0.05)[1:]
    res = map(getLoss, xs)
    res = np.array(res)

    xmin = xs[np.argmin(res.T[0])]

    if not silent:
        if ax is None:
            ax = plt.gca()

        ax.plot(xs, res.T[0])
        # plt.ylim(0.4,None)
        ax.twinx().plot(xs, res.T[1], 'go')
        # plt.ylim(0.4,None)
        # plt.ylim(0,None)
        ax.set_title(res.min(axis=0))
        ax.grid(1)
        pyvis.abline(x0=xmin)

    const, vdf = getLoss(xmin, debug=2)
    return const, vdf
Beispiel #3
0
    def worker((i, r)):
        #     betas = [3.0] * 25
        #     betas  = getBeta(i)
        nIter = 100
        alias = 'i-%d_r-%d' % (i, r)

        mdl0 = pyjob.job__cluster__mixtureVMF__incr(
            normalizeSample=0,  #### set to 1 to normalize the vector lenght
            tdf=tdf,
            meanNorm=1,  ##### perform X = X-E(X)_
            weighted=True,
            init_method='random',
            nIter=nIter,
            #         start=0.001, #### specify temperature range
            #         end=2.0,
            #         end=0.7,
            start=0.2,  #### specify temperature range
            #         end=2.0,
            end=0.7,

            #         betas = betas, #### alternatively, pass a callable for temperature
            randomState=r,
            alias='mdl_' + alias,  #### filename of cache produced
            verbose=2,
            K=60,
        )

        ##### produce diagnostic plot
        YCUT = entropy_cutoff = 2.5
        XCUT = step = 30

        axs = pycbk.qc__vmf__speed(
            mdl0,
            #                                XCUT=step,YCUT=entropy_cutoff  ### not working yet
        )
        fig = plt.gcf()
        ax = fig.axes[0]
        #     pyvis.abline(y0=3.7,k=0,ax=ax)
        pyvis.abline(y0=YCUT, k=0, ax=ax)
        pyvis.abline(x0=XCUT, k=0, ax=ax)
        figs['diagnostic-plot'] = plt.gcf()

        #### using the last model to predict cluster
        mdls = mdl0.callback.mdls  #### models is recorded for each point
        mdl = mdls[step][-1]  #### getting the model at step
        clu = mdl.predictClu(tdf, entropy_cutoff=entropy_cutoff)
        clu.to_csv('cluster.csv')  ### getting cluster assignment

        pyvis.heatmap(tdf.reindex(clu.sort_values('clu').index),
                      figsize=[14, 7])
        figs['clustered-heatmap'] = plt.gcf()
        return (alias, fig)