Python select_dfrows Examples, pymir.pd_utils.select_dfrows Python Examples

Example #1

0

Show file

def changepoint_predictions(dsc_outdir, methods, order = 0, sfix = 1, dsc_iter = 1):
    dbpath     = os.path.join(dsc_outdir, os.path.basename(os.path.normpath(dsc_outdir)) + ".db")
    targets    = ["changepoint", "changepoint.basis_k", "changepoint.sfix", "fit_cpt", "predict_linear"]
    conditions = [f"changepoint.basis_k == {order}", f"changepoint.sfix == {sfix}"]
    groups     = ["fit:"]
    qp         = dscQP(dbpath, targets, conditions, groups)
    outdf      = pd_utils.select_dfrows(qp.output_table, [f"$(DSC) == {dsc_iter}"])
    ypred      = dict()
    b0pred     = dict()
    b1pred     = dict()
    simpath0   = os.path.join(dsc_outdir, outdf.loc[outdf.index[0], 'changepoint.output.file'])
    for method in methods:
        dfrow    = pd_utils.select_dfrows(outdf, [f"$(fit_cpt) == {method}"])
        assert (dfrow.index.shape[0] == 1), "Error! More than one row is selected."
        idx      = dfrow.index[0]
        fitpath  = os.path.join(dsc_outdir, dfrow.loc[idx, 'fit_cpt.output.file'])
        predpath = os.path.join(dsc_outdir, dfrow.loc[idx, 'predict_linear.output.file'])
        simpath  = os.path.join(dsc_outdir, dfrow.loc[idx, 'changepoint.output.file'])
        assert (simpath == simpath0), "Error! Different simulation file"
        pred     = flex_read(predpath)
        fit      = flex_read(fitpath)
        ypred[method]  = pred['yest']
        b1pred[method] = fit['beta_est']
        b0pred[method] = fit['intercept']
    data = flex_read(simpath0)
    X = data['X']
    y = data['y']
    beta = data['beta']
    Xtest = data['Xtest']
    ytest = data['ytest']
    se = data['se']
    return X, y, Xtest, ytest, beta, se, ypred, b0pred, b1pred

Example #2

0

Show file

def emvamp_mse_hist(dsc_outdir, method, dim, sfrac, pve, rho):
    target     = ["simulate", "fit"]
    conditions = [f"simulate.sfrac == {sfrac}",
                  f"simulate.dims == '({dim[0]},{dim[1]})'",
                  f"simulate.pve == {pve}",
                  f"simulate.rho == {rho}"
                 ]
    groups     = None
    dbpath     = os.path.join(dsc_outdir, os.path.basename(os.path.normpath(dsc_outdir)) + ".db")
    allscores  = list()
    qp         = dscQP(dbpath, target, conditions, groups)
    outdf      = pd_utils.select_dfrows(qp.output_table, [f"$(fit) == {method}"])
    for idx in outdf.index.to_numpy():
        fitpath    = os.path.join(dsc_outdir, outdf.loc[idx, 'fit.output.file'])
        simpath    = os.path.join(dsc_outdir, outdf.loc[idx, 'simulate.output.file'])
        resdict    = flex_read(fitpath)
        datadict   = flex_read(simpath)
        bhat_hist  = resdict['model']
        Xtest      = datadict['Xtest']
        ytest      = datadict['ytest']
        se         = datadict['se']
        niter      = len(bhat_hist)
        scores     = np.zeros(niter)
        n, p       = Xtest.shape
        for it in range(niter):
            bhati  = bhat_hist[it]
            ypred  = np.dot(Xtest, bhati[1:]) + bhati[0]
            rmse   = np.sqrt(np.mean((ytest.reshape(n,1) - ypred)**2))
            scores[it] = rmse / se
        allscores.append(scores)
    return allscores

Example #3

0

Show file

File: methodplots.py Project: stephenslab/eb-linreg-dsc

def single_plot_computational_time(ax, data, colname, whichmethods, pve, rho,
                                   dims, sfrac):
    yscale = 'linear'
    xscale = 'log10'
    ylabels = list()
    for i, method in enumerate(whichmethods):
        # Select relevant rows
        mconditions = [f"$(fit) == {method}"]
        mconditions += [f"$(simulate.pve) == {pve}"]
        mconditions += [f"$(simulate.rho) == {rho}"]
        mconditions += [f"$(simulate.sfrac) == {sfrac}"]
        dfselect = pd_utils.select_dfrows(data, mconditions)

        # Plotting style
        pm = methodprops.plot_metainfo()[method]
        boxprops = dict(linewidth=0, color=pm.color, facecolor=pm.color)
        medianprops = dict(linewidth=0, color=pm.color)
        whiskerprops = dict(color=pm.color)
        flierprops = dict(marker='o',
                          markerfacecolor=pm.color,
                          markersize=4,
                          markeredgewidth=0,
                          markeredgecolor=pm.color)

        # Boxplot
        times = dfselect[colname].to_numpy()
        xx = mpl_utils.scale_array(times, xscale)
        ax.boxplot(xx,
                   positions=[i + 1],
                   showfliers=True,
                   showcaps=False,
                   widths=0.6,
                   vert=False,
                   patch_artist=True,
                   notch=False,
                   boxprops=boxprops,
                   medianprops=medianprops,
                   whiskerprops=whiskerprops,
                   flierprops=flierprops)
        ylabels.append(pm.label)

        # Background barplot
        xleft = mpl_utils.scale_array(0.1, xscale)
        xmean = mpl_utils.scale_array(np.mean(times), xscale) - xleft
        ax.barh(i + 1,
                xmean,
                left=xleft,
                align='center',
                color=pm.color,
                linewidth=0,
                height=0.6,
                alpha=0.2)

    ax.tick_params(labelcolor="#333333", left=False)
    ax.set_yticklabels(ylabels, rotation=0)
    mpl_utils.set_soft_xlim(ax, 0.09, 40, scale=xscale)
    mpl_utils.set_xticks(ax, scale=xscale, kmin=3, kmax=4, spacing='log10')
    mpl_utils.decorate_axes(ax, hide=["top", "right", "left"], ticklimits=True)
    return

Example #4

0

Show file

File: methodplots.py Project: stephenslab/eb-linreg-dsc

def single_plot_score_methods(ax,
                              resdf,
                              colname,
                              methods,
                              pve,
                              rho,
                              dims,
                              sfracs,
                              use_median=False):
    xvals = [max(1, int(x * dims[1])) for x in sfracs]
    xscale = 'log10'
    yscale = 'log10'
    for method in methods:
        score = [0 for x in sfracs]
        mconditions = [f"$(fit) == {method}"]
        mconditions += [f"$(simulate.pve) == {pve}"]
        mconditions += [f"$(simulate.rho) == {rho}"]
        for i, sfrac in enumerate(sfracs):
            sfrac_condition = f"$(simulate.sfrac) == {sfrac}"
            dfselect = pd_utils.select_dfrows(resdf,
                                              mconditions + [sfrac_condition])
            scores = dfselect[colname].to_numpy()
            if use_median:
                score[i] = np.median(scores[~np.isnan(scores)])
            else:
                score[i] = np.mean(scores[~np.isnan(scores)])

        # Plot xvals vs score
        pm = methodprops.plot_metainfo()[method]
        xx = mpl_utils.scale_array(xvals, xscale)
        yy = mpl_utils.scale_array(score, yscale)
        ax.plot(xx,
                yy,
                label=pm.label,
                color=pm.color,
                lw=pm.linewidth / 2,
                ls=pm.linestyle,
                marker=pm.marker,
                ms=pm.size / 1.2,
                mec=pm.color,
                mfc=pm.facecolor,
                mew=pm.linewidth,
                zorder=pm.zorder)

    mpl_utils.set_soft_ylim(ax, 1.0, 1.2, scale=yscale)
    mpl_utils.set_xticks(ax, scale=xscale, tickmarks=xvals)
    mpl_utils.set_yticks(ax, scale=yscale, kmin=3, kmax=4, forceticks=[1.0])
    mpl_utils.decorate_axes(ax, hide=["top", "right"], ticklimits=True)
    return

Example #5

0

Show file

File: convergence_plots.py Project: stephenslab/eb-linreg-dsc

def create_single_method_score_distribution_plot(data, method, dim_list,
                                                 rho_list, pve_list, sfracs,
                                                 colname):
    ncol = 4
    nrow = 2
    wspace = 0.2
    hspace = 1.5
    aspect = 0.7
    xscale = 'log10'
    yscale = 'log10'
    nan_pos = -1.5
    figw = 12

    figh, blockh, axh, axw = get_dimensions(1,
                                            nrow,
                                            ncol,
                                            wspace,
                                            hspace,
                                            0,
                                            aspectratio=aspect)
    fig = plt.figure(figsize=(figw, figh))
    gs = gridspec.GridSpec(nrow, ncol)
    gs.update(wspace=wspace, hspace=hspace)

    pm = methodprops.plot_metainfo()[method]
    figtitle = f"{pm.label}"
    xlab_offset = -wspace / 2 if ncol % 2 == 0 else 0.5
    ylab_offset = (1 + hspace / 2) if nrow % 2 == 0 else 0.5

    axrow = list()
    for i, dim in enumerate(dim_list):
        xvals = [max(1, int(x * dim[1])) for x in sfracs]
        axlist = list()
        allscores = list()
        resdf = pd_utils.select_dfrows(
            data, [f"$(simulate.dims) == '({dim[0]},{dim[1]})'"])
        for j, rho in enumerate(rho_list):
            for k, pve in enumerate(pve_list):
                colnum = j * 2 + k
                if len(axlist) == 0:
                    ax = fig.add_subplot(gs[i, colnum])
                    ax.text(0,
                            1.3,
                            f"n = {dim[0]}",
                            va='bottom',
                            ha='left',
                            transform=ax.transAxes)
                else:
                    ax = fig.add_subplot(gs[i, colnum], sharey=axlist[0])
                ax.text(0.5,
                        1.05,
                        f"pve = {pve:g}, " + r"$\rho$ = {:g}".format(rho),
                        va='bottom',
                        ha='center',
                        transform=ax.transAxes)

                # Main plot
                mconditions = [f"$(fit) == {method}"]
                mconditions += [f"$(simulate.rho) == {rho}"]
                mconditions += [f"$(simulate.pve) == {pve}"]
                for s, sfrac in enumerate(sfracs):
                    scondition = [f"$(simulate.sfrac) == {sfrac}"]
                    dfselect = pd_utils.select_dfrows(resdf,
                                                      mconditions + scondition)
                    scores = dfselect[colname].to_numpy()
                    num_nan = np.sum(np.isnan(scores))
                    xpos = mpl_utils.scale_list([xvals[s]], scale=xscale)
                    yvals = mpl_utils.scale_array(scores[~np.isnan(scores)],
                                                  scale=yscale)
                    ax.scatter(xpos * len(yvals), yvals, alpha=0.5)
                    ax.text(xpos[0],
                            nan_pos,
                            f"{num_nan}",
                            ha='center',
                            va='bottom')

                # Tick marks and axes decoration
                mpl_utils.set_xticks(ax,
                                     scale=xscale,
                                     tickmarks=xvals,
                                     rotation=90)
                ax.tick_params(labelcolor="#333333", left=False)
                if len(axlist) > 0: ax.tick_params(labelleft=False)
                mpl_utils.decorate_axes(ax,
                                        hide=["left", "right", "top"],
                                        ticklimits=True,
                                        pads=[34, 10])
                mpl_utils.set_xticks(ax,
                                     scale=xscale,
                                     tickmarks=xvals,
                                     rotation=90)
                for side, border in ax.spines.items():
                    if side == "top": border.set_visible(True)
                ax.grid(which='major', axis='y', ls='dotted')
                axlist.append(ax)
        '''
        Following indices are now hard-coded
        '''
        axlist[2].set_xlabel(r"Number of non-zero coefficients (s)",
                             x=xlab_offset)
        mpl_utils.set_yticks(axlist[0], scale=yscale, spacing='log10')
        axlist[0].text(0, nan_pos, f'nan', ha='right', va='bottom')
        axrow.append(axlist)

    axrow[1][0].set_ylabel(r"Prediction Error (RMSE / $\sigma$)",
                           y=ylab_offset)
    axrow[0][2].set_title(figtitle, x=xlab_offset, pad=40)

    plt.show()
    return