def changepoint_predictions(dsc_outdir, methods, order = 0, sfix = 1, dsc_iter = 1): dbpath = os.path.join(dsc_outdir, os.path.basename(os.path.normpath(dsc_outdir)) + ".db") targets = ["changepoint", "changepoint.basis_k", "changepoint.sfix", "fit_cpt", "predict_linear"] conditions = [f"changepoint.basis_k == {order}", f"changepoint.sfix == {sfix}"] groups = ["fit:"] qp = dscQP(dbpath, targets, conditions, groups) outdf = pd_utils.select_dfrows(qp.output_table, [f"$(DSC) == {dsc_iter}"]) ypred = dict() b0pred = dict() b1pred = dict() simpath0 = os.path.join(dsc_outdir, outdf.loc[outdf.index[0], 'changepoint.output.file']) for method in methods: dfrow = pd_utils.select_dfrows(outdf, [f"$(fit_cpt) == {method}"]) assert (dfrow.index.shape[0] == 1), "Error! More than one row is selected." idx = dfrow.index[0] fitpath = os.path.join(dsc_outdir, dfrow.loc[idx, 'fit_cpt.output.file']) predpath = os.path.join(dsc_outdir, dfrow.loc[idx, 'predict_linear.output.file']) simpath = os.path.join(dsc_outdir, dfrow.loc[idx, 'changepoint.output.file']) assert (simpath == simpath0), "Error! Different simulation file" pred = flex_read(predpath) fit = flex_read(fitpath) ypred[method] = pred['yest'] b1pred[method] = fit['beta_est'] b0pred[method] = fit['intercept'] data = flex_read(simpath0) X = data['X'] y = data['y'] beta = data['beta'] Xtest = data['Xtest'] ytest = data['ytest'] se = data['se'] return X, y, Xtest, ytest, beta, se, ypred, b0pred, b1pred
def emvamp_mse_hist(dsc_outdir, method, dim, sfrac, pve, rho): target = ["simulate", "fit"] conditions = [f"simulate.sfrac == {sfrac}", f"simulate.dims == '({dim[0]},{dim[1]})'", f"simulate.pve == {pve}", f"simulate.rho == {rho}" ] groups = None dbpath = os.path.join(dsc_outdir, os.path.basename(os.path.normpath(dsc_outdir)) + ".db") allscores = list() qp = dscQP(dbpath, target, conditions, groups) outdf = pd_utils.select_dfrows(qp.output_table, [f"$(fit) == {method}"]) for idx in outdf.index.to_numpy(): fitpath = os.path.join(dsc_outdir, outdf.loc[idx, 'fit.output.file']) simpath = os.path.join(dsc_outdir, outdf.loc[idx, 'simulate.output.file']) resdict = flex_read(fitpath) datadict = flex_read(simpath) bhat_hist = resdict['model'] Xtest = datadict['Xtest'] ytest = datadict['ytest'] se = datadict['se'] niter = len(bhat_hist) scores = np.zeros(niter) n, p = Xtest.shape for it in range(niter): bhati = bhat_hist[it] ypred = np.dot(Xtest, bhati[1:]) + bhati[0] rmse = np.sqrt(np.mean((ytest.reshape(n,1) - ypred)**2)) scores[it] = rmse / se allscores.append(scores) return allscores
def single_plot_computational_time(ax, data, colname, whichmethods, pve, rho, dims, sfrac): yscale = 'linear' xscale = 'log10' ylabels = list() for i, method in enumerate(whichmethods): # Select relevant rows mconditions = [f"$(fit) == {method}"] mconditions += [f"$(simulate.pve) == {pve}"] mconditions += [f"$(simulate.rho) == {rho}"] mconditions += [f"$(simulate.sfrac) == {sfrac}"] dfselect = pd_utils.select_dfrows(data, mconditions) # Plotting style pm = methodprops.plot_metainfo()[method] boxprops = dict(linewidth=0, color=pm.color, facecolor=pm.color) medianprops = dict(linewidth=0, color=pm.color) whiskerprops = dict(color=pm.color) flierprops = dict(marker='o', markerfacecolor=pm.color, markersize=4, markeredgewidth=0, markeredgecolor=pm.color) # Boxplot times = dfselect[colname].to_numpy() xx = mpl_utils.scale_array(times, xscale) ax.boxplot(xx, positions=[i + 1], showfliers=True, showcaps=False, widths=0.6, vert=False, patch_artist=True, notch=False, boxprops=boxprops, medianprops=medianprops, whiskerprops=whiskerprops, flierprops=flierprops) ylabels.append(pm.label) # Background barplot xleft = mpl_utils.scale_array(0.1, xscale) xmean = mpl_utils.scale_array(np.mean(times), xscale) - xleft ax.barh(i + 1, xmean, left=xleft, align='center', color=pm.color, linewidth=0, height=0.6, alpha=0.2) ax.tick_params(labelcolor="#333333", left=False) ax.set_yticklabels(ylabels, rotation=0) mpl_utils.set_soft_xlim(ax, 0.09, 40, scale=xscale) mpl_utils.set_xticks(ax, scale=xscale, kmin=3, kmax=4, spacing='log10') mpl_utils.decorate_axes(ax, hide=["top", "right", "left"], ticklimits=True) return
def single_plot_score_methods(ax, resdf, colname, methods, pve, rho, dims, sfracs, use_median=False): xvals = [max(1, int(x * dims[1])) for x in sfracs] xscale = 'log10' yscale = 'log10' for method in methods: score = [0 for x in sfracs] mconditions = [f"$(fit) == {method}"] mconditions += [f"$(simulate.pve) == {pve}"] mconditions += [f"$(simulate.rho) == {rho}"] for i, sfrac in enumerate(sfracs): sfrac_condition = f"$(simulate.sfrac) == {sfrac}" dfselect = pd_utils.select_dfrows(resdf, mconditions + [sfrac_condition]) scores = dfselect[colname].to_numpy() if use_median: score[i] = np.median(scores[~np.isnan(scores)]) else: score[i] = np.mean(scores[~np.isnan(scores)]) # Plot xvals vs score pm = methodprops.plot_metainfo()[method] xx = mpl_utils.scale_array(xvals, xscale) yy = mpl_utils.scale_array(score, yscale) ax.plot(xx, yy, label=pm.label, color=pm.color, lw=pm.linewidth / 2, ls=pm.linestyle, marker=pm.marker, ms=pm.size / 1.2, mec=pm.color, mfc=pm.facecolor, mew=pm.linewidth, zorder=pm.zorder) mpl_utils.set_soft_ylim(ax, 1.0, 1.2, scale=yscale) mpl_utils.set_xticks(ax, scale=xscale, tickmarks=xvals) mpl_utils.set_yticks(ax, scale=yscale, kmin=3, kmax=4, forceticks=[1.0]) mpl_utils.decorate_axes(ax, hide=["top", "right"], ticklimits=True) return
def create_single_method_score_distribution_plot(data, method, dim_list, rho_list, pve_list, sfracs, colname): ncol = 4 nrow = 2 wspace = 0.2 hspace = 1.5 aspect = 0.7 xscale = 'log10' yscale = 'log10' nan_pos = -1.5 figw = 12 figh, blockh, axh, axw = get_dimensions(1, nrow, ncol, wspace, hspace, 0, aspectratio=aspect) fig = plt.figure(figsize=(figw, figh)) gs = gridspec.GridSpec(nrow, ncol) gs.update(wspace=wspace, hspace=hspace) pm = methodprops.plot_metainfo()[method] figtitle = f"{pm.label}" xlab_offset = -wspace / 2 if ncol % 2 == 0 else 0.5 ylab_offset = (1 + hspace / 2) if nrow % 2 == 0 else 0.5 axrow = list() for i, dim in enumerate(dim_list): xvals = [max(1, int(x * dim[1])) for x in sfracs] axlist = list() allscores = list() resdf = pd_utils.select_dfrows( data, [f"$(simulate.dims) == '({dim[0]},{dim[1]})'"]) for j, rho in enumerate(rho_list): for k, pve in enumerate(pve_list): colnum = j * 2 + k if len(axlist) == 0: ax = fig.add_subplot(gs[i, colnum]) ax.text(0, 1.3, f"n = {dim[0]}", va='bottom', ha='left', transform=ax.transAxes) else: ax = fig.add_subplot(gs[i, colnum], sharey=axlist[0]) ax.text(0.5, 1.05, f"pve = {pve:g}, " + r"$\rho$ = {:g}".format(rho), va='bottom', ha='center', transform=ax.transAxes) # Main plot mconditions = [f"$(fit) == {method}"] mconditions += [f"$(simulate.rho) == {rho}"] mconditions += [f"$(simulate.pve) == {pve}"] for s, sfrac in enumerate(sfracs): scondition = [f"$(simulate.sfrac) == {sfrac}"] dfselect = pd_utils.select_dfrows(resdf, mconditions + scondition) scores = dfselect[colname].to_numpy() num_nan = np.sum(np.isnan(scores)) xpos = mpl_utils.scale_list([xvals[s]], scale=xscale) yvals = mpl_utils.scale_array(scores[~np.isnan(scores)], scale=yscale) ax.scatter(xpos * len(yvals), yvals, alpha=0.5) ax.text(xpos[0], nan_pos, f"{num_nan}", ha='center', va='bottom') # Tick marks and axes decoration mpl_utils.set_xticks(ax, scale=xscale, tickmarks=xvals, rotation=90) ax.tick_params(labelcolor="#333333", left=False) if len(axlist) > 0: ax.tick_params(labelleft=False) mpl_utils.decorate_axes(ax, hide=["left", "right", "top"], ticklimits=True, pads=[34, 10]) mpl_utils.set_xticks(ax, scale=xscale, tickmarks=xvals, rotation=90) for side, border in ax.spines.items(): if side == "top": border.set_visible(True) ax.grid(which='major', axis='y', ls='dotted') axlist.append(ax) ''' Following indices are now hard-coded ''' axlist[2].set_xlabel(r"Number of non-zero coefficients (s)", x=xlab_offset) mpl_utils.set_yticks(axlist[0], scale=yscale, spacing='log10') axlist[0].text(0, nan_pos, f'nan', ha='right', va='bottom') axrow.append(axlist) axrow[1][0].set_ylabel(r"Prediction Error (RMSE / $\sigma$)", y=ylab_offset) axrow[0][2].set_title(figtitle, x=xlab_offset, pad=40) plt.show() return