def _report_features(features0, features1, features, NC): if logging.getLogger("output").getEffectiveLevel() > logging.DEBUG: return plots.pyplot_reset() logging.debug( "[sample_data] Storing features to /tmp/sample_data_features.tsv") features.to_csv("/tmp/sample_data_features.tsv", sep="\t", header=True, index=False) if NC < 2: return x = np.asarray(features0[["cov0", "cov1"]]) z = np.asarray(list(zip(1 - features0["group"], features0["group"]))) _plot_data_2d(x, z, label="$i_u=0$") x = np.asarray(features1[["cov0", "cov1"]]) z = np.asarray(list(zip(1 - features1["group"], features1["group"]))) _plot_data_2d(x, z, label="$i_u=1$") pyplot.xlabel("covariate 0") pyplot.ylabel("covariate 1") pyplot.grid(True) pyplot.legend() plots.savefig("/tmp/sample_data_features.png")
data["St"], lower, upper = theoretic_pvalues_rejections( ldf, ratios, "SHfH0_pval", badge_time) data["Se"], lower, upper = empirical_pvalues_rejections( ldf, ratios, "SHfH0_LLR", badge_time) data["Be"], lower, upper = empirical_pvalues_rejections( ldf, ratios, "BHfH0_LLR", badge_time) plots.pyplot_reset() plot_data( data, ylabel=r"$H_0$ rejection probability", #level=0.05, level_label="5\%", columns=["St", "Se", "Be"], labels=["basic theoretic", "basic bootstrap", "robust bootrsap"]) pyplot.legend(fontsize=20, loc=4) plots.savefig("%s_l%g_rejections.pdf" % (args.input, l0)) ##################################################################### ############################################################################################## print("Empirical vs theoretic p-values") for l0, ldf in df.groupby("l0"): ##################################################################### data = pd.DataFrame({"ratio": ratios}) data["St"], lower, upper = theoretic_pvalues(ldf, ratios, "SHfH0_pval", badge_time) data["Se"], lower, upper = empirical_pvalues(ldf, ratios, "SHfH0_LLR", badge_time) data["Be"], lower, upper = empirical_pvalues(ldf, ratios, "BHfH0_LLR", badge_time)
data = pd.DataFrame({"ratio": ratios}) data["St"], lower, upper = theoretic_pvalues_rejections(ldf, ratios, "SHfH0_pval", badge_time) data["Se"], lower, upper = empirical_pvalues_rejections(ldf, ratios, "SHfH0_LLR", badge_time) data["Be"], lower, upper = empirical_pvalues_rejections(ldf, ratios, "BHfH0_LLR", badge_time) plots.pyplot_reset() pyplot.ylim((0, 1.0)) plot_data(data, ylabel=r"$H_0$ rejection probability", #level=0.05, level_label="5\%", columns=["St", "Se", "Be"], labels=["basic theoretic", "basic bootstrap", "robust bootrsap"], leg_loc=2) pyplot.ylim((0, 1.0)) pyplot.gcf().subplots_adjust(bottom=0.17, left=0.18) plots.savefig("%s_l%g_rejections_survival.pdf" % (args.input, l0)) ##################################################################### ############################################################################################## print("Empirical vs theoretic p-values") for l0, ldf in df.groupby("l0"): ##################################################################### data = pd.DataFrame({"ratio": ratios}) data["St"], lower, upper = theoretic_pvalues(ldf, ratios, "SHfH0_pval", badge_time) data["Se"], lower, upper = empirical_pvalues(ldf, ratios, "SHfH0_LLR", badge_time) data["Be"], lower, upper = empirical_pvalues(ldf, ratios, "BHfH0_LLR", badge_time) plots.pyplot_reset() pyplot.ylim((0, 1.0)) plot_data(data, ylabel=r"$p$-value", level=0.05,
lw=3, color=COLORS[i % len(COLORS)]) #pyplot.plot(data[TIMECOL], means+stds, color=p[-1].get_color(), lw=1) pyplot.ylabel("average intensity (days)", fontsize=25) plots.pyplot_parse_params2(xmin=params.get("xmin", None), xmax=params.get("xmax", None), ymax=params.pop("ymax", None)) pyplot.grid(True) _plot_badges(args) xmin, xmax = pyplot.xlim() plot_legend(fontsize=20, loc=(1 if args.badges[0] < (xmin + xmax) + 0.5 else 2)) _set_time_axis(params) pyplot.tick_params(axis='both', which='major', labelsize=22) pyplot.gcf().subplots_adjust(bottom=0.17, left=0.22) plots.savefig(args.output + "_fitting.pdf") ######################################################################### print("=================================") print("LLR-values over time") plots.pyplot_reset() #_plot_badges2(args) #transform = lambda c: list(c.apply(lambda v: numpy.exp(v))) transform = lambda c: list(c) x, y = _smoothing(data[TIMECOL], transform(data["SHfH0_LLR"]), args, params) pyplot.plot(list(x), y, label="basic", lw=3, ls="-", color=COLORS[1]) #pyplot.plot(x[5::15], y[5::15], marker="o", markeredgecolor="none", markersize=5, color=COLORS[1], lw=0)
for trend, tdf in df.groupby("trend"): for F, sdf in tdf.groupby("F"): for col in VALUE_COLS: logging.info("PROCESSING: %s" % col) cmap = "Blues" if "TR" in col else "Reds" m, rl, cl = extract_matrix(sdf, column=col, aggregate=np.mean, row_column="ishift", col_column="covmshift") matplotlib.rcParams.update({'font.size': 24}) matplotlib.rcParams['pdf.fonttype'] = 42 matplotlib.rcParams['ps.fonttype'] = 42 matplotlib.rcParams['text.usetex'] = True plot_matrix(m, rl, cl, xlabel=r"covariates discrepancy, $\Delta_x$", ylabel=r"badge effect, $\Delta_\lambda$", clabel="AUC", cmin=0.5, cmax=1.0, cmap=cmap) matplotlib.rcParams['pdf.fonttype'] = 42 matplotlib.rcParams['ps.fonttype'] = 42 matplotlib.rcParams['text.usetex'] = True plots.savefig(output + "_t%s_F%s_%s.pdf" % (trend, F, col))