def get_avg_gain_for_intrpt_exp(exp_file, conf_tholds): """Gives the average gain throughout the sequence updates of an interruption experiment for given confidence thresholds. Args: exp_file (str): full path to the interruption experiment result file conf_tholds (list): of floats for confidence thresholds that the average gains are to be calculated for. If any given threshold was not used in the interruption experiment, it is silently ignored. Notes: Average gain is calculated for the last kNN member. Returns: pd.DataFrame: indices are the `conf_tholds` and the only column is the average gain """ # Load interruption experiment exp_intrpt_output = common.load_obj(exp_file) # type: ExpIntrptOutput data = exp_intrpt_output.data data = data.loc[data["update"] != 0] # Filter the initial problem df = data.loc[data["knni"] == data["knni"].max()][["confthold", "gain"]] # Use last kNN member only conf_tholds_in_df = df["confthold"].unique() for ind, thold in enumerate(conf_tholds): if thold not in conf_tholds_in_df: conf_tholds.pop(ind) print("Threshold argument {} is ignored, " "it does not exist in the experiment results {}.".format(thold, exp_file)) avg_gains = df.loc[df["confthold"].isin(conf_tholds)].groupby("confthold").mean() return avg_gains
def gen_intrpt_output_f_path(dataset, pdp_file, tw_width, tw_step, k, conf_tholds, z, test_size, cls_rank_iterator, suffix=""): """Returns full path of the output file for the interruption experiment results""" dataset_name = os.path.splitext(os.path.basename(dataset))[0] # Base file name w/o extension pdp_output = common.load_obj(pdp_file) # type: pdp.PDPOutput calc_step = pdp_output.settings.calc_step_arg q_step = pdp_output.settings.q_step pdp_dataset = exp_common.get_setting(pdp_output.settings.experiment, "dataset") pdp_dataset = common.file_name_wo_ext(pdp_dataset) rank_iter_tag = cls_rank_iterator.abbrv z_tag = int(z) if int(z) == z else z conf_thold_tag = "[{}]".format("_".join([str(ct) for ct in conf_tholds])) if conf_tholds is not None else "" out_file = os.path.join(common.APP.FOLDER.RESULT, "INT_{d}_w_{w}_s_{s}_k_{k}_r_{r}_PDP_{dp}_c_{c}_q_{q}__ct_{ct}_z_{z}_t_{t}{x}{e}".format( d=dataset_name, w=tw_width, s=tw_step, k=k, r=rank_iter_tag, dp=pdp_dataset, c=calc_step, q=q_step, ct=conf_thold_tag, z=z_tag, t=str(test_size), x=suffix, e=common.APP.FILE_EXT.PICKLE)) return out_file
def get_avg_gain_for_classify_exp(exp_file, conf_tholds, wsoln=False, lblwsoln="w/Soln"): """Gives the average gain throughout the sequence updates in a classification experiment for given confidence thresholds and optionally upon stopping w/ exact solution. Args: exp_file (str): full path to the classification experiment result file conf_tholds (list): of floats for confidence thresholds that the average gains are to be calculated for. If any given threshold was not used in the classification experiment, it is silently ignored. wsoln (bool): If True, gains for 'stop_w_soln=1' rows are also added. lblwsoln (str): Label of the column for the interruption with exact solution Notes: Average gain is calculated for the last kNN member. Returns: pd.DataFrame: indices are the `conf_tholds` and `w/soln` if requested, and the only column is the average gain """ # Load classification experiment exp_intrpt_output = common.load_obj(exp_file) # type: ExpClassifierOutput data = exp_intrpt_output.data.gain_data data = data.loc[data["update"] != 0] # Filter the initial problem df = data.loc[(data["stopwsoln"] == 0) & (data["knni"] == data["knni"].max())][[ "confthold", "gain" ]] # Use intrpt w/conf and last kNN member only conf_tholds_in_df = df["confthold"].unique() conf_tholds_copy = conf_tholds[:] for ind, thold in enumerate(conf_tholds_copy): if thold not in conf_tholds_in_df: conf_tholds_copy.pop(ind) print("Threshold argument {} is ignored, " "it does not exist in the experiment results {}.".format( thold, exp_file)) avg_gains = df.loc[df["confthold"].isin(conf_tholds_copy)].groupby( "confthold").mean() if wsoln: # Add the avg gain for the experiment w/ stop_w_soln=True # Doesn't matter if an interruption has actually occurred w/ exact soln or not, see ignore 'intrptwsoln' column s_avg_w_soln = data.loc[data["stopwsoln"] == 1][["gain"]].mean() s_avg_w_soln.name = lblwsoln avg_gains = avg_gains.append(s_avg_w_soln) return avg_gains
def get_avg_effcy_for_intrpt_exp(exp_file, knn_i=None): """Gives the average confidence efficiency throughout the sequence updates of an interruption experiment. Args: exp_file (str): full path to the interruption experiment result file knn_i (int): Zero-based index of the kNN member; if None, the average performance is calculated for *all* kNNs. Returns: (float, float). (avg conf, std dev) """ # Load interruption experiment exp_intrpt_output = common.load_obj(exp_file) # type: ExpIntrptOutput data = exp_intrpt_output.data data = data.loc[data["update"] != 0] # Filter the initial problem if knn_i is not None: data = data.loc[data["knni"] == knn_i] # Filter by the given ki return data["effcyq"].mean(), data["effcyq"].std() # μ & σ of all updates and stop calcs
def get_setting(out_file, attr): """Returns the given setting attribute of an output object. Args: out_file (str): Full path to the output file attr (str): name of the setting attribute Raises: AttributeError: If the given attribute is not within the settings Returns: Any: the value of the setting. """ output = common.load_obj(out_file) # type: Output try: return getattr(output.settings, attr) except AttributeError: raise AttributeError("'{}' is not found in the settings.".format(attr))
def get_avg_hit_for_classify_exp(exp_file, conf_tholds, wsoln=False, lblwsoln="w/Soln"): """Gives the average solution throughout updates of an classification experiment for given confidence thresholds. Args: exp_file (str): full path to classification experiment result file conf_tholds (list): list of floats for confidence thresholds that the average gains are to be calculated for. If any given threshold was not used in the classification experiment, it is silently ignored. wsoln (bool): If True, gains for 'stop_w_soln=1' rows are also added. lblwsoln (str): Label of the column for the interruption with exact solution Returns: pandas.DataFrame: indices are the `conf_tholds` and the only column is the average hit """ # Load classification experiment exp_intrpt_output = common.load_obj(exp_file) # type: ExpClassifierOutput data = exp_intrpt_output.data.hit_data data = data.loc[data["update"] != 0] # Filter the initial problem df = data.loc[data["stopwsoln"] == 0][[ "confthold", "hit" ]] # Use intrpt w/conf and all kNN members conf_tholds_in_df = df["confthold"].unique() conf_tholds_copy = conf_tholds[:] for ind, thold in enumerate(conf_tholds_copy): if thold not in conf_tholds_in_df: conf_tholds_copy.pop(ind) print("Threshold argument {} is ignored, " "it does not exist in the experiment results {}.".format( thold, exp_file)) avg_hits = df.loc[df["confthold"].isin(conf_tholds_copy)].groupby( "confthold").mean() if wsoln: # Add the avg hit for the experiment w/ stop_w_soln=True and intrptwsoln=True # An interruption has to have actually occurred w/ exact soln to be included in hits s_avg_w_soln = data.loc[(data["stopwsoln"] == 1) & (data["intrptwsoln"] == 1)][["hit"]].mean() s_avg_w_soln.name = lblwsoln avg_hits = avg_hits.append(s_avg_w_soln) return avg_hits
def get_avg_gain_for_exp(exp_file): """Gives the average gain throughout the sequence updates in an insights experiment""" # Load insights experiment exp_insights_output = common.load_obj(exp_file) # type: ExpInsightsOutput exp_gains = exp_insights_output.data.gain return np.mean([u[1] for u in exp_gains])
def insights_multiple(experiments, file_format="pdf", total=True, actual=True, all_k=False, marker_size=0, all_ticks=True, with_title=True, signature=True): """Plots the total and actual calculations made to find kNNs for multiple experiments on the same figure. Args: experiments (list): List of full paths to the `run.run_insights` experiment(s) result file(s). file_format (str): One of the file extensions supported by the active backend. Most backends support png, pdf, ps, eps and svg. total (bool): If True, plots the total number of calculations made for the ki's. actual (bool): If True, plots the actual number of calculations made for the ki's. all_k (bool): If False, plots all ki's in the first experiment and then plots only the calcs for the kth NN for the experiments with index>=1; otherwise, plots all ki's for all experiments. If there is only one experiment to plot, this argument is ignored. marker_size (float): size of the marker in scatter plot all_ticks (bool): If True, all x-ticks are displayed. with_title (bool): if True, shows the figure title. signature (bool): If True, name of the plotting function is also displayed. Returns: None """ LINE_STYLE = {0: "-", 1: "--", 2: "-.", 3: ":"} COLORS_ = sns.color_palette() LW = 1 experiments_k = [] # k's of the experiment for the output file name from matplotlib.ticker import MaxNLocator # Create a figure plt.figure(num=1, figsize=(12, 10)) title_ = "Anytime Lazy KNN Search\nTotal vs Actual # of Similarity Assessments\n\n" sns.set_style("whitegrid") for exp_id, experiment in enumerate(experiments): result = common.load_obj(experiment) fn_wo_ext = common.file_name_wo_ext(experiment) # Read experiment data data = result.data if total: k_calcs_total = data.knn_total_cumsum k = len(k_calcs_total[0]) - 1 # k of kNN if actual: k_calcs_actual = data.knn_actual_cumsum if not total: k = len(k_calcs_actual[0]) - 1 # k of kNN experiments_k.append(str(k)) for i in range(k): # If all_k is False, plot k_calcs of all ki's of the 1st experiment but only the 0^th and k'th of the other experiments. if all_k or exp_id == 0 or i == (k - 1) or i == 0: if total: # Total calculation number for each kNN dd = pd.DataFrame( np.array(k_calcs_total, dtype=int)[:, [0, i + 1]].tolist()) dd.columns = ["Update", "Comps"] data = dd label_str = "kNN[{}] total".format(i) if len( experiments ) == 1 else "kNN[{}] total (Exp #{})".format(i, exp_id) g = sns.regplot( x='Update', y='Comps', data=data, scatter=True, fit_reg=True, scatter_kws={"s": marker_size}, order=3, ci=None, # ci=100, color=COLORS_[i], truncate=True, line_kws={ "linestyle": LINE_STYLE[exp_id % len(LINE_STYLE)], "label": label_str, "lw": LW }) plt.legend(frameon=True, loc="best", fontsize="large") if actual: # Actual calculation number for each kNN dd = pd.DataFrame( np.array(k_calcs_actual, dtype=int)[:, [0, i + 1]].tolist()) dd.columns = ["Update", "Comps"] data = dd label_str = "kNN[{}] actual".format(i) if len( experiments ) == 1 else "kNN[{}] actual (Exp #{})".format(i, exp_id) g = sns.regplot( x='Update', y='Comps', data=data, scatter=True, fit_reg=True, scatter_kws={"s": marker_size}, order=3, ci=None, # ci=100, marker="x", color=COLORS_[i], truncate=True, line_kws={ "linestyle": LINE_STYLE[(exp_id % len(LINE_STYLE)) + 1], # Actual dashed "label": label_str, "lw": LW }) plt.legend(frameon=True, loc="best", fontsize="large") # Update the title title_ = "{}{}Exp #{}: {}".format(title_, "\n" if exp_id > 0 else "", exp_id, fn_wo_ext) #g.xaxis.set_major_locator(MaxNLocator(integer=True)) ax = plt.gca() if all_ticks: x_ticks_ = dd["Update"].unique() ax.set_xticks(x_ticks_) # if not all_ticks: # for ind, label in enumerate(g.get_xticklabels()): # if ind % (len(x_ticks_) % 10) == 0: # only 10% of the ticks is shown # label.set_visible(True) # else: # label.set_visible(False) ax.grid(True, linestyle="dashed", linewidth=0.4) plt.xlabel("Problem index", fontsize="x-large") plt.ylabel("Cumulative # of sim calculations for the i^th NN", fontsize="x-large") #plt.rcParams['xtick.labelsize'] = "x-large" #plt.rcParams['ytick.labelsize'] = "x-large" ax.tick_params(axis='both', which='major', labelsize="large") if signature: plt.figtext(0.99, 0.01, 'rendered by \'insights_multiple\'.', horizontalalignment='right', alpha=0.5, size="small") save_fn = 'CALCS_{}_{}{}{}-{}{}'.format( fn_wo_ext, "+".join(experiments_k), "-TOTAL" if total else "", "-ACTUAL" if actual else "", "ALL_K" if all_k or len(experiments) == 1 else "SELECT_K", "-MARKERS" if marker_size else "") # Udate the title of the plot window plt.gcf().canvas.set_window_title(save_fn) if with_title: plt.title(title_ + "\n") if file_format: save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.{}".format(save_fn, file_format)) plt.savefig(save_fpath, dpi=300, bbox_inches="tight") print( "Total vs Actual Calcs figure saved into '{}'.".format(save_fpath)) else: # Update the title of the plot window plt.gcf().canvas.set_window_title(save_fn) plt.show() plt.close()
def quality_map(experiment, file_format=None, ki=None, urange=None, ustep=1, colored=True, fill=False, with_title=True, signature=True, q_full_scale=True, start_calc=1, cull=None): """Plots the log scale quality map for a given kNN[j] of all sample test sequences. Plots all or ppi (calc, sim) points between each major tick interval Args: experiment (str): insights experiment results full file path file_format (str): One of the file extensions supported by the active backend. Most backends support png, pdf, ps, eps and svg. if is None, the plot is displayed and not saved. ki (int): zero-based index of the NN in kNN list, if None all kNNs are plotted. ustep (int): a different color is chosen in the color palette for every `ustep` number of updates; irrelevant for colored=False. urange (tuple): range of updates to plot given as (start, end) where both is inclusive; if given as (start, ), max update is used for end; if None, all updates are plotted. colored (bool): If True, each update is plotted in a different color; otherwise all plotted black, fill (bool): if True, propagates the quality for the intermediate calc values; if False, plots only the given points provided as sparse list. with_title (bool): if True, shows the figure title. signature (bool): If True, name of the plotting function is also displayed. q_full_scale (bool): if True, quality (i.e. y) axis starts from 0.0; otherwise minimum quality is used. start_calc (int): The start value for the calculations (i.e. x) axis. cull (float): The percentage (.0, 1.] to cull the data points to be plotted Returns: None Note: Start: 20190918, End:20191023 """ # Create a figure plt.figure(num=1) # , figsize=(10, 8)) # Read experiment data result = common.load_obj(experiment) knn_calc_sim_history = result.data.knn_calc_sim_history k = len(knn_calc_sim_history[0]) - 1 # k of kNN max_update = max([test[0] for test in knn_calc_sim_history]) if urange is None: urange = (1, max_update) elif len(urange) == 1 or urange[1] > max_update: urange = (urange[0], max_update) max_X = 0 # Fill in plot data CALCS = np.array([]) QUALITY = np.array([]) UPDATE = np.array([]) knn_calc_sim_history = sorted(knn_calc_sim_history, key=lambda point: point[0], reverse=False) # sort by updates for test_history in knn_calc_sim_history: # print(test_history) update = test_history[0] if urange[0] <= update <= urange[1]: # print("update: ", update) for nn_ind, nn_i_history in enumerate(test_history[1:]): if ki is None or nn_ind == ki: points = pdp.quality(nn_i_history) # print(points) X, Y = helper.to_arr(points, fill=fill) # Eliminate (0, 0.0) entries if X[0] == 0: X = X[1:] if Y[0] == 0: Y = Y[1:] if max(X) > max_X: max_X = max(X) # Eliminate the entries < start_calc X = X[X >= start_calc] Y = Y[-len(X):] CALCS = np.concatenate((CALCS, X)) QUALITY = np.concatenate((QUALITY, Y)) # UPDATE = np.concatenate((UPDATE, np.full((len(X),), math.ceil(update / ustep), dtype=np.int))) UPDATE = np.concatenate( (UPDATE, np.full((len(X), ), update, dtype=np.int))) if cull: CALCS, ind_removed = helper.cull_arr(CALCS, pct=cull) QUALITY, _ = helper.cull_arr(QUALITY, ind=ind_removed) UPDATE, _ = helper.cull_arr(UPDATE, ind=ind_removed) if colored: # Color palette cmap = "autumn" # "autumn" "tab20" # "Blues_r" cmap_size = math.ceil(max_update / ustep) my_palette = plt.cm.get_cmap(cmap, cmap_size) _ = plt.scatter(CALCS, QUALITY, marker=".", s=1, c=UPDATE, cmap=my_palette, vmin=1, vmax=max_update, alpha=1.) cbar = plt.colorbar(orientation="vertical") cbar.set_label("updates") else: _ = plt.scatter(CALCS, QUALITY, marker=".", s=1, c="black") ax = plt.gca() ax.yaxis.set_major_locator(plt.MultipleLocator(.1)) ax.yaxis.set_minor_locator(plt.MultipleLocator(.05)) plt.grid(True, which="major", linestyle="-", linewidth=1, color="lightgrey") plt.grid(True, which="minor", linestyle=":", linewidth=1, color="lightgrey") xticks_ = plt_common.get_ticks_log_scale(max_X, start=start_calc) y_min = 0.0 if q_full_scale else math.floor( np.nanmin(QUALITY[start_calc - 1:]) * 10) / 10 yticks_ = np.arange(y_min, 1.01, .1) plt.rcParams['axes.axisbelow'] = True plt.xscale('log') plt.xticks(xticks_) plt.xlim(left=start_calc, right=max(xticks_)) plt.yticks(yticks_) if signature: plt_common.sign_plot(plt, quality_map.__name__) fn_wo_ext = common.file_name_wo_ext(experiment) lbl_ki = str(ki if ki is not None else list([0, k - 1])) # zero-based lbl_update = str(list(urange) if urange[0] != urange[1] else urange[0]) if with_title: title_ = "Quality Map\n" title_ = "{}Exp: {}\n".format(title_, fn_wo_ext) title_ = "{}ki:{}, update:{}".format(title_, lbl_ki, lbl_update) if colored: title_ = "{}, color step:{}".format(title_, ustep) if cull: title_ = "{}, cull:{:.0%}".format(title_, cull) plt.title(title_ + "\n\n") save_fn = "QUALITY_MAP_{}_ki_{}_u_{}{}{}{}{}{}".format( fn_wo_ext, lbl_ki, lbl_update, "_s_{}".format(ustep) if colored else "", "_f" if fill else "", "_t" if with_title else "", "_c_{:.2f}".format(cull) if cull else "", "_z" if not q_full_scale else "") # axis labels matplotlib.rcParams['text.usetex'] = True # Allow LaTeX in text ax.set_xlabel("$\#$ of similarity calculations ($c$)") ax.set_ylabel(r"quality ($\mathcal{Q}_c$)") # Tight layout plt.tight_layout() if file_format: save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.{}".format(save_fn, file_format)) plt.savefig(save_fpath, dpi=300, bbox_inches="tight") print("Quality Map figure saved into '{}'.".format(save_fpath)) else: # Update the title of the plot window plt.gcf().canvas.set_window_title(save_fn) plt.show() plt.close() return None
def gains_multiple(experiments, file_format="pdf", marker_size=1., color_ind=None, with_title=True, signature=True): """Plots the gains for a list of insights experiments. Args: experiments (list): List of full paths to the `run.run_insights` experiment(s) result file(s). file_format (str): One of the file extensions supported by the active backend. Most backends support png, pdf, ps, eps and svg. marker_size (float): size of the marker in scatter plot color_ind (int): Index in the `sns.color_palette()` for plotting single experiment with_title (bool): if True, shows the figure title. signature (bool): If True, name of the plotting function is also displayed. Returns: None """ COLORS_ = sns.color_palette() title_ = "Gain in similarity calcs compared to Brute Search" save_fn = "" sns.set(style='whitegrid') # , font_scale=.9) plt.figure(num=1, figsize=(8, 6)) for exp_id, experiment in enumerate(experiments): result = common.load_obj(experiment) fn_wo_ext = common.file_name_wo_ext(experiment) # title_ = "{}\nExp #{}: {}".format(title_, exp_id, fn) dd = pd.DataFrame(result.data.gain) dd.columns = ['Update', '% Gain'] g = sns.regplot( x='Update', y='% Gain', data=dd, scatter=True, fit_reg=True, scatter_kws={"s": marker_size}, order=3, ci=None, line_kws={"label": "#{}: {}".format(exp_id, fn_wo_ext)}, color=COLORS_[exp_id] if len(experiments) > 1 or not color_ind else COLORS_[color_ind], # color hack for presentations truncate=True) plt.ylim(np.min(dd['% Gain']), 100.) # np.max(dd['% Gain'])) g.set(ylabel="Gain (%)") plt.xlim(np.min(dd['Update']), np.max(dd['Update'])) g.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(integer=True)) # plt.xticks(range(np.min(dd['Update']), np.max(dd['Update']) + 1)) plt.gca().grid(True, linestyle="dashed", linewidth=0.4) plt.xlabel('Problem index') if not save_fn: save_fn = 'GAINS_{}'.format(fn_wo_ext) save_fn = "{}{}{}".format( save_fn, "_and_{}_more".format(len(experiments) - 1) if len(experiments) > 1 else "", "_MARKERS" if marker_size else "") if signature: plt_common.sign_plot(plt, gains_multiple.__name__) plt.legend(title="Experiments", frameon=True, loc="best", fontsize="small") if with_title: plt.title(title_ + "\n") if file_format: save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.{}".format(save_fn, file_format)) plt.savefig(save_fpath, dpi=300, bbox_inches="tight") print("Gains figure saved into '{}'.".format(save_fpath)) else: # Update the title of the plot window plt.gcf().canvas.set_window_title(save_fn) plt.show() plt.close()
def pdp(pdp_file, file_format=None, update=1, ki=0, decimals=3, to_latex=False, start_q=0.0): """Plots or exports as latex a PDP table for a given update and ki. Only top n rows where the n^th row is the first row with conf=1.0 are plotted or exported. Args: pdp_file (str): pickle file for the performance distribution profile file_format (str): One of the file extensions supported by the active backend. Most backends support png, pdf, ps, eps and svg. if is None, the plot is displayed and not saved. update (int): which update to plot. ki (int): zero-based index of the NN in kNN list to plot. decimals (int): number of decimal digits for rounding probability and confidence values; to_latex (bool): if True, PDP 2D excerpt is saved into a .tex file in the pdp folder and the figure is not displayed or saved. start_q (float): start column for the quality to be used to plot/export the PDP Returns: None """ # File info # fn = basename(pdp_file) fn_wo_ext = common.file_name_wo_ext(pdp_file) save_fn = "PDP_FIG_{}_ki_{}_u_{}".format(fn_wo_ext, ki, update) dir_path = os.path.dirname(os.path.realpath(__file__)) # Read experiment data pdp_output = common.load_obj(pdp_file) pdp_all = pdp_output.data calc_step = pdp_output.settings.calc_step q_step = pdp_output.settings.q_step pdp = pdp_all[update - 1][ki] # pdp = pdp * 100 # Filter PDP in order not to plot redundant rows with conf = 1.0 pdp_rows_conf_1 = np.where(pdp[:, -1] == 1.0)[0] # rows with conf=1.0 if pdp_rows_conf_1.size > 0: # If the calc_step is very coarse, then there may be no rows with conf = 1 top_n = pdp_rows_conf_1[ 0] + 1 # top n rows where the n^th row is the first row with conf=1.0 else: top_n = pdp.shape[0] pdp = pdp[:top_n] nrows = pdp.shape[0] rows = ["{:d}".format(i * calc_step) for i in range(1, nrows + 1)] ncols = pdp.shape[1] decimals_q_step = len(str(q_step).split('.')[1]) cols = [ "{0:.{1}f}".format(i * q_step, decimals_q_step) for i in range(1, ncols + 1) ] # calculate the weighted mean of probability distributions of quality (i.e. confidence) and std deviation for each row (i.e. calc range) q_ind_array = np.array( [round(i * q_step, decimals) for i in range(1, ncols + 1)]) conf_n_std_dev = np.apply_along_axis( lambda a: ( np.average(q_ind_array, weights=a), # conf helper.weighted_std(q_ind_array, a)), # std_dev axis=1, arr=pdp) # Add the conf and std_dev columns to the original pdp pdp = np.column_stack((pdp, conf_n_std_dev)) cols = cols + ["Confidence", "\u03C3"] pdp = pdp.round(decimals) if start_q: start_col_ind = math.ceil(start_q / q_step) - 1 pdp = pdp[:, start_col_ind:] # Show only the quality columns >= start_q ncols = ncols - start_col_ind cols = cols[start_col_ind:] save_fn = "{}{}".format(save_fn, "_sq_{}".format(cols[0]) if start_q else "") cell_colors = plt.cm.Oranges(pdp) ######################## # PDP to LaTeX if to_latex: # Make a table for the top n rows where the n^th row is the first row with conf=1.0 pdp = pd.DataFrame(data=pdp, index=rows, columns=cols) save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn)) pdp.to_latex(buf=save_fpath, index=True, float_format=lambda x: "{0:.{1}f}".format(x, decimals) if x != 0. else "") print("PDP saved as LaTeX table into '{}'.".format(save_fpath)) return None ######################## # Clear 0.0 cells pdp = pdp.astype("U{}".format(2 + decimals)) # len("0.") = 2 pdp[pdp == "0.0"] = "" # Create a figure hcell, wcell = 0.3, .8 hpad, wpad = 0, 0 fig = plt.figure(figsize=(ncols * wcell + wpad, (nrows * hcell + hpad))) # fig = plt.figure() ax2 = fig.add_subplot(111) ax2.axis('off') # Add a table at the bottom of the axes table = ax2.table(cellText=pdp, rowLabels=rows, rowLoc='right', rowColours=plt.cm.BuPu(np.linspace(0, 0.5, len(rows))), colColours=plt.cm.YlGn(np.linspace(0, 0.5, len(cols))), cellColours=cell_colors, colLabels=cols, loc='center') # Set "confidence" column header's color. c = table.get_celld()[(0, len(cols) - 1)] c.set_facecolor("w") title_ = "Performance Distribution Profile\n" title_ = "{}PDP: {}\n".format(title_, fn_wo_ext) title_ = "{}ki: {}, update: {}\n".format(title_, ki, update) # plt.subplots_adjust(left=0.2, top=0.8) plt.title(title_) plt.tight_layout() if file_format: save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.{}".format(save_fn, file_format)) plt.savefig(save_fpath, dpi=150, bbox_inches="tight") print("PDP figure saved into '{}'.".format(save_fpath)) else: # Update the title of the plot window plt.gcf().canvas.set_window_title(save_fn) plt.show() plt.close()
def main(argv=None): # Configure argument parser parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "experiments", nargs="*", type=str, help="Exploit Candidates experiment result file path(s)") parser.add_argument( "-p", "--fpath", type=str, help= "Full path of the folder containing Exploit Candidates experiment result files. " "Optionally used instead of the 'experiments' argument.") parser.add_argument("-d", "--dec", type=int, default=2, help="Decimal digits to be used in gain percentage") parser.add_argument( "--rtrim", nargs="*", type=str, help= "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST") # Parse arguments args = parser.parse_args(argv) # Required params check if args.experiments and args.fpath is not None: parser.error("'experiments' and 'fpath' arguments may not coexist!") if not args.experiments and args.fpath is None: parser.error( "Either 'experiments' or 'fpath' argument should be given!") # Get experiment files if args.fpath: fpath = os.path.expanduser(args.fpath) files_ = common.listdir_non_hidden(fpath) experiments = [os.path.join(fpath, f) for f in files_] else: experiments = [os.path.expanduser(e) for e in args.experiments] dec_digits = args.dec rtrim = args.rtrim float_formatter = lambda x: "{0:.{1}f}".format( x, dec_digits) if isinstance(x, (int, float)) else x LBL_DATASET = "Dataset" LBL_FWIDTH = "Width" LBL_FSTEP = "Step" # Create output dataframe df_gains_output = pd.DataFrame( columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) # Populate summary dictionaries for exp in experiments: print("Exp: {}".format(exp)) # Load 'exploit candidates' experiment output exp_exploit_output = common.load_obj( exp) # type: exploit.ExpExploitOutput # Get the dataset name dataset_name = common.file_name_wo_ext( exp_exploit_output.settings.dataset) print("...Dataset: {}".format(dataset_name)) if rtrim: dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True) # Get the average gains in experiments exp_exploit_data = exp_exploit_output.data exp_exploit_data = exp_exploit_data.loc[ exp_exploit_data["update"] != 0] # Filter the initial problem df_avg_gains = exp_exploit_data[["gain", "iterator" ]].groupby("iterator").mean() # Avg gain dict dict_avg_gains = { LBL_DATASET: dataset_name, LBL_FWIDTH: run_common.time_window_width_str( exp_exploit_output.settings.tw_width), LBL_FSTEP: exp_exploit_output.settings.tw_step } # avg_gain_keys = [str(c) if c is not None else "-" for c in df_avg_gains.index.tolist()] avg_gain_keys = df_avg_gains.index.tolist() avg_gain_values = df_avg_gains["gain"].values # Add the results to the output dataframe dict_avg_gains.update(dict(zip(avg_gain_keys, avg_gain_values))) df_gains_output = df_gains_output.append(dict_avg_gains, ignore_index=True) # Export the df to LaTeX if len(df_gains_output) > 0: # Create a multiindex for a sorted (and prettier) output df_gains_output = df_gains_output.set_index( [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) df_gains_output = df_gains_output.sort_index() # df_gains_output = df_gains_output.round(dec_digits) save_fn = "gain_exploit_(x{})".format(len(df_gains_output)) save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn)) df_gains_output.to_latex(buf=save_fpath, float_format=float_formatter, escape=False, multirow=True, index=True) print( "Avg Gain for TopDown vs ExploitCandidates Rank Iterations saved as LaTeX table into '{}'." .format(save_fpath)) else: print("No average gain results could be calculated.")
def main(argv=None): # Configure argument parser parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("experiments", nargs="*", type=str, help="Interruption experiment result file path(s)") parser.add_argument( "-p", "--fpath", type=str, help= "Full path of the folder containing interruption experiment result files. " "Optionally used instead of the 'experiments' argument.") parser.add_argument("-d", "--dec", type=int, default=2, help="Decimal digits to be used in gain percentage") parser.add_argument( "--rtrim", nargs="*", type=str, help= "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST") # Parse arguments args = parser.parse_args(argv) # Required params check if args.experiments and args.fpath is not None: parser.error("'experiments' and 'fpath' arguments may not coexist!") if not args.experiments and args.fpath is None: parser.error( "Either 'experiments' or 'fpath' argument should be given!") # Get experiment files if args.fpath: fpath = os.path.expanduser(args.fpath) files_ = common.listdir_non_hidden(fpath) experiments = [os.path.join(fpath, f) for f in files_] else: experiments = [os.path.expanduser(e) for e in args.experiments] dec_digits = args.dec rtrim = args.rtrim float_formatter = lambda x: "{0:.{1}f}".format( x, dec_digits) if isinstance(x, (int, float)) else x int_formatter = lambda x: '{:,}'.format(x) LBL_DATASET = "Dataset" LBL_FWIDTH = "Width" LBL_FSTEP = "Step" LBL_UPDATES = "Updates" LBL_CB_SIZE = "\u007CCB\u007C" LBL_GAIN = "Gain" # Create output dataframe df_gains_output = pd.DataFrame(columns=[ LBL_DATASET, LBL_FWIDTH, LBL_FSTEP, LBL_UPDATES, LBL_CB_SIZE, LBL_GAIN ]) # Populate summary dictionaries for exp in experiments: print("Exp: {}".format(exp)) # Load insights experiment exp_insights_output = common.load_obj( exp) # type: insights.ExpInsightsOutput dataset_name = common.file_name_wo_ext( exp_insights_output.settings.dataset) print("...Dataset: {}".format(dataset_name)) # Add the results to the output dataframe if rtrim: dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True) # Get the average gain for the insights experiment avg_gain = insights.get_avg_gain_for_exp(exp) n_updates = np.max([u[0] for u in exp_insights_output.data.gain]) # Avg gain dict dict_avg_gains = { LBL_DATASET: dataset_name, LBL_FWIDTH: time_window_width_str(exp_insights_output.settings.tw_width), LBL_FSTEP: exp_insights_output.settings.tw_step, LBL_UPDATES: n_updates + 1, LBL_CB_SIZE: exp_insights_output.settings.cb_size, LBL_GAIN: avg_gain } # Add the results to the output dataframe df_gains_output = df_gains_output.append(dict_avg_gains, ignore_index=True) # Export the df to LaTeX if len(df_gains_output) > 0: # Create a multiindex for a sorted (and prettier) output df_gains_output = df_gains_output.set_index( [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) df_gains_output = df_gains_output.sort_index() # df_gains_output = df_gains_output.round(dec_digits) save_fn = "gain_insights_(x{})".format(len(df_gains_output)) save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn)) df_gains_output.to_latex(buf=save_fpath, formatters={ LBL_UPDATES: int_formatter, LBL_CB_SIZE: int_formatter }, float_format=float_formatter, escape=False, multirow=True, index=True) print("Avg Gain results saved as LaTeX table into '{}'.".format( save_fpath)) else: print("No average gain results could be calculated.")
def efficiency(experiments, file_format="png", y_to_use="effcyq", rtrim=None, outliers=False, with_title=True, signature=True, palette="tab20", maximized=True, aspect=None): """Plots efficiency (f.k.a. confidence performance) for given interruption experiments. Args: experiments (List[str]): list of experiment file paths file_format (str): One of the file extensions supported by the active backend. Most backends support png, pdf, ps, eps and svg. if is None, the plot is displayed and not saved. y_to_use (str): ['abspcterr', 'abserr', 'effcysim', 'effcyq'] fields in experiment result dataframe standing for absolute error, absolute error percentage, efficiency (using sim) and efficiency (using quality) of confidence. rtrim (list): Remove given strings at the end of dataset names; e.g. ['_TRAIN', '_TEST'] outliers (bool): If True, outliers are plotted. with_title (bool): If True, show generated plot title. signature (bool): If True, write the name of this function. palette (str): A matplotlib colormap. e.g. 'tab20", 'Purples_d' maximized (bool): If True, maximize plot to full screen. aspect (float): Desired aspect ratio (i.e. height/width) of the figure. If not None, the width is re-adjusted for this ratio while the height remains the same. Returns: None Raises: ValueError: i) If dataset names for the experiments are different; ii) If y_to_use not in valid options. """ Y_OPTIONS = { "abspcterr": "absolute percentage error (%)", "abserr": "absolute error", "effcysim": "efficiency ($\eta$)", # using sim "effcyq": "efficiency ($\eta$)" } # using quality if y_to_use not in Y_OPTIONS.keys(): raise ValueError( "Non-valid value for y_to_use argument. Should be in {}".format( list(Y_OPTIONS.keys()))) df_output = None # Variables for output file name dataset_name = None w_list = [] step_list = [] z_list = [] # z=-nstd in the new efficiency definition # Populate summary dictionary for exp in experiments: # Read experiment data result = common.load_obj(exp) # type: intrpt.ExpIntrptOutput # Update output DataFrame data = result.data data["setting"] = "$w$:{}, $step$:{}, $z$:{}".format( result.settings.tw_width, result.settings.tw_step, helper.is_float_int(result.settings.z)) df_output = pd.concat([df_output, data]) # Update variables for output file name if dataset_name is None: dataset_name = common.file_name_wo_ext(result.settings.dataset) if rtrim: for tag in rtrim: # Trim end tags dataset_name = re.sub("{}$".format(tag), "", dataset_name) elif result.settings.dataset.find(dataset_name) == -1: # dataset_name = "Misc_{}".format(time.strftime("%Y%m%d")) raise ValueError( "Plotting different datasets not allowed yet: {}, {}.".format( dataset_name, result.settings.dataset)) w_list.append(result.settings.tw_width) step_list.append(result.settings.tw_step) z_list.append(helper.is_float_int(result.settings.z)) # Plot plt.figure() ax = plt.gca() # Show grid ax.grid(True, linestyle="dashed", linewidth=.5) # Grouped boxplot sns.boxplot(x="confthold", y=y_to_use, hue="setting", data=df_output, palette=palette, linewidth=.5, showfliers=outliers, fliersize=.2) if y_to_use in ["effcysim", "effcyq"]: # Draw a horizontal line for y=1 (efficiency approx= 1) ax.axhline(1, linestyle="--", color="black", linewidth=1.) # axis labels matplotlib.rcParams['text.usetex'] = True # Allow LaTeX in text ax.set_xlabel("confidence thresholds ($\mu\!+\!z\sigma$) for interruption") ax.set_ylabel("{}".format(Y_OPTIONS[y_to_use])) # plot title if with_title: plt.title("{} of Confidence in Interruption Tests\nfor ${}$".format( Y_OPTIONS[y_to_use].capitalize(), dataset_name.replace("_", "\_"))) # Add the signature if signature: plt_common.sign_plot(plt, efficiency.__name__) # Maximize plot to full screen if maximized: plt.legend(fontsize="medium") manager = plt.get_current_fig_manager() backend_ = matplotlib.get_backend() if backend_.upper == "TKAGG": manager.resize(*manager.window.maxsize()) elif backend_.upper().startswith("QT"): manager.window.showMaximized() elif backend_.find("interagg") != -1: # Hack for PyCharm SciView pass else: # Try your chance manager.resize(*manager.window.maxsize()) else: plt.legend(fontsize="small") # File name/Window title w_list = [str(_) for _ in set(sorted(w_list))] step_list = [str(_) for _ in set(sorted(step_list))] z_list = [str(_) for _ in set(sorted(z_list))] save_fn = "EFF_{}(x{})_w_[{}]_s_[{}]_z_[{}]_{}{}{}{}".format( dataset_name, len(experiments), "_".join(w_list), "_".join(step_list), "_".join(z_list), y_to_use, "_a_{}".format(aspect) if aspect else "", "_o" if outliers else "", "_t" if with_title else "") # Aspect ratio if aspect: # figw, figh = plt.rcParams["figure.figsize"] # plt.rcParams["figure.figsize"] = [figw / aspect, figh] fig = plt.gcf() figw, figh = fig.get_size_inches() fig.set_size_inches(figh / aspect, figh, forward=True) if file_format: save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.{}".format(save_fn, file_format)) plt.savefig(save_fpath, dpi=300, bbox_inches="tight") print( "Confidence efficiency figure saved into '{}'.".format(save_fpath)) else: # Update the title of the plot window plt.gcf().canvas.set_window_title(save_fn) plt.show() plt.close() return None
def main(argv=None): # Configure argument parser parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("experiments", nargs="*", type=str, help="Classification experiment result file path(s)") parser.add_argument( "-p", "--fpath", type=str, help= "Full path of the folder containing classification experiment result files. " "Optionally used instead of the 'experiments' argument.") parser.add_argument("-c", "--confthold", nargs="+", type=float, default=[1., .98, .95, .9, .8], help="Confidence thresholds") parser.add_argument("-z", "--z", type=float, default=-1., help="z factor of the efficiency measure") parser.add_argument("-d", "--dec", type=int, default=2, help="Decimal digits to be used in gain percentage") parser.add_argument( "--knni", type=int, help= "Zero-based index of the kNN for which the average 'confidence performance' is calculated." " 'None' to calculate for all kNNs." " Normally, it makes sense either for the last or all NNs.") parser.add_argument( "--wsoln", choices=[0, 1], type=int, default=0, help="1 to display hits upon interruption w/ exact solution") parser.add_argument( "--rtrim", nargs="*", type=str, help= "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST") # Parse arguments args = parser.parse_args(argv) # Required params check if args.experiments and args.fpath is not None: parser.error("'experiments' and 'fpath' arguments may not coexist!") if not args.experiments and args.fpath is None: parser.error( "Either 'experiments' or 'fpath' argument should be given!") # Get experiment files if args.fpath: fpath = os.path.expanduser(args.fpath) files_ = common.listdir_non_hidden(fpath) experiments = [os.path.join(fpath, f) for f in files_] else: experiments = [os.path.expanduser(e) for e in args.experiments] conf_thold = args.confthold arg_z = args.z dec_digits = args.dec knn_i = args.knni rtrim = args.rtrim float_formatter = lambda x: "{0:.{1}f}".format(x, dec_digits) float_formatter_hit = lambda x: "{0:.{1}f}".format( x * 100, dec_digits) if isinstance(x, (int, float)) else x wsoln = args.wsoln wsoln_tag = "_ws_{}".format(wsoln) if wsoln else "" LBL_DATASET = "Dataset" LBL_FWIDTH = "Width" LBL_FSTEP = "Step" LBL_STOP_W_SOLN = "w\u2215Soln" # Create output dataframe df_hits_output = pd.DataFrame( columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] + ([LBL_STOP_W_SOLN] if wsoln else []) + [ gain_intrpt_classify.conf_col_label(c, float_formatter, arg_z) for c in conf_thold if c != 1 ]) # Exclude conf=1.00 for hits, makes no sense for uninterruption # Populate summary dictionary for exp in experiments: print("Exp: {}".format(exp)) # Load classification experiment exp_output = common.load_obj( exp ) # type: Union[intrpt.ExpIntrptOutput, classify.ExpClassifierOutput] exp_z = exp_output.settings.z if arg_z != exp_z: print( "Ignored. The 'z' command line argument ({}) is different from " "the experiment 'z' setting ({}).".format( helper.is_float_int(arg_z), helper.is_float_int(exp_z))) else: dataset_name = common.file_name_wo_ext(exp_output.settings.dataset) print("...Dataset: {}".format(dataset_name)) # Get the average hits in the classification experiment if rtrim: dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True) # Avg hit dict df_avg_hits = alk.exp.classify.get_avg_hit_for_classify_exp( exp, conf_thold, wsoln, lblwsoln=LBL_STOP_W_SOLN) dict_avg_hits = { LBL_DATASET: dataset_name, LBL_FWIDTH: run_common.time_window_width_str(exp_output.settings.tw_width), LBL_FSTEP: exp_output.settings.tw_step } avg_hits_keys = [ gain_intrpt_classify.conf_col_label(c, float_formatter, exp_z) if isinstance(c, float) else c for c in df_avg_hits.index.tolist() ] avg_hits_values = df_avg_hits["hit"].values dict_avg_hits.update(dict(zip(avg_hits_keys, avg_hits_values))) # Add the results to the output dataframe df_hits_output = df_hits_output.append(dict_avg_hits, ignore_index=True) # Export the df_hits to LaTeX if len(df_hits_output) > 0: # Create a multiindex for a sorted (and prettier) output df_hits_output = df_hits_output.set_index( [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) df_hits_output = df_hits_output.sort_index() save_fn_hit = "soln_hit_(x{})_[{}]_sd_{}_ki_{}{}".format( len(df_hits_output), "_".join([str(c) for c in conf_thold]), helper.is_float_int(arg_z), knn_i if knn_i is not None else "All", wsoln_tag) save_fpath_hit = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn_hit)) df_hits_output.to_latex(buf=save_fpath_hit, float_format=float_formatter_hit, escape=False, multirow=True, index=True) print("Avg Solution Hit %s saved as LaTeX table into '{}'.".format( save_fpath_hit)) else: print("No average solution hit results could be calculated.")
def main(argv=None): # Configure argument parser parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("experiments", nargs="*", type=str, help="Interruption/classification experiment result file path(s)") parser.add_argument("-p", "--fpath", type=str, help="Full path of the folder containing experiment result files. " "Optionally used instead of the 'experiments' argument.") parser.add_argument("-c", "--confthold", nargs="+", type=float, default=[1., .98, .95, .9, .8], help="Confidence thresholds") parser.add_argument("-z", "--z", type=float, default=-1., help="z factor of the efficiency measure") parser.add_argument("-d", "--dec", type=int, default=2, help="Decimal digits to be used in gain percentage") parser.add_argument("--knni", type=int, help="Zero-based index of the kNN for which the average 'confidence performance' is calculated." " 'None' to calculate for all kNNs." " Normally, it makes sense either for the last or all NNs.") parser.add_argument("--clsfy", choices=[0, 1], type=int, default=0, help="0 for interruption experiments;" " 1 for classification experiments to display gains also upon interruption w/ exact solution.") parser.add_argument("--rtrim", nargs="*", type=str, help="Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST") # Parse arguments args = parser.parse_args(argv) # Required params check if args.experiments and args.fpath is not None: parser.error("'experiments' and 'fpath' arguments may not coexist!") if not args.experiments and args.fpath is None: parser.error("Either 'experiments' or 'fpath' argument should be given!") # Get experiment files if args.fpath: fpath = os.path.expanduser(args.fpath) files_ = common.listdir_non_hidden(fpath) experiments = [os.path.join(fpath, f) for f in files_] else: experiments = [os.path.expanduser(e) for e in args.experiments] conf_thold = args.confthold arg_z = args.z dec_digits = args.dec knn_i = args.knni rtrim = args.rtrim float_formatter = lambda x: "{0:.{1}f}".format(x, dec_digits) clsfy = args.clsfy exp_tag = "{}".format("classify" if clsfy else "intrpt") LBL_DATASET = "Dataset" LBL_FWIDTH = "Width" LBL_FSTEP = "Step" LBL_CONF_PERF = "Effcy" LBL_CONF_PERF_STD = "\u03C3" LBL_STOP_W_SOLN = "w\u2215Soln" # Create output dataframe df_output = pd.DataFrame(columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] + ([LBL_STOP_W_SOLN] if clsfy else []) + [conf_col_label(c, float_formatter, arg_z) for c in conf_thold] + [LBL_CONF_PERF, LBL_CONF_PERF_STD]) # Populate summary dictionary for exp in experiments: print("Exp: {}".format(exp)) # Load interruption/classification experiment exp_output = common.load_obj(exp) # type: Union[intrpt.ExpIntrptOutput, classify.ExpClassifierOutput] exp_z = exp_output.settings.z if arg_z != exp_z: print("Ignored. The 'z' command line argument ({}) is different from " "the experiment 'z' setting ({}).".format(helper.is_float_int(arg_z), helper.is_float_int(exp_z))) else: dataset_name = common.file_name_wo_ext(exp_output.settings.dataset) print("...Dataset: {}".format(dataset_name)) # Get the average gains in the interruption/classification experiment # Average gain is calculated for the last kNN member for confthold experiments and # for stopwsoln=1 for the interruption w/ exact solution experiments (if wsoln=True) if not clsfy: df_avg_gains = intrpt.get_avg_gain_for_intrpt_exp(exp, conf_thold) else: df_avg_gains = classify.get_avg_gain_for_classify_exp(exp, conf_thold, wsoln=True, lblwsoln=LBL_STOP_W_SOLN) # Add the results to the output dataframe if rtrim: dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True) dict_avg_gains = {LBL_DATASET: dataset_name, LBL_FWIDTH: run_common.time_window_width_str(exp_output.settings.tw_width), LBL_FSTEP: exp_output.settings.tw_step} avg_gain_keys = [conf_col_label(c, float_formatter, exp_z) if isinstance(c, float) else c for c in df_avg_gains.index.tolist()] avg_gain_values = df_avg_gains["gain"].values dict_avg_gains.update(dict(zip(avg_gain_keys, avg_gain_values))) # Add average efficiency and its std deviation columns too if not clsfy: avg_conf_perf, avg_conf_perf_std = intrpt.get_avg_effcy_for_intrpt_exp(exp, knn_i=knn_i) else: avg_conf_perf, avg_conf_perf_std = classify.get_avg_effcy_for_classify_exp(exp, knn_i=knn_i) dict_avg_gains.update({LBL_CONF_PERF: avg_conf_perf, LBL_CONF_PERF_STD: avg_conf_perf_std}) df_output = df_output.append(dict_avg_gains, ignore_index=True) # Export the df to LaTeX if len(df_output) > 0: # Swap wsoln and 1.0 columns if clsfy: unint_col = conf_col_label(1., float_formatter, arg_z) gain_cols = df_output.columns.tolist() if unint_col in gain_cols: unint_col_idx = gain_cols.index(unint_col) wsoln_col_idx = gain_cols.index(LBL_STOP_W_SOLN) gain_cols[unint_col_idx], gain_cols[wsoln_col_idx] = gain_cols[wsoln_col_idx], gain_cols[unint_col_idx] df_output = df_output[gain_cols] # Create a multiindex for a sorted (and prettier) output df_output = df_output.set_index([LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) df_output = df_output.sort_index() # df_output = df_output.round(dec_digits) save_fn = "gain_{}_(x{})_[{}]_sd_{}_ki_{}".format(exp_tag, len(df_output), "_".join([str(c) for c in conf_thold]), helper.is_float_int(arg_z), knn_i if knn_i is not None else "All") save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn)) df_output.to_latex(buf=save_fpath, float_format=float_formatter, escape=False, multirow=True, index=True) print_msg_header = "Avg Gain for Interruptions at Confidence Thresholds{}".format(" and with Exact Solutions" if clsfy else "") print("{} saved as LaTeX table into '{}'.".format(print_msg_header, save_fpath)) else: print("No average gain results could be calculated.")