Example #1
0
def gen_intrpt_output_f_path(dataset, pdp_file, tw_width, tw_step, k, conf_tholds, z, test_size, cls_rank_iterator, suffix=""):
    """Returns full path of the output file for the interruption experiment results"""
    dataset_name = os.path.splitext(os.path.basename(dataset))[0]  # Base file name w/o extension
    pdp_output = common.load_obj(pdp_file)  # type: pdp.PDPOutput
    calc_step = pdp_output.settings.calc_step_arg
    q_step = pdp_output.settings.q_step
    pdp_dataset = exp_common.get_setting(pdp_output.settings.experiment, "dataset")
    pdp_dataset = common.file_name_wo_ext(pdp_dataset)
    rank_iter_tag = cls_rank_iterator.abbrv
    z_tag = int(z) if int(z) == z else z
    conf_thold_tag = "[{}]".format("_".join([str(ct) for ct in conf_tholds])) if conf_tholds is not None else ""
    out_file = os.path.join(common.APP.FOLDER.RESULT,
                            "INT_{d}_w_{w}_s_{s}_k_{k}_r_{r}_PDP_{dp}_c_{c}_q_{q}__ct_{ct}_z_{z}_t_{t}{x}{e}".format(
                                d=dataset_name, w=tw_width, s=tw_step, k=k, r=rank_iter_tag, dp=pdp_dataset, c=calc_step, q=q_step,
                                ct=conf_thold_tag, z=z_tag, t=str(test_size), x=suffix, e=common.APP.FILE_EXT.PICKLE))
    return out_file
Example #2
0
def cb_sequences(dataset, file_format="pdf", width=0, step=1, seqs=[0], upd_ind=None,
                 full=True, with_title=True, signature=True, **kwargs):
    """Plots given sequences of a given time series CB.

    Args:
        dataset (str): The sequence dataset file to be converted to CB.
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
            if is None, the plot is displayed and not saved.
        width (int): if > 0, width of the moving time window; otherwise expanding windows approach is applied.
        step (int): number of steps (in terms of data points in TS) taken by the window at each update.
                This can also be seen as the number of data points changed in each update.
        seqs (list or int): if is a `list`, then the items are the indexes of the sequences in the CB to be plotted;
            if is an `int`, `seq_ind` number of randomly selected sequences are plotted
        upd_ind (int): the update (i.e. the upd_ind^th problem profile) of the sequences to be plotted;
            if None, *last* update of the sequences are plotted
        full (bool): if True, plots full sequence in light grey color under the given `update`
        with_title (bool): if True, shows the figure title.
        signature (bool): If True, name of the plotting function is also displayed.
        **kwargs: to passed over to the `matplotlib.pyplot.plot` function
    Returns:
        list : indices of the plotted sequences.

    """
    COLORS_ = plt.rcParams['axes.prop_cycle'].by_key()['color']
    cb = ts.gen_cb(dataset=dataset, tw_width=width, tw_step=step)
    max_updates = max([len(seq.data) for seq in cb.sequences()])  # Data points
    max_profiles = max([seq.n_profiles() for seq in cb.sequences()])  # Cases, max_updates=max_profiles if step=1
    Y_max, Y_min = ts.get_max_min(dataset)
    X = list(range(max_updates))
    xticks_ = list(np.arange(0, max_updates, step=math.ceil(max_updates / 10)))
    if [max_updates - 1] not in xticks_:
        xticks_.append(max_updates - 1)
    if upd_ind is None:
        upd_ind = max_profiles - 1

    if not isinstance(seqs, list):
        seq_ind = random.sample(range(len(cb.sequences())), seqs)
    else:
        seq_ind = seqs
    for ix, ind in enumerate(seq_ind):
        case_features = cb[ind].profile(idx=upd_ind)
        start_, end_ = cb[ind]._get_range_profile(upd_ind, len(cb[ind].data), width, step)
        pad_right = start_
        pad_left = max_updates - end_
        Y = [None] * pad_right + list(case_features) + [None] * pad_left  # do padding to the sides too
        if full and upd_ind < max_updates:
            full_seq = cb[ind].data
            plt.plot(X, list(full_seq), label=None, color=COLORS_[2 + ix], alpha=0.2)  # Hack not to repeat gain plot colors
        plt.plot(X, Y, color=COLORS_[2 + ix], label="Seq {}".format(ind), alpha=0.8 if ix > 0 else 1, **kwargs)
    plt.xticks(xticks_)
    plt.xlim(left=min(xticks_))
    plt.ylim(Y_min, Y_max)
    # Show both gridlines
    ax = plt.gca()
    ax.grid(True, linestyle=":", linewidth=.5)
    plt.ylabel("value", fontsize="large")
    plt.xlabel("data point", fontsize="large")
    # plt.margins(x=0)  # ! This removes the None Y values from the plot, which we do NOT want.
    plt.legend(fontsize="medium", ncol=2)
    fn_wo_ext = common.file_name_wo_ext(dataset)
    if with_title:
        title_ = "Case Base Sequences\n"
        title_ = "{}CB: {}\n".format(title_, fn_wo_ext)
        title_ = "{}Time-window width:{}, step:{},  Update:{}".format(title_, run_common.time_window_width_str(width), step, upd_ind)
        plt.title(title_ + "\n")
    if signature:
        plt_common.sign_plot(plt, cb_sequences.__name__)
    plt.tight_layout()
    save_fn = "CB_SEQUENCES_{}_w_{}_s_{}_seqs_{}_u_{}{}".format(fn_wo_ext,
                                                                width,
                                                                step,
                                                                str(seqs),
                                                                upd_ind,
                                                                "_t" if with_title else "")
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=150, bbox_inches="tight")
        print("CB sequences figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()
    return seq_ind
Example #3
0
def quality_map(experiment,
                file_format=None,
                ki=None,
                urange=None,
                ustep=1,
                colored=True,
                fill=False,
                with_title=True,
                signature=True,
                q_full_scale=True,
                start_calc=1,
                cull=None):
    """Plots the log scale quality map for a given kNN[j] of all sample test sequences.

    Plots all or ppi (calc, sim) points between each major tick interval

    Args:
        experiment (str): insights experiment results full file path
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
            if is None, the plot is displayed and not saved.
        ki (int): zero-based index of the NN in kNN list, if None all kNNs are plotted.
        ustep (int): a different color is chosen in the color palette for every `ustep` number of updates;
            irrelevant for colored=False.
        urange (tuple): range of updates to plot given as (start, end) where both is inclusive;
            if given as (start, ), max update is used for end;
            if None, all updates are plotted.
        colored (bool): If True, each update is plotted in a different color; otherwise all plotted black,
        fill (bool): if True, propagates the quality for the intermediate calc values;
            if False, plots only the given points provided as sparse list.
        with_title (bool): if True, shows the figure title.
        signature (bool): If True, name of the plotting function is also displayed.
        q_full_scale (bool): if True, quality (i.e. y) axis starts from 0.0; otherwise minimum quality is used.
        start_calc (int): The start value for the calculations (i.e. x) axis.
        cull (float): The percentage (.0, 1.] to cull the data points to be plotted

    Returns:
        None

    Note:
        Start: 20190918, End:20191023
    """
    # Create a figure
    plt.figure(num=1)  # , figsize=(10, 8))
    # Read experiment data
    result = common.load_obj(experiment)
    knn_calc_sim_history = result.data.knn_calc_sim_history
    k = len(knn_calc_sim_history[0]) - 1  # k of kNN
    max_update = max([test[0] for test in knn_calc_sim_history])
    if urange is None:
        urange = (1, max_update)
    elif len(urange) == 1 or urange[1] > max_update:
        urange = (urange[0], max_update)
    max_X = 0
    # Fill in plot data
    CALCS = np.array([])
    QUALITY = np.array([])
    UPDATE = np.array([])
    knn_calc_sim_history = sorted(knn_calc_sim_history,
                                  key=lambda point: point[0],
                                  reverse=False)  # sort by updates
    for test_history in knn_calc_sim_history:
        # print(test_history)
        update = test_history[0]
        if urange[0] <= update <= urange[1]:
            # print("update: ", update)
            for nn_ind, nn_i_history in enumerate(test_history[1:]):
                if ki is None or nn_ind == ki:
                    points = pdp.quality(nn_i_history)
                    # print(points)
                    X, Y = helper.to_arr(points, fill=fill)
                    # Eliminate (0, 0.0) entries
                    if X[0] == 0:
                        X = X[1:]
                    if Y[0] == 0:
                        Y = Y[1:]
                    if max(X) > max_X:
                        max_X = max(X)
                    # Eliminate the entries < start_calc
                    X = X[X >= start_calc]
                    Y = Y[-len(X):]
                    CALCS = np.concatenate((CALCS, X))
                    QUALITY = np.concatenate((QUALITY, Y))
                    # UPDATE = np.concatenate((UPDATE, np.full((len(X),), math.ceil(update / ustep), dtype=np.int)))
                    UPDATE = np.concatenate(
                        (UPDATE, np.full((len(X), ), update, dtype=np.int)))
    if cull:
        CALCS, ind_removed = helper.cull_arr(CALCS, pct=cull)
        QUALITY, _ = helper.cull_arr(QUALITY, ind=ind_removed)
        UPDATE, _ = helper.cull_arr(UPDATE, ind=ind_removed)
    if colored:
        # Color palette
        cmap = "autumn"  # "autumn"  "tab20"  # "Blues_r"
        cmap_size = math.ceil(max_update / ustep)
        my_palette = plt.cm.get_cmap(cmap, cmap_size)
        _ = plt.scatter(CALCS,
                        QUALITY,
                        marker=".",
                        s=1,
                        c=UPDATE,
                        cmap=my_palette,
                        vmin=1,
                        vmax=max_update,
                        alpha=1.)
        cbar = plt.colorbar(orientation="vertical")
        cbar.set_label("updates")
    else:
        _ = plt.scatter(CALCS, QUALITY, marker=".", s=1, c="black")
    ax = plt.gca()
    ax.yaxis.set_major_locator(plt.MultipleLocator(.1))
    ax.yaxis.set_minor_locator(plt.MultipleLocator(.05))
    plt.grid(True,
             which="major",
             linestyle="-",
             linewidth=1,
             color="lightgrey")
    plt.grid(True,
             which="minor",
             linestyle=":",
             linewidth=1,
             color="lightgrey")
    xticks_ = plt_common.get_ticks_log_scale(max_X, start=start_calc)
    y_min = 0.0 if q_full_scale else math.floor(
        np.nanmin(QUALITY[start_calc - 1:]) * 10) / 10
    yticks_ = np.arange(y_min, 1.01, .1)
    plt.rcParams['axes.axisbelow'] = True
    plt.xscale('log')
    plt.xticks(xticks_)
    plt.xlim(left=start_calc, right=max(xticks_))
    plt.yticks(yticks_)
    if signature:
        plt_common.sign_plot(plt, quality_map.__name__)
    fn_wo_ext = common.file_name_wo_ext(experiment)
    lbl_ki = str(ki if ki is not None else list([0, k - 1]))  # zero-based
    lbl_update = str(list(urange) if urange[0] != urange[1] else urange[0])
    if with_title:
        title_ = "Quality Map\n"
        title_ = "{}Exp: {}\n".format(title_, fn_wo_ext)
        title_ = "{}ki:{}, update:{}".format(title_, lbl_ki, lbl_update)
        if colored:
            title_ = "{}, color step:{}".format(title_, ustep)
        if cull:
            title_ = "{}, cull:{:.0%}".format(title_, cull)
        plt.title(title_ + "\n\n")
    save_fn = "QUALITY_MAP_{}_ki_{}_u_{}{}{}{}{}{}".format(
        fn_wo_ext, lbl_ki, lbl_update,
        "_s_{}".format(ustep) if colored else "", "_f" if fill else "",
        "_t" if with_title else "", "_c_{:.2f}".format(cull) if cull else "",
        "_z" if not q_full_scale else "")
    # axis labels
    matplotlib.rcParams['text.usetex'] = True  # Allow LaTeX in text
    ax.set_xlabel("$\#$ of similarity calculations ($c$)")
    ax.set_ylabel(r"quality ($\mathcal{Q}_c$)")
    # Tight layout
    plt.tight_layout()
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=300, bbox_inches="tight")
        print("Quality Map figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()

    return None
Example #4
0
def insights_multiple(experiments,
                      file_format="pdf",
                      total=True,
                      actual=True,
                      all_k=False,
                      marker_size=0,
                      all_ticks=True,
                      with_title=True,
                      signature=True):
    """Plots the total and actual calculations made to find kNNs for multiple experiments on the same figure.

    Args:
        experiments (list): List of full paths to the `run.run_insights` experiment(s) result file(s).
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
        total (bool): If True, plots the total number of calculations made for the ki's.
        actual (bool): If True, plots the actual number of calculations made for the ki's.
        all_k (bool): If False, plots all ki's in the first experiment and then plots
            only the calcs for the kth NN for the experiments with index>=1;
            otherwise, plots all ki's for all experiments. If there is only one
            experiment to plot, this argument is ignored.
        marker_size (float): size of the marker in scatter plot
        all_ticks (bool): If True, all x-ticks are displayed.
        with_title (bool): if True, shows the figure title.
        signature (bool): If True, name of the plotting function is also displayed.

    Returns:
        None

    """
    LINE_STYLE = {0: "-", 1: "--", 2: "-.", 3: ":"}
    COLORS_ = sns.color_palette()
    LW = 1
    experiments_k = []  # k's of the experiment for the output file name
    from matplotlib.ticker import MaxNLocator
    # Create a figure
    plt.figure(num=1, figsize=(12, 10))
    title_ = "Anytime Lazy KNN Search\nTotal vs Actual # of Similarity Assessments\n\n"
    sns.set_style("whitegrid")
    for exp_id, experiment in enumerate(experiments):
        result = common.load_obj(experiment)
        fn_wo_ext = common.file_name_wo_ext(experiment)
        # Read experiment data
        data = result.data
        if total:
            k_calcs_total = data.knn_total_cumsum
            k = len(k_calcs_total[0]) - 1  # k of kNN
        if actual:
            k_calcs_actual = data.knn_actual_cumsum
            if not total:
                k = len(k_calcs_actual[0]) - 1  # k of kNN
        experiments_k.append(str(k))
        for i in range(k):
            # If all_k is False, plot k_calcs of all ki's of the 1st experiment but only the 0^th and k'th of the other experiments.
            if all_k or exp_id == 0 or i == (k - 1) or i == 0:
                if total:
                    # Total calculation number for each kNN
                    dd = pd.DataFrame(
                        np.array(k_calcs_total,
                                 dtype=int)[:, [0, i + 1]].tolist())
                    dd.columns = ["Update", "Comps"]
                    data = dd
                    label_str = "kNN[{}] total".format(i) if len(
                        experiments
                    ) == 1 else "kNN[{}] total (Exp #{})".format(i, exp_id)
                    g = sns.regplot(
                        x='Update',
                        y='Comps',
                        data=data,
                        scatter=True,
                        fit_reg=True,
                        scatter_kws={"s": marker_size},
                        order=3,
                        ci=None,  # ci=100,
                        color=COLORS_[i],
                        truncate=True,
                        line_kws={
                            "linestyle": LINE_STYLE[exp_id % len(LINE_STYLE)],
                            "label": label_str,
                            "lw": LW
                        })
                    plt.legend(frameon=True, loc="best", fontsize="large")
                if actual:
                    # Actual calculation number for each kNN
                    dd = pd.DataFrame(
                        np.array(k_calcs_actual,
                                 dtype=int)[:, [0, i + 1]].tolist())
                    dd.columns = ["Update", "Comps"]
                    data = dd
                    label_str = "kNN[{}] actual".format(i) if len(
                        experiments
                    ) == 1 else "kNN[{}] actual (Exp #{})".format(i, exp_id)
                    g = sns.regplot(
                        x='Update',
                        y='Comps',
                        data=data,
                        scatter=True,
                        fit_reg=True,
                        scatter_kws={"s": marker_size},
                        order=3,
                        ci=None,  # ci=100,
                        marker="x",
                        color=COLORS_[i],
                        truncate=True,
                        line_kws={
                            "linestyle":
                            LINE_STYLE[(exp_id % len(LINE_STYLE)) +
                                       1],  # Actual dashed
                            "label": label_str,
                            "lw": LW
                        })
                    plt.legend(frameon=True, loc="best", fontsize="large")
        # Update the title
        title_ = "{}{}Exp #{}: {}".format(title_, "\n" if exp_id > 0 else "",
                                          exp_id, fn_wo_ext)

    #g.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax = plt.gca()
    if all_ticks:
        x_ticks_ = dd["Update"].unique()
        ax.set_xticks(x_ticks_)
    # if not all_ticks:
    #     for ind, label in enumerate(g.get_xticklabels()):
    #         if ind % (len(x_ticks_) % 10) == 0:  # only 10% of the ticks is shown
    #             label.set_visible(True)
    #         else:
    #             label.set_visible(False)
    ax.grid(True, linestyle="dashed", linewidth=0.4)
    plt.xlabel("Problem index", fontsize="x-large")
    plt.ylabel("Cumulative # of sim calculations for the i^th NN",
               fontsize="x-large")
    #plt.rcParams['xtick.labelsize'] = "x-large"
    #plt.rcParams['ytick.labelsize'] = "x-large"
    ax.tick_params(axis='both', which='major', labelsize="large")
    if signature:
        plt.figtext(0.99,
                    0.01,
                    'rendered by \'insights_multiple\'.',
                    horizontalalignment='right',
                    alpha=0.5,
                    size="small")
    save_fn = 'CALCS_{}_{}{}{}-{}{}'.format(
        fn_wo_ext, "+".join(experiments_k), "-TOTAL" if total else "",
        "-ACTUAL" if actual else "",
        "ALL_K" if all_k or len(experiments) == 1 else "SELECT_K",
        "-MARKERS" if marker_size else "")
    # Udate the title of the plot window
    plt.gcf().canvas.set_window_title(save_fn)
    if with_title:
        plt.title(title_ + "\n")
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=300, bbox_inches="tight")
        print(
            "Total vs Actual Calcs figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()
Example #5
0
def pdp(pdp_file,
        file_format=None,
        update=1,
        ki=0,
        decimals=3,
        to_latex=False,
        start_q=0.0):
    """Plots or exports as latex a PDP table for a given update and ki.

    Only top n rows where the n^th row is the first row with conf=1.0 are plotted or exported.

    Args:
        pdp_file (str): pickle file for the performance distribution profile
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
            if is None, the plot is displayed and not saved.
        update (int): which update to plot.
        ki (int): zero-based index of the NN in kNN list to plot.
        decimals (int): number of decimal digits for rounding probability and confidence values;
        to_latex (bool): if True, PDP 2D excerpt is saved into a .tex file in the pdp folder and the figure is not
            displayed or saved.
        start_q (float): start column for the quality to be used to plot/export the PDP

    Returns:
        None

    """
    # File info
    # fn = basename(pdp_file)
    fn_wo_ext = common.file_name_wo_ext(pdp_file)
    save_fn = "PDP_FIG_{}_ki_{}_u_{}".format(fn_wo_ext, ki, update)
    dir_path = os.path.dirname(os.path.realpath(__file__))
    # Read experiment data
    pdp_output = common.load_obj(pdp_file)
    pdp_all = pdp_output.data
    calc_step = pdp_output.settings.calc_step
    q_step = pdp_output.settings.q_step
    pdp = pdp_all[update - 1][ki]
    # pdp = pdp * 100
    # Filter PDP in order not to plot redundant rows with conf = 1.0
    pdp_rows_conf_1 = np.where(pdp[:, -1] == 1.0)[0]  # rows with conf=1.0
    if pdp_rows_conf_1.size > 0:  # If the calc_step is very coarse, then there may be no rows with conf = 1
        top_n = pdp_rows_conf_1[
            0] + 1  # top n rows where the n^th row is the first row with conf=1.0
    else:
        top_n = pdp.shape[0]
    pdp = pdp[:top_n]
    nrows = pdp.shape[0]
    rows = ["{:d}".format(i * calc_step) for i in range(1, nrows + 1)]
    ncols = pdp.shape[1]
    decimals_q_step = len(str(q_step).split('.')[1])
    cols = [
        "{0:.{1}f}".format(i * q_step, decimals_q_step)
        for i in range(1, ncols + 1)
    ]
    # calculate the weighted mean of probability distributions of quality (i.e. confidence) and std deviation for each row (i.e. calc range)
    q_ind_array = np.array(
        [round(i * q_step, decimals) for i in range(1, ncols + 1)])
    conf_n_std_dev = np.apply_along_axis(
        lambda a: (
            np.average(q_ind_array, weights=a),  # conf
            helper.weighted_std(q_ind_array, a)),  # std_dev
        axis=1,
        arr=pdp)

    # Add the conf and std_dev columns to the original pdp
    pdp = np.column_stack((pdp, conf_n_std_dev))
    cols = cols + ["Confidence", "\u03C3"]
    pdp = pdp.round(decimals)
    if start_q:
        start_col_ind = math.ceil(start_q / q_step) - 1
        pdp = pdp[:,
                  start_col_ind:]  # Show only the quality columns >= start_q
        ncols = ncols - start_col_ind
        cols = cols[start_col_ind:]
        save_fn = "{}{}".format(save_fn,
                                "_sq_{}".format(cols[0]) if start_q else "")
    cell_colors = plt.cm.Oranges(pdp)
    ########################
    # PDP to LaTeX
    if to_latex:
        # Make a table for the top n rows where the n^th row is the first row with conf=1.0
        pdp = pd.DataFrame(data=pdp, index=rows, columns=cols)
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.tex".format(save_fn))
        pdp.to_latex(buf=save_fpath,
                     index=True,
                     float_format=lambda x: "{0:.{1}f}".format(x, decimals)
                     if x != 0. else "")
        print("PDP saved as LaTeX table into '{}'.".format(save_fpath))
        return None
    ########################
    # Clear 0.0 cells
    pdp = pdp.astype("U{}".format(2 + decimals))  # len("0.") = 2
    pdp[pdp == "0.0"] = ""
    # Create a figure
    hcell, wcell = 0.3, .8
    hpad, wpad = 0, 0
    fig = plt.figure(figsize=(ncols * wcell + wpad, (nrows * hcell + hpad)))
    # fig = plt.figure()
    ax2 = fig.add_subplot(111)
    ax2.axis('off')
    # Add a table at the bottom of the axes
    table = ax2.table(cellText=pdp,
                      rowLabels=rows,
                      rowLoc='right',
                      rowColours=plt.cm.BuPu(np.linspace(0, 0.5, len(rows))),
                      colColours=plt.cm.YlGn(np.linspace(0, 0.5, len(cols))),
                      cellColours=cell_colors,
                      colLabels=cols,
                      loc='center')
    # Set "confidence" column header's color.
    c = table.get_celld()[(0, len(cols) - 1)]
    c.set_facecolor("w")
    title_ = "Performance Distribution Profile\n"
    title_ = "{}PDP: {}\n".format(title_, fn_wo_ext)
    title_ = "{}ki: {}, update: {}\n".format(title_, ki, update)
    # plt.subplots_adjust(left=0.2, top=0.8)
    plt.title(title_)
    plt.tight_layout()
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=150, bbox_inches="tight")
        print("PDP figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()
Example #6
0
def gains_multiple(experiments,
                   file_format="pdf",
                   marker_size=1.,
                   color_ind=None,
                   with_title=True,
                   signature=True):
    """Plots the gains for a list of insights experiments.

    Args:
        experiments (list): List of full paths to the `run.run_insights` experiment(s) result file(s).
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
        marker_size (float): size of the marker in scatter plot
        color_ind (int): Index in the `sns.color_palette()` for plotting single experiment
        with_title (bool): if True, shows the figure title.
        signature (bool): If True, name of the plotting function is also displayed.

    Returns:
        None

    """
    COLORS_ = sns.color_palette()
    title_ = "Gain in similarity calcs compared to Brute Search"
    save_fn = ""
    sns.set(style='whitegrid')  # , font_scale=.9)
    plt.figure(num=1, figsize=(8, 6))
    for exp_id, experiment in enumerate(experiments):
        result = common.load_obj(experiment)
        fn_wo_ext = common.file_name_wo_ext(experiment)
        # title_ = "{}\nExp #{}: {}".format(title_, exp_id, fn)
        dd = pd.DataFrame(result.data.gain)
        dd.columns = ['Update', '% Gain']

        g = sns.regplot(
            x='Update',
            y='% Gain',
            data=dd,
            scatter=True,
            fit_reg=True,
            scatter_kws={"s": marker_size},
            order=3,
            ci=None,
            line_kws={"label": "#{}: {}".format(exp_id, fn_wo_ext)},
            color=COLORS_[exp_id] if len(experiments) > 1 or not color_ind else
            COLORS_[color_ind],  # color hack for presentations
            truncate=True)
        plt.ylim(np.min(dd['% Gain']), 100.)  # np.max(dd['% Gain']))
        g.set(ylabel="Gain (%)")
        plt.xlim(np.min(dd['Update']), np.max(dd['Update']))
        g.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))
        # plt.xticks(range(np.min(dd['Update']), np.max(dd['Update']) + 1))
        plt.gca().grid(True, linestyle="dashed", linewidth=0.4)
        plt.xlabel('Problem index')
        if not save_fn:
            save_fn = 'GAINS_{}'.format(fn_wo_ext)
    save_fn = "{}{}{}".format(
        save_fn, "_and_{}_more".format(len(experiments) -
                                       1) if len(experiments) > 1 else "",
        "_MARKERS" if marker_size else "")
    if signature:
        plt_common.sign_plot(plt, gains_multiple.__name__)
    plt.legend(title="Experiments", frameon=True, loc="best", fontsize="small")
    if with_title:
        plt.title(title_ + "\n")
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=300, bbox_inches="tight")
        print("Gains figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()
Example #7
0
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "experiments",
        nargs="*",
        type=str,
        help="Exploit Candidates experiment result file path(s)")
    parser.add_argument(
        "-p",
        "--fpath",
        type=str,
        help=
        "Full path of the folder containing Exploit Candidates experiment result files. "
        "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-d",
                        "--dec",
                        type=int,
                        default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument(
        "--rtrim",
        nargs="*",
        type=str,
        help=
        "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")
    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error(
            "Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    dec_digits = args.dec
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(
        x, dec_digits) if isinstance(x, (int, float)) else x

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"

    # Create output dataframe
    df_gains_output = pd.DataFrame(
        columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])

    # Populate summary dictionaries
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load 'exploit candidates' experiment output
        exp_exploit_output = common.load_obj(
            exp)  # type: exploit.ExpExploitOutput
        # Get the dataset name
        dataset_name = common.file_name_wo_ext(
            exp_exploit_output.settings.dataset)
        print("...Dataset: {}".format(dataset_name))
        if rtrim:
            dataset_name = ts.rtrim_dataset_name(dataset_name,
                                                 rtrim,
                                                 latex_it=True)
        # Get the average gains in experiments
        exp_exploit_data = exp_exploit_output.data
        exp_exploit_data = exp_exploit_data.loc[
            exp_exploit_data["update"] != 0]  # Filter the initial problem
        df_avg_gains = exp_exploit_data[["gain", "iterator"
                                         ]].groupby("iterator").mean()
        # Avg gain dict
        dict_avg_gains = {
            LBL_DATASET:
            dataset_name,
            LBL_FWIDTH:
            run_common.time_window_width_str(
                exp_exploit_output.settings.tw_width),
            LBL_FSTEP:
            exp_exploit_output.settings.tw_step
        }
        # avg_gain_keys = [str(c) if c is not None else "-" for c in df_avg_gains.index.tolist()]
        avg_gain_keys = df_avg_gains.index.tolist()
        avg_gain_values = df_avg_gains["gain"].values
        # Add the results to the output dataframe
        dict_avg_gains.update(dict(zip(avg_gain_keys, avg_gain_values)))
        df_gains_output = df_gains_output.append(dict_avg_gains,
                                                 ignore_index=True)

    # Export the df to LaTeX
    if len(df_gains_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_gains_output = df_gains_output.set_index(
            [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_gains_output = df_gains_output.sort_index()
        # df_gains_output = df_gains_output.round(dec_digits)
        save_fn = "gain_exploit_(x{})".format(len(df_gains_output))
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.tex".format(save_fn))
        df_gains_output.to_latex(buf=save_fpath,
                                 float_format=float_formatter,
                                 escape=False,
                                 multirow=True,
                                 index=True)
        print(
            "Avg Gain for TopDown vs ExploitCandidates Rank Iterations saved as LaTeX table into '{}'."
            .format(save_fpath))
    else:
        print("No average gain results could be calculated.")
Example #8
0
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("experiments",
                        nargs="*",
                        type=str,
                        help="Interruption experiment result file path(s)")
    parser.add_argument(
        "-p",
        "--fpath",
        type=str,
        help=
        "Full path of the folder containing interruption experiment result files. "
        "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-d",
                        "--dec",
                        type=int,
                        default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument(
        "--rtrim",
        nargs="*",
        type=str,
        help=
        "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")

    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error(
            "Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    dec_digits = args.dec
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(
        x, dec_digits) if isinstance(x, (int, float)) else x
    int_formatter = lambda x: '{:,}'.format(x)

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"
    LBL_UPDATES = "Updates"
    LBL_CB_SIZE = "\u007CCB\u007C"
    LBL_GAIN = "Gain"

    # Create output dataframe
    df_gains_output = pd.DataFrame(columns=[
        LBL_DATASET, LBL_FWIDTH, LBL_FSTEP, LBL_UPDATES, LBL_CB_SIZE, LBL_GAIN
    ])

    # Populate summary dictionaries
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load insights experiment
        exp_insights_output = common.load_obj(
            exp)  # type: insights.ExpInsightsOutput
        dataset_name = common.file_name_wo_ext(
            exp_insights_output.settings.dataset)
        print("...Dataset: {}".format(dataset_name))
        # Add the results to the output dataframe
        if rtrim:
            dataset_name = ts.rtrim_dataset_name(dataset_name,
                                                 rtrim,
                                                 latex_it=True)
        # Get the average gain for the insights experiment
        avg_gain = insights.get_avg_gain_for_exp(exp)
        n_updates = np.max([u[0] for u in exp_insights_output.data.gain])
        # Avg gain dict
        dict_avg_gains = {
            LBL_DATASET:
            dataset_name,
            LBL_FWIDTH:
            time_window_width_str(exp_insights_output.settings.tw_width),
            LBL_FSTEP:
            exp_insights_output.settings.tw_step,
            LBL_UPDATES:
            n_updates + 1,
            LBL_CB_SIZE:
            exp_insights_output.settings.cb_size,
            LBL_GAIN:
            avg_gain
        }
        # Add the results to the output dataframe
        df_gains_output = df_gains_output.append(dict_avg_gains,
                                                 ignore_index=True)

    # Export the df to LaTeX
    if len(df_gains_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_gains_output = df_gains_output.set_index(
            [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_gains_output = df_gains_output.sort_index()
        # df_gains_output = df_gains_output.round(dec_digits)
        save_fn = "gain_insights_(x{})".format(len(df_gains_output))
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.tex".format(save_fn))
        df_gains_output.to_latex(buf=save_fpath,
                                 formatters={
                                     LBL_UPDATES: int_formatter,
                                     LBL_CB_SIZE: int_formatter
                                 },
                                 float_format=float_formatter,
                                 escape=False,
                                 multirow=True,
                                 index=True)
        print("Avg Gain results saved as LaTeX table into '{}'.".format(
            save_fpath))
    else:
        print("No average gain results could be calculated.")
Example #9
0
def efficiency(experiments,
               file_format="png",
               y_to_use="effcyq",
               rtrim=None,
               outliers=False,
               with_title=True,
               signature=True,
               palette="tab20",
               maximized=True,
               aspect=None):
    """Plots efficiency (f.k.a. confidence performance) for given interruption experiments.

    Args:
        experiments (List[str]): list of experiment file paths
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
            if is None, the plot is displayed and not saved.
        y_to_use (str): ['abspcterr', 'abserr', 'effcysim', 'effcyq'] fields in experiment result dataframe
            standing for absolute error, absolute error percentage, efficiency (using sim) and efficiency (using quality)
            of confidence.
        rtrim (list): Remove given strings at the end of dataset names; e.g. ['_TRAIN', '_TEST']
        outliers (bool): If True, outliers are plotted.
        with_title (bool): If True, show generated plot title.
        signature (bool): If True, write the name of this function.
        palette (str): A matplotlib colormap. e.g. 'tab20", 'Purples_d'
        maximized (bool): If True, maximize plot to full screen.
        aspect (float): Desired aspect ratio (i.e. height/width) of the figure.
            If not None, the width is re-adjusted for this ratio while the height remains the same.

    Returns:
        None

    Raises:
        ValueError:
            i) If dataset names for the experiments are different;
            ii) If y_to_use not in valid options.

    """

    Y_OPTIONS = {
        "abspcterr": "absolute percentage error (%)",
        "abserr": "absolute error",
        "effcysim": "efficiency ($\eta$)",  # using sim
        "effcyq": "efficiency ($\eta$)"
    }  # using quality
    if y_to_use not in Y_OPTIONS.keys():
        raise ValueError(
            "Non-valid value for y_to_use argument. Should be in {}".format(
                list(Y_OPTIONS.keys())))
    df_output = None
    # Variables for output file name
    dataset_name = None
    w_list = []
    step_list = []
    z_list = []  # z=-nstd in the new efficiency definition
    # Populate summary dictionary
    for exp in experiments:
        # Read experiment data
        result = common.load_obj(exp)  # type: intrpt.ExpIntrptOutput
        # Update output DataFrame
        data = result.data
        data["setting"] = "$w$:{}, $step$:{}, $z$:{}".format(
            result.settings.tw_width, result.settings.tw_step,
            helper.is_float_int(result.settings.z))
        df_output = pd.concat([df_output, data])
        # Update variables for output file name
        if dataset_name is None:
            dataset_name = common.file_name_wo_ext(result.settings.dataset)
            if rtrim:
                for tag in rtrim:
                    # Trim end tags
                    dataset_name = re.sub("{}$".format(tag), "", dataset_name)
        elif result.settings.dataset.find(dataset_name) == -1:
            # dataset_name = "Misc_{}".format(time.strftime("%Y%m%d"))
            raise ValueError(
                "Plotting different datasets not allowed yet: {}, {}.".format(
                    dataset_name, result.settings.dataset))
        w_list.append(result.settings.tw_width)
        step_list.append(result.settings.tw_step)
        z_list.append(helper.is_float_int(result.settings.z))

    # Plot
    plt.figure()
    ax = plt.gca()
    # Show grid
    ax.grid(True, linestyle="dashed", linewidth=.5)
    # Grouped boxplot
    sns.boxplot(x="confthold",
                y=y_to_use,
                hue="setting",
                data=df_output,
                palette=palette,
                linewidth=.5,
                showfliers=outliers,
                fliersize=.2)
    if y_to_use in ["effcysim", "effcyq"]:
        # Draw a horizontal line for y=1  (efficiency approx= 1)
        ax.axhline(1, linestyle="--", color="black", linewidth=1.)
    # axis labels
    matplotlib.rcParams['text.usetex'] = True  # Allow LaTeX in text
    ax.set_xlabel("confidence thresholds ($\mu\!+\!z\sigma$) for interruption")
    ax.set_ylabel("{}".format(Y_OPTIONS[y_to_use]))
    # plot title
    if with_title:
        plt.title("{} of Confidence in Interruption Tests\nfor ${}$".format(
            Y_OPTIONS[y_to_use].capitalize(), dataset_name.replace("_", "\_")))
    # Add the signature
    if signature:
        plt_common.sign_plot(plt, efficiency.__name__)
    # Maximize plot to full screen
    if maximized:
        plt.legend(fontsize="medium")
        manager = plt.get_current_fig_manager()
        backend_ = matplotlib.get_backend()
        if backend_.upper == "TKAGG":
            manager.resize(*manager.window.maxsize())
        elif backend_.upper().startswith("QT"):
            manager.window.showMaximized()
        elif backend_.find("interagg") != -1:  # Hack for PyCharm SciView
            pass
        else:  # Try your chance
            manager.resize(*manager.window.maxsize())
    else:
        plt.legend(fontsize="small")
    # File name/Window title
    w_list = [str(_) for _ in set(sorted(w_list))]
    step_list = [str(_) for _ in set(sorted(step_list))]
    z_list = [str(_) for _ in set(sorted(z_list))]
    save_fn = "EFF_{}(x{})_w_[{}]_s_[{}]_z_[{}]_{}{}{}{}".format(
        dataset_name, len(experiments), "_".join(w_list), "_".join(step_list),
        "_".join(z_list), y_to_use, "_a_{}".format(aspect) if aspect else "",
        "_o" if outliers else "", "_t" if with_title else "")
    # Aspect ratio
    if aspect:
        # figw, figh = plt.rcParams["figure.figsize"]
        # plt.rcParams["figure.figsize"] = [figw / aspect, figh]
        fig = plt.gcf()
        figw, figh = fig.get_size_inches()
        fig.set_size_inches(figh / aspect, figh, forward=True)
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=300, bbox_inches="tight")
        print(
            "Confidence efficiency figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()

    return None
Example #10
0
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("experiments",
                        nargs="*",
                        type=str,
                        help="Classification experiment result file path(s)")
    parser.add_argument(
        "-p",
        "--fpath",
        type=str,
        help=
        "Full path of the folder containing classification experiment result files. "
        "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-c",
                        "--confthold",
                        nargs="+",
                        type=float,
                        default=[1., .98, .95, .9, .8],
                        help="Confidence thresholds")
    parser.add_argument("-z",
                        "--z",
                        type=float,
                        default=-1.,
                        help="z factor of the efficiency measure")
    parser.add_argument("-d",
                        "--dec",
                        type=int,
                        default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument(
        "--knni",
        type=int,
        help=
        "Zero-based index of the kNN for which the average 'confidence performance' is calculated."
        " 'None' to calculate for all kNNs."
        " Normally, it makes sense either for the last or all NNs.")
    parser.add_argument(
        "--wsoln",
        choices=[0, 1],
        type=int,
        default=0,
        help="1 to display hits upon interruption w/ exact solution")
    parser.add_argument(
        "--rtrim",
        nargs="*",
        type=str,
        help=
        "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")
    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error(
            "Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    conf_thold = args.confthold
    arg_z = args.z
    dec_digits = args.dec
    knn_i = args.knni
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(x, dec_digits)
    float_formatter_hit = lambda x: "{0:.{1}f}".format(
        x * 100, dec_digits) if isinstance(x, (int, float)) else x
    wsoln = args.wsoln
    wsoln_tag = "_ws_{}".format(wsoln) if wsoln else ""

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"
    LBL_STOP_W_SOLN = "w\u2215Soln"

    # Create output dataframe
    df_hits_output = pd.DataFrame(
        columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] +
        ([LBL_STOP_W_SOLN] if wsoln else []) + [
            gain_intrpt_classify.conf_col_label(c, float_formatter, arg_z)
            for c in conf_thold if c != 1
        ])  # Exclude conf=1.00 for hits, makes no sense for uninterruption

    # Populate summary dictionary
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load classification experiment
        exp_output = common.load_obj(
            exp
        )  # type: Union[intrpt.ExpIntrptOutput, classify.ExpClassifierOutput]
        exp_z = exp_output.settings.z
        if arg_z != exp_z:
            print(
                "Ignored. The 'z' command line argument ({}) is different from "
                "the experiment 'z' setting ({}).".format(
                    helper.is_float_int(arg_z), helper.is_float_int(exp_z)))
        else:
            dataset_name = common.file_name_wo_ext(exp_output.settings.dataset)
            print("...Dataset: {}".format(dataset_name))
            # Get the average hits in the classification experiment
            if rtrim:
                dataset_name = ts.rtrim_dataset_name(dataset_name,
                                                     rtrim,
                                                     latex_it=True)
            # Avg hit dict
            df_avg_hits = alk.exp.classify.get_avg_hit_for_classify_exp(
                exp, conf_thold, wsoln, lblwsoln=LBL_STOP_W_SOLN)
            dict_avg_hits = {
                LBL_DATASET:
                dataset_name,
                LBL_FWIDTH:
                run_common.time_window_width_str(exp_output.settings.tw_width),
                LBL_FSTEP:
                exp_output.settings.tw_step
            }
            avg_hits_keys = [
                gain_intrpt_classify.conf_col_label(c, float_formatter, exp_z)
                if isinstance(c, float) else c
                for c in df_avg_hits.index.tolist()
            ]
            avg_hits_values = df_avg_hits["hit"].values
            dict_avg_hits.update(dict(zip(avg_hits_keys, avg_hits_values)))
            # Add the results to the output dataframe
            df_hits_output = df_hits_output.append(dict_avg_hits,
                                                   ignore_index=True)

    # Export the df_hits to LaTeX
    if len(df_hits_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_hits_output = df_hits_output.set_index(
            [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_hits_output = df_hits_output.sort_index()
        save_fn_hit = "soln_hit_(x{})_[{}]_sd_{}_ki_{}{}".format(
            len(df_hits_output), "_".join([str(c) for c in conf_thold]),
            helper.is_float_int(arg_z), knn_i if knn_i is not None else "All",
            wsoln_tag)
        save_fpath_hit = os.path.join(common.APP.FOLDER.FIGURE,
                                      "{}.tex".format(save_fn_hit))
        df_hits_output.to_latex(buf=save_fpath_hit,
                                float_format=float_formatter_hit,
                                escape=False,
                                multirow=True,
                                index=True)
        print("Avg Solution Hit %s saved as LaTeX table into '{}'.".format(
            save_fpath_hit))
    else:
        print("No average solution hit results could be calculated.")
Example #11
0
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("experiments", nargs="*", type=str,
                        help="Interruption/classification experiment result file path(s)")
    parser.add_argument("-p", "--fpath", type=str,
                        help="Full path of the folder containing experiment result files. "
                             "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-c", "--confthold", nargs="+", type=float, default=[1., .98, .95, .9, .8],
                        help="Confidence thresholds")
    parser.add_argument("-z", "--z", type=float, default=-1.,
                        help="z factor of the efficiency measure")
    parser.add_argument("-d", "--dec", type=int, default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument("--knni", type=int,
                        help="Zero-based index of the kNN for which the average 'confidence performance' is calculated."
                             " 'None' to calculate for all kNNs."
                             " Normally, it makes sense either for the last or all NNs.")
    parser.add_argument("--clsfy", choices=[0, 1], type=int, default=0,
                        help="0 for interruption experiments;"
                             " 1 for classification experiments to display gains also upon interruption w/ exact solution.")
    parser.add_argument("--rtrim", nargs="*", type=str,
                       help="Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")
    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error("Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    conf_thold = args.confthold
    arg_z = args.z
    dec_digits = args.dec
    knn_i = args.knni
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(x, dec_digits)
    clsfy = args.clsfy
    exp_tag = "{}".format("classify" if clsfy else "intrpt")

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"
    LBL_CONF_PERF = "Effcy"
    LBL_CONF_PERF_STD = "\u03C3"
    LBL_STOP_W_SOLN = "w\u2215Soln"

    # Create output dataframe
    df_output = pd.DataFrame(columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] +
                                     ([LBL_STOP_W_SOLN] if clsfy else []) +
                                     [conf_col_label(c, float_formatter, arg_z) for c in conf_thold] +
                                     [LBL_CONF_PERF, LBL_CONF_PERF_STD])
    # Populate summary dictionary
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load interruption/classification experiment
        exp_output = common.load_obj(exp)  # type: Union[intrpt.ExpIntrptOutput, classify.ExpClassifierOutput]
        exp_z = exp_output.settings.z
        if arg_z != exp_z:
            print("Ignored. The 'z' command line argument ({}) is different from "
                  "the experiment 'z' setting ({}).".format(helper.is_float_int(arg_z), helper.is_float_int(exp_z)))
        else:
            dataset_name = common.file_name_wo_ext(exp_output.settings.dataset)
            print("...Dataset: {}".format(dataset_name))
            # Get the average gains in the interruption/classification experiment
            # Average gain is calculated for the last kNN member for confthold experiments and
            # for stopwsoln=1 for the interruption w/ exact solution experiments (if wsoln=True)
            if not clsfy:
                df_avg_gains = intrpt.get_avg_gain_for_intrpt_exp(exp, conf_thold)
            else:
                df_avg_gains = classify.get_avg_gain_for_classify_exp(exp, conf_thold, wsoln=True, lblwsoln=LBL_STOP_W_SOLN)
            # Add the results to the output dataframe
            if rtrim:
                dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True)
            dict_avg_gains = {LBL_DATASET: dataset_name,
                              LBL_FWIDTH: run_common.time_window_width_str(exp_output.settings.tw_width),
                              LBL_FSTEP: exp_output.settings.tw_step}
            avg_gain_keys = [conf_col_label(c, float_formatter, exp_z) if isinstance(c, float) else c for c in df_avg_gains.index.tolist()]
            avg_gain_values = df_avg_gains["gain"].values
            dict_avg_gains.update(dict(zip(avg_gain_keys, avg_gain_values)))
            # Add average efficiency and its std deviation columns too
            if not clsfy:
                avg_conf_perf, avg_conf_perf_std = intrpt.get_avg_effcy_for_intrpt_exp(exp, knn_i=knn_i)
            else:
                avg_conf_perf, avg_conf_perf_std = classify.get_avg_effcy_for_classify_exp(exp, knn_i=knn_i)
            dict_avg_gains.update({LBL_CONF_PERF: avg_conf_perf, LBL_CONF_PERF_STD: avg_conf_perf_std})
            df_output = df_output.append(dict_avg_gains, ignore_index=True)

    # Export the df to LaTeX
    if len(df_output) > 0:
        # Swap wsoln and 1.0 columns
        if clsfy:
            unint_col = conf_col_label(1., float_formatter, arg_z)
            gain_cols = df_output.columns.tolist()
            if unint_col in gain_cols:
                unint_col_idx = gain_cols.index(unint_col)
                wsoln_col_idx = gain_cols.index(LBL_STOP_W_SOLN)
                gain_cols[unint_col_idx], gain_cols[wsoln_col_idx] = gain_cols[wsoln_col_idx], gain_cols[unint_col_idx]
                df_output = df_output[gain_cols]
        # Create a multiindex for a sorted (and prettier) output
        df_output = df_output.set_index([LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_output = df_output.sort_index()
        # df_output = df_output.round(dec_digits)
        save_fn = "gain_{}_(x{})_[{}]_sd_{}_ki_{}".format(exp_tag,
                                                          len(df_output),
                                                          "_".join([str(c) for c in conf_thold]),
                                                          helper.is_float_int(arg_z),
                                                          knn_i if knn_i is not None else "All")
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn))
        df_output.to_latex(buf=save_fpath, float_format=float_formatter, escape=False, multirow=True, index=True)
        print_msg_header = "Avg Gain for Interruptions at Confidence Thresholds{}".format(" and with Exact Solutions" if clsfy else "")
        print("{} saved as LaTeX table into '{}'.".format(print_msg_header, save_fpath))
    else:
        print("No average gain results could be calculated.")