コード例 #1
0
ファイル: intrpt.py プロジェクト: IIIA-ML/alk
def get_avg_gain_for_intrpt_exp(exp_file, conf_tholds):
    """Gives the average gain throughout the sequence updates of an interruption experiment for given confidence thresholds.

    Args:
        exp_file (str): full path to the interruption experiment result file
        conf_tholds (list): of floats for confidence thresholds that the average gains are to be calculated for.
            If any given threshold was not used in the interruption experiment, it is silently ignored.

    Notes:
        Average gain is calculated for the last kNN member.

    Returns:
        pd.DataFrame: indices are the `conf_tholds` and the only column is the average gain

    """
    # Load interruption experiment
    exp_intrpt_output = common.load_obj(exp_file)  # type: ExpIntrptOutput
    data = exp_intrpt_output.data
    data = data.loc[data["update"] != 0]  # Filter the initial problem
    df = data.loc[data["knni"] == data["knni"].max()][["confthold", "gain"]]  # Use last kNN member only
    conf_tholds_in_df = df["confthold"].unique()
    for ind, thold in enumerate(conf_tholds):
        if thold not in conf_tholds_in_df:
            conf_tholds.pop(ind)
            print("Threshold argument {} is ignored, "
                  "it does not exist in the experiment results {}.".format(thold, exp_file))
    avg_gains = df.loc[df["confthold"].isin(conf_tholds)].groupby("confthold").mean()
    return avg_gains
コード例 #2
0
ファイル: intrpt.py プロジェクト: IIIA-ML/alk
def gen_intrpt_output_f_path(dataset, pdp_file, tw_width, tw_step, k, conf_tholds, z, test_size, cls_rank_iterator, suffix=""):
    """Returns full path of the output file for the interruption experiment results"""
    dataset_name = os.path.splitext(os.path.basename(dataset))[0]  # Base file name w/o extension
    pdp_output = common.load_obj(pdp_file)  # type: pdp.PDPOutput
    calc_step = pdp_output.settings.calc_step_arg
    q_step = pdp_output.settings.q_step
    pdp_dataset = exp_common.get_setting(pdp_output.settings.experiment, "dataset")
    pdp_dataset = common.file_name_wo_ext(pdp_dataset)
    rank_iter_tag = cls_rank_iterator.abbrv
    z_tag = int(z) if int(z) == z else z
    conf_thold_tag = "[{}]".format("_".join([str(ct) for ct in conf_tholds])) if conf_tholds is not None else ""
    out_file = os.path.join(common.APP.FOLDER.RESULT,
                            "INT_{d}_w_{w}_s_{s}_k_{k}_r_{r}_PDP_{dp}_c_{c}_q_{q}__ct_{ct}_z_{z}_t_{t}{x}{e}".format(
                                d=dataset_name, w=tw_width, s=tw_step, k=k, r=rank_iter_tag, dp=pdp_dataset, c=calc_step, q=q_step,
                                ct=conf_thold_tag, z=z_tag, t=str(test_size), x=suffix, e=common.APP.FILE_EXT.PICKLE))
    return out_file
コード例 #3
0
def get_avg_gain_for_classify_exp(exp_file,
                                  conf_tholds,
                                  wsoln=False,
                                  lblwsoln="w/Soln"):
    """Gives the average gain throughout the sequence updates in a classification experiment
    for given confidence thresholds and optionally upon stopping w/ exact solution.

    Args:
        exp_file (str): full path to the classification experiment result file
        conf_tholds (list): of floats for confidence thresholds that the average gains are to be calculated for.
            If any given threshold was not used in the classification experiment, it is silently ignored.
        wsoln (bool): If True, gains for 'stop_w_soln=1' rows are also added.
        lblwsoln (str): Label of the column for the interruption with exact solution

    Notes:
        Average gain is calculated for the last kNN member.

    Returns:
        pd.DataFrame: indices are the `conf_tholds` and `w/soln` if requested, and the only column is the average gain

    """
    # Load classification experiment
    exp_intrpt_output = common.load_obj(exp_file)  # type: ExpClassifierOutput
    data = exp_intrpt_output.data.gain_data
    data = data.loc[data["update"] != 0]  # Filter the initial problem
    df = data.loc[(data["stopwsoln"] == 0)
                  & (data["knni"] == data["knni"].max())][[
                      "confthold", "gain"
                  ]]  # Use intrpt w/conf and last kNN member only
    conf_tholds_in_df = df["confthold"].unique()
    conf_tholds_copy = conf_tholds[:]
    for ind, thold in enumerate(conf_tholds_copy):
        if thold not in conf_tholds_in_df:
            conf_tholds_copy.pop(ind)
            print("Threshold argument {} is ignored, "
                  "it does not exist in the experiment results {}.".format(
                      thold, exp_file))
    avg_gains = df.loc[df["confthold"].isin(conf_tholds_copy)].groupby(
        "confthold").mean()
    if wsoln:
        # Add the avg gain for the experiment w/ stop_w_soln=True
        # Doesn't matter if an interruption has actually occurred w/ exact soln or not, see ignore 'intrptwsoln' column
        s_avg_w_soln = data.loc[data["stopwsoln"] == 1][["gain"]].mean()
        s_avg_w_soln.name = lblwsoln
        avg_gains = avg_gains.append(s_avg_w_soln)
    return avg_gains
コード例 #4
0
ファイル: intrpt.py プロジェクト: IIIA-ML/alk
def get_avg_effcy_for_intrpt_exp(exp_file, knn_i=None):
    """Gives the average confidence efficiency throughout the sequence updates of an interruption experiment.

    Args:
        exp_file (str): full path to the interruption experiment result file
        knn_i (int): Zero-based index of the kNN member;
            if None, the average performance is calculated for *all* kNNs.
    Returns:
        (float, float). (avg conf, std dev)

    """
    # Load interruption experiment
    exp_intrpt_output = common.load_obj(exp_file)  # type: ExpIntrptOutput
    data = exp_intrpt_output.data
    data = data.loc[data["update"] != 0]  # Filter the initial problem
    if knn_i is not None:
        data = data.loc[data["knni"] == knn_i]  # Filter by the given ki
    return data["effcyq"].mean(), data["effcyq"].std()  # μ & σ of all updates and stop calcs
コード例 #5
0
def get_setting(out_file, attr):
    """Returns the given setting attribute of an output object.

    Args:
        out_file (str): Full path to the output file
        attr (str): name of the setting attribute

    Raises:
        AttributeError: If the given attribute is not within the settings

    Returns:
        Any: the value of the setting.

    """
    output = common.load_obj(out_file)  # type: Output
    try:
        return getattr(output.settings, attr)
    except AttributeError:
        raise AttributeError("'{}' is not found in the settings.".format(attr))
コード例 #6
0
def get_avg_hit_for_classify_exp(exp_file,
                                 conf_tholds,
                                 wsoln=False,
                                 lblwsoln="w/Soln"):
    """Gives the average solution throughout updates of an classification experiment for given confidence thresholds.

    Args:
        exp_file (str): full path to classification experiment result file
        conf_tholds (list): list of floats for confidence thresholds that the average gains are to be calculated for.
            If any given threshold was not used in the classification experiment, it is silently ignored.
        wsoln (bool): If True, gains for 'stop_w_soln=1' rows are also added.
        lblwsoln (str): Label of the column for the interruption with exact solution

    Returns:
        pandas.DataFrame: indices are the `conf_tholds` and the only column is the average hit

    """
    # Load classification experiment
    exp_intrpt_output = common.load_obj(exp_file)  # type: ExpClassifierOutput
    data = exp_intrpt_output.data.hit_data
    data = data.loc[data["update"] != 0]  # Filter the initial problem
    df = data.loc[data["stopwsoln"] == 0][[
        "confthold", "hit"
    ]]  # Use intrpt w/conf and all kNN members
    conf_tholds_in_df = df["confthold"].unique()
    conf_tholds_copy = conf_tholds[:]
    for ind, thold in enumerate(conf_tholds_copy):
        if thold not in conf_tholds_in_df:
            conf_tholds_copy.pop(ind)
            print("Threshold argument {} is ignored, "
                  "it does not exist in the experiment results {}.".format(
                      thold, exp_file))
    avg_hits = df.loc[df["confthold"].isin(conf_tholds_copy)].groupby(
        "confthold").mean()
    if wsoln:
        # Add the avg hit for the experiment w/ stop_w_soln=True and intrptwsoln=True
        # An interruption has to have actually occurred w/ exact soln to be included in hits
        s_avg_w_soln = data.loc[(data["stopwsoln"] == 1)
                                & (data["intrptwsoln"] == 1)][["hit"]].mean()
        s_avg_w_soln.name = lblwsoln
        avg_hits = avg_hits.append(s_avg_w_soln)
    return avg_hits
コード例 #7
0
def get_avg_gain_for_exp(exp_file):
    """Gives the average gain throughout the sequence updates in an insights experiment"""
    # Load insights experiment
    exp_insights_output = common.load_obj(exp_file)  # type: ExpInsightsOutput
    exp_gains = exp_insights_output.data.gain
    return np.mean([u[1] for u in exp_gains])
コード例 #8
0
def insights_multiple(experiments,
                      file_format="pdf",
                      total=True,
                      actual=True,
                      all_k=False,
                      marker_size=0,
                      all_ticks=True,
                      with_title=True,
                      signature=True):
    """Plots the total and actual calculations made to find kNNs for multiple experiments on the same figure.

    Args:
        experiments (list): List of full paths to the `run.run_insights` experiment(s) result file(s).
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
        total (bool): If True, plots the total number of calculations made for the ki's.
        actual (bool): If True, plots the actual number of calculations made for the ki's.
        all_k (bool): If False, plots all ki's in the first experiment and then plots
            only the calcs for the kth NN for the experiments with index>=1;
            otherwise, plots all ki's for all experiments. If there is only one
            experiment to plot, this argument is ignored.
        marker_size (float): size of the marker in scatter plot
        all_ticks (bool): If True, all x-ticks are displayed.
        with_title (bool): if True, shows the figure title.
        signature (bool): If True, name of the plotting function is also displayed.

    Returns:
        None

    """
    LINE_STYLE = {0: "-", 1: "--", 2: "-.", 3: ":"}
    COLORS_ = sns.color_palette()
    LW = 1
    experiments_k = []  # k's of the experiment for the output file name
    from matplotlib.ticker import MaxNLocator
    # Create a figure
    plt.figure(num=1, figsize=(12, 10))
    title_ = "Anytime Lazy KNN Search\nTotal vs Actual # of Similarity Assessments\n\n"
    sns.set_style("whitegrid")
    for exp_id, experiment in enumerate(experiments):
        result = common.load_obj(experiment)
        fn_wo_ext = common.file_name_wo_ext(experiment)
        # Read experiment data
        data = result.data
        if total:
            k_calcs_total = data.knn_total_cumsum
            k = len(k_calcs_total[0]) - 1  # k of kNN
        if actual:
            k_calcs_actual = data.knn_actual_cumsum
            if not total:
                k = len(k_calcs_actual[0]) - 1  # k of kNN
        experiments_k.append(str(k))
        for i in range(k):
            # If all_k is False, plot k_calcs of all ki's of the 1st experiment but only the 0^th and k'th of the other experiments.
            if all_k or exp_id == 0 or i == (k - 1) or i == 0:
                if total:
                    # Total calculation number for each kNN
                    dd = pd.DataFrame(
                        np.array(k_calcs_total,
                                 dtype=int)[:, [0, i + 1]].tolist())
                    dd.columns = ["Update", "Comps"]
                    data = dd
                    label_str = "kNN[{}] total".format(i) if len(
                        experiments
                    ) == 1 else "kNN[{}] total (Exp #{})".format(i, exp_id)
                    g = sns.regplot(
                        x='Update',
                        y='Comps',
                        data=data,
                        scatter=True,
                        fit_reg=True,
                        scatter_kws={"s": marker_size},
                        order=3,
                        ci=None,  # ci=100,
                        color=COLORS_[i],
                        truncate=True,
                        line_kws={
                            "linestyle": LINE_STYLE[exp_id % len(LINE_STYLE)],
                            "label": label_str,
                            "lw": LW
                        })
                    plt.legend(frameon=True, loc="best", fontsize="large")
                if actual:
                    # Actual calculation number for each kNN
                    dd = pd.DataFrame(
                        np.array(k_calcs_actual,
                                 dtype=int)[:, [0, i + 1]].tolist())
                    dd.columns = ["Update", "Comps"]
                    data = dd
                    label_str = "kNN[{}] actual".format(i) if len(
                        experiments
                    ) == 1 else "kNN[{}] actual (Exp #{})".format(i, exp_id)
                    g = sns.regplot(
                        x='Update',
                        y='Comps',
                        data=data,
                        scatter=True,
                        fit_reg=True,
                        scatter_kws={"s": marker_size},
                        order=3,
                        ci=None,  # ci=100,
                        marker="x",
                        color=COLORS_[i],
                        truncate=True,
                        line_kws={
                            "linestyle":
                            LINE_STYLE[(exp_id % len(LINE_STYLE)) +
                                       1],  # Actual dashed
                            "label": label_str,
                            "lw": LW
                        })
                    plt.legend(frameon=True, loc="best", fontsize="large")
        # Update the title
        title_ = "{}{}Exp #{}: {}".format(title_, "\n" if exp_id > 0 else "",
                                          exp_id, fn_wo_ext)

    #g.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax = plt.gca()
    if all_ticks:
        x_ticks_ = dd["Update"].unique()
        ax.set_xticks(x_ticks_)
    # if not all_ticks:
    #     for ind, label in enumerate(g.get_xticklabels()):
    #         if ind % (len(x_ticks_) % 10) == 0:  # only 10% of the ticks is shown
    #             label.set_visible(True)
    #         else:
    #             label.set_visible(False)
    ax.grid(True, linestyle="dashed", linewidth=0.4)
    plt.xlabel("Problem index", fontsize="x-large")
    plt.ylabel("Cumulative # of sim calculations for the i^th NN",
               fontsize="x-large")
    #plt.rcParams['xtick.labelsize'] = "x-large"
    #plt.rcParams['ytick.labelsize'] = "x-large"
    ax.tick_params(axis='both', which='major', labelsize="large")
    if signature:
        plt.figtext(0.99,
                    0.01,
                    'rendered by \'insights_multiple\'.',
                    horizontalalignment='right',
                    alpha=0.5,
                    size="small")
    save_fn = 'CALCS_{}_{}{}{}-{}{}'.format(
        fn_wo_ext, "+".join(experiments_k), "-TOTAL" if total else "",
        "-ACTUAL" if actual else "",
        "ALL_K" if all_k or len(experiments) == 1 else "SELECT_K",
        "-MARKERS" if marker_size else "")
    # Udate the title of the plot window
    plt.gcf().canvas.set_window_title(save_fn)
    if with_title:
        plt.title(title_ + "\n")
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=300, bbox_inches="tight")
        print(
            "Total vs Actual Calcs figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()
コード例 #9
0
def quality_map(experiment,
                file_format=None,
                ki=None,
                urange=None,
                ustep=1,
                colored=True,
                fill=False,
                with_title=True,
                signature=True,
                q_full_scale=True,
                start_calc=1,
                cull=None):
    """Plots the log scale quality map for a given kNN[j] of all sample test sequences.

    Plots all or ppi (calc, sim) points between each major tick interval

    Args:
        experiment (str): insights experiment results full file path
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
            if is None, the plot is displayed and not saved.
        ki (int): zero-based index of the NN in kNN list, if None all kNNs are plotted.
        ustep (int): a different color is chosen in the color palette for every `ustep` number of updates;
            irrelevant for colored=False.
        urange (tuple): range of updates to plot given as (start, end) where both is inclusive;
            if given as (start, ), max update is used for end;
            if None, all updates are plotted.
        colored (bool): If True, each update is plotted in a different color; otherwise all plotted black,
        fill (bool): if True, propagates the quality for the intermediate calc values;
            if False, plots only the given points provided as sparse list.
        with_title (bool): if True, shows the figure title.
        signature (bool): If True, name of the plotting function is also displayed.
        q_full_scale (bool): if True, quality (i.e. y) axis starts from 0.0; otherwise minimum quality is used.
        start_calc (int): The start value for the calculations (i.e. x) axis.
        cull (float): The percentage (.0, 1.] to cull the data points to be plotted

    Returns:
        None

    Note:
        Start: 20190918, End:20191023
    """
    # Create a figure
    plt.figure(num=1)  # , figsize=(10, 8))
    # Read experiment data
    result = common.load_obj(experiment)
    knn_calc_sim_history = result.data.knn_calc_sim_history
    k = len(knn_calc_sim_history[0]) - 1  # k of kNN
    max_update = max([test[0] for test in knn_calc_sim_history])
    if urange is None:
        urange = (1, max_update)
    elif len(urange) == 1 or urange[1] > max_update:
        urange = (urange[0], max_update)
    max_X = 0
    # Fill in plot data
    CALCS = np.array([])
    QUALITY = np.array([])
    UPDATE = np.array([])
    knn_calc_sim_history = sorted(knn_calc_sim_history,
                                  key=lambda point: point[0],
                                  reverse=False)  # sort by updates
    for test_history in knn_calc_sim_history:
        # print(test_history)
        update = test_history[0]
        if urange[0] <= update <= urange[1]:
            # print("update: ", update)
            for nn_ind, nn_i_history in enumerate(test_history[1:]):
                if ki is None or nn_ind == ki:
                    points = pdp.quality(nn_i_history)
                    # print(points)
                    X, Y = helper.to_arr(points, fill=fill)
                    # Eliminate (0, 0.0) entries
                    if X[0] == 0:
                        X = X[1:]
                    if Y[0] == 0:
                        Y = Y[1:]
                    if max(X) > max_X:
                        max_X = max(X)
                    # Eliminate the entries < start_calc
                    X = X[X >= start_calc]
                    Y = Y[-len(X):]
                    CALCS = np.concatenate((CALCS, X))
                    QUALITY = np.concatenate((QUALITY, Y))
                    # UPDATE = np.concatenate((UPDATE, np.full((len(X),), math.ceil(update / ustep), dtype=np.int)))
                    UPDATE = np.concatenate(
                        (UPDATE, np.full((len(X), ), update, dtype=np.int)))
    if cull:
        CALCS, ind_removed = helper.cull_arr(CALCS, pct=cull)
        QUALITY, _ = helper.cull_arr(QUALITY, ind=ind_removed)
        UPDATE, _ = helper.cull_arr(UPDATE, ind=ind_removed)
    if colored:
        # Color palette
        cmap = "autumn"  # "autumn"  "tab20"  # "Blues_r"
        cmap_size = math.ceil(max_update / ustep)
        my_palette = plt.cm.get_cmap(cmap, cmap_size)
        _ = plt.scatter(CALCS,
                        QUALITY,
                        marker=".",
                        s=1,
                        c=UPDATE,
                        cmap=my_palette,
                        vmin=1,
                        vmax=max_update,
                        alpha=1.)
        cbar = plt.colorbar(orientation="vertical")
        cbar.set_label("updates")
    else:
        _ = plt.scatter(CALCS, QUALITY, marker=".", s=1, c="black")
    ax = plt.gca()
    ax.yaxis.set_major_locator(plt.MultipleLocator(.1))
    ax.yaxis.set_minor_locator(plt.MultipleLocator(.05))
    plt.grid(True,
             which="major",
             linestyle="-",
             linewidth=1,
             color="lightgrey")
    plt.grid(True,
             which="minor",
             linestyle=":",
             linewidth=1,
             color="lightgrey")
    xticks_ = plt_common.get_ticks_log_scale(max_X, start=start_calc)
    y_min = 0.0 if q_full_scale else math.floor(
        np.nanmin(QUALITY[start_calc - 1:]) * 10) / 10
    yticks_ = np.arange(y_min, 1.01, .1)
    plt.rcParams['axes.axisbelow'] = True
    plt.xscale('log')
    plt.xticks(xticks_)
    plt.xlim(left=start_calc, right=max(xticks_))
    plt.yticks(yticks_)
    if signature:
        plt_common.sign_plot(plt, quality_map.__name__)
    fn_wo_ext = common.file_name_wo_ext(experiment)
    lbl_ki = str(ki if ki is not None else list([0, k - 1]))  # zero-based
    lbl_update = str(list(urange) if urange[0] != urange[1] else urange[0])
    if with_title:
        title_ = "Quality Map\n"
        title_ = "{}Exp: {}\n".format(title_, fn_wo_ext)
        title_ = "{}ki:{}, update:{}".format(title_, lbl_ki, lbl_update)
        if colored:
            title_ = "{}, color step:{}".format(title_, ustep)
        if cull:
            title_ = "{}, cull:{:.0%}".format(title_, cull)
        plt.title(title_ + "\n\n")
    save_fn = "QUALITY_MAP_{}_ki_{}_u_{}{}{}{}{}{}".format(
        fn_wo_ext, lbl_ki, lbl_update,
        "_s_{}".format(ustep) if colored else "", "_f" if fill else "",
        "_t" if with_title else "", "_c_{:.2f}".format(cull) if cull else "",
        "_z" if not q_full_scale else "")
    # axis labels
    matplotlib.rcParams['text.usetex'] = True  # Allow LaTeX in text
    ax.set_xlabel("$\#$ of similarity calculations ($c$)")
    ax.set_ylabel(r"quality ($\mathcal{Q}_c$)")
    # Tight layout
    plt.tight_layout()
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=300, bbox_inches="tight")
        print("Quality Map figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()

    return None
コード例 #10
0
def gains_multiple(experiments,
                   file_format="pdf",
                   marker_size=1.,
                   color_ind=None,
                   with_title=True,
                   signature=True):
    """Plots the gains for a list of insights experiments.

    Args:
        experiments (list): List of full paths to the `run.run_insights` experiment(s) result file(s).
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
        marker_size (float): size of the marker in scatter plot
        color_ind (int): Index in the `sns.color_palette()` for plotting single experiment
        with_title (bool): if True, shows the figure title.
        signature (bool): If True, name of the plotting function is also displayed.

    Returns:
        None

    """
    COLORS_ = sns.color_palette()
    title_ = "Gain in similarity calcs compared to Brute Search"
    save_fn = ""
    sns.set(style='whitegrid')  # , font_scale=.9)
    plt.figure(num=1, figsize=(8, 6))
    for exp_id, experiment in enumerate(experiments):
        result = common.load_obj(experiment)
        fn_wo_ext = common.file_name_wo_ext(experiment)
        # title_ = "{}\nExp #{}: {}".format(title_, exp_id, fn)
        dd = pd.DataFrame(result.data.gain)
        dd.columns = ['Update', '% Gain']

        g = sns.regplot(
            x='Update',
            y='% Gain',
            data=dd,
            scatter=True,
            fit_reg=True,
            scatter_kws={"s": marker_size},
            order=3,
            ci=None,
            line_kws={"label": "#{}: {}".format(exp_id, fn_wo_ext)},
            color=COLORS_[exp_id] if len(experiments) > 1 or not color_ind else
            COLORS_[color_ind],  # color hack for presentations
            truncate=True)
        plt.ylim(np.min(dd['% Gain']), 100.)  # np.max(dd['% Gain']))
        g.set(ylabel="Gain (%)")
        plt.xlim(np.min(dd['Update']), np.max(dd['Update']))
        g.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))
        # plt.xticks(range(np.min(dd['Update']), np.max(dd['Update']) + 1))
        plt.gca().grid(True, linestyle="dashed", linewidth=0.4)
        plt.xlabel('Problem index')
        if not save_fn:
            save_fn = 'GAINS_{}'.format(fn_wo_ext)
    save_fn = "{}{}{}".format(
        save_fn, "_and_{}_more".format(len(experiments) -
                                       1) if len(experiments) > 1 else "",
        "_MARKERS" if marker_size else "")
    if signature:
        plt_common.sign_plot(plt, gains_multiple.__name__)
    plt.legend(title="Experiments", frameon=True, loc="best", fontsize="small")
    if with_title:
        plt.title(title_ + "\n")
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=300, bbox_inches="tight")
        print("Gains figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()
コード例 #11
0
def pdp(pdp_file,
        file_format=None,
        update=1,
        ki=0,
        decimals=3,
        to_latex=False,
        start_q=0.0):
    """Plots or exports as latex a PDP table for a given update and ki.

    Only top n rows where the n^th row is the first row with conf=1.0 are plotted or exported.

    Args:
        pdp_file (str): pickle file for the performance distribution profile
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
            if is None, the plot is displayed and not saved.
        update (int): which update to plot.
        ki (int): zero-based index of the NN in kNN list to plot.
        decimals (int): number of decimal digits for rounding probability and confidence values;
        to_latex (bool): if True, PDP 2D excerpt is saved into a .tex file in the pdp folder and the figure is not
            displayed or saved.
        start_q (float): start column for the quality to be used to plot/export the PDP

    Returns:
        None

    """
    # File info
    # fn = basename(pdp_file)
    fn_wo_ext = common.file_name_wo_ext(pdp_file)
    save_fn = "PDP_FIG_{}_ki_{}_u_{}".format(fn_wo_ext, ki, update)
    dir_path = os.path.dirname(os.path.realpath(__file__))
    # Read experiment data
    pdp_output = common.load_obj(pdp_file)
    pdp_all = pdp_output.data
    calc_step = pdp_output.settings.calc_step
    q_step = pdp_output.settings.q_step
    pdp = pdp_all[update - 1][ki]
    # pdp = pdp * 100
    # Filter PDP in order not to plot redundant rows with conf = 1.0
    pdp_rows_conf_1 = np.where(pdp[:, -1] == 1.0)[0]  # rows with conf=1.0
    if pdp_rows_conf_1.size > 0:  # If the calc_step is very coarse, then there may be no rows with conf = 1
        top_n = pdp_rows_conf_1[
            0] + 1  # top n rows where the n^th row is the first row with conf=1.0
    else:
        top_n = pdp.shape[0]
    pdp = pdp[:top_n]
    nrows = pdp.shape[0]
    rows = ["{:d}".format(i * calc_step) for i in range(1, nrows + 1)]
    ncols = pdp.shape[1]
    decimals_q_step = len(str(q_step).split('.')[1])
    cols = [
        "{0:.{1}f}".format(i * q_step, decimals_q_step)
        for i in range(1, ncols + 1)
    ]
    # calculate the weighted mean of probability distributions of quality (i.e. confidence) and std deviation for each row (i.e. calc range)
    q_ind_array = np.array(
        [round(i * q_step, decimals) for i in range(1, ncols + 1)])
    conf_n_std_dev = np.apply_along_axis(
        lambda a: (
            np.average(q_ind_array, weights=a),  # conf
            helper.weighted_std(q_ind_array, a)),  # std_dev
        axis=1,
        arr=pdp)

    # Add the conf and std_dev columns to the original pdp
    pdp = np.column_stack((pdp, conf_n_std_dev))
    cols = cols + ["Confidence", "\u03C3"]
    pdp = pdp.round(decimals)
    if start_q:
        start_col_ind = math.ceil(start_q / q_step) - 1
        pdp = pdp[:,
                  start_col_ind:]  # Show only the quality columns >= start_q
        ncols = ncols - start_col_ind
        cols = cols[start_col_ind:]
        save_fn = "{}{}".format(save_fn,
                                "_sq_{}".format(cols[0]) if start_q else "")
    cell_colors = plt.cm.Oranges(pdp)
    ########################
    # PDP to LaTeX
    if to_latex:
        # Make a table for the top n rows where the n^th row is the first row with conf=1.0
        pdp = pd.DataFrame(data=pdp, index=rows, columns=cols)
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.tex".format(save_fn))
        pdp.to_latex(buf=save_fpath,
                     index=True,
                     float_format=lambda x: "{0:.{1}f}".format(x, decimals)
                     if x != 0. else "")
        print("PDP saved as LaTeX table into '{}'.".format(save_fpath))
        return None
    ########################
    # Clear 0.0 cells
    pdp = pdp.astype("U{}".format(2 + decimals))  # len("0.") = 2
    pdp[pdp == "0.0"] = ""
    # Create a figure
    hcell, wcell = 0.3, .8
    hpad, wpad = 0, 0
    fig = plt.figure(figsize=(ncols * wcell + wpad, (nrows * hcell + hpad)))
    # fig = plt.figure()
    ax2 = fig.add_subplot(111)
    ax2.axis('off')
    # Add a table at the bottom of the axes
    table = ax2.table(cellText=pdp,
                      rowLabels=rows,
                      rowLoc='right',
                      rowColours=plt.cm.BuPu(np.linspace(0, 0.5, len(rows))),
                      colColours=plt.cm.YlGn(np.linspace(0, 0.5, len(cols))),
                      cellColours=cell_colors,
                      colLabels=cols,
                      loc='center')
    # Set "confidence" column header's color.
    c = table.get_celld()[(0, len(cols) - 1)]
    c.set_facecolor("w")
    title_ = "Performance Distribution Profile\n"
    title_ = "{}PDP: {}\n".format(title_, fn_wo_ext)
    title_ = "{}ki: {}, update: {}\n".format(title_, ki, update)
    # plt.subplots_adjust(left=0.2, top=0.8)
    plt.title(title_)
    plt.tight_layout()
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=150, bbox_inches="tight")
        print("PDP figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()
コード例 #12
0
ファイル: gain_exploit.py プロジェクト: IIIA-ML/alk
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "experiments",
        nargs="*",
        type=str,
        help="Exploit Candidates experiment result file path(s)")
    parser.add_argument(
        "-p",
        "--fpath",
        type=str,
        help=
        "Full path of the folder containing Exploit Candidates experiment result files. "
        "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-d",
                        "--dec",
                        type=int,
                        default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument(
        "--rtrim",
        nargs="*",
        type=str,
        help=
        "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")
    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error(
            "Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    dec_digits = args.dec
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(
        x, dec_digits) if isinstance(x, (int, float)) else x

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"

    # Create output dataframe
    df_gains_output = pd.DataFrame(
        columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])

    # Populate summary dictionaries
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load 'exploit candidates' experiment output
        exp_exploit_output = common.load_obj(
            exp)  # type: exploit.ExpExploitOutput
        # Get the dataset name
        dataset_name = common.file_name_wo_ext(
            exp_exploit_output.settings.dataset)
        print("...Dataset: {}".format(dataset_name))
        if rtrim:
            dataset_name = ts.rtrim_dataset_name(dataset_name,
                                                 rtrim,
                                                 latex_it=True)
        # Get the average gains in experiments
        exp_exploit_data = exp_exploit_output.data
        exp_exploit_data = exp_exploit_data.loc[
            exp_exploit_data["update"] != 0]  # Filter the initial problem
        df_avg_gains = exp_exploit_data[["gain", "iterator"
                                         ]].groupby("iterator").mean()
        # Avg gain dict
        dict_avg_gains = {
            LBL_DATASET:
            dataset_name,
            LBL_FWIDTH:
            run_common.time_window_width_str(
                exp_exploit_output.settings.tw_width),
            LBL_FSTEP:
            exp_exploit_output.settings.tw_step
        }
        # avg_gain_keys = [str(c) if c is not None else "-" for c in df_avg_gains.index.tolist()]
        avg_gain_keys = df_avg_gains.index.tolist()
        avg_gain_values = df_avg_gains["gain"].values
        # Add the results to the output dataframe
        dict_avg_gains.update(dict(zip(avg_gain_keys, avg_gain_values)))
        df_gains_output = df_gains_output.append(dict_avg_gains,
                                                 ignore_index=True)

    # Export the df to LaTeX
    if len(df_gains_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_gains_output = df_gains_output.set_index(
            [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_gains_output = df_gains_output.sort_index()
        # df_gains_output = df_gains_output.round(dec_digits)
        save_fn = "gain_exploit_(x{})".format(len(df_gains_output))
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.tex".format(save_fn))
        df_gains_output.to_latex(buf=save_fpath,
                                 float_format=float_formatter,
                                 escape=False,
                                 multirow=True,
                                 index=True)
        print(
            "Avg Gain for TopDown vs ExploitCandidates Rank Iterations saved as LaTeX table into '{}'."
            .format(save_fpath))
    else:
        print("No average gain results could be calculated.")
コード例 #13
0
ファイル: gain_insights.py プロジェクト: IIIA-ML/alk
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("experiments",
                        nargs="*",
                        type=str,
                        help="Interruption experiment result file path(s)")
    parser.add_argument(
        "-p",
        "--fpath",
        type=str,
        help=
        "Full path of the folder containing interruption experiment result files. "
        "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-d",
                        "--dec",
                        type=int,
                        default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument(
        "--rtrim",
        nargs="*",
        type=str,
        help=
        "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")

    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error(
            "Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    dec_digits = args.dec
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(
        x, dec_digits) if isinstance(x, (int, float)) else x
    int_formatter = lambda x: '{:,}'.format(x)

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"
    LBL_UPDATES = "Updates"
    LBL_CB_SIZE = "\u007CCB\u007C"
    LBL_GAIN = "Gain"

    # Create output dataframe
    df_gains_output = pd.DataFrame(columns=[
        LBL_DATASET, LBL_FWIDTH, LBL_FSTEP, LBL_UPDATES, LBL_CB_SIZE, LBL_GAIN
    ])

    # Populate summary dictionaries
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load insights experiment
        exp_insights_output = common.load_obj(
            exp)  # type: insights.ExpInsightsOutput
        dataset_name = common.file_name_wo_ext(
            exp_insights_output.settings.dataset)
        print("...Dataset: {}".format(dataset_name))
        # Add the results to the output dataframe
        if rtrim:
            dataset_name = ts.rtrim_dataset_name(dataset_name,
                                                 rtrim,
                                                 latex_it=True)
        # Get the average gain for the insights experiment
        avg_gain = insights.get_avg_gain_for_exp(exp)
        n_updates = np.max([u[0] for u in exp_insights_output.data.gain])
        # Avg gain dict
        dict_avg_gains = {
            LBL_DATASET:
            dataset_name,
            LBL_FWIDTH:
            time_window_width_str(exp_insights_output.settings.tw_width),
            LBL_FSTEP:
            exp_insights_output.settings.tw_step,
            LBL_UPDATES:
            n_updates + 1,
            LBL_CB_SIZE:
            exp_insights_output.settings.cb_size,
            LBL_GAIN:
            avg_gain
        }
        # Add the results to the output dataframe
        df_gains_output = df_gains_output.append(dict_avg_gains,
                                                 ignore_index=True)

    # Export the df to LaTeX
    if len(df_gains_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_gains_output = df_gains_output.set_index(
            [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_gains_output = df_gains_output.sort_index()
        # df_gains_output = df_gains_output.round(dec_digits)
        save_fn = "gain_insights_(x{})".format(len(df_gains_output))
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.tex".format(save_fn))
        df_gains_output.to_latex(buf=save_fpath,
                                 formatters={
                                     LBL_UPDATES: int_formatter,
                                     LBL_CB_SIZE: int_formatter
                                 },
                                 float_format=float_formatter,
                                 escape=False,
                                 multirow=True,
                                 index=True)
        print("Avg Gain results saved as LaTeX table into '{}'.".format(
            save_fpath))
    else:
        print("No average gain results could be calculated.")
コード例 #14
0
ファイル: plt_intrpt.py プロジェクト: IIIA-ML/alk
def efficiency(experiments,
               file_format="png",
               y_to_use="effcyq",
               rtrim=None,
               outliers=False,
               with_title=True,
               signature=True,
               palette="tab20",
               maximized=True,
               aspect=None):
    """Plots efficiency (f.k.a. confidence performance) for given interruption experiments.

    Args:
        experiments (List[str]): list of experiment file paths
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
            if is None, the plot is displayed and not saved.
        y_to_use (str): ['abspcterr', 'abserr', 'effcysim', 'effcyq'] fields in experiment result dataframe
            standing for absolute error, absolute error percentage, efficiency (using sim) and efficiency (using quality)
            of confidence.
        rtrim (list): Remove given strings at the end of dataset names; e.g. ['_TRAIN', '_TEST']
        outliers (bool): If True, outliers are plotted.
        with_title (bool): If True, show generated plot title.
        signature (bool): If True, write the name of this function.
        palette (str): A matplotlib colormap. e.g. 'tab20", 'Purples_d'
        maximized (bool): If True, maximize plot to full screen.
        aspect (float): Desired aspect ratio (i.e. height/width) of the figure.
            If not None, the width is re-adjusted for this ratio while the height remains the same.

    Returns:
        None

    Raises:
        ValueError:
            i) If dataset names for the experiments are different;
            ii) If y_to_use not in valid options.

    """

    Y_OPTIONS = {
        "abspcterr": "absolute percentage error (%)",
        "abserr": "absolute error",
        "effcysim": "efficiency ($\eta$)",  # using sim
        "effcyq": "efficiency ($\eta$)"
    }  # using quality
    if y_to_use not in Y_OPTIONS.keys():
        raise ValueError(
            "Non-valid value for y_to_use argument. Should be in {}".format(
                list(Y_OPTIONS.keys())))
    df_output = None
    # Variables for output file name
    dataset_name = None
    w_list = []
    step_list = []
    z_list = []  # z=-nstd in the new efficiency definition
    # Populate summary dictionary
    for exp in experiments:
        # Read experiment data
        result = common.load_obj(exp)  # type: intrpt.ExpIntrptOutput
        # Update output DataFrame
        data = result.data
        data["setting"] = "$w$:{}, $step$:{}, $z$:{}".format(
            result.settings.tw_width, result.settings.tw_step,
            helper.is_float_int(result.settings.z))
        df_output = pd.concat([df_output, data])
        # Update variables for output file name
        if dataset_name is None:
            dataset_name = common.file_name_wo_ext(result.settings.dataset)
            if rtrim:
                for tag in rtrim:
                    # Trim end tags
                    dataset_name = re.sub("{}$".format(tag), "", dataset_name)
        elif result.settings.dataset.find(dataset_name) == -1:
            # dataset_name = "Misc_{}".format(time.strftime("%Y%m%d"))
            raise ValueError(
                "Plotting different datasets not allowed yet: {}, {}.".format(
                    dataset_name, result.settings.dataset))
        w_list.append(result.settings.tw_width)
        step_list.append(result.settings.tw_step)
        z_list.append(helper.is_float_int(result.settings.z))

    # Plot
    plt.figure()
    ax = plt.gca()
    # Show grid
    ax.grid(True, linestyle="dashed", linewidth=.5)
    # Grouped boxplot
    sns.boxplot(x="confthold",
                y=y_to_use,
                hue="setting",
                data=df_output,
                palette=palette,
                linewidth=.5,
                showfliers=outliers,
                fliersize=.2)
    if y_to_use in ["effcysim", "effcyq"]:
        # Draw a horizontal line for y=1  (efficiency approx= 1)
        ax.axhline(1, linestyle="--", color="black", linewidth=1.)
    # axis labels
    matplotlib.rcParams['text.usetex'] = True  # Allow LaTeX in text
    ax.set_xlabel("confidence thresholds ($\mu\!+\!z\sigma$) for interruption")
    ax.set_ylabel("{}".format(Y_OPTIONS[y_to_use]))
    # plot title
    if with_title:
        plt.title("{} of Confidence in Interruption Tests\nfor ${}$".format(
            Y_OPTIONS[y_to_use].capitalize(), dataset_name.replace("_", "\_")))
    # Add the signature
    if signature:
        plt_common.sign_plot(plt, efficiency.__name__)
    # Maximize plot to full screen
    if maximized:
        plt.legend(fontsize="medium")
        manager = plt.get_current_fig_manager()
        backend_ = matplotlib.get_backend()
        if backend_.upper == "TKAGG":
            manager.resize(*manager.window.maxsize())
        elif backend_.upper().startswith("QT"):
            manager.window.showMaximized()
        elif backend_.find("interagg") != -1:  # Hack for PyCharm SciView
            pass
        else:  # Try your chance
            manager.resize(*manager.window.maxsize())
    else:
        plt.legend(fontsize="small")
    # File name/Window title
    w_list = [str(_) for _ in set(sorted(w_list))]
    step_list = [str(_) for _ in set(sorted(step_list))]
    z_list = [str(_) for _ in set(sorted(z_list))]
    save_fn = "EFF_{}(x{})_w_[{}]_s_[{}]_z_[{}]_{}{}{}{}".format(
        dataset_name, len(experiments), "_".join(w_list), "_".join(step_list),
        "_".join(z_list), y_to_use, "_a_{}".format(aspect) if aspect else "",
        "_o" if outliers else "", "_t" if with_title else "")
    # Aspect ratio
    if aspect:
        # figw, figh = plt.rcParams["figure.figsize"]
        # plt.rcParams["figure.figsize"] = [figw / aspect, figh]
        fig = plt.gcf()
        figw, figh = fig.get_size_inches()
        fig.set_size_inches(figh / aspect, figh, forward=True)
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=300, bbox_inches="tight")
        print(
            "Confidence efficiency figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()

    return None
コード例 #15
0
ファイル: hit_classify.py プロジェクト: IIIA-ML/alk
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("experiments",
                        nargs="*",
                        type=str,
                        help="Classification experiment result file path(s)")
    parser.add_argument(
        "-p",
        "--fpath",
        type=str,
        help=
        "Full path of the folder containing classification experiment result files. "
        "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-c",
                        "--confthold",
                        nargs="+",
                        type=float,
                        default=[1., .98, .95, .9, .8],
                        help="Confidence thresholds")
    parser.add_argument("-z",
                        "--z",
                        type=float,
                        default=-1.,
                        help="z factor of the efficiency measure")
    parser.add_argument("-d",
                        "--dec",
                        type=int,
                        default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument(
        "--knni",
        type=int,
        help=
        "Zero-based index of the kNN for which the average 'confidence performance' is calculated."
        " 'None' to calculate for all kNNs."
        " Normally, it makes sense either for the last or all NNs.")
    parser.add_argument(
        "--wsoln",
        choices=[0, 1],
        type=int,
        default=0,
        help="1 to display hits upon interruption w/ exact solution")
    parser.add_argument(
        "--rtrim",
        nargs="*",
        type=str,
        help=
        "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")
    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error(
            "Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    conf_thold = args.confthold
    arg_z = args.z
    dec_digits = args.dec
    knn_i = args.knni
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(x, dec_digits)
    float_formatter_hit = lambda x: "{0:.{1}f}".format(
        x * 100, dec_digits) if isinstance(x, (int, float)) else x
    wsoln = args.wsoln
    wsoln_tag = "_ws_{}".format(wsoln) if wsoln else ""

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"
    LBL_STOP_W_SOLN = "w\u2215Soln"

    # Create output dataframe
    df_hits_output = pd.DataFrame(
        columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] +
        ([LBL_STOP_W_SOLN] if wsoln else []) + [
            gain_intrpt_classify.conf_col_label(c, float_formatter, arg_z)
            for c in conf_thold if c != 1
        ])  # Exclude conf=1.00 for hits, makes no sense for uninterruption

    # Populate summary dictionary
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load classification experiment
        exp_output = common.load_obj(
            exp
        )  # type: Union[intrpt.ExpIntrptOutput, classify.ExpClassifierOutput]
        exp_z = exp_output.settings.z
        if arg_z != exp_z:
            print(
                "Ignored. The 'z' command line argument ({}) is different from "
                "the experiment 'z' setting ({}).".format(
                    helper.is_float_int(arg_z), helper.is_float_int(exp_z)))
        else:
            dataset_name = common.file_name_wo_ext(exp_output.settings.dataset)
            print("...Dataset: {}".format(dataset_name))
            # Get the average hits in the classification experiment
            if rtrim:
                dataset_name = ts.rtrim_dataset_name(dataset_name,
                                                     rtrim,
                                                     latex_it=True)
            # Avg hit dict
            df_avg_hits = alk.exp.classify.get_avg_hit_for_classify_exp(
                exp, conf_thold, wsoln, lblwsoln=LBL_STOP_W_SOLN)
            dict_avg_hits = {
                LBL_DATASET:
                dataset_name,
                LBL_FWIDTH:
                run_common.time_window_width_str(exp_output.settings.tw_width),
                LBL_FSTEP:
                exp_output.settings.tw_step
            }
            avg_hits_keys = [
                gain_intrpt_classify.conf_col_label(c, float_formatter, exp_z)
                if isinstance(c, float) else c
                for c in df_avg_hits.index.tolist()
            ]
            avg_hits_values = df_avg_hits["hit"].values
            dict_avg_hits.update(dict(zip(avg_hits_keys, avg_hits_values)))
            # Add the results to the output dataframe
            df_hits_output = df_hits_output.append(dict_avg_hits,
                                                   ignore_index=True)

    # Export the df_hits to LaTeX
    if len(df_hits_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_hits_output = df_hits_output.set_index(
            [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_hits_output = df_hits_output.sort_index()
        save_fn_hit = "soln_hit_(x{})_[{}]_sd_{}_ki_{}{}".format(
            len(df_hits_output), "_".join([str(c) for c in conf_thold]),
            helper.is_float_int(arg_z), knn_i if knn_i is not None else "All",
            wsoln_tag)
        save_fpath_hit = os.path.join(common.APP.FOLDER.FIGURE,
                                      "{}.tex".format(save_fn_hit))
        df_hits_output.to_latex(buf=save_fpath_hit,
                                float_format=float_formatter_hit,
                                escape=False,
                                multirow=True,
                                index=True)
        print("Avg Solution Hit %s saved as LaTeX table into '{}'.".format(
            save_fpath_hit))
    else:
        print("No average solution hit results could be calculated.")
コード例 #16
0
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("experiments", nargs="*", type=str,
                        help="Interruption/classification experiment result file path(s)")
    parser.add_argument("-p", "--fpath", type=str,
                        help="Full path of the folder containing experiment result files. "
                             "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-c", "--confthold", nargs="+", type=float, default=[1., .98, .95, .9, .8],
                        help="Confidence thresholds")
    parser.add_argument("-z", "--z", type=float, default=-1.,
                        help="z factor of the efficiency measure")
    parser.add_argument("-d", "--dec", type=int, default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument("--knni", type=int,
                        help="Zero-based index of the kNN for which the average 'confidence performance' is calculated."
                             " 'None' to calculate for all kNNs."
                             " Normally, it makes sense either for the last or all NNs.")
    parser.add_argument("--clsfy", choices=[0, 1], type=int, default=0,
                        help="0 for interruption experiments;"
                             " 1 for classification experiments to display gains also upon interruption w/ exact solution.")
    parser.add_argument("--rtrim", nargs="*", type=str,
                       help="Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")
    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error("Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    conf_thold = args.confthold
    arg_z = args.z
    dec_digits = args.dec
    knn_i = args.knni
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(x, dec_digits)
    clsfy = args.clsfy
    exp_tag = "{}".format("classify" if clsfy else "intrpt")

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"
    LBL_CONF_PERF = "Effcy"
    LBL_CONF_PERF_STD = "\u03C3"
    LBL_STOP_W_SOLN = "w\u2215Soln"

    # Create output dataframe
    df_output = pd.DataFrame(columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] +
                                     ([LBL_STOP_W_SOLN] if clsfy else []) +
                                     [conf_col_label(c, float_formatter, arg_z) for c in conf_thold] +
                                     [LBL_CONF_PERF, LBL_CONF_PERF_STD])
    # Populate summary dictionary
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load interruption/classification experiment
        exp_output = common.load_obj(exp)  # type: Union[intrpt.ExpIntrptOutput, classify.ExpClassifierOutput]
        exp_z = exp_output.settings.z
        if arg_z != exp_z:
            print("Ignored. The 'z' command line argument ({}) is different from "
                  "the experiment 'z' setting ({}).".format(helper.is_float_int(arg_z), helper.is_float_int(exp_z)))
        else:
            dataset_name = common.file_name_wo_ext(exp_output.settings.dataset)
            print("...Dataset: {}".format(dataset_name))
            # Get the average gains in the interruption/classification experiment
            # Average gain is calculated for the last kNN member for confthold experiments and
            # for stopwsoln=1 for the interruption w/ exact solution experiments (if wsoln=True)
            if not clsfy:
                df_avg_gains = intrpt.get_avg_gain_for_intrpt_exp(exp, conf_thold)
            else:
                df_avg_gains = classify.get_avg_gain_for_classify_exp(exp, conf_thold, wsoln=True, lblwsoln=LBL_STOP_W_SOLN)
            # Add the results to the output dataframe
            if rtrim:
                dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True)
            dict_avg_gains = {LBL_DATASET: dataset_name,
                              LBL_FWIDTH: run_common.time_window_width_str(exp_output.settings.tw_width),
                              LBL_FSTEP: exp_output.settings.tw_step}
            avg_gain_keys = [conf_col_label(c, float_formatter, exp_z) if isinstance(c, float) else c for c in df_avg_gains.index.tolist()]
            avg_gain_values = df_avg_gains["gain"].values
            dict_avg_gains.update(dict(zip(avg_gain_keys, avg_gain_values)))
            # Add average efficiency and its std deviation columns too
            if not clsfy:
                avg_conf_perf, avg_conf_perf_std = intrpt.get_avg_effcy_for_intrpt_exp(exp, knn_i=knn_i)
            else:
                avg_conf_perf, avg_conf_perf_std = classify.get_avg_effcy_for_classify_exp(exp, knn_i=knn_i)
            dict_avg_gains.update({LBL_CONF_PERF: avg_conf_perf, LBL_CONF_PERF_STD: avg_conf_perf_std})
            df_output = df_output.append(dict_avg_gains, ignore_index=True)

    # Export the df to LaTeX
    if len(df_output) > 0:
        # Swap wsoln and 1.0 columns
        if clsfy:
            unint_col = conf_col_label(1., float_formatter, arg_z)
            gain_cols = df_output.columns.tolist()
            if unint_col in gain_cols:
                unint_col_idx = gain_cols.index(unint_col)
                wsoln_col_idx = gain_cols.index(LBL_STOP_W_SOLN)
                gain_cols[unint_col_idx], gain_cols[wsoln_col_idx] = gain_cols[wsoln_col_idx], gain_cols[unint_col_idx]
                df_output = df_output[gain_cols]
        # Create a multiindex for a sorted (and prettier) output
        df_output = df_output.set_index([LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_output = df_output.sort_index()
        # df_output = df_output.round(dec_digits)
        save_fn = "gain_{}_(x{})_[{}]_sd_{}_ki_{}".format(exp_tag,
                                                          len(df_output),
                                                          "_".join([str(c) for c in conf_thold]),
                                                          helper.is_float_int(arg_z),
                                                          knn_i if knn_i is not None else "All")
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn))
        df_output.to_latex(buf=save_fpath, float_format=float_formatter, escape=False, multirow=True, index=True)
        print_msg_header = "Avg Gain for Interruptions at Confidence Thresholds{}".format(" and with Exact Solutions" if clsfy else "")
        print("{} saved as LaTeX table into '{}'.".format(print_msg_header, save_fpath))
    else:
        print("No average gain results could be calculated.")