Example #1
0
def multi_ale_plot_1d(
    model,
    X_train,
    columns,
    fig_name,
    xlabel=None,
    ylabel=None,
    title=None,
    n_jobs=8,
    verbose=False,
    figure_saver=None,
):
    fig, ax = plt.subplots(
        figsize=(7.5, 4.5))  # Make sure plot is plotted onto a new figure.
    model.n_jobs = n_jobs
    with parallel_backend("threading", n_jobs=n_jobs):
        quantile_list = []
        ale_list = []
        for feature in tqdm(columns,
                            desc="Calculating feature ALEs",
                            disable=not verbose):
            quantiles, ale = first_order_ale_quant(model.predict,
                                                   X_train,
                                                   feature,
                                                   bins=20)
            quantile_list.append(quantiles)
            ale_list.append(ale)

    # Construct quantiles from the individual quantiles, minimising the amount of interpolation.
    combined_quantiles = np.vstack(
        [quantiles[None] for quantiles in quantile_list])

    final_quantiles = np.mean(combined_quantiles, axis=0)
    # Account for extrema.
    final_quantiles[0] = np.min(combined_quantiles)
    final_quantiles[-1] = np.max(combined_quantiles)

    mod_quantiles = np.arange(len(quantiles))
    for feature, quantiles, ale in zip(columns, quantile_list, ale_list):
        # Interpolate each of the quantiles relative to the accumulated final quantiles.
        ax.plot(
            np.interp(quantiles, final_quantiles, mod_quantiles),
            ale,
            marker="o",
            ms=3,
            label=feature,
        )

    ax.legend(loc="best")
    ax.set_xticks(mod_quantiles)
    ax.set_xticklabels(_sci_format(final_quantiles, scilim=0.6))
    ax.xaxis.set_tick_params(rotation=45)
    ax.grid(alpha=0.4, linestyle="--")

    fig.suptitle(title)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    if figure_saver is not None:
        figure_saver.save_figure(fig, fig_name, sub_directory="multi_ale")
Example #2
0
def save_pdp_plot_1d(model,
                     X_train,
                     column,
                     n_jobs,
                     CACHE_DIR,
                     figure_saver=None):
    data_file = os.path.join(CACHE_DIR, "pdp_data", column)

    if not os.path.isfile(data_file):
        model.n_jobs = n_jobs
        with parallel_backend("threading", n_jobs=n_jobs):
            pdp_isolate_out = pdp.pdp_isolate(
                model=model,
                dataset=X_train,
                model_features=X_train.columns,
                feature=column,
                num_grid_points=20,
            )
        os.makedirs(os.path.dirname(data_file), exist_ok=True)
        with open(data_file, "wb") as f:
            pickle.dump((column, pdp_isolate_out), f, -1)
    else:
        with open(data_file, "rb") as f:
            column, pdp_isolate_out = pickle.load(f)

    # With ICEs.
    fig_ice, axes_ice = pdp.pdp_plot(
        pdp_isolate_out,
        column,
        plot_lines=True,
        center=True,
        frac_to_plot=1000,
        x_quantile=True,
        figsize=(7, 5),
    )
    axes_ice["pdp_ax"].xaxis.set_tick_params(rotation=45)
    if figure_saver is not None:
        figure_saver.save_figure(fig_ice, column, sub_directory="pdp")

    # Without ICEs.
    fig_no_ice, ax = plt.subplots(figsize=(7.5, 4.5))
    plt.plot(pdp_isolate_out.pdp - pdp_isolate_out.pdp[0], marker="o")
    plt.xticks(
        ticks=range(len(pdp_isolate_out.pdp)),
        labels=_sci_format(pdp_isolate_out.feature_grids, scilim=0.6),
        rotation=45,
    )
    plt.xlabel(f"{column}")
    plt.title(f"PDP of feature '{column}'\nBins: {len(pdp_isolate_out.pdp)}")
    plt.grid(alpha=0.4, linestyle="--")
    if figure_saver is not None:
        figure_saver.save_figure(fig_no_ice,
                                 column,
                                 sub_directory="pdp_no_ice")
    return (fig_ice, fig_no_ice), pdp_isolate_out, data_file
Example #3
0
def save_ale_2d_and_get_importance(
    model,
    train_set,
    features,
    n_jobs=8,
    include_first_order=False,
    figure_saver=None,
    plot_samples=True,
    figsize=None,
):
    model.n_jobs = n_jobs

    if figsize is None:
        if plot_samples:
            figsize = (10, 4.5)
        else:
            figsize = (7.5, 4.5)

    fig, ax = plt.subplots(
        1,
        2 if plot_samples else 1,
        figsize=figsize,
        gridspec_kw={"width_ratios": [1.7, 1]} if plot_samples else None,
        constrained_layout=True if plot_samples else False,
    )  # Make sure plot is plotted onto a new figure.
    with parallel_backend("threading", n_jobs=n_jobs):
        fig, axes, (quantiles_list, ale, samples) = ale_plot(
            model,
            train_set,
            features,
            bins=20,
            fig=fig,
            ax=ax[0] if plot_samples else ax,
            plot_quantiles=True,
            quantile_axis=True,
            plot_kwargs={
                "colorbar_kwargs":
                dict(
                    format="%.0e",
                    pad=0.02 if plot_samples else 0.09,
                    aspect=32,
                    shrink=0.85,
                    ax=ax[0] if plot_samples else ax,
                )
            },
            return_data=True,
            n_jobs=n_jobs,
            include_first_order=include_first_order,
        )

    # plt.subplots_adjust(top=0.89)
    for ax_key in ("ale", "quantiles_x"):
        axes[ax_key].xaxis.set_tick_params(rotation=45)

    if plot_samples:
        # Plotting samples.
        ax[1].set_title("Samples")
        # ax[1].set_xlabel(f"Feature '{features[0]}'")
        # ax[1].set_ylabel(f"Feature '{features[1]}'")
        mod_quantiles_list = []
        for axis, quantiles in zip(("x", "y"), quantiles_list):
            inds = np.arange(len(quantiles))
            mod_quantiles_list.append(inds)
            ax[1].set(**{f"{axis}ticks": inds})
            ax[1].set(
                **{f"{axis}ticklabels": _sci_format(quantiles, scilim=0.6)})
        samples_img = ax[1].pcolormesh(*mod_quantiles_list,
                                       samples.T,
                                       norm=SymLogNorm(linthresh=1))
        fig.colorbar(samples_img, ax=ax, shrink=0.6, pad=0.01)
        ax[1].xaxis.set_tick_params(rotation=90)
        ax[1].set_aspect("equal")
        fig.set_constrained_layout_pads(w_pad=0.000,
                                        h_pad=0.000,
                                        hspace=0.0,
                                        wspace=0.015)

    if figure_saver is not None:
        figure_saver.save_figure(
            fig,
            "__".join(features),
            sub_directory="2d_ale_first_order"
            if include_first_order else "2d_ale",
        )

    #     min_samples = (
    #         train_set.shape[0] / reduce(mul, map(lambda x: len(x) - 1, quantiles_list))
    #     ) / 10
    #     return np.ma.max(ale[samples_grid > min_samples]) - np.ma.min(
    #         ale[samples_grid > min_samples]
    #     )

    return np.ptp(ale)