def multi_ale_plot_1d( model, X_train, columns, fig_name, xlabel=None, ylabel=None, title=None, n_jobs=8, verbose=False, figure_saver=None, ): fig, ax = plt.subplots( figsize=(7.5, 4.5)) # Make sure plot is plotted onto a new figure. model.n_jobs = n_jobs with parallel_backend("threading", n_jobs=n_jobs): quantile_list = [] ale_list = [] for feature in tqdm(columns, desc="Calculating feature ALEs", disable=not verbose): quantiles, ale = first_order_ale_quant(model.predict, X_train, feature, bins=20) quantile_list.append(quantiles) ale_list.append(ale) # Construct quantiles from the individual quantiles, minimising the amount of interpolation. combined_quantiles = np.vstack( [quantiles[None] for quantiles in quantile_list]) final_quantiles = np.mean(combined_quantiles, axis=0) # Account for extrema. final_quantiles[0] = np.min(combined_quantiles) final_quantiles[-1] = np.max(combined_quantiles) mod_quantiles = np.arange(len(quantiles)) for feature, quantiles, ale in zip(columns, quantile_list, ale_list): # Interpolate each of the quantiles relative to the accumulated final quantiles. ax.plot( np.interp(quantiles, final_quantiles, mod_quantiles), ale, marker="o", ms=3, label=feature, ) ax.legend(loc="best") ax.set_xticks(mod_quantiles) ax.set_xticklabels(_sci_format(final_quantiles, scilim=0.6)) ax.xaxis.set_tick_params(rotation=45) ax.grid(alpha=0.4, linestyle="--") fig.suptitle(title) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if figure_saver is not None: figure_saver.save_figure(fig, fig_name, sub_directory="multi_ale")
def save_pdp_plot_1d(model, X_train, column, n_jobs, CACHE_DIR, figure_saver=None): data_file = os.path.join(CACHE_DIR, "pdp_data", column) if not os.path.isfile(data_file): model.n_jobs = n_jobs with parallel_backend("threading", n_jobs=n_jobs): pdp_isolate_out = pdp.pdp_isolate( model=model, dataset=X_train, model_features=X_train.columns, feature=column, num_grid_points=20, ) os.makedirs(os.path.dirname(data_file), exist_ok=True) with open(data_file, "wb") as f: pickle.dump((column, pdp_isolate_out), f, -1) else: with open(data_file, "rb") as f: column, pdp_isolate_out = pickle.load(f) # With ICEs. fig_ice, axes_ice = pdp.pdp_plot( pdp_isolate_out, column, plot_lines=True, center=True, frac_to_plot=1000, x_quantile=True, figsize=(7, 5), ) axes_ice["pdp_ax"].xaxis.set_tick_params(rotation=45) if figure_saver is not None: figure_saver.save_figure(fig_ice, column, sub_directory="pdp") # Without ICEs. fig_no_ice, ax = plt.subplots(figsize=(7.5, 4.5)) plt.plot(pdp_isolate_out.pdp - pdp_isolate_out.pdp[0], marker="o") plt.xticks( ticks=range(len(pdp_isolate_out.pdp)), labels=_sci_format(pdp_isolate_out.feature_grids, scilim=0.6), rotation=45, ) plt.xlabel(f"{column}") plt.title(f"PDP of feature '{column}'\nBins: {len(pdp_isolate_out.pdp)}") plt.grid(alpha=0.4, linestyle="--") if figure_saver is not None: figure_saver.save_figure(fig_no_ice, column, sub_directory="pdp_no_ice") return (fig_ice, fig_no_ice), pdp_isolate_out, data_file
def save_ale_2d_and_get_importance( model, train_set, features, n_jobs=8, include_first_order=False, figure_saver=None, plot_samples=True, figsize=None, ): model.n_jobs = n_jobs if figsize is None: if plot_samples: figsize = (10, 4.5) else: figsize = (7.5, 4.5) fig, ax = plt.subplots( 1, 2 if plot_samples else 1, figsize=figsize, gridspec_kw={"width_ratios": [1.7, 1]} if plot_samples else None, constrained_layout=True if plot_samples else False, ) # Make sure plot is plotted onto a new figure. with parallel_backend("threading", n_jobs=n_jobs): fig, axes, (quantiles_list, ale, samples) = ale_plot( model, train_set, features, bins=20, fig=fig, ax=ax[0] if plot_samples else ax, plot_quantiles=True, quantile_axis=True, plot_kwargs={ "colorbar_kwargs": dict( format="%.0e", pad=0.02 if plot_samples else 0.09, aspect=32, shrink=0.85, ax=ax[0] if plot_samples else ax, ) }, return_data=True, n_jobs=n_jobs, include_first_order=include_first_order, ) # plt.subplots_adjust(top=0.89) for ax_key in ("ale", "quantiles_x"): axes[ax_key].xaxis.set_tick_params(rotation=45) if plot_samples: # Plotting samples. ax[1].set_title("Samples") # ax[1].set_xlabel(f"Feature '{features[0]}'") # ax[1].set_ylabel(f"Feature '{features[1]}'") mod_quantiles_list = [] for axis, quantiles in zip(("x", "y"), quantiles_list): inds = np.arange(len(quantiles)) mod_quantiles_list.append(inds) ax[1].set(**{f"{axis}ticks": inds}) ax[1].set( **{f"{axis}ticklabels": _sci_format(quantiles, scilim=0.6)}) samples_img = ax[1].pcolormesh(*mod_quantiles_list, samples.T, norm=SymLogNorm(linthresh=1)) fig.colorbar(samples_img, ax=ax, shrink=0.6, pad=0.01) ax[1].xaxis.set_tick_params(rotation=90) ax[1].set_aspect("equal") fig.set_constrained_layout_pads(w_pad=0.000, h_pad=0.000, hspace=0.0, wspace=0.015) if figure_saver is not None: figure_saver.save_figure( fig, "__".join(features), sub_directory="2d_ale_first_order" if include_first_order else "2d_ale", ) # min_samples = ( # train_set.shape[0] / reduce(mul, map(lambda x: len(x) - 1, quantiles_list)) # ) / 10 # return np.ma.max(ale[samples_grid > min_samples]) - np.ma.min( # ale[samples_grid > min_samples] # ) return np.ptp(ale)