def plot_ale_plot(model, X, X_unscaled=None, features=None, **kwargs): ''' Plots an ale plot. If features contains 1 feature, it produces a 1 dimensional ale plot, if it contains 2 features it produces a 2 dimensional ALE plot ''' fig, ax = plt.subplots() ale.ale_plot(model, X, features, ax=ax, **kwargs) if X_unscaled is not None: meanx = X_unscaled[features[0]].mean() stdx = X_unscaled[features[0]].std() #Unscale x values def unscale_xticks(x, pos): return ('%.0f' % (x * stdx + meanx)) if len(features) == 2: meany = X_unscaled[features[1]].mean() stdy = X_unscaled[features[1]].std() #Unscale y values def unscale_yticks(x, pos): return ('%.0f' % (x * stdy + meany)) ax.yaxis.set_major_formatter(mticker.FuncFormatter(unscale_yticks)) ax.xaxis.set_major_formatter(mticker.FuncFormatter(unscale_xticks)) return fig, ax
def save_ale_plot_1d_with_ptp( model, X_train, column, n_jobs=8, monte_carlo_rep=1000, monte_carlo_ratio=100, verbose=False, monte_carlo=True, center=False, figure_saver=None, ): model.n_jobs = n_jobs with parallel_backend("threading", n_jobs=n_jobs): fig, ax = plt.subplots( figsize=(7.5, 4.5) ) # Make sure plot is plotted onto a new figure. out = ale_plot( model, X_train, column, bins=20, monte_carlo=monte_carlo, monte_carlo_rep=monte_carlo_rep, monte_carlo_ratio=monte_carlo_ratio, plot_quantiles=True, quantile_axis=True, rugplot_lim=0, scilim=0.6, return_data=True, return_mc_data=True, verbose=verbose, center=center, ) if monte_carlo: fig, axes, data, mc_data = out else: fig, axes, data = out for ax_key in ("ale", "quantiles_x"): axes[ax_key].xaxis.set_tick_params(rotation=45) sub_dir = "ale" if monte_carlo else "ale_non_mc" if figure_saver is not None: figure_saver.save_figure(fig, column, sub_directory=sub_dir) if monte_carlo: mc_ales = np.array([]) for mc_q, mc_ale in mc_data: mc_ales = np.append(mc_ales, mc_ale) return np.ptp(data[1]), np.ptp(mc_ales) else: return np.ptp(data[1])
def plot_ale_feature_paired(estimators, titles, feature_name, features, figsize=(6, 6)): from alepython import ale plots = [] for i, estimator in enumerate(estimators): plot = ale.ale_plot(estimator, features, feature_name, figsize=figsize, title=titles[i]) plots.append(plot) return plots
def plot_ale_feature_per_split(configs, feature_name, figsize=(4, 6)): from alepython import ale plots = [] for split_index in range(3): (train_values, train_labels, val_values, val_labels) = load_split(split_index) estimator = configs[split_index]['estimator'] plot = ale.ale_plot(estimator, train_values, feature_name, figsize=figsize, title=f'Split {split_index + 1}') plots.append(plot) return plots
def save_ale_2d_and_get_importance( model, train_set, features, n_jobs=8, include_first_order=False, figure_saver=None, plot_samples=True, figsize=None, ): model.n_jobs = n_jobs if figsize is None: if plot_samples: figsize = (10, 4.5) else: figsize = (7.5, 4.5) fig, ax = plt.subplots( 1, 2 if plot_samples else 1, figsize=figsize, gridspec_kw={"width_ratios": [1.7, 1]} if plot_samples else None, constrained_layout=True if plot_samples else False, ) # Make sure plot is plotted onto a new figure. with parallel_backend("threading", n_jobs=n_jobs): fig, axes, (quantiles_list, ale, samples) = ale_plot( model, train_set, features, bins=20, fig=fig, ax=ax[0] if plot_samples else ax, plot_quantiles=True, quantile_axis=True, plot_kwargs={ "colorbar_kwargs": dict( format="%.0e", pad=0.02 if plot_samples else 0.09, aspect=32, shrink=0.85, ax=ax[0] if plot_samples else ax, ) }, return_data=True, n_jobs=n_jobs, include_first_order=include_first_order, ) # plt.subplots_adjust(top=0.89) for ax_key in ("ale", "quantiles_x"): axes[ax_key].xaxis.set_tick_params(rotation=45) if plot_samples: # Plotting samples. ax[1].set_title("Samples") # ax[1].set_xlabel(f"Feature '{features[0]}'") # ax[1].set_ylabel(f"Feature '{features[1]}'") mod_quantiles_list = [] for axis, quantiles in zip(("x", "y"), quantiles_list): inds = np.arange(len(quantiles)) mod_quantiles_list.append(inds) ax[1].set(**{f"{axis}ticks": inds}) ax[1].set( **{f"{axis}ticklabels": _sci_format(quantiles, scilim=0.6)}) samples_img = ax[1].pcolormesh(*mod_quantiles_list, samples.T, norm=SymLogNorm(linthresh=1)) fig.colorbar(samples_img, ax=ax, shrink=0.6, pad=0.01) ax[1].xaxis.set_tick_params(rotation=90) ax[1].set_aspect("equal") fig.set_constrained_layout_pads(w_pad=0.000, h_pad=0.000, hspace=0.0, wspace=0.015) if figure_saver is not None: figure_saver.save_figure( fig, "__".join(features), sub_directory="2d_ale_first_order" if include_first_order else "2d_ale", ) # min_samples = ( # train_set.shape[0] / reduce(mul, map(lambda x: len(x) - 1, quantiles_list)) # ) / 10 # return np.ma.max(ale[samples_grid > min_samples]) - np.ma.min( # ale[samples_grid > min_samples] # ) return np.ptp(ale)
def __init__(self, predictor: callable, class_no: int): self.predictor = predictor self.class_no = class_no def predict(self, X, **kwargs): return self.predictor(X,**kwargs)[:,self.class_no] if __name__ == "__main__": featureNames = ["seq", "mcg", "gvh", "alm", "mit", "erl", "pox", "vac", "nuc", "loc"] yeastData = pd.read_csv("yeast.data", sep=" ", names=featureNames) # titles = ("GradientBoost", "MLP") # add more # models = (GradientBoostingClassifier(n_estimators=100, max_features=None, max_depth=2, random_state=5), # MLPClassifier()) model = MLPClassifier() yeast4Classes = yeastData.loc[(yeastData["loc"] == "CYT")|( yeastData["loc"] == "NUC" )| (yeastData["loc"] == "MIT" )| (yeastData["loc"] == "ME3")] yeastAttrib = yeast4Classes.iloc[:, 1:9] yeastTarget = yeast4Classes["loc"] X_train, X_test, y_train, y_test = train_test_split(yeastAttrib, yeastTarget, test_size = 0.33, random_state = 42) # for model, title in zip(models, titles): model.fit(X_train, y_train) # print(model.predict_proba(X_test.iloc[1:3])) # print(predictor.predict(X_test.iloc[1:2])) for loc in range(len(set(y_train))): print(model.classes_[loc]) predictor = one_class_prob_pred(model.predict_proba, loc) # ale_plot(model, X_train, "alm", predictor=predictor.predict, monte_carlo=True) ale_plot(model, X_train, ["mit", "alm"], predictor=predictor.predict, monte_carlo=True, monte_carlo_ratio= 0.3) plt.show()