def plot_ale_plot(model, X, X_unscaled=None, features=None, **kwargs):
    '''
    Plots an ale plot. If features contains 1 feature, it produces a 1 dimensional ale plot, 
    if it contains 2 features it produces a 2 dimensional ALE plot
    '''
    fig, ax = plt.subplots()
    ale.ale_plot(model, X, features, ax=ax, **kwargs)

    if X_unscaled is not None:
        meanx = X_unscaled[features[0]].mean()
        stdx = X_unscaled[features[0]].std()

        #Unscale x values
        def unscale_xticks(x, pos):
            return ('%.0f' % (x * stdx + meanx))

        if len(features) == 2:

            meany = X_unscaled[features[1]].mean()
            stdy = X_unscaled[features[1]].std()

            #Unscale y values
            def unscale_yticks(x, pos):
                return ('%.0f' % (x * stdy + meany))

            ax.yaxis.set_major_formatter(mticker.FuncFormatter(unscale_yticks))

        ax.xaxis.set_major_formatter(mticker.FuncFormatter(unscale_xticks))

    return fig, ax
Beispiel #2
0
def save_ale_plot_1d_with_ptp(
    model,
    X_train,
    column,
    n_jobs=8,
    monte_carlo_rep=1000,
    monte_carlo_ratio=100,
    verbose=False,
    monte_carlo=True,
    center=False,
    figure_saver=None,
):
    model.n_jobs = n_jobs
    with parallel_backend("threading", n_jobs=n_jobs):
        fig, ax = plt.subplots(
            figsize=(7.5, 4.5)
        )  # Make sure plot is plotted onto a new figure.
        out = ale_plot(
            model,
            X_train,
            column,
            bins=20,
            monte_carlo=monte_carlo,
            monte_carlo_rep=monte_carlo_rep,
            monte_carlo_ratio=monte_carlo_ratio,
            plot_quantiles=True,
            quantile_axis=True,
            rugplot_lim=0,
            scilim=0.6,
            return_data=True,
            return_mc_data=True,
            verbose=verbose,
            center=center,
        )
    if monte_carlo:
        fig, axes, data, mc_data = out
    else:
        fig, axes, data = out

    for ax_key in ("ale", "quantiles_x"):
        axes[ax_key].xaxis.set_tick_params(rotation=45)

    sub_dir = "ale" if monte_carlo else "ale_non_mc"
    if figure_saver is not None:
        figure_saver.save_figure(fig, column, sub_directory=sub_dir)

    if monte_carlo:
        mc_ales = np.array([])
        for mc_q, mc_ale in mc_data:
            mc_ales = np.append(mc_ales, mc_ale)
        return np.ptp(data[1]), np.ptp(mc_ales)
    else:
        return np.ptp(data[1])
Beispiel #3
0
def plot_ale_feature_paired(estimators, titles, feature_name,
                            features, figsize=(6, 6)):
    from alepython import ale

    plots = []

    for i, estimator in enumerate(estimators):
        plot = ale.ale_plot(estimator, features, feature_name,
                            figsize=figsize, title=titles[i])

        plots.append(plot)

    return plots
Beispiel #4
0
def plot_ale_feature_per_split(configs, feature_name, figsize=(4, 6)):
    from alepython import ale

    plots = []

    for split_index in range(3):
        (train_values, train_labels, val_values, val_labels) = load_split(split_index)

        estimator = configs[split_index]['estimator']

        plot = ale.ale_plot(estimator, train_values, feature_name,
                            figsize=figsize, title=f'Split {split_index + 1}')

        plots.append(plot)

    return plots
Beispiel #5
0
def save_ale_2d_and_get_importance(
    model,
    train_set,
    features,
    n_jobs=8,
    include_first_order=False,
    figure_saver=None,
    plot_samples=True,
    figsize=None,
):
    model.n_jobs = n_jobs

    if figsize is None:
        if plot_samples:
            figsize = (10, 4.5)
        else:
            figsize = (7.5, 4.5)

    fig, ax = plt.subplots(
        1,
        2 if plot_samples else 1,
        figsize=figsize,
        gridspec_kw={"width_ratios": [1.7, 1]} if plot_samples else None,
        constrained_layout=True if plot_samples else False,
    )  # Make sure plot is plotted onto a new figure.
    with parallel_backend("threading", n_jobs=n_jobs):
        fig, axes, (quantiles_list, ale, samples) = ale_plot(
            model,
            train_set,
            features,
            bins=20,
            fig=fig,
            ax=ax[0] if plot_samples else ax,
            plot_quantiles=True,
            quantile_axis=True,
            plot_kwargs={
                "colorbar_kwargs":
                dict(
                    format="%.0e",
                    pad=0.02 if plot_samples else 0.09,
                    aspect=32,
                    shrink=0.85,
                    ax=ax[0] if plot_samples else ax,
                )
            },
            return_data=True,
            n_jobs=n_jobs,
            include_first_order=include_first_order,
        )

    # plt.subplots_adjust(top=0.89)
    for ax_key in ("ale", "quantiles_x"):
        axes[ax_key].xaxis.set_tick_params(rotation=45)

    if plot_samples:
        # Plotting samples.
        ax[1].set_title("Samples")
        # ax[1].set_xlabel(f"Feature '{features[0]}'")
        # ax[1].set_ylabel(f"Feature '{features[1]}'")
        mod_quantiles_list = []
        for axis, quantiles in zip(("x", "y"), quantiles_list):
            inds = np.arange(len(quantiles))
            mod_quantiles_list.append(inds)
            ax[1].set(**{f"{axis}ticks": inds})
            ax[1].set(
                **{f"{axis}ticklabels": _sci_format(quantiles, scilim=0.6)})
        samples_img = ax[1].pcolormesh(*mod_quantiles_list,
                                       samples.T,
                                       norm=SymLogNorm(linthresh=1))
        fig.colorbar(samples_img, ax=ax, shrink=0.6, pad=0.01)
        ax[1].xaxis.set_tick_params(rotation=90)
        ax[1].set_aspect("equal")
        fig.set_constrained_layout_pads(w_pad=0.000,
                                        h_pad=0.000,
                                        hspace=0.0,
                                        wspace=0.015)

    if figure_saver is not None:
        figure_saver.save_figure(
            fig,
            "__".join(features),
            sub_directory="2d_ale_first_order"
            if include_first_order else "2d_ale",
        )

    #     min_samples = (
    #         train_set.shape[0] / reduce(mul, map(lambda x: len(x) - 1, quantiles_list))
    #     ) / 10
    #     return np.ma.max(ale[samples_grid > min_samples]) - np.ma.min(
    #         ale[samples_grid > min_samples]
    #     )

    return np.ptp(ale)
Beispiel #6
0
    def __init__(self, predictor: callable, class_no: int):
        self.predictor = predictor
        self.class_no = class_no

    def predict(self, X, **kwargs):
        return self.predictor(X,**kwargs)[:,self.class_no]


if __name__ == "__main__":
    featureNames = ["seq", "mcg", "gvh", "alm", "mit", "erl", "pox", "vac", "nuc", "loc"]
    yeastData = pd.read_csv("yeast.data", sep=" ", names=featureNames)
    # titles = ("GradientBoost", "MLP")  # add more
    # models = (GradientBoostingClassifier(n_estimators=100, max_features=None, max_depth=2, random_state=5),
    #           MLPClassifier())
    model = MLPClassifier()
    yeast4Classes = yeastData.loc[(yeastData["loc"] == "CYT")|( yeastData["loc"] == "NUC" )| (yeastData["loc"] == "MIT" )| (yeastData["loc"] == "ME3")]
    yeastAttrib = yeast4Classes.iloc[:, 1:9]
    yeastTarget = yeast4Classes["loc"]
    X_train, X_test, y_train, y_test = train_test_split(yeastAttrib, yeastTarget, test_size = 0.33, random_state = 42)

    # for model, title in zip(models, titles):
    model.fit(X_train, y_train)
    # print(model.predict_proba(X_test.iloc[1:3]))
    # print(predictor.predict(X_test.iloc[1:2]))
    for loc in range(len(set(y_train))):
        print(model.classes_[loc])
        predictor = one_class_prob_pred(model.predict_proba, loc)
        # ale_plot(model, X_train, "alm", predictor=predictor.predict, monte_carlo=True)
        ale_plot(model, X_train, ["mit", "alm"], predictor=predictor.predict,
                 monte_carlo=True, monte_carlo_ratio= 0.3)
    plt.show()