Ejemplo n.º 1
0
def plot_partial_dependences(model,
                             X,
                             var_names,
                             y=None,
                             bootstrap_models=None,
                             pipeline=None,
                             n_points=250):
    """Convenience function for creating many partial dependency plots."""
    fig, axs = plt.subplots(len(var_names), figsize=(12, 3 * len(var_names)))
    for ax, name in zip(axs, var_names):
        if bootstrap_models:
            for M in bootstrap_models[:100]:
                print(M)
                plot_partial_depenence(ax,
                                       M,
                                       X=X,
                                       var_name=name,
                                       pipeline=pipeline,
                                       alpha=0.8,
                                       linewidth=1,
                                       color="lightblue")
        print(model)
        plot_partial_depenence(ax,
                               model,
                               X=X,
                               var_name=name,
                               y=y,
                               pipeline=pipeline,
                               color="blue",
                               linewidth=3)
        ax.set_title("{} Partial Dependence".format(name))
    return fig, axs
Ejemplo n.º 2
0
def plot_partial_dependences(model,
                             X,
                             var_names,
                             y=None,
                             bootstrap_models=None,
                             pipeline=None,
                             n_points=250):
    """Creates many partial dependency plots.
        INPUT:
            model:
                A sklearn model
                Call fit() on model before using plot_feature_importances.
            X:
                A dataframe or array containing independent variables
                that fit in pipeline.
            var_names:
                The column names of the x variables in the
                dataframe you want plotted.
            y:
                A dataframe or array, the dependent variable.
                Plotted in grey.
            pipeline:
                Runs on df_X (without y), already fit to df_X.
            n_points:
                The number of points to plot.
            *kwargs:
                Extra arguments passed to plot_partial_dependence.
        OUTPUT:
            an array of partial dependence plots, describing each varible's
            contibution to the regression.
    """
    fig, axs = plt.subplots(len(var_names), figsize=(12, 3 * len(var_names)))
    for ax, name in zip(axs, var_names):
        if bootstrap_models:
            for M in bootstrap_models[:10]:
                # print(M)
                plot_partial_depenence(ax,
                                       M,
                                       X=X,
                                       var_name=name,
                                       pipeline=pipeline,
                                       alpha=0.8,
                                       linewidth=1,
                                       color="lightblue")
        # print(model)
        plot_partial_depenence(ax,
                               model,
                               X=X,
                               var_name=name,
                               y=y,
                               pipeline=pipeline,
                               color="blue",
                               linewidth=3)
        ax.set_title("{} Partial Dependence".format(name))
    return fig, axs
Ejemplo n.º 3
0
def shaped_plot_partial_dependences(model,
                                    df,
                                    y_var_name,
                                    pipeline=None,
                                    n_points=250,
                                    **kwargs):
    X_features = list(df.columns)
    X_features.remove(y_var_name)
    if len(X_features) > 1:
        num_plot_rows = int(np.ceil(len(X_features) / 2.0))
        fig, axs = plt.subplots(num_plot_rows,
                                2,
                                figsize=(14, 3 * num_plot_rows))
        for i, X_feature in enumerate(X_features):
            # print(model)
            plot_partial_depenence(axs.flatten()[i],
                                   model=model,
                                   X=df.drop(y_var_name, axis=1),
                                   var_name=X_feature,
                                   y=df[y_var_name],
                                   pipeline=pipeline,
                                   n_points=n_points,
                                   **kwargs)
            axs.flatten()[i].set_title("{}: Partial Dependence Plot {}".format(
                X_feature, model.__class__.__name__))
    elif len(X_features) == 1:
        fig, axs = plt.subplots(len(X_features),
                                1,
                                figsize=(14, 4.5 * len(X_features)))
        for i, X_feature in enumerate(X_features):
            plot_partial_depenence(axs,
                                   model=model,
                                   X=df.drop(y_var_name, axis=1),
                                   var_name=X_feature,
                                   y=df[y_var_name],
                                   pipeline=pipeline,
                                   n_points=n_points,
                                   **kwargs)
            axs.set_title("{}: Partial Dependence Plots {}".format(
                X_feature, model.__class__.__name__))
            fig.set_title("Partial Dependence Plots for " +
                          model.__class__.__name__)
            #             fig.set_tight_layout(tight = True) #this doesn't work!!!
            fig.tight_layout(
                pad=2)  # 'tight_layout' must be used in calling script as well
    else:
        print('No Features to Plot')
Ejemplo n.º 4
0
def shaped_plot_partial_dependences(model,
                                    df,
                                    y_var_name,
                                    pipeline=None,
                                    n_points=250,
                                    **kwargs):
    """Creates many partial dependency plots.
        INPUT:
            model:
                A sklearn model
                Call fit() on model before using plot_feature_importances.
            df:
                A dataframe containing independent variables y
                and dependent variables X.
            y_var_name:
                String, the column name of the dependent y variable in the dataframe
            pipeline:
                Runs on df_X (without y), already fit to df_X.
            n_points:
                the number of points to plot
            *kwargs:
                extra arguments passed to plot_partial_dependence
        OUTPUT:
            an array of partial dependence plots, describing each varible's contibution to the regression.
    """

    X_features = list(df.columns)
    X_features.remove(y_var_name)
    if len(X_features) > 1:
        num_plot_rows = int(np.ceil(len(X_features) / 2.0))
        fig, axs = plt.subplots(num_plot_rows,
                                2,
                                figsize=(14, 3 * num_plot_rows))
        for i, X_feature in enumerate(X_features):
            # print(model)
            plot_partial_depenence(axs.flatten()[i],
                                   model=model,
                                   X=df.drop(y_var_name, axis=1),
                                   var_name=X_feature,
                                   y=df[y_var_name],
                                   pipeline=pipeline,
                                   n_points=n_points,
                                   **kwargs)
            axs.flatten()[i].set_title("{}: Partial Dependence Plot {}".format(
                X_feature, model.__class__.__name__))
    elif len(X_features) == 1:
        fig, axs = plt.subplots(len(X_features),
                                1,
                                figsize=(14, 4.5 * len(X_features)))
        for i, X_feature in enumerate(X_features):
            plot_partial_depenence(axs,
                                   model=model,
                                   X=df.drop(y_var_name, axis=1),
                                   var_name=X_feature,
                                   y=df[y_var_name],
                                   pipeline=pipeline,
                                   n_points=n_points,
                                   **kwargs)
            axs.set_title("{}: Partial Dependence Plots {}".format(
                X_feature, model.__class__.__name__))
            fig.set_title("Partial Dependence Plots for " +
                          model.__class__.__name__)


#             fig.set_tight_layout(tight = True) #this doesn't work!!!
# 'tight_layout' must be used in calling script as well
        fig.tight_layout(pad=2)
    else:
        print('No Features to Plot')