def plot_partial_dependences(model, X, var_names, y=None, bootstrap_models=None, pipeline=None, n_points=250): """Convenience function for creating many partial dependency plots.""" fig, axs = plt.subplots(len(var_names), figsize=(12, 3 * len(var_names))) for ax, name in zip(axs, var_names): if bootstrap_models: for M in bootstrap_models[:100]: print(M) plot_partial_depenence(ax, M, X=X, var_name=name, pipeline=pipeline, alpha=0.8, linewidth=1, color="lightblue") print(model) plot_partial_depenence(ax, model, X=X, var_name=name, y=y, pipeline=pipeline, color="blue", linewidth=3) ax.set_title("{} Partial Dependence".format(name)) return fig, axs
def plot_partial_dependences(model, X, var_names, y=None, bootstrap_models=None, pipeline=None, n_points=250): """Creates many partial dependency plots. INPUT: model: A sklearn model Call fit() on model before using plot_feature_importances. X: A dataframe or array containing independent variables that fit in pipeline. var_names: The column names of the x variables in the dataframe you want plotted. y: A dataframe or array, the dependent variable. Plotted in grey. pipeline: Runs on df_X (without y), already fit to df_X. n_points: The number of points to plot. *kwargs: Extra arguments passed to plot_partial_dependence. OUTPUT: an array of partial dependence plots, describing each varible's contibution to the regression. """ fig, axs = plt.subplots(len(var_names), figsize=(12, 3 * len(var_names))) for ax, name in zip(axs, var_names): if bootstrap_models: for M in bootstrap_models[:10]: # print(M) plot_partial_depenence(ax, M, X=X, var_name=name, pipeline=pipeline, alpha=0.8, linewidth=1, color="lightblue") # print(model) plot_partial_depenence(ax, model, X=X, var_name=name, y=y, pipeline=pipeline, color="blue", linewidth=3) ax.set_title("{} Partial Dependence".format(name)) return fig, axs
def shaped_plot_partial_dependences(model, df, y_var_name, pipeline=None, n_points=250, **kwargs): X_features = list(df.columns) X_features.remove(y_var_name) if len(X_features) > 1: num_plot_rows = int(np.ceil(len(X_features) / 2.0)) fig, axs = plt.subplots(num_plot_rows, 2, figsize=(14, 3 * num_plot_rows)) for i, X_feature in enumerate(X_features): # print(model) plot_partial_depenence(axs.flatten()[i], model=model, X=df.drop(y_var_name, axis=1), var_name=X_feature, y=df[y_var_name], pipeline=pipeline, n_points=n_points, **kwargs) axs.flatten()[i].set_title("{}: Partial Dependence Plot {}".format( X_feature, model.__class__.__name__)) elif len(X_features) == 1: fig, axs = plt.subplots(len(X_features), 1, figsize=(14, 4.5 * len(X_features))) for i, X_feature in enumerate(X_features): plot_partial_depenence(axs, model=model, X=df.drop(y_var_name, axis=1), var_name=X_feature, y=df[y_var_name], pipeline=pipeline, n_points=n_points, **kwargs) axs.set_title("{}: Partial Dependence Plots {}".format( X_feature, model.__class__.__name__)) fig.set_title("Partial Dependence Plots for " + model.__class__.__name__) # fig.set_tight_layout(tight = True) #this doesn't work!!! fig.tight_layout( pad=2) # 'tight_layout' must be used in calling script as well else: print('No Features to Plot')
def shaped_plot_partial_dependences(model, df, y_var_name, pipeline=None, n_points=250, **kwargs): """Creates many partial dependency plots. INPUT: model: A sklearn model Call fit() on model before using plot_feature_importances. df: A dataframe containing independent variables y and dependent variables X. y_var_name: String, the column name of the dependent y variable in the dataframe pipeline: Runs on df_X (without y), already fit to df_X. n_points: the number of points to plot *kwargs: extra arguments passed to plot_partial_dependence OUTPUT: an array of partial dependence plots, describing each varible's contibution to the regression. """ X_features = list(df.columns) X_features.remove(y_var_name) if len(X_features) > 1: num_plot_rows = int(np.ceil(len(X_features) / 2.0)) fig, axs = plt.subplots(num_plot_rows, 2, figsize=(14, 3 * num_plot_rows)) for i, X_feature in enumerate(X_features): # print(model) plot_partial_depenence(axs.flatten()[i], model=model, X=df.drop(y_var_name, axis=1), var_name=X_feature, y=df[y_var_name], pipeline=pipeline, n_points=n_points, **kwargs) axs.flatten()[i].set_title("{}: Partial Dependence Plot {}".format( X_feature, model.__class__.__name__)) elif len(X_features) == 1: fig, axs = plt.subplots(len(X_features), 1, figsize=(14, 4.5 * len(X_features))) for i, X_feature in enumerate(X_features): plot_partial_depenence(axs, model=model, X=df.drop(y_var_name, axis=1), var_name=X_feature, y=df[y_var_name], pipeline=pipeline, n_points=n_points, **kwargs) axs.set_title("{}: Partial Dependence Plots {}".format( X_feature, model.__class__.__name__)) fig.set_title("Partial Dependence Plots for " + model.__class__.__name__) # fig.set_tight_layout(tight = True) #this doesn't work!!! # 'tight_layout' must be used in calling script as well fig.tight_layout(pad=2) else: print('No Features to Plot')