def pred_target_plot(clf,X,Y,features_to_plot,label,grid_range=None): from pdpbox import info_plots figs = list() axs = list() pd.options.mode.chained_assignment = None # Turn warning msg off # Extract the classifier object from the clf multilearn object index = Y.columns.to_list().index(label) clf = clf.classifiers_[index] clf.verbose = False #Turn verbose off after this to tidy prints for feature in features_to_plot: if(grid_range is None): fig, ax, summary_df = info_plots.actual_plot(clf,X,feature=feature,feature_name=feature,grid_type='equal') else: fig, ax, summary_df = info_plots.actual_plot(clf,X,feature=feature,feature_name=feature,grid_type='equal', show_outliers='True',grid_range=grid_range) figs.append(fig) axs.append(ax) pd.options.mode.chained_assignment = 'warn' # Turn warning msg back on clf.verbose = True # reset return figs, axs
def show_ICE_actual(self, features=[], feature_names=[]): if len(features) != len(feature_names): print("features and feature names must have same size") return for f,n in zip(features,feature_names): info_plots.actual_plot(self.model,self.X_train, feature=f,feature_name=n,predict_kwds={}) plt.xticks(rotation=90) plt.show()
def predictionDistribution(data, pr, featureToExamine): fig, axes, summary_df = info_plots.actual_plot( model=pr, X=data, feature=featureToExamine, feature_name=featureToExamine, predict_kwds={}) save("predictionDistribution", fig=fig, plt=plt)
def info_actual_plot(self, feature, sample = 10000, predict_kwds = {}, which_classes=None, **kargs): fig, axes, result = info_plots.actual_plot( model=self.md, X=self.sample(sample), feature=feature, feature_name=feature, predict_kwds=predict_kwds, which_classes = which_classes, **kargs) self.info_actual_data = ResultDF(result, 'count') plt.show()
def ice_pred_plot(self, feature, feature_name='Feature'): ''' partial dependence plot - pdpbox version https://towardsdatascience.com/introducing-pdpbox-2aa820afd312 ''' fig, axes, summary_df = info_plots.actual_plot( model=self.model, X=self.X_train, feature=feature, feature_name=feature_name) plt.plot()
def actual_plot(model, X, feature, feature_name, num_grid_points=10, xticklabels=None, show_percentile=False): """Wrapper for info_plots.actual_plot.""" fig, axes, summary_df = info_plots.actual_plot( model=model, X=X, feature=feature, feature_name=feature_name, num_grid_points=num_grid_points, show_percentile=show_percentile, predict_kwds={}, ) if xticklabels is not None: _ = axes["bar_ax"].set_xticklabels(xticklabels) return fig, summary_df
# %% target_plot ------------------------------------------------------------- fig, axes, summary_df = info_plots.target_plot( df=XY, feature=x_cols[2], feature_name=x_cols[2], target=y_cols[0], ) # %% actual_plot ------------------------------------------------------------- fig, axes, df = info_plots.actual_plot( model=model, X=X, feature=x_cols[1], feature_name=x_cols[1], which_classes=[0, 3, 6], predict_kwds={}, # !This should be passed to avoid a strange TypeError ) # %% pdp_isolate: Preset ----------------------------------------------------- pdp_isolated_tmp = pdp.pdp_isolate( model=model, dataset=X, model_features=x_cols, feature=x_cols[0], n_jobs=1, ) # %% pdp_plot
titanic_features = test_titanic['features'] titanic_model = test_titanic['xgb_model'] titanic_target = test_titanic['target'] #Let's start with the gender #Survivors based on their sex fig, axes, summary_df = info_plots.target_plot( df=titanic_data, feature='Sex', feature_name='gender', target=titanic_target ) _ = axes['bar_ax'].set_xticklabels(['Female', 'Male']) display(summary_df) #Chance of survival our model give based on gender fig, axes, summary_df = info_plots.actual_plot( model=titanic_model, X=titanic_data[titanic_features], feature='Sex', feature_name='gender' ) display(summary_df) #PDP for the genderfeature pdp_sex = pdp.pdp_isolate( model=titanic_model, dataset=titanic_data, model_features=titanic_features, feature='Sex' ) fig, axes = pdp.pdp_plot(pdp_sex, 'Sex', plot_lines=True, frac_to_plot=0.5) _ = axes['pdp_ax'].set_xticklabels(['Female', 'Male']) #Let's go on with the PassengerClass feature #Firstly, the statistics of survivors based on their PassengerClass fig, axes, summary_df = info_plots.target_plot( df=titanic_data, feature='Pclass', feature_name='Pclass', target=titanic_target, show_percentile=True ) display(summary_df)
#PDP Plots : Target Plot for _ in X.columns.tolist(): fig,axes,summary_df=info_plots.target_plot( df=df, feature=_, feature_name=_, target=target_variable ) #PDP Plots : Actual Plot for _ in X.columns.tolist(): fig,axes,summary_df=info_plots.actual_plot( model=baseline, X=df[X.columns], feature=_, feature_name=_, predict_kwds={} ) fig, axes, summary_df = info_plots.actual_plot_interact( model=baseline, X=df[X.columns], features=interactions_2way, feature_names=interactions_2way ) #PDP Plot : Grid Plot interactions = pdp.pdp_interact( model=baseline, dataset=df, model_features=X.columns, features=interactions_2way ) fig, axes = pdp.pdp_interact_plot(interactions,interactions_2way , plot_type='grid', x_quantile=True, plot_pdp=False)
# region Predictions boxplots #Variables with high importance in the predicton: # Internet service/Monthly charges #Lenght of contract #Tenure #Online security #Tech support #defining a small function to unscale the x axis def unscaling(x_scaled,X_col_unsc): return ( np.round((x_scaled* X_col_unsc.std(axis=0)) + X_col_unsc.mean(axis=0),1).astype(int) ) # region Tenure #Actual plot fig, axes, summary_df =info_plots.actual_plot(model=best_rf.named_steps["classifier"], X=X_train, feature="tenure",feature_name="Tenure",predict_kwds={},num_grid_points=11) axes["bar_ax"].set_xticklabels(pd.qcut(X_train_ori.tenure,10,precision=3).values.categories.values) #plt.savefig('Predi_boxplot_tenure.png', bbox_inches='tight') #pickle.dump(fig,open("Predi_boxplot_tenure.pickle","wb")) # endregion # region Internet fig, axes, summary_df =info_plots.actual_plot(model=best_rf.named_steps["classifier"], X=X_train, feature=["InternetService_DSL","InternetService_Fiber optic","InternetService_No"],feature_name="Internet service",predict_kwds={},
base_features = X_train.columns.values.tolist() feature_name = 'incident_severity' pdp = pdp.pdp_isolate(model=model, dataset=X_test, model_features = base_features, feature = feature_name) pdp.pdp_plot(pdp, feature_name) plt.show() fig, axes, summary_df = info_plots.target_plot(\ df=data, feature='capital-gains', feature_name='capital-gains', target='fraud_reported', show_percentile=True) fig, axes, summary_df = info_plots.actual_plot(\ model, X_test, feature='capital-gains', feature_name='capital-gains',predict_kwds={}) df.auto_model.unique() fig, axes, summary_df = info_plots.target_plot(\ df=data, feature='auto_model', feature_name='auto_model', target='fraud_reported', show_percentile=True) fig, axes, summary_df = info_plots.target_plot(\ df=data, feature='auto_make', feature_name='auto_make', target='fraud_reported', show_percentile=True) fig, axes, summary_df = info_plots.target_plot(\