Ejemplo n.º 1
0
def pred_target_plot(clf,X,Y,features_to_plot,label,grid_range=None):
    from pdpbox import info_plots
    
    figs = list()
    axs  = list()

    pd.options.mode.chained_assignment = None # Turn warning msg off
    # Extract the classifier object from the clf multilearn object
    index = Y.columns.to_list().index(label)
    clf = clf.classifiers_[index]
    clf.verbose = False #Turn verbose off after this to tidy prints

    for feature in features_to_plot:
        if(grid_range is None):
            fig, ax, summary_df = info_plots.actual_plot(clf,X,feature=feature,feature_name=feature,grid_type='equal')
        else:
            fig, ax, summary_df = info_plots.actual_plot(clf,X,feature=feature,feature_name=feature,grid_type='equal',
                    show_outliers='True',grid_range=grid_range)
        figs.append(fig)
        axs.append(ax)

    pd.options.mode.chained_assignment = 'warn' # Turn warning msg back on
    clf.verbose = True # reset

    return figs, axs
 def show_ICE_actual(self, features=[], feature_names=[]):
     if len(features) != len(feature_names):
         print("features and feature names must have same size")
         return
     for f,n in zip(features,feature_names):
         info_plots.actual_plot(self.model,self.X_train,
                   feature=f,feature_name=n,predict_kwds={})
         plt.xticks(rotation=90)
         plt.show()
def predictionDistribution(data, pr, featureToExamine):
    fig, axes, summary_df = info_plots.actual_plot(
        model=pr,
        X=data,
        feature=featureToExamine,
        feature_name=featureToExamine,
        predict_kwds={})
    save("predictionDistribution", fig=fig, plt=plt)
Ejemplo n.º 4
0
 def info_actual_plot(self, feature, sample = 10000, predict_kwds = {}, which_classes=None, **kargs):
     fig, axes, result = info_plots.actual_plot(
             model=self.md, 
             X=self.sample(sample), 
             feature=feature, feature_name=feature,
             predict_kwds=predict_kwds, which_classes = which_classes, **kargs)
     self.info_actual_data =  ResultDF(result, 'count')
     plt.show()        
Ejemplo n.º 5
0
 def ice_pred_plot(self, feature, feature_name='Feature'):
     '''
     partial dependence plot - pdpbox version
     https://towardsdatascience.com/introducing-pdpbox-2aa820afd312
     '''
     fig, axes, summary_df = info_plots.actual_plot(
         model=self.model, X=self.X_train, feature=feature, feature_name=feature_name)
     plt.plot()
Ejemplo n.º 6
0
def actual_plot(model,
                X,
                feature,
                feature_name,
                num_grid_points=10,
                xticklabels=None,
                show_percentile=False):
    """Wrapper for info_plots.actual_plot."""
    fig, axes, summary_df = info_plots.actual_plot(
        model=model,
        X=X,
        feature=feature,
        feature_name=feature_name,
        num_grid_points=num_grid_points,
        show_percentile=show_percentile,
        predict_kwds={},
    )

    if xticklabels is not None:
        _ = axes["bar_ax"].set_xticklabels(xticklabels)
    return fig, summary_df
Ejemplo n.º 7
0
# %% target_plot -------------------------------------------------------------

fig, axes, summary_df = info_plots.target_plot(
    df=XY,
    feature=x_cols[2],
    feature_name=x_cols[2],
    target=y_cols[0],
)

# %% actual_plot -------------------------------------------------------------

fig, axes, df = info_plots.actual_plot(
    model=model,
    X=X,
    feature=x_cols[1],
    feature_name=x_cols[1],
    which_classes=[0, 3, 6],
    predict_kwds={},  # !This should be passed to avoid a strange TypeError
)

# %% pdp_isolate: Preset -----------------------------------------------------

pdp_isolated_tmp = pdp.pdp_isolate(
    model=model,
    dataset=X,
    model_features=x_cols,
    feature=x_cols[0],
    n_jobs=1,
)

# %% pdp_plot
titanic_features = test_titanic['features']
titanic_model = test_titanic['xgb_model']
titanic_target = test_titanic['target']

#Let's start with the gender
#Survivors based on their sex
fig, axes, summary_df = info_plots.target_plot(
	df=titanic_data, feature='Sex', feature_name='gender', target=titanic_target
	)
_ = axes['bar_ax'].set_xticklabels(['Female', 'Male'])

display(summary_df)

#Chance of survival our model give based on gender
fig, axes, summary_df = info_plots.actual_plot(
    model=titanic_model, X=titanic_data[titanic_features], feature='Sex', feature_name='gender'
)
display(summary_df)
#PDP for the genderfeature
pdp_sex = pdp.pdp_isolate(
    model=titanic_model, dataset=titanic_data, model_features=titanic_features, feature='Sex'
)
fig, axes = pdp.pdp_plot(pdp_sex, 'Sex', plot_lines=True, frac_to_plot=0.5)
_ = axes['pdp_ax'].set_xticklabels(['Female', 'Male'])

#Let's go on with the PassengerClass feature
#Firstly, the statistics of survivors based on their PassengerClass
fig, axes, summary_df = info_plots.target_plot(
    df=titanic_data, feature='Pclass', feature_name='Pclass', target=titanic_target, show_percentile=True
)
display(summary_df)
Ejemplo n.º 9
0
#PDP Plots : Target Plot
for _ in X.columns.tolist():
    fig,axes,summary_df=info_plots.target_plot(
    df=df,
    feature=_,
    feature_name=_,
    target=target_variable
    )

#PDP Plots : Actual Plot
for _ in X.columns.tolist():
    fig,axes,summary_df=info_plots.actual_plot(
    model=baseline,
    X=df[X.columns],
    feature=_,
    feature_name=_,
    predict_kwds={}
    )

fig, axes, summary_df = info_plots.actual_plot_interact(
    model=baseline, X=df[X.columns], features=interactions_2way, feature_names=interactions_2way
)

#PDP Plot : Grid Plot
interactions = pdp.pdp_interact(
    model=baseline, dataset=df, model_features=X.columns, features=interactions_2way
)

fig, axes = pdp.pdp_interact_plot(interactions,interactions_2way , plot_type='grid', x_quantile=True, plot_pdp=False)
# region Predictions boxplots
#Variables with high importance in the predicton:
# Internet service/Monthly charges
#Lenght of contract
#Tenure
#Online security
#Tech support

#defining a small function to unscale the x axis
def unscaling(x_scaled,X_col_unsc):
    return ( np.round((x_scaled* X_col_unsc.std(axis=0)) + X_col_unsc.mean(axis=0),1).astype(int) )

# region Tenure

#Actual plot
fig, axes, summary_df =info_plots.actual_plot(model=best_rf.named_steps["classifier"], X=X_train,
                             feature="tenure",feature_name="Tenure",predict_kwds={},num_grid_points=11)

axes["bar_ax"].set_xticklabels(pd.qcut(X_train_ori.tenure,10,precision=3).values.categories.values)



#plt.savefig('Predi_boxplot_tenure.png', bbox_inches='tight')
#pickle.dump(fig,open("Predi_boxplot_tenure.pickle","wb"))

# endregion

# region Internet


fig, axes, summary_df =info_plots.actual_plot(model=best_rf.named_steps["classifier"], X=X_train,
                             feature=["InternetService_DSL","InternetService_Fiber optic","InternetService_No"],feature_name="Internet service",predict_kwds={},
Ejemplo n.º 11
0
base_features = X_train.columns.values.tolist()
feature_name = 'incident_severity'
pdp = pdp.pdp_isolate(model=model, dataset=X_test, 
                      model_features = base_features, 
                      feature = feature_name)
pdp.pdp_plot(pdp, feature_name)
plt.show()

fig, axes, summary_df = info_plots.target_plot(\
                                               df=data, feature='capital-gains', 
                                               feature_name='capital-gains', target='fraud_reported', 
                                               show_percentile=True)

fig, axes, summary_df = info_plots.actual_plot(\
                                               model, X_test, feature='capital-gains', 
                                               feature_name='capital-gains',predict_kwds={})

df.auto_model.unique()

fig, axes, summary_df = info_plots.target_plot(\
                                               df=data, feature='auto_model', 
                                               feature_name='auto_model', target='fraud_reported', 
                                               show_percentile=True)

fig, axes, summary_df = info_plots.target_plot(\
                                               df=data, feature='auto_make', 
                                               feature_name='auto_make', target='fraud_reported', 
                                               show_percentile=True)

fig, axes, summary_df = info_plots.target_plot(\