def part_plot_1D(model, total_features, X_val, y_val, feature): pdp_dist = pdp.pdp_isolate(model=model, dataset=X_val, model_features=total_features, feature=feature) pdp.pdp_plot(pdp_dist, feature) plt.show()
def pdp_1d(clf,X,Y,features_to_plot,label,detailed=False): from pdpbox import pdp figs = list() axs = list() # Extract the classifier object from the clf multilearn object index = Y.columns.to_list().index(label) clf = clf.classifiers_[index] clf.verbose = False #Turn verbose off after this to tidy prints for feature in features_to_plot: pdp_dist = pdp.pdp_isolate(model=clf, dataset=X, model_features=X.columns.to_list(), feature=feature) if(detailed==True): fig, ax = pdp.pdp_plot(pdp_dist, feature, plot_pts_dist=True,cluster=True,n_cluster_centers=50,x_quantile=True,show_percentile=True) else: fig, ax = pdp.pdp_plot(pdp_dist, feature) figs.append(fig) axs.append(ax) clf.verbose = True # reset return figs, axs
def pdpPdpbox(data, pr, featureToExamine): pdpValues = pdp.pdp_isolate(model=pr, dataset=data, model_features=data.columns, feature=featureToExamine) figPdp, axesPdp = pdp.pdp_plot(pdpValues, featureToExamine, plot_lines=True, frac_to_plot=min(100, len(data))) for line in axesPdp["pdp_ax"].lines: line._alpha = 1 save("pdpPdpboxIsolate", plt=plt) fig, axes = pdp.pdp_plot(pdpValues, featureToExamine, plot_lines=True, frac_to_plot=min(100, len(data)), x_quantile=True, plot_pts_dist=True, show_percentile=True) for line in axes["pdp_ax"]["_pdp_ax"].lines: line._alpha = 1 for line in axes["pdp_ax"]["_count_ax"].lines: line._alpha = 1 save("pdpPdpboxPlot", plt=plt, fig=fig)
def show_PDP_isolate(self, features=[]): for f in features: pdp_isolate = pdp.pdp_isolate(self.model,self.X_train, model_features=self.feature_names,feature=f,predict_kwds={}) pdp.pdp_plot(pdp_isolate,feature_name=f) plt.xticks(rotation=90) plt.show()
def construct_ice_plot(pdp_current, feature): ## centered ice-plot for numeric feature: fig_center, axes_center = pdp.pdp_plot( pdp_current, varnames_long_dict[wch_feature], #wch_feature, center = True, plot_lines = True, frac_to_plot = 100, ## percentage! x_quantile = False, plot_pts_dist = True, show_percentile = True, plot_params = plot_params_default) axes_center["pdp_ax"]["_pdp_ax"].set_ylabel("Number of bike rides per hour", size = 24) axes_center["pdp_ax"]["_count_ax"].set_xlabel(varnames_long_dict[feature], size = 24) axes_center["pdp_ax"]["_pdp_ax"].set_title('Partial Dependence and ICE Plot for: %s' % \ varnames_long_dict[feature], y = 1.1, size = 24) axes_center["pdp_ax"]["_pdp_ax"].tick_params(axis = 'both', which = 'major', labelsize = 24) ## standard ice-plot for numeric feature: fig, axes = pdp.pdp_plot( pdp_current, varnames_long_dict[wch_feature], #wch_feature, center = False, plot_lines = True, frac_to_plot = 100, ## percentage! x_quantile = False, plot_pts_dist = True, show_percentile = True, plot_params = plot_params_default) axes["pdp_ax"]["_pdp_ax"].set_ylabel("Number of bike rides per hour", size = 24) axes["pdp_ax"]["_count_ax"].set_xlabel(varnames_long_dict[feature], size = 24) #axes["pdp_ax"]["_pdp_ax"].set_ylim(0, np.max(vars(pdp_current)['count_data']['count'])) axes["pdp_ax"]["_pdp_ax"].set_title('Partial Dependence and ICE Plot for: %s' % \ varnames_long_dict[feature], y = 1.1, size = 24) axes["pdp_ax"]["_pdp_ax"].tick_params(axis = 'both', which = 'major', labelsize = 24) return fig_center, fig
def pdp_isolate_explain(X, y, feature): import category_encoders as ce from sklearn.pipeline import make_pipeline from sklearn.impute import SimpleImputer from sklearn.ensemble import RandomForestClassifier from pdpbox.pdp import pdp_isolate, pdp_plot # Encode, impute as needed X_encoded = ce.OrdinalEncoder().fit_transform(X) X_processed = SimpleImputer().fit_transform(X_encoded) # Pick a model and fit the data pdp_model = RandomForestClassifier(n_estimators=200, n_jobs=-1, random_state=6) pdp_model.fit(X_processed, y) # The actual plotting pdp_isolate = pdp_isolate(model=pdp_model, dataset=X_encoded, model_features=X_encoded.columns, feature=feature) pdp_plot(pdp_isolate, feature_name=feature, plot_lines=True, frac_to_plot=100)
def pdp_plotter(feature, model): pdp_feat = pdp.pdp_isolate(model=lgb_clf, dataset=test_X, model_features=feature_names, feature=feature) pdp.pdp_plot(pdp_feat, feature) plt.show()
def partial_dependence_plot(model, data: pd.DataFrame, model_features: list, column: str): pdp_df = pdp.pdp_isolate(model=model, dataset=data, model_features=model_features, feature=column) pdp.pdp_plot(pdp_df, column, figsize=(10, 8)) return plt.show()
def ploting_pdp(f): ''' Function for ploting PDP ''' pdp_surv = pdp.pdp_isolate(model=rf, dataset=X_train, model_features=X_train.columns, feature=f, cust_grid_points=None) pdp.pdp_plot(pdp_surv, f) plt.show()
def partial_dependence_plot(feat_name, model, X_test, base_features, path): pdp_dist = pdp.pdp_isolate(model=model, dataset=X_test, model_features=base_features, feature=feat_name) pdp.pdp_plot(pdp_dist, feat_name) plt.savefig(path) print('generate ' + path) plt.close()
def plot_1D_partial_dependency(fitted_model, X_test : pd.DataFrame, model_features : list, feature : str ): # Create the data that we will plot pdp_obj = pdp.pdp_isolate(model = fitted_model, dataset = X_test, model_features = model_features, feature = feature) # plot it pdp.pdp_plot(pdp_obj, feature) plt.show()
def pdp_plot(self, model, X_val, feature_to_plot): '''plot partial dependence for list of variables. model: fitted model X_val: validation dataset feature_to_plot : list of features ''' for feat_name in feature_to_plot: pdp_dist = pdp.pdp_isolate(model, X_val, X_val.columns.tolist(), feat_name) pdp.pdp_plot(pdp_dist, feat_name) plt.show()
def show_partial_dep_plots(lin_model, X_test): """Prints partial dependence plots for each feature in the dataset.""" for feat_name in X_test.columns: pdp_dist = pdp.pdp_isolate( model=lin_model, dataset=X_test, model_features=X_test.columns, feature=feat_name, ) pdp.pdp_plot(pdp_dist, feat_name) plt.show()
def isolated(model, X, feature): """ isolated pair dependancy plot """ #instantiate and isolate variable isolated = pdp_isolate(model = model, dataset = X, model_features = X.columns, feature = feature) #plot the variable pdp_plot(isolated, feature_name = feature)
def make_pdp_interpretation(dataset, column_names, training_set, model): """to display partial dependence plots based on user input""" X_pdp = pd.DataFrame(training_set, columns=column_names) col_pdp = st.selectbox("Choose the feature to plot", column_names) feature = col_pdp class_list = list(dataset['Target Exit Destination'].value_counts().index) target_value = st.selectbox("Choose the class to plot", class_list, index=1) isolated = pdp_isolate( model=model, dataset=X_pdp, model_features=X_pdp.columns, feature=feature, ) if target_value == 'Unknown/Other': pdp_plot(isolated[0], feature_name=[feature, target_value]) elif target_value == 'Permanent Exit': pdp_plot(isolated[1], feature_name=[feature, target_value]) elif target_value == 'Emergency Shelter': pdp_plot(isolated[2], feature_name=[feature, target_value]) elif target_value == 'Temporary Exit': pdp_plot(isolated[3], feature_name=[feature, target_value]) elif target_value == 'Transitional Housing': pdp_plot(isolated[4], feature_name=[feature, target_value]) st.pyplot() st.markdown("#### Partial Dependence Plot") info_global = st.button("How it is calculated") if info_global: st.info(""" The partial dependence plot shows how a feature affects predictions. Here's how to undertand the pdp plot: 1. The y axis is interpreted as change in the prediction from what it would be predicted at the baseline or leftmost value. 2. A blue shaded area indicates level of confidence You can choose one of out the five prediction classes to see the effects of a selected feature. For more information, check out this free course at kaggle: [Link](https://www.kaggle.com/dansbecker/partial-plots) To check out the pdp box documentation, click the link: [PDP Box Documentation]( https://pdpbox.readthedocs.io/en/latest/index.html ) """)
def plot_pdp(self, feature_to_plot, i): # creating data to plot pdp_feature = pdp.pdp_isolate(model=self.model, dataset=self.x, model_features=list(self.x.columns), feature=feature_to_plot) # plot it pdp.pdp_plot(pdp_feature, feature_to_plot) # saving the plot plt.tight_layout() plt.savefig(self.out + '/dep_plot' + str(i) + '.jpg', dpi=400) plt.close()
def pdplot( model, X_val, feat, image_name='img_pdplot.png', ): ml_model = pickle.load(open(model, 'rb')) feat_names = X_val.columns.tolist() pdp_assign = pdp.pdp_isolate(model=ml_model, dataset=X_val, model_features=feat_names, feature=feat) pdp.pdp_plot(pdp_assign, feat) plt.show() plt.savefig(image_name)
def show_pdp(self, feature_name): if self.fitted_model is None: self.fit() partial_dependence = pdp_isolate(self.fitted_model, self.X_train, self.X_train.columns, feature_name) if feature_name == 'Sex': # encoded feature partial_dependence.display_columns = self.encoder.categories_[ self.categorical.columns.get_loc('Sex')] pdp_plot(partial_dependence, feature_name, center=False, plot_lines=True, x_quantile=True, frac_to_plot=0.2)
def plot_pdp(m, X, features, feature, center=True, classes=None, percentile_range=None, plot_params=None): p = pdp.pdp_isolate(m, X, features, feature, n_jobs=-1, percentile_range=percentile_range) fig, axes = pdp.pdp_plot(p, feature, plot_lines=True, center=center, plot_pts_dist=True, plot_params=plot_params) if classes is not None: _ = axes['pdp_ax']['_pdp_ax'].set_xticklabels(classes) _ = axes['pdp_ax']['_count_ax'].set_xticklabels(classes) _ = axes['pdp_ax']['_count_ax'].set_xlabel('') _ = axes['pdp_ax']['_count_ax'].set_title('') fig.autofmt_xdate() plt.show()
def partial_dependence_plot(model, dataset: pd.DataFrame, model_features: list, objective: str, **kwargs): """ :param model: :param dataset: :param model_features: :param objective: :return: """ pdp_data = pdp.pdp_isolate(model=model, dataset=dataset, model_features=model_features, feature=objective) pdp.pdp_plot(pdp_data, objective, figsize=(10, 8), **kwargs) return plt.show()
def test_pdp_plot_single_default(self, pdp_sex): # single chart without data dist plot fig, axes = pdp_plot(pdp_sex, "sex") assert type(fig) == matplotlib.figure.Figure assert sorted(axes.keys()) == ["pdp_ax", "title_ax"] assert type(axes["pdp_ax"]) == matplotlib.axes._subplots.Subplot assert type(axes["title_ax"]) == matplotlib.axes._subplots.Subplot
def plot_pdp(feat, clusters=None, feat_name=None): feat_name = feat_name or feat p = pdp.pdp_isolate(m, x, feat) return pdp.pdp_plot(p, feat_name, plot_lines=True, cluster=clusters is not None, n_cluster_centers=clusters)
def pdp_feat(feat): pdp_obj = pdp.pdp_isolate(xgb_clf, test[xgb_clf.booster().feature_names], str(feat)) pdp.pdp_plot(pdp_obj, str(feat), plot_org_pts=True, x_quantile=True) buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) img_tag = "<img class='pdp_plot' src='data:image/png;base64," + base64.b64encode(buf.getvalue()) + "'/>" buf.close() pdp.actual_plot(pdp_obj, str(feat)) buf2 = io.BytesIO() plt.savefig(buf2, format='png') buf2.seek(0) img_tag_act = "<img class='act_plot' src='data:image/png;base64," + base64.b64encode(buf2.getvalue()) + "'/>" buf2.close() return render_template('pdp.html',feature_name = feat, img_html = img_tag, img_act_html = img_tag_act)
def test_pdp_plot_single_distplot(self, pdp_sex): # single chart with data dist plot fig, axes = pdp_plot(pdp_sex, "sex", plot_pts_dist=True) assert sorted(axes.keys()) == ["pdp_ax", "title_ax"] assert sorted(axes["pdp_ax"].keys()) == ["_count_ax", "_pdp_ax"] assert type(axes["pdp_ax"]["_pdp_ax"]) == matplotlib.axes._subplots.Subplot assert type(axes["pdp_ax"]["_count_ax"]) == matplotlib.axes._subplots.Subplot assert type(axes["title_ax"]) == matplotlib.axes._subplots.Subplot
def generateInsight(model,features,data): pdp_airbnb = pdp.pdp_isolate(model=model, dataset=data, model_features=data.columns, feature=features) fig, axes = pdp.pdp_plot(pdp_isolate_out=pdp_airbnb, feature_name=features, plot_pts_dist=True, )
def plot_pdp(df, model, feat, clusters=None, feat_name=None): '''Use a sample from the dataframe using get_sample()''' feat_name = feat_name or feat p = pdp.pdp_isolate(model, df, df.columns, feat) return pdp.pdp_plot(p, feat_name, plot_lines=True, cluster=clusters is not None, n_cluster_centers=clusters)
def test_pdp_plot_multi_which_classes(self, pdp_feat_67_rf): # change which classes fig, axes = pdp_plot(pdp_feat_67_rf, 'feat_67', center=True, x_quantile=True, ncols=2, which_classes=[0, 3, 7]) assert len(axes['pdp_ax']) == 3
def test_pdp_plot_multi_one_class(self, pdp_feat_67_rf): # only keep 1 class fig, axes = pdp_plot(pdp_feat_67_rf, 'feat_67', center=True, x_quantile=True, ncols=2, which_classes=[5]) assert type(axes['pdp_ax']) == matplotlib.axes._subplots.Subplot
def eval_pdp(model, x_dev, feature_names): # https://www.kaggle.com/dansbecker/partial-plots # pdp_isolate requires the data to be DataFrame so wrap it df_x_dev = pd.DataFrame(x_dev, columns=feature_names) for feature in feature_names: # Create the data that we will plot pdp_values = pdp.pdp_isolate(model=model, dataset=df_x_dev, model_features=feature_names, feature=feature, num_grid_points=100) # plot it pdp.pdp_plot(pdp_values, feature) plt.savefig(flexp.get_file_path("pdp_{}.png".format(feature))) plt.clf()
def test_pdp_plot_single_distplot(self, pdp_sex): # single chart with data dist plot fig, axes = pdp_plot(pdp_sex, 'sex', plot_pts_dist=True) assert sorted(axes.keys()) == ['pdp_ax', 'title_ax'] assert sorted(axes['pdp_ax'].keys()) == ['_count_ax', '_pdp_ax'] assert type( axes['pdp_ax']['_pdp_ax']) == matplotlib.axes._subplots.Subplot assert type( axes['pdp_ax']['_count_ax']) == matplotlib.axes._subplots.Subplot assert type(axes['title_ax']) == matplotlib.axes._subplots.Subplot