def test_front_page_xgboost(): xgboost = pytest.importorskip('xgboost') # load JS visualization code to notebook shap.initjs() # train XGBoost model X, y = shap.datasets.california(n_points=500) model = xgboost.train({"learning_rate": 0.01, "silent": 1}, xgboost.DMatrix(X, label=y), 100) # explain the model's predictions using SHAP values explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X) # visualize the first prediction's explaination shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :]) # visualize the training set predictions shap.force_plot(explainer.expected_value, shap_values, X) # create a SHAP dependence plot to show the effect of a single feature across the whole dataset shap.dependence_plot(5, shap_values, X, show=False) shap.dependence_plot("Longitude", shap_values, X, show=False) # summarize the effects of all the features shap.summary_plot(shap_values, X, show=False)
def test_front_page_xgboost(): import xgboost import shap # load JS visualization code to notebook shap.initjs() # train XGBoost model X, y = shap.datasets.boston() model = xgboost.train({"learning_rate": 0.01}, xgboost.DMatrix(X, label=y), 100) # explain the model's predictions using SHAP values (use pred_contrib in LightGBM) shap_values = shap.TreeExplainer(model).shap_values(X) # visualize the first prediction's explaination shap.force_plot(shap_values[0, :], X.iloc[0, :]) # visualize the training set predictions shap.force_plot(shap_values, X) # create a SHAP dependence plot to show the effect of a single feature across the whole dataset shap.dependence_plot(5, shap_values, X, show=False) shap.dependence_plot("RM", shap_values, X, show=False) # summarize the effects of all the features shap.summary_plot(shap_values, X, show=False)
def test_front_page_sklearn(): import sklearn.ensemble import shap # load JS visualization code to notebook shap.initjs() # train model X, y = shap.datasets.boston() models = [ sklearn.ensemble.RandomForestRegressor(n_estimators=100), sklearn.ensemble.ExtraTreesRegressor(n_estimators=100), ] for model in models: model.fit(X, y) # explain the model's predictions using SHAP values explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X) # visualize the first prediction's explaination shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :]) # visualize the training set predictions shap.force_plot(explainer.expected_value, shap_values, X) # create a SHAP dependence plot to show the effect of a single feature across the whole dataset shap.dependence_plot(5, shap_values, X, show=False) shap.dependence_plot("RM", shap_values, X, show=False) # summarize the effects of all the features shap.summary_plot(shap_values, X, show=False)
def analysis(): path = os.getcwd() X_train = pd.read_csv(path + "/results/x_train.csv") y_train = pd.read_csv(path + "/results/x_train.csv") X_test = pd.read_csv(path + "/results/x_train.csv") grid = pickle.load(open(path + "/results/grid.p", "rb")) estimator = grid.predict_proba(X_test)[:, 1] f = lambda x: grid.best_estimator_.steps[-1][1].predict_proba(x)[:, 1] X = X_train X = pd.concat([X, y_train], axis=1) a = X.corr("spearman") print(a) # use Kernel SHAP to explain test set predictions explainer = shap.KernelExplainer(f, X) shap_values = explainer.shap_values(X[0:10]) print(estimator) print(sum(estimator) / estimator.shape[0]) shapv = pd.DataFrame(shap_values) base_pred = explainer.expected_value explainer.expected_value shapv.index = X[0:10].index shapv.columns = 's_' + X.columns shap_all = pd.concat([shapv, X[0:10]], axis=1) a = shap_all.query('gil<50').sort_values(['gil']).index.sort_values() b = shapv.loc[a, :].sum(axis=1).sort_values(ascending=True).index shapv.loc[b, :] shap.force_plot(explainer.expected_value, shap_values[8, :], X.iloc[8, :], matplotlib=True) shap.summary_plot(shap_values, X[0:10], max_display=40, plot_type="dot") plt.show()
def shap_implementation(model, app_train): shap.initjs() explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(app_train) shap.force_plot(explainer.expected_value, shap_values[2], app_train[2]) shap.summary_plot(shap_values, app_train)
def calc_plot_importance(X_no_dataset, Y_total): meta_classifier = train_on_all_datasets(X_no_dataset, Y_total) importance_types = ['weight', 'gain', 'cover'] for type in importance_types: plot_importance(meta_classifier, importance_type=type, title='Feature importance: ' + type, max_num_features=20) # top 20 most important features plt.savefig('plots/' + type + '_importance.png') plt.clf() import shap # load JS visualization code to notebook shap.initjs() # explain the model's predictions using SHAP # (same syntax works for LightGBM, CatBoost, scikit-learn and spark models) explainer = shap.TreeExplainer(meta_classifier) shap_values = explainer.shap_values(X_no_dataset) # visualize the first prediction's explanation (use matplotlib=True to avoid Javascript) shap.force_plot(explainer.expected_value, shap_values[0, :], X_no_dataset.iloc[0, :]) shap.summary_plot(shap_values, X_no_dataset, show=False) plt.savefig('plots/shap_values.png') plt.clf()
def test_front_page_xgboost(): try: import xgboost except Exception as e: print("Skipping test_front_page_xgboost!") return import shap # load JS visualization code to notebook shap.initjs() # train XGBoost model X, y = shap.datasets.boston() model = xgboost.train({ "learning_rate": 0.01, "silent": 1 }, xgboost.DMatrix(X, label=y), 100) # explain the model's predictions using SHAP values explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X) # visualize the first prediction's explaination shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :]) # visualize the training set predictions shap.force_plot(explainer.expected_value, shap_values, X) # create a SHAP dependence plot to show the effect of a single feature across the whole dataset shap.dependence_plot(5, shap_values, X, show=False) shap.dependence_plot("RM", shap_values, X, show=False) # summarize the effects of all the features shap.summary_plot(shap_values, X, show=False)
def PlotShap(): explainer = shap.KernelExplainer(clf.predict_proba, X_train, link="logit") shap_values = explainer.shap_values(X_test) shap.summary_plot(shap_values[1], X_test) shap.force_plot(explainer.expected_value[0], shap_values[1], X_test, link="logit") return
def show_explanation(self, show_in_note_book=True): """Visualization of explanation of shap. # Arguments show_in_note_book: Boolean. Whether show in jupyter notebook. """ Explainer.show_explanation(self) shap_values = self.explanation expected_value = self.explainer.expected_value class_names = self.class_names labels = self.labels instance = self.instance shap.initjs() print() print("Shap Explanation") print() assert hasattr(labels, '__len__') if len(instance.shape) == 1 or instance.shape[0] == 1: for item in labels: print("Shap value for label {}:".format(class_names[item])) print(shap_values[item]) if show_in_note_book: for item in labels: if isinstance(instance, csr_matrix): display(shap.force_plot(expected_value[item], shap_values[item], instance.A)) else: display(shap.force_plot(expected_value[item], shap_values[item], instance)) else: if show_in_note_book: shap.summary_plot(shap_values, instance)
def plot_force_plot_best_action(runs=1): #Create Path to save Path(f"{args.output_path}forceplot").mkdir(parents=True, exist_ok=True) for row_index in range(runs): state = [np.ceil(s / .025) for s in shap_model.possibilits[row_index]] best_action = np.argmax(predictions[row_index]) shap.force_plot( shap_model.expected_value[best_action], shap_model.shap_values[best_action][row_index], state, matplotlib=True, show=False, #feature_names=['Actual','Next','Others'], feature_names=[ f'F0: {state[0]}', f'F1: {state[1]}', f'F2: {state[2]}', f'F3: {state[3]}', f'F4: {state[4]}', f'F5: {state[5]}', f'F6: {state[6]}', f'F7: {state[7]}' ], #None, ) ##ax = plt.gca().get_xlim() ##ax_s = ax[1] - ax[0] ##plt.xlim([ax[0]-.2*ax_s, ax[1]+.2*ax_s]) ##plt.ylim([-.3, .3]) fig = plt.gcf() fig.tight_layout() # otherwise the right y-label is slightly clipped plt.grid(False) plt.subplots_adjust(top=0.5) fig.set_size_inches(args.isx, args.isy) plt.savefig( f"{args.output_path}forceplot/forceplot_{row_index}_{best_action}.pdf", bbox_inches='tight') plt.close() pass
def shap_force_plot(self, instance_ind=None, instance_interval=None, show_feature_value=True, feature_names=None): try: shap.initjs() # exp, shap_values, expected_value = self.calc_shap_values(attr=None, background_sample=background_sample, # ) if instance_ind is not None: if show_feature_value: features = self.x_train.iloc[instance_ind] else: features = None if feature_names is None: feature_names = self.x_train.columns return shap.force_plot(self.expected_v, self.shap_v[instance_ind], features=features, feature_names=feature_names) if isinstance(instance_interval, tuple) or isinstance(instance_interval, list): start = instance_interval[0] end = instance_interval[1] if show_feature_value: features = self.x_train.iloc[start:end + 1, :] else: features = None if feature_names is None: feature_names = self.x_train.columns return shap.force_plot(self.expected_v, self.shap_v[start:end + 1, :], features=features, feature_names=feature_names) return shap.force_plot(self.expected_v, self.shap_v, self.x_train, feature_names=self.x_train.columns) except Exception as err: print('Error: model is not supported by SHAP force plot') err_logging(err) raise Exception(err)
def feature_importance(self): clf_feat = RandomForestClassifier(n_estimators=100) clf_feat.fit(self.X_train, self.y_train) features = self.X_train.columns importances = clf_feat.feature_importances_ indices = np.argsort(importances) plt.title('Feature Importances') plt.barh(range(len(indices)), importances[indices], color='b', align='center') plt.yticks(range(len(indices)), [features[i] for i in indices]) plt.xlabel('Relative Importance') plt.savefig(feature_importance_image_path) row_to_show = 5 data_for_prediction = self.X_test.iloc[row_to_show] explainer = shap.TreeExplainer(clf_feat) shap_values = explainer.shap_values(data_for_prediction) shap.initjs() shap.force_plot(explainer.expected_value[1], shap_values[1], data_for_prediction, show=False, matplotlib=True).savefig(shap_image_path) clf = SelectFromModel(RandomForestClassifier(n_estimators=100), threshold=0.10) clf.fit(self.X_train, self.y_train) self.selected_feat = self.X_train.columns[(clf.get_support())]
def test_front_page_model_agnostic_rank(): import sklearn import shap from sklearn.model_selection import train_test_split # print the JS visualization code to the notebook shap.initjs() # train a SVM classifier X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.1, random_state=0) svm = sklearn.svm.SVC(kernel='rbf', probability=True) svm.fit(X_train, Y_train) # use Kernel SHAP to explain test set predictions explainer = shap.KernelExplainer(svm.predict_proba, X_train, nsamples=100, link="logit", l1_reg="rank(3)") shap_values = explainer.shap_values(X_test) # plot the SHAP values for the Setosa output of the first instance shap.force_plot(explainer.expected_value[0], shap_values[0][0, :], X_test.iloc[0, :], link="logit")
def plot_shap(model, test, instance=None, feature=None, dataset=False): """ Displays shap plots to explain a black box model. :param model: the model considered. The shap plots are calculated only after the model has been fit. :param test: test dataset. :param instance: instance of the test dataset to explain. default_value=None :param feature: feature of the test dataset to explain. default_value=None :param dataset: if True the entire dataset is taken into account. default_value=False :return: """ # Make an explainer on the model given. Not all the models are supported explainer = TreeExplainer(model) # Compute SHAP values shap_values = explainer.shap_values(test) initjs() # If not None explain single prediction if instance is not None: force_plot(explainer.expected_value, shap_values[instance, :], test.iloc[instance, :], matplotlib=True) # If not None explain single feature if feature is not None: fig, ax = plt.subplots(figsize=(13, 10)) dependence_plot(feature, shap_values, test, ax=ax) # If True explain the entire dataset if dataset: summary_plot(shap_values, test, plot_size=(8, 8)) summary_plot(shap_values, test, plot_type="bar", plot_size=(8, 8))
def analyze_shap(self): "SHapley Additive exPlanations" # create our SHAP explainer shap_explainer = shap.TreeExplainer(self.estimator) test_X_imp = self.imputer.transform(self.X_test) # calculate the shapley values for our test set test_shap_vals = shap_explainer.shap_values(test_X_imp) # load JS in order to use some of the plotting functions from the shap # package in the notebook #shap.initjs() test_X_imp = self.imputer.transform(self.X_test) test_X_imp_df = pd.DataFrame(test_X_imp, columns=self.features) # plot the explanation for a single prediction #shap.force_plot(test_shap_vals[0, :], test_X_imp_df.iloc[0, :]) #shap.force_plot(test_X_imp_df.iloc[0, :], test_shap_vals[0, :]) # visualize the first prediction's explanation shap.force_plot(shap_explainer.expected_value, test_shap_vals[0, :], test_X_imp_df.iloc[0, :])
def explain(shap_exp: Explanation, training_df, test_df, explanation_target): job = shap_exp.job job model = joblib.load(job.predictive_model.model_path) model = model[0] shap.initjs() explainer = shap.TreeExplainer(model) merged_df = pd.concat([training_df, test_df]) shap_values = explainer.shap_values(merged_df.drop(['trace_id', 'label'], 1)) encoder = retrieve_proper_encoder(job) encoder.decode(merged_df, job.encoding) encoder.decode(test_df, job.encoding) explanation_target_int = merged_df[merged_df['trace_id'] == explanation_target].index.item() + \ training_df.drop(['trace_id', 'label'], 1).shape[0] explanation_target_vector = test_df[test_df['trace_id'] == explanation_target].drop(['trace_id', 'label'], 1) expected_value = explainer.expected_value[0] if explainer.expected_value.size > 1 else explainer.expected_value shap_value = shap_values[explanation_target_int, :] if hasattr(shap_values,"size") else shap_values[0][ explanation_target_int, :] shap.force_plot(expected_value, shap_value, explanation_target_vector, show=False, matplotlib=True).savefig("temporal_shap.svg") f = open("temporal_shap.svg", "r") response = f.read() os.remove("temporal_shap.svg") return response
def __update(self): X = self.dataset.X idx = int(self.sample_index) model = self.model if isinstance(self.model.skl_model, SKL_RF): features = [ feature.name for feature in self.dataset.domain.attributes ] explainer = shap.TreeExplainer(model.skl_model) shap_values = explainer.shap_values(X) shap.force_plot(explainer.expected_value, shap_values[idx, :], X[idx, :], feature_names=features, matplotlib=True) else: # model.skl_model should be RandomForestClassifier features = [ feature.name for feature in self.dataset.domain.attributes ] explainer = shap.TreeExplainer(model.skl_model) shap_values = explainer.shap_values(X) for c in range(len(shap_values)): shap.force_plot(explainer.expected_value[c], shap_values[c][idx, :], X[idx, :], feature_names=features, matplotlib=True)
def describe(self, model_name, model, data_dict, n=300): try: explainer = self.get_explainer(model_name, model, data_dict) data_to_pass = data_dict['x_train'][:n] if ('Keras' in model_name): data_to_pass = data_to_pass.values feature_names = data_dict['x_train'].columns shap_values = explainer.shap_values(data_to_pass) if type(shap_values) != list: return { "Force plot": shap.force_plot(explainer.expected_value, shap_values, feature_names=feature_names) } result_dict = {} for i in range(len(shap_values)): result_dict[f"Force plot {i} class"] = shap.force_plot( explainer.expected_value[i], shap_values[i], feature_names=feature_names) return result_dict except Exception as e: print(e) return {}
def inference(data, preprocessor, model): data_encoder = preprocessor.transform(data) data_encoder.to_csv('data_encoder.csv', index=False) if args.algorithm != 'nn': normalLogger.debug('do inference...') inference_start = time.time() y_preds = model.predict(data_encoder) preds_prob = model.predict_proba(data_encoder) print('predict probability:') print(preds_prob) y_hat = np.expand_dims(y_preds, axis=0) pred_result = np.concatenate((y_hat.T, preds_prob), axis=1) normalLogger.debug('finish inference, elapsed %.4fs' % (time.time() - inference_start)) try: if len(data_encoder) == 1: explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(data_encoder) #plt.figure() plt.switch_backend('agg') if len(shap_values) == 1: local_explain_plot = shap.force_plot( explainer.expected_value, shap_values[0, :], data_encoder.iloc[0, :], show=False, matplotlib=True) else: #lgbm len(shap_values)==2 local_explain_plot = shap.force_plot( explainer.expected_value[1], shap_values[1][0, :], data_encoder.iloc[0, :], show=False, matplotlib=True) #plt.title #plt.show() local_explain_plot.savefig("shap_importance.png") except: normalLogger.debug('fail to explain data by shap...') shap_values = [] else: tensor_data = torch.from_numpy(np.array(data_encoder)).to(device) log_prob = F.log_softmax(model(tensor_data.float())) preds_prob = torch.exp(log_prob).data.cpu().numpy() print(preds_prob) y_preds = np.argmax(preds_prob, axis=1) y_hat = np.expand_dims(y_preds, axis=0) pred_result = np.concatenate((y_hat.T, preds_prob), axis=1) return pred_result
def shap_explaination(sk_id_curr): ''' compute and display explainer ''' if st.button("Explain Results by SHAP"): with st.spinner('Calculating...'): st.write( '__SH__apley __A__dditive ex__P__lanations provide an overview of how most important features impacts Class prediction' ) # st.write('*__Summary plot__ shows, considering __any application__, the distribution of features values colored by Class prediction*') st.write( '*__Force plot__ shows, __depending on the ground data selected__, how opposite are the features strenghs*' ) st.write( '*Green means feature value makes Default Risk lower while Red means feature value makes Default Risk higher*' ) # recover index position of sk_id_curr idx = inputs.index.get_loc(sk_id_curr) # create individual fig ind_fig = shap.force_plot(shap_explainer.expected_value[1], shap_values[1][idx], inputs.iloc[[idx]], plot_cmap="PkYg") ind_fig_html = f"<head>{shap.getjs()}</head><body>{ind_fig.html()}</body>" # create collective fig col_fig = shap.force_plot(shap_explainer.expected_value[1], shap_values[1][0, :], inputs.iloc[0, :], plot_cmap="PkYg") col_fig_html = f"<head>{shap.getjs()}</head><body>{col_fig.html()}</body>" # create feat_fig = shap.force_plot(shap_explainer.expected_value[1], shap_values[1][:500, :], inputs.iloc[:500, :], plot_cmap="PkYg") feat_fig_html = f"<head>{shap.getjs()}</head><body>{feat_fig.html()}</body>" # Display the summary plot # st.write('__ - SHAP Summary plot of Class 1: Failure Risk__') # st.write('*Blue means negative impact to Risk while Red means positive impact*') # st.write('__*Red*__ means Class 1: Failure Risk') # st.write('__*Blue*__ means opposite') # shap.summary_plot(shap_values[1], inputs, show=False) # st.pyplot(bbox_inches='tight') # Display explainer HTML object col_fig st.write( '__ - SHAP Force plot considering entire new Applications data (test)__' ) # st.write('*Green means feature value makes Risk lower while Red means feature value makes Risk higher*') components.html(col_fig_html, height=120) # Display explainer HTML object ind_fig st.write('__ - SHAP Force plot for the selected Application__') # st.write('*Green means feature value makes Risk lower while Red means feature value makes Risk higher*') components.html(ind_fig_html, height=120) # Display explainer HTML object feat_fig st.write( '__ - SHAP Force plot to provide feature analysis along a sample of Applications (here 10% of test set)__' ) # st.write('*Green means feature value makes Risk lower while Red means feature value makes Risk higher*') components.html(feat_fig_html, height=350)
def getshapvalue(X, y, clf): shap.initjs() model = clf.fit(X, y) explainer = shap.TreeExplainer(model) print(explainer) shap_values = explainer.shap_values(X) shap.summary_plot(shap_values, X, plot_type="bar") shap.summary_plot(shap_values, X) shap.force_plot(explainer.expected_value, shap_values, X)
def plot_forces(self, data, index): if self.expected_value is None: return if self.shap_values is None: return shap.force_plot(self.expected_value, self.shap_values[:index, :], data, link='logit')
def get_shap_explainer(model, X, plot_force=False): """Explain model predictions using SHAP""" explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X) # visualize the first prediction's explanation # (use matplotlib=True to avoid Javascript) if plot_force: shap.force_plot(explainer.expected_value[0], shap_values[0]) return explainer, shap_values
def SHAP(xgb_model, X_train, Y_train): explainer = shap.TreeExplainer(xgb_model) shap_values = explainer.shap_values(X_train) # View SHAP results shap.initjs() shap.force_plot(explainer.expected_value, shap_values[1000, :], X_train.iloc[1000, :]) # shap.force_plot(explainer.expected_value, shap_values, X_train) shap.summary_plot(shap_values, X_train)
def shap_explain(self, raw_sample): processed_sample = self.preprocessor.transform( raw_sample.to_frame().transpose()) sample_shap = self.shap_explainer.shap_values(processed_sample) shap.force_plot(self.shap_explainer.expected_value, sample_shap, processed_sample, link="logit") return (pd.Series(np.ravel(processed_sample), index=self.feature_names), sample_shap)
def plot_force(self, data, index): if self.expected_value is None: return if self.shap_values is None: return shap.force_plot(self.expected_value, self.shap_values[index, :], data, link='logit', matplotlib=True)
def single_force_plot(i, html=True): if html: fig = shap.force_plot(explainer.expected_value, shap_values[i, :], data_to_explain.iloc[i, :], feature_names=feat_used, show=False, link='logit') shap.save_html('./result/shap_force_plot_' + str(i) + '.htm', fig) else: fig = shap.force_plot(explainer.expected_value, shap_values[i, :], data_to_explain.iloc[i, :], feature_names=feat_used, show=False, matplotlib=True, link='logit') # fig = plt.gcf() # fig.savefig('./result/shap_force_plot_' + str(i) + '.svg') # fig.close() return fig
def shap_function(model,data,X,y,output): # X = data.drop(output,axis=1) # y = data[output] shap.initjs() categorical_features_indices = np.where(X.dtypes == np.object)[0] explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(Pool(X, y, cat_features=categorical_features_indices)) shap.force_plot(explainer.expected_value, shap_values[0,:], X.iloc[0,:]) shap.summary_plot(shap_values, X, show=False) # shap.dependence_plot("LET", shap_values['LET'], X['LET']) plt.savefig('./plots/shap_values_'+output+'.png') plt.clf()
def heart_disease_risk_factors(model, patient): # Get weights of each feature explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(patient) # Plot weights shap.initjs() shap.save_html( "./test.html", shap.force_plot(explainer.expected_value[1], shap_values[1], patient)) return shap.force_plot(explainer.expected_value[1], shap_values[1], patient)
def explain_class(model, X, show): shap.initjs() explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X) # visualize the first prediction's explanation shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :], matplotlib=True, show=show, link="logit") utils.save_picture(os.path.join(ROOT, 'outputs/force_plot_post.png')) utils.clear_plot()