def compute_explanation(model: Model, x, z): """ generates an explanation how the model came to its result Parameters ---------- model : Model a hassbrain model x : np.ndarray the raw array conatining the sensor values Returns ------- """ from matplotlib.pyplot import Figure wrapped_model = ModelWrapper(model) class_names = model.get_state_lbl_lst() feature_names = model.get_obs_lbl_lst() cat_idxs = [i for i in range(len(class_names))] categorical_names = {} for i in cat_idxs: categorical_names[i] = {} categorical_names[i][0] = "off" categorical_names[i][1] = "on" from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer exp = LimeTabularExplainer(x, mode='classification', training_labels=z, feature_names=feature_names, categorical_features=cat_idxs, categorical_names=categorical_names, class_names=class_names) fig = exp.explain_instance(x[0], wrapped_model.predict_proba).as_pyplot_figure()
def test_regression_without_feature_names(self): """ Ensure lime.lime_tabular works when predict_fn = regressor.predict and feature names are NOT passed :return: """ interpretor = LimeTabularExplainer(self.X, mode="regression") assert interpretor.explain_instance(self.example, self.regressor.predict)
def test_classifier_with_proba_without_feature_names(self): """ Ensure lime.lime_tabular works when predict_fn = classifier.predict_proba and feature names are NOT passed :return: """ interpretor = LimeTabularExplainer(self.X) assert interpretor.explain_instance(self.example, self.classifier.predict_proba)
def test_regression_without_feature_names(self): """ Ensure lime.lime_tabular works when predict_fn = regressor.predict and feature names are NOT passed :return: """ interpretor = LimeTabularExplainer(self.X) assert interpretor.explain_regressor_instance(self.example, self.regressor.predict)
def test_classifier_with_proba_without_feature_names(self): """ Ensure lime.lime_tabular works when predict_fn = classifier.predict_proba and feature names are NOT passed :return: """ interpretor = LimeTabularExplainer(self.X) assert interpretor.explain_instance(self.example, self.classifier.predict_proba)
def on_value_change(change): index = change['new'] exp = LimeTabularExplainer(X_test, feature_names=features, discretize_continuous=False, class_names=['acdc', 'non_acdc']) print("Model behavior at row: {}".format(index)) # Lets evaluate the prediction from the model and actual target label print("prediction from the model:{}".format(estimator.predict(X_test[index].reshape(1, -1)))) print("Target Label on the row: {}".format(y_test.reshape(1,-1)[0][index])) clear_output() display(HTML(exp.explain_instance(X_test[index], models['ensemble'].predict_proba).as_html()))
def test_lime_coef_accuracy(self): """ Ensure that for a trivial example, the coefficients of a regressor explanation are all similar to the true beta values of the generative process. :return: """ error_epsilon = .1 explainer = LimeTabularExplainer(self.X, discretize_continuous=True) explanation = explainer.explain_regressor_instance( self.example, self.regressor.predict, model_regressor=self.model_regressor) vals = dict(explanation.as_list()) keys = ['{} <= 0.00'.format(i) for i in [2, 1, 0]] lime_coefs = np.array([vals[key] for key in keys]) assert (abs(self.regressor.coef_ - lime_coefs) < error_epsilon).all()
def test_lime_coef_accuracy(self): """ Ensure that for a trivial example, the coefficients of a regressor explanation are all similar to the true beta values of the generative process. :return: """ error_epsilon = .1 explainer = LimeTabularExplainer(self.X, discretize_continuous=True, mode="regression") explanation = explainer.explain_instance(self.example, self.regressor.predict, model_regressor=self.model_regressor) vals = dict(explanation.as_list()) keys = ['{} <= 0.00'.format(i) for i in [2, 1, 0]] lime_coefs = np.array([vals[key] for key in keys]) assert (abs(self.regressor.coef_ - lime_coefs) < error_epsilon).all()
def test_classifier_no_proba_without_feature_names(self): """ Ensure lime.lime_tabular works when predict_fn = classifier.predict and feature names are NOT passed :return: """ interpretor = LimeTabularExplainer(self.X) interpretor_func = partial(interpretor.explain_instance, *[self.example, self.classifier.predict]) self.assertRaises(NotImplementedError, interpretor_func)
def __init__(self, mdl, test_x, test_z): from hassbrain_algorithm.benchmark.interpretation import ModelWrapper wrapped_model = ModelWrapper(mdl) class_names = mdl.get_state_lbl_lst() feature_names = mdl.get_obs_lbl_lst() cat_idxs = [i for i in range(len(feature_names))] categorical_names = {} for i in cat_idxs: categorical_names[i] = {} categorical_names[i][0] = "off" categorical_names[i][1] = "on" from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer self._exp = LimeTabularExplainer(test_x, mode='classification', training_labels=test_z, feature_names=feature_names, categorical_features=cat_idxs, categorical_names=categorical_names, class_names=class_names)
with_variance=True, figsize=(10, 5)) model_feature_interaction = InMemoryModel(estimator.predict_proba, examples=X_train, target_names=['acdc', 'non_acdc']) # Two-way interaction interpreter.partial_dependence.plot_partial_dependence([("maxDeltaEta_tag_tag", "mass_higgsLikeDijet")], model, grid_resolution=10) from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer exp = LimeTabularExplainer(X_train, feature_names=features, class_names=['acdc', 'non_acdc'], discretize_continuous=True) plt.show() # explain prediction for data point for background label 'non_acdc' exp.explain_instance(X_test[1], estimator.predict_proba) # Interactive slider for controlling grid resolution def understanding_interaction(): pyint_model = InMemoryModel(estimator.predict_proba, examples=X_test, target_names=features) # ['worst area', 'mean perimeter'] --> list(feature_selection.value) # Two-way iteraction interpreter.partial_dependence.plot_partial_dependence(["mass_tag_tag_max_mass", "maxDeltaEta_jet_jet"], model, grid_resolution=grid_resolution.value,
feature_indices], train_labels=wqp_train_y, plot_step=0.02, cmap=plt.cm.RdYlBu, markers=[',', 'd', '+'], alphas=[1.0, 0.8, 0.5], colors=['r', 'b', 'y']) # ## Interpreting Model Predictions # In[36]: from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer exp = LimeTabularExplainer(wqp_train_SX, feature_names=wqp_feature_names, discretize_continuous=True, class_names=wqp_rf.classes_) # In[80]: exp.explain_instance(wqp_test_SX[10], wqp_rf.predict_proba, top_labels=1).show_in_notebook() # In[81]: exp.explain_instance(wqp_test_SX[747], wqp_rf.predict_proba, top_labels=1).show_in_notebook() # ## Visualizing partial dependencies # In[39]:
class Explanator(object): def __init__(self, mdl, test_x, test_z): from hassbrain_algorithm.benchmark.interpretation import ModelWrapper wrapped_model = ModelWrapper(mdl) class_names = mdl.get_state_lbl_lst() feature_names = mdl.get_obs_lbl_lst() cat_idxs = [i for i in range(len(feature_names))] categorical_names = {} for i in cat_idxs: categorical_names[i] = {} categorical_names[i][0] = "off" categorical_names[i][1] = "on" from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer self._exp = LimeTabularExplainer(test_x, mode='classification', training_labels=test_z, feature_names=feature_names, categorical_features=cat_idxs, categorical_names=categorical_names, class_names=class_names) def get_explanator(self): return self._exp def explain(self, x): from hassbrain_algorithm.benchmark.interpretation import ModelWrapper assert isinstance(x, np.ndarray) assert len(x.shape) == 1 model = ModelWrapper(self) lst = self._exp.explain_instance(x, model.predict_proba).as_list() return lst def plot_explanation(self, x): from hassbrain_algorithm.benchmark.interpretation import ModelWrapper assert isinstance(x, np.ndarray) model = ModelWrapper(self) fig = self._exp.explain_instance( x, model.predict_proba).as_pyplot_figure() fig.show() def plot_and_save_explanation(self, x, labels, file_paths): from hassbrain_algorithm.benchmark.interpretation import ModelWrapper assert isinstance(x, np.ndarray) model = ModelWrapper(self) enc_labels = self.expl_to_ids(labels) exp = self._exp.explain_instance(x, model.predict_proba, labels=enc_labels) #type: Explanation for lbl, file_path in zip(enc_labels, file_paths): fig = self.as_pyplot_figure(exp, label=lbl) import matplotlib.pyplot as plt plt.tight_layout() fig.savefig(file_path, dpi=fig.dpi) def expl_to_ids(self, labels): """ Parameters ---------- labels : list list of labels Returns ------- list encoded labels """ tmp2 = self._exp # type: LimeTabularExplainer classnames = tmp2.class_names enc_lbl_lst = [] for lbl in labels: assert lbl in classnames for i, item in enumerate(classnames): if item == lbl: enc_lbl_lst.append(i) return enc_lbl_lst def as_pyplot_figure(self, exp, label=1, **kwargs): """Returns the explanation as a pyplot figure. Will throw an error if you don't have matplotlib installed Args: label: desired label. If you ask for a label for which an explanation wasn't computed, will throw an exception. Will be ignored for regression explanations. kwargs: keyword arguments, passed to domain_mapper Returns: pyplot figure (barchart). """ import matplotlib.pyplot as plt explst = exp.as_list(label=label, **kwargs) fig = plt.figure() vals = [x[1] for x in explst] names = [x[0] for x in explst] vals.reverse() names.reverse() colors = ['black' if x > 0 else 'red' for x in vals] pos = np.arange(len(explst)) + .5 plt.barh(pos, vals, align='center', color=colors) plt.yticks(pos, names) #title = 'Local explanation for class %s' % exp.class_names[label] #plt.title(title) return fig
feature_indices = [i for i, feature in enumerate(wqp_feature_names) if feature in ['alcohol', 'volatile acidity']] meu.plot_model_decision_surface(clf=wqp_rf, train_features=wqp_train_SX[:, feature_indices], train_labels=wqp_train_y, plot_step=0.02, cmap=plt.cm.RdYlBu, markers=[',', 'd', '+'], alphas=[1.0, 0.8, 0.5], colors=['r', 'b', 'y']) # ## Interpreting Model Predictions # In[36]: from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer exp = LimeTabularExplainer(wqp_train_SX, feature_names=wqp_feature_names, discretize_continuous=True, class_names=wqp_rf.classes_) # In[80]: exp.explain_instance(wqp_test_SX[10], wqp_rf.predict_proba, top_labels=1).show_in_notebook() # In[81]: exp.explain_instance(wqp_test_SX[747], wqp_rf.predict_proba, top_labels=1).show_in_notebook() # ## Visualizing partial dependencies
# ## One-way partial dependence plot # In[38]: p = interpreter.partial_dependence.plot_partial_dependence(['worst area'], model, grid_resolution=50, with_variance=True, figsize = (6, 4)) # ## Explaining Predictions # In[39]: from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer exp = LimeTabularExplainer(X_train, feature_names=data.feature_names, discretize_continuous=True, class_names=['0', '1']) # In[40]: exp.explain_instance(X_test[0], logistic.predict_proba).show_in_notebook() # In[41]: exp.explain_instance(X_test[1], logistic.predict_proba).show_in_notebook() # # Model Deployment
#feature_names = hmm_model.get_obs_lbl_lst() #class_names = hmm_model.get_state_lbl_lst() def boolean_arr2str(arr): res = arr.astype(str) return res cat_idxs = [i for i in range(len(class_names))] categorical_names = {} for i in cat_idxs: categorical_names[i] = {} categorical_names[i][0] = "off" categorical_names[i][1] = "on" from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer exp = LimeTabularExplainer(train_x, mode='classification', training_labels=train_z, feature_names=feature_names, categorical_features=cat_idxs, categorical_names=categorical_names, class_names=class_names) #fig = exp.explain_instance(train_x[0], model.predict_proba).as_pyplot_figure() #fig.show() x = train_x[0] lst = exp.explain_instance(x, model.predict_proba).as_list() print(lst)
def run_explanations(csv_path, csv_columns, target_column, zero_value): # Read the dataset from the provided CSV and print out information about it. df = pd.read_csv(csv_path, names=csv_columns, skipinitialspace=True, skiprows=1) #df = df.drop('Target',axis=1) input_features = [name for name in csv_columns if name != target_column] #data, labels = shap.datasets.adult(display=True) if target_column not in csv_columns: print("target column error") return ("target column error") elif zero_value not in df[target_column].tolist(): if str.isdecimal(zero_value) and ( np.int64(zero_value) in df[target_column].tolist() or np.float64(zero_value) in df[target_column].tolist()): print("happy") zero_value = np.int64(zero_value) else: print(zero_value, df[target_column].tolist(), df[target_column].dtype) return ("zero value error") labels = df[target_column].tolist() #labels = np.array([int(label) for label in labels]) labels2 = [] for label in labels: if label == zero_value: labels2.append(0) else: labels2.append(1) labels = np.array(labels2) data = df[input_features] for feature in input_features: if data[feature].dtype is not np.dtype( np.int64) and data[feature].dtype is not np.dtype( np.float64) and data[feature].dtype is not np.dtype( np.float32): data[feature] = data[feature].astype('category') cat_cols = data.select_dtypes(['category']).columns data[cat_cols] = data[cat_cols].apply(lambda x: x.cat.codes) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42) data_disp, labels_disp = shap.datasets.adult(display=True) X_train_disp, X_test_disp, y_train_disp, y_test_disp = train_test_split( data_disp, labels_disp, test_size=0.3, random_state=42) xgc = xgb.XGBClassifier(n_estimators=500, max_depth=5, base_score=0.5, objective='binary:logistic', random_state=42) xgc.fit(X_train, y_train) predictions = xgc.predict(X_test) fig = plt.figure(figsize=(16, 12)) title = fig.suptitle("Default Feature Importances from XGBoost", fontsize=14) ax1 = fig.add_subplot(2, 2, 1) xgb.plot_importance(xgc, importance_type='weight', ax=ax1) t = ax1.set_title("Feature Importance - Feature Weight") ax2 = fig.add_subplot(2, 2, 2) xgb.plot_importance(xgc, importance_type='gain', ax=ax2) t = ax2.set_title("Feature Importance - Split Mean Gain") ax3 = fig.add_subplot(2, 2, 3) xgb.plot_importance(xgc, importance_type='cover', ax=ax3) t = ax3.set_title("Feature Importance - Sample Coverage") #plt.savefig('static/explanations.png') explanation = eli5.explain_weights(xgc.get_booster()) explanation_html = eli5.formatters.html.format_as_html(explanation) print(explanation_html) with open("templates/explanation.html", "a+") as file: file.write(explanation_html) doc_num = 0 print('Actual Label:', y_test[doc_num]) print('Predicted Label:', predictions[doc_num]) #eli5.show_prediction(xgc.get_booster(), X_test.iloc[doc_num], # feature_names=list(data.columns) ,show_feature_values=True) explanation2 = eli5.explain_prediction(xgc.get_booster(), X_test.iloc[doc_num], feature_names=list(data.columns)) explanation_html2 = eli5.formatters.html.format_as_html(explanation2) with open("templates/explanation.html", "a") as file: file.write(explanation_html2) doc_num = 2 print('Actual Label:', y_test[doc_num]) print('Predicted Label:', predictions[doc_num]) #eli5.show_predicon(xgc.get_booster(), X_test.iloc[doc_num], feature_names=list(data.columns) ,show_feature_values=True) explanation3 = eli5.explain_prediction(xgc.get_booster(), X_test.iloc[doc_num], feature_names=list(data.columns)) explanation_html3 = eli5.formatters.html.format_as_html(explanation3) with open("templates/explanation.html", "a") as file: file.write(explanation_html3) #target_names = ['$50K or less', 'More than $50K'] interpreter = Interpretation(training_data=X_test, training_labels=y_test, feature_names=list(data.columns)) im_model = InMemoryModel(xgc.predict_proba, examples=X_train) plots = interpreter.feature_importance.plot_feature_importance( im_model, ascending=True, n_samples=23000) plots[0].savefig('skater.png') features_pdp = input_features xgc_np = xgb.XGBClassifier(n_estimators=500, max_depth=5, base_score=0.5, objective='binary:logistic', random_state=42) xgc_np.fit(X_train.values, y_train) # In[ ]: from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer exp = LimeTabularExplainer(X_test.values, feature_names=list(data.columns), discretize_continuous=True) doc_num = 0 print('Actual Label:', y_test[doc_num]) print('Predicted Label:', predictions[doc_num]) instance = exp.explain_instance(X_test.iloc[doc_num].values, xgc_np.predict_proba) instance.save_to_file('templates/lime.html', show_all=False) doc_num = 2 print('Actual Label:', y_test[doc_num]) print('Predicted Label:', predictions[doc_num]) instance2 = exp.explain_instance(X_test.iloc[doc_num].values, xgc_np.predict_proba) instance2.save_to_file('templates/lime2.html', show_all=False) explainer = shap.TreeExplainer(xgc) shap_values = explainer.shap_values(X_test) pd.DataFrame(shap_values).head() #shap.force_plot(explainer.expected_value, shap_values[:,], X_test_disp.iloc[:,],show=False,matplotlib=True) #plt.savefig("static/force_plot.png") shap.summary_plot(shap_values, X_test, plot_type="bar", show=False) plt.savefig("static/summary_plot.png") shap.summary_plot(shap_values, X_test, show=False) plt.savefig("static/summary_plot2.png") return "Everyone Happy"
# In[203]: p = interpreter.partial_dependence.plot_partial_dependence(['worst area'], model, grid_resolution=50, with_variance=True, figsize=(6, 4)) # ## Explaining Predictions # In[195]: from skater.core.local_interpretation.lime.lime_tabular import LimeTabularExplainer exp = LimeTabularExplainer(X_train, feature_names=data.feature_names, discretize_continuous=True, class_names=['0', '1']) # In[204]: exp.explain_instance(X_test[0], logistic.predict_proba).show_in_notebook() # In[202]: exp.explain_instance(X_test[1], logistic.predict_proba).show_in_notebook() # # Model Deployment # ## Persist model to disk # In[207]: