def export_explanations (model, X_test = X_test, class_names = ["Irrelevante", "Evidência"], name="name", n_exp=20): ''' Arguments here are: Model has to be a pipeline with (vectorizer + model) to explain. X_test = the subset of testing data in order to compute the best coverage of features to explain class names name is the argument that will name the files in disk and number of variables to pick ''' class_names = ["Irrelevante","Evidência"] explainer = LimeTextExplainer(class_names=class_names) ###### Here begin the sub-modular pick code sp_obj = submodular_pick.SubmodularPick(explainer, X_test, model.predict_proba, sample_size=n_exp, num_features=15,num_exps_desired=n_exp) imagens = [exp.as_pyplot_figure(label=exp.available_labels()[0]) for exp in sp_obj.sp_explanations] i =0 for exp in sp_obj.sp_explanations: exp.save_to_file(file_path="{}explanation{}.html".format(name,i)) i+=1 i=0 for img in imagens: img.savefig("{}Imagem{}".format(name,i)) i+=1 #plt.close(img) plt.close('all') pass
def fit(self, X, sample_size=20, num_expected_examples=15): # https://github.com/marcotcr/lime/blob/master/doc/notebooks/Submodular%20Pick%20examples.ipynb self.splime = submodular_pick.SubmodularPick( self.explainer, X, self.predict_fn, sample_size=sample_size, num_features=self.num_features, num_exps_desired=num_expected_examples)
def generate_global_lime_explanations(self): explainer = lime_tabular.LimeTabularExplainer( training_data=np.array(self.X_train), class_names=['unstable', 'stable'], mode="classification", feature_names=self.feature_names) model_name = type(self.model).__name__ # LIME Global Explainer with Submodule Pick if model_name == 'Sequential': if self.model.name == 'DNN': predict_fn = self.dnn_model_predict model_name = 'DNN' elif self.model.name == 'RNN': predict_fn = self.rnn_model_predict model_name = 'RNN' else: predict_fn = self.model.predict_proba root = Path(".") my_file = Path(root / "explainer_outputs" / "LIME_pickles" / (model_name + '_LIME_SP_' + self.grid)) if my_file.is_file(): print("EXISTS!!!!!!!!!!!!!!") pickle_in = open(my_file, "rb") sp_obj = pickle.load(pickle_in) print("LOADED!!!!!!!!!!!!!!") else: print("DOESNT EXIST. CREATING NEW") start = timer() sp_obj = submodular_pick.SubmodularPick( explainer, np.array(self.X_train), predict_fn, num_features=self.X_test.shape[1], num_exps_desired=5) end = timer() print('Global LIME Explanations: ', end - start) # Store in Pickle pickle_out = open(my_file, "wb") pickle.dump(sp_obj, pickle_out) pickle_out.close() sp_explanation_time = end - start print('LIME Global Explanation time: ', sp_explanation_time) dir_name = os.path.join('explainer_outputs', 'LIME', 'Global', self.grid) path_global = os.path.join(dir_name, model_name + 'LIME_SP.pdf') with PdfPages(path_global) as pdf: for exp in sp_obj.sp_explanations: fig = exp.as_pyplot_figure(label=exp.available_labels()[0]) pdf.savefig(fig, bbox_inches='tight') plt.close() return sp_obj
def explain_anecdotes_lime_sp(): explainer = LimeTextExplainer(class_names=anecdotes_labels) #Problem: 5000 perturbations per sample, API muss umgangen werden um das umzustellen start = time.time() sp_obj = submodular_pick.SubmodularPick(explainer, anecdotes_data, anecdotes_predict_lime, sample_size=1, num_features=5,num_exps_desired=10) end = time.time() for idx,exp_item in enumerate(sp_obj.explanations): exp_item.save_to_file("exp/anecdotes/ANEC_" + str(idx) + ".html") filehandler = open("exp/anecdotes/SP.obj","wb") pickle.dump(sp_obj,filehandler) filehandler.close() print('execution time: ',end-start)
def explain_model(self, sample_size=5, num_exps_desired=5, num_features=5, num_samples=10, show_in_notebook=False): sp_obj = submodular_pick.SubmodularPick( self.__explainer, self.__train, self.__pred_fn, method='sample', sample_size=sample_size, num_features=num_features, num_exps_desired=num_exps_desired, num_samples=num_samples) if show_in_notebook: [exp.as_pyplot_figure() for exp in sp_obj.sp_explanations] return [exp.as_list() for exp in sp_obj.sp_explanations]
def seq_explain_global(self, letters, sample_size=2, num_features=6, num_exps_desired=2): """ Extracts the most representative explanations for a set of letters using SP-LIME. :param sentences(list): letter split into sentences and preprocessed :param top_labels(int): number of classes to rank in LIME :param sample_size(int): number of samples to perturb per instance :param num_features(int): number of features to score per sample :return: sp_obj(Submodular Pick Object: contains the explanations for each instance) """ explainer = lime.lime_text.LimeTextExplainer( bow=False, class_names=self.predictor.class_names ) sp_obj = submodular_pick.SubmodularPick(explainer, letters, self.predictor.seq_predict, sample_size=sample_size, num_features=num_features, num_exps_desired=num_exps_desired) return sp_obj.sp_explanations
def __compute_lime_importance(self, x_train, y_train, x_test, y_test, estimator): lime_explainer = lime.lime_tabular.LimeTabularExplainer( x_train.values, training_labels=y_train.values, feature_names=x_train.columns.tolist(), verbose=False, mode='regression', discretize_continuous=False, random_state=self._seed) sp_obj_cr = submodular_pick.SubmodularPick( lime_explainer, x_test.values, estimator.predict, num_features=len(x_test.columns), num_exps_desired=self._num_rounds) # W_s = pd.DataFrame([dict(this.as_list(label=0)) for this in sp_obj_cr.explanations]) # rank_w_s = W_s[x_test.columns].abs().rank(1, ascending=False, method='first') # rank_w_s_median, rank_w_s_mean = rank_w_s.median(), rank_w_s.mean() # rank_w_s_median.name = 'median_rank' # rank_w_s_mean.name = 'mean_rank' # ranked_features = pd.concat([rank_w_s_median, rank_w_s_mean], axis=1).sort_values( # by=['median_rank', 'mean_rank'], # ascending=[False, False]) # # min_row = ranked_features.index[:self._k].values # # columns = set(x_test.columns) - set(min_row) # # self._importance = ranked_features.head(self._k).reset_index().values res = pd.DataFrame( [dict(this.as_list(label=0)) for this in sp_obj_cr.explanations]) res = res.abs() imp_ci = 1.96 * res.std(ddof=0) / np.sqrt(len(res)) permutation_importance = pd.DataFrame( dict(feature_importance=res.mean(), ci_fixed=imp_ci, errors=None, std_errors=None, success_count=None), ) return permutation_importance
X_train, mode='classification', training_labels=y_train, feature_names=X_data.columns.values, random_state=369) # idx = 12 #9 # exp = explainer.explain_instance(X_test[idx], model.predict_proba, num_features=5) # print('id: %d' % idx) # # xgboost(like sklearn) expects X as 2D data(n_samples, n_features).If you want to predict only one sample # # you can reshape your feature vector to a 2D array # print('Probability (right case) =', model.predict_proba(np.array(X_test[idx]).reshape((1, -1)))[0, 1]) # print('True class: %s' % y_test.values[idx]) # print('Explanation for class right') # print('\n'.join(map(str, exp.as_list()))) # fig = exp.as_pyplot_figure() # plt.show() # Submodular Pick sp_obj = submodular_pick.SubmodularPick(explainer, X_train, model.predict_proba, sample_size=50, num_features=5, num_exps_desired=1) for exp in sp_obj.sp_explanations: # https://stackoverflow.com/questions/60914598/keyerror-1-in-using-sp-lime-with-lightgbm exp.as_pyplot_figure(label=exp.available_labels()[0]) plt.show() print(exp.as_list(label=exp.available_labels()[0])) print('done')
class_names=iris.target_names, discretize_continuous=True) exp = explainer.explain_instance(test[0], rf.predict_proba, num_features=4, top_labels=3) figure = exp.as_html() fig = exp.as_pyplot_figure() fig.tight_layout() fig.savefig('export/single.pdf', format='pdf') f = open('export/single.html', 'w') f.write(figure) f.close() sp = submodular_pick.SubmodularPick(explainer, train, rf.predict_proba, sample_size=20, num_exps_desired=3) i = 0 for exp in sp.sp_explanations: print('instance: ', exp.predict_proba) figure = exp.as_pyplot_figure() figure.tight_layout() figure.savefig('export/multi{}.pdf'.format(i), format='pdf') i += 1
# Prediction of my model print('Predicted proba that customer will churn, with true model', y_proba2[i,1]) # The prediction made by LIME print('Predicted proba that customer will churn, with LIME', exp.local_pred[0]) exp.intercept # bias term for the local explanation ### SP_LIME - Submodular pick. # Import import warnings from lime import submodular_pick # SP-LIME returns explanations (on a sample set) to provide a non redundant global decision boundary of original model sp_obj = submodular_pick.SubmodularPick(explainer, X_train, classifier.predict_proba, num_features=10, num_exps_desired=5) # Not used sample_size= ? param. [exp.show_in_notebook(show_table=True, show_all=False) for exp in sp_obj.sp_explanations]; # visualise the 5 explanations selected by SP-LIME to provide a global understanding od the model. # [exp.as_pyplot_figure() for exp in sp_obj.sp_explanations]; # Other visualisation possibility # I can explain the model predictions for the test set in a similar way. # When applied on training set, we focus on the inner workings of the model to gain an understanding of it or to improve it. # When applied on test set, model is already understood and diffused. It could be used in a multitude of different ways by the company to reduce customer churn. ### TEST SET # If we want to be more concise/selective in our explanations.
def explain(self, **kwargs): """ Explains a given model, optionally performing SubmodularPick . """ index = kwargs.get("index") or None # sample size is comprised of 25% of total dataset by default sample_size = kwargs.get("sample_size") or round( self.data.shape[0] * 0.25) num_features = kwargs.get("num_features") or 10 num_exps_desired = kwargs.get("num_exps_desired") or 10 print_exps = kwargs.get("print_exps") or False features = self.data.drop([self.target], axis=1) if index is None: if self.verbose: print("*** Generating explanations using Submodular Pick...") print("Sample size chosen: {}".format(round(sample_size))) sp_obj = submodular_pick.SubmodularPick( self.explainer, np.asarray(features), self.model.predict_proba, sample_size=sample_size, num_features=num_features, num_exps_desired=num_exps_desired, ) for sp_exp in sp_obj.sp_explanations: exp = sp_exp.as_list(label=sp_exp.available_labels()[0]) self._append_explanation(exp) # create averaged contribution for all features across all explanations # obtain feature importance in terms of magnitude, discarding signal # remove features that appear less than 10% of the time explanation = defaultdict(lambda: []) for exp in self.explanations: for feature in exp: explanation[feature].append(exp[feature]) avg_explanation = { feature: np.mean(np.abs(np.array(explanation[feature]))) for feature in explanation if len(explanation[feature]) > num_exps_desired * 0.25 } avg_explanation = OrderedDict( sorted(avg_explanation.items(), key=lambda x: x[1], reverse=True)) for column, value in avg_explanation.items(): if print_exps: print("{0} = {1}".format(column, value)) return avg_explanation else: try: if self.verbose: print(f"Explaining prediction for case #{index}") row_feat = np.asarray(features)[int(index), :].reshape(1, -1) y_true, y_pred, y_prob = ( self.data[self.target][int(index)], self.model.predict(row_feat), self.model.predict_proba(row_feat), ) if self.verbose: print("Model predicted class {0} with class score {1:.3f}". format(y_pred, y_prob[0, int(y_pred)])) print("Actual class is {0}".format(y_true)) exp = self.explainer.explain_instance(row_feat[0], self.model.predict_proba) self._append_explanation( exp.as_list(label=exp.available_labels()[0])) except Exception as e: print("Error occurred: {}".format(str(e))) print(traceback.format_exc()) return self.explanation
#subtask 2 shap.initjs() explainer2 = shap.TreeExplainer(lgbm) shap_values = explainer2.shap_values(x2_array) shap.force_plot(explainer2.expected_value[0], numpy.array(shap_values)[0][0, :], (x2_array)[0, :], feature_names[0:11]) # shap.force_plot(explainer2.expected_value[0], numpy.array(shap_values)[1][0, :], (x2_array)[1, :], feature_names[0:11]) # #subtask 3 from lime import submodular_pick sp_obj = submodular_pick.SubmodularPick(explainer1, x_train_array, lgbm.predict_proba, sample_size=20, num_features=11, num_exps_desired=10) [ exp.show_in_notebook(show_table=True, show_all=False) for exp in sp_obj.sp_explanations ] #subtask 4 shap_values = explainer2.shap_values(x_train_array, y_train_array) shap.summary_plot(numpy.array(shap_values)[0], x_train_array, feature_names=feature_names) shap_values = explainer2.shap_values(x_test_array, y_test_array) shap.summary_plot(numpy.array(shap_values)[0], x_test_array,