예제 #1
0
def export_explanations (model, X_test = X_test, class_names = ["Irrelevante", "Evidência"], name="name", n_exp=20):
    '''
    Arguments here are:
        Model has to be a pipeline with (vectorizer +  model) to explain.
        X_test = the subset of testing data in order to compute the best coverage of features to explain
        class names
        name is the argument that will name the files in disk
        and number of variables to pick
    '''
    class_names = ["Irrelevante","Evidência"]
    explainer = LimeTextExplainer(class_names=class_names)


        ######   Here begin the sub-modular pick code
    sp_obj = submodular_pick.SubmodularPick(explainer, X_test, model.predict_proba, sample_size=n_exp, num_features=15,num_exps_desired=n_exp)
    imagens = [exp.as_pyplot_figure(label=exp.available_labels()[0]) for exp in sp_obj.sp_explanations]
    i =0
    for exp in sp_obj.sp_explanations:
        exp.save_to_file(file_path="{}explanation{}.html".format(name,i))
        i+=1
    i=0
    for img in imagens:
        img.savefig("{}Imagem{}".format(name,i))
        i+=1
        #plt.close(img)
        plt.close('all')
    pass
예제 #2
0
 def fit(self, X, sample_size=20, num_expected_examples=15):
     # https://github.com/marcotcr/lime/blob/master/doc/notebooks/Submodular%20Pick%20examples.ipynb
     self.splime = submodular_pick.SubmodularPick(
         self.explainer,
         X,
         self.predict_fn,
         sample_size=sample_size,
         num_features=self.num_features,
         num_exps_desired=num_expected_examples)
예제 #3
0
파일: xai.py 프로젝트: mahmouddraz/tranpy
    def generate_global_lime_explanations(self):
        explainer = lime_tabular.LimeTabularExplainer(
            training_data=np.array(self.X_train),
            class_names=['unstable', 'stable'],
            mode="classification",
            feature_names=self.feature_names)
        model_name = type(self.model).__name__

        # LIME Global Explainer with Submodule Pick
        if model_name == 'Sequential':
            if self.model.name == 'DNN':
                predict_fn = self.dnn_model_predict
                model_name = 'DNN'
            elif self.model.name == 'RNN':
                predict_fn = self.rnn_model_predict
                model_name = 'RNN'
        else:
            predict_fn = self.model.predict_proba

        root = Path(".")
        my_file = Path(root / "explainer_outputs" / "LIME_pickles" /
                       (model_name + '_LIME_SP_' + self.grid))

        if my_file.is_file():
            print("EXISTS!!!!!!!!!!!!!!")
            pickle_in = open(my_file, "rb")
            sp_obj = pickle.load(pickle_in)
            print("LOADED!!!!!!!!!!!!!!")
        else:
            print("DOESNT EXIST. CREATING NEW")
            start = timer()
            sp_obj = submodular_pick.SubmodularPick(
                explainer,
                np.array(self.X_train),
                predict_fn,
                num_features=self.X_test.shape[1],
                num_exps_desired=5)
            end = timer()
            print('Global LIME Explanations: ', end - start)

            # Store in Pickle
            pickle_out = open(my_file, "wb")
            pickle.dump(sp_obj, pickle_out)
            pickle_out.close()
            sp_explanation_time = end - start
            print('LIME Global Explanation time: ', sp_explanation_time)

        dir_name = os.path.join('explainer_outputs', 'LIME', 'Global',
                                self.grid)
        path_global = os.path.join(dir_name, model_name + 'LIME_SP.pdf')
        with PdfPages(path_global) as pdf:
            for exp in sp_obj.sp_explanations:
                fig = exp.as_pyplot_figure(label=exp.available_labels()[0])
                pdf.savefig(fig, bbox_inches='tight')
                plt.close()
        return sp_obj
예제 #4
0
def explain_anecdotes_lime_sp():
        explainer = LimeTextExplainer(class_names=anecdotes_labels)
        #Problem: 5000 perturbations per sample, API muss umgangen werden um das umzustellen
        start = time.time()
        sp_obj = submodular_pick.SubmodularPick(explainer, anecdotes_data, anecdotes_predict_lime, sample_size=1, num_features=5,num_exps_desired=10)
        end = time.time()
        for idx,exp_item in enumerate(sp_obj.explanations):
                exp_item.save_to_file("exp/anecdotes/ANEC_" + str(idx) + ".html")
        filehandler = open("exp/anecdotes/SP.obj","wb")
        pickle.dump(sp_obj,filehandler)
        filehandler.close()
        print('execution time: ',end-start)        
예제 #5
0
    def explain_model(self,
                      sample_size=5,
                      num_exps_desired=5,
                      num_features=5,
                      num_samples=10,
                      show_in_notebook=False):
        sp_obj = submodular_pick.SubmodularPick(
            self.__explainer,
            self.__train,
            self.__pred_fn,
            method='sample',
            sample_size=sample_size,
            num_features=num_features,
            num_exps_desired=num_exps_desired,
            num_samples=num_samples)

        if show_in_notebook:
            [exp.as_pyplot_figure() for exp in sp_obj.sp_explanations]
        return [exp.as_list() for exp in sp_obj.sp_explanations]
    def seq_explain_global(self, letters, sample_size=2, num_features=6, num_exps_desired=2):
        """
        Extracts the most representative explanations for a set of letters using SP-LIME.
        :param sentences(list): letter split into sentences and preprocessed
        :param top_labels(int): number of classes to rank in LIME
        :param sample_size(int): number of samples to perturb per instance
        :param num_features(int): number of features to score per sample

        :return: sp_obj(Submodular Pick Object: contains the explanations for each instance)
        """
        explainer = lime.lime_text.LimeTextExplainer(
            bow=False,
            class_names=self.predictor.class_names
        )
        sp_obj = submodular_pick.SubmodularPick(explainer,
                                                letters,
                                                self.predictor.seq_predict,
                                                sample_size=sample_size,
                                                num_features=num_features,
                                                num_exps_desired=num_exps_desired)
        return sp_obj.sp_explanations
예제 #7
0
    def __compute_lime_importance(self, x_train, y_train, x_test, y_test,
                                  estimator):

        lime_explainer = lime.lime_tabular.LimeTabularExplainer(
            x_train.values,
            training_labels=y_train.values,
            feature_names=x_train.columns.tolist(),
            verbose=False,
            mode='regression',
            discretize_continuous=False,
            random_state=self._seed)
        sp_obj_cr = submodular_pick.SubmodularPick(
            lime_explainer,
            x_test.values,
            estimator.predict,
            num_features=len(x_test.columns),
            num_exps_desired=self._num_rounds)
        # W_s = pd.DataFrame([dict(this.as_list(label=0)) for this in sp_obj_cr.explanations])
        # rank_w_s = W_s[x_test.columns].abs().rank(1, ascending=False, method='first')
        # rank_w_s_median, rank_w_s_mean = rank_w_s.median(), rank_w_s.mean()
        # rank_w_s_median.name = 'median_rank'
        # rank_w_s_mean.name = 'mean_rank'
        # ranked_features = pd.concat([rank_w_s_median, rank_w_s_mean], axis=1).sort_values(
        #     by=['median_rank', 'mean_rank'],
        #     ascending=[False, False])
        # # min_row = ranked_features.index[:self._k].values
        # # columns = set(x_test.columns) - set(min_row)
        # # self._importance = ranked_features.head(self._k).reset_index().values
        res = pd.DataFrame(
            [dict(this.as_list(label=0)) for this in sp_obj_cr.explanations])
        res = res.abs()
        imp_ci = 1.96 * res.std(ddof=0) / np.sqrt(len(res))
        permutation_importance = pd.DataFrame(
            dict(feature_importance=res.mean(),
                 ci_fixed=imp_ci,
                 errors=None,
                 std_errors=None,
                 success_count=None), )

        return permutation_importance
예제 #8
0
        X_train,
        mode='classification',
        training_labels=y_train,
        feature_names=X_data.columns.values,
        random_state=369)

    # idx = 12 #9
    # exp = explainer.explain_instance(X_test[idx], model.predict_proba, num_features=5)
    # print('id: %d' % idx)
    # # xgboost(like sklearn) expects X as 2D data(n_samples, n_features).If you want to predict only one sample
    # # you can reshape your feature vector to a 2D array
    # print('Probability (right case) =', model.predict_proba(np.array(X_test[idx]).reshape((1, -1)))[0, 1])
    # print('True class: %s' % y_test.values[idx])
    # print('Explanation for class right')
    # print('\n'.join(map(str, exp.as_list())))
    # fig = exp.as_pyplot_figure()
    # plt.show()

    # Submodular Pick
    sp_obj = submodular_pick.SubmodularPick(explainer,
                                            X_train,
                                            model.predict_proba,
                                            sample_size=50,
                                            num_features=5,
                                            num_exps_desired=1)
    for exp in sp_obj.sp_explanations:
        # https://stackoverflow.com/questions/60914598/keyerror-1-in-using-sp-lime-with-lightgbm
        exp.as_pyplot_figure(label=exp.available_labels()[0])
        plt.show()
        print(exp.as_list(label=exp.available_labels()[0]))
    print('done')
예제 #9
0
    class_names=iris.target_names,
    discretize_continuous=True)

exp = explainer.explain_instance(test[0],
                                 rf.predict_proba,
                                 num_features=4,
                                 top_labels=3)

figure = exp.as_html()
fig = exp.as_pyplot_figure()
fig.tight_layout()
fig.savefig('export/single.pdf', format='pdf')

f = open('export/single.html', 'w')
f.write(figure)
f.close()

sp = submodular_pick.SubmodularPick(explainer,
                                    train,
                                    rf.predict_proba,
                                    sample_size=20,
                                    num_exps_desired=3)

i = 0
for exp in sp.sp_explanations:
    print('instance: ', exp.predict_proba)
    figure = exp.as_pyplot_figure()
    figure.tight_layout()
    figure.savefig('export/multi{}.pdf'.format(i), format='pdf')
    i += 1
# Prediction of my model 
print('Predicted proba that customer will churn, with true model', y_proba2[i,1])
# The prediction made by LIME
print('Predicted proba that customer will churn, with LIME', exp.local_pred[0])  
exp.intercept   # bias term for the local explanation



### SP_LIME - Submodular pick.

# Import
import warnings
from lime import submodular_pick

# SP-LIME returns explanations (on a sample set) to provide a non redundant global decision boundary of original model
sp_obj = submodular_pick.SubmodularPick(explainer, X_train, classifier.predict_proba, num_features=10, num_exps_desired=5)  # Not used sample_size= ? param. 
[exp.show_in_notebook(show_table=True, show_all=False) for exp in sp_obj.sp_explanations]; # visualise the 5 explanations selected by SP-LIME to provide a global understanding od the model. 
# [exp.as_pyplot_figure() for exp in sp_obj.sp_explanations];  # Other visualisation possibility 



# I can explain the model predictions for the test set in a similar way. 
# When applied on training set, we focus on the inner workings of the model to gain an understanding of it or to improve it. 
# When applied on test set, model is already understood and diffused. It could be used in a multitude of different ways by the company to reduce customer churn. 



### TEST SET 


# If we want to be more concise/selective in our explanations. 
예제 #11
0
    def explain(self, **kwargs):
        """
        Explains a given model, optionally performing SubmodularPick .
        """

        index = kwargs.get("index") or None
        # sample size is comprised of 25% of total dataset by default
        sample_size = kwargs.get("sample_size") or round(
            self.data.shape[0] * 0.25)
        num_features = kwargs.get("num_features") or 10
        num_exps_desired = kwargs.get("num_exps_desired") or 10
        print_exps = kwargs.get("print_exps") or False

        features = self.data.drop([self.target], axis=1)

        if index is None:
            if self.verbose:
                print("*** Generating explanations using Submodular Pick...")
                print("Sample size chosen: {}".format(round(sample_size)))

            sp_obj = submodular_pick.SubmodularPick(
                self.explainer,
                np.asarray(features),
                self.model.predict_proba,
                sample_size=sample_size,
                num_features=num_features,
                num_exps_desired=num_exps_desired,
            )

            for sp_exp in sp_obj.sp_explanations:
                exp = sp_exp.as_list(label=sp_exp.available_labels()[0])
                self._append_explanation(exp)

            # create averaged contribution for all features across all explanations
            # obtain feature importance in terms of magnitude, discarding signal
            # remove features that appear less than 10% of the time
            explanation = defaultdict(lambda: [])
            for exp in self.explanations:
                for feature in exp:
                    explanation[feature].append(exp[feature])
            avg_explanation = {
                feature: np.mean(np.abs(np.array(explanation[feature])))
                for feature in explanation
                if len(explanation[feature]) > num_exps_desired * 0.25
            }
            avg_explanation = OrderedDict(
                sorted(avg_explanation.items(),
                       key=lambda x: x[1],
                       reverse=True))

            for column, value in avg_explanation.items():
                if print_exps:
                    print("{0} = {1}".format(column, value))
            return avg_explanation

        else:
            try:
                if self.verbose:
                    print(f"Explaining prediction for case #{index}")

                row_feat = np.asarray(features)[int(index), :].reshape(1, -1)
                y_true, y_pred, y_prob = (
                    self.data[self.target][int(index)],
                    self.model.predict(row_feat),
                    self.model.predict_proba(row_feat),
                )

                if self.verbose:
                    print("Model predicted class {0} with class score {1:.3f}".
                          format(y_pred, y_prob[0, int(y_pred)]))
                    print("Actual class is {0}".format(y_true))

                exp = self.explainer.explain_instance(row_feat[0],
                                                      self.model.predict_proba)
                self._append_explanation(
                    exp.as_list(label=exp.available_labels()[0]))
            except Exception as e:
                print("Error occurred: {}".format(str(e)))
                print(traceback.format_exc())

        return self.explanation
예제 #12
0
    #subtask 2
    shap.initjs()
    explainer2 = shap.TreeExplainer(lgbm)
    shap_values = explainer2.shap_values(x2_array)
    shap.force_plot(explainer2.expected_value[0],
                    numpy.array(shap_values)[0][0, :], (x2_array)[0, :],
                    feature_names[0:11])  #
    shap.force_plot(explainer2.expected_value[0],
                    numpy.array(shap_values)[1][0, :], (x2_array)[1, :],
                    feature_names[0:11])  #

    #subtask 3
    from lime import submodular_pick
    sp_obj = submodular_pick.SubmodularPick(explainer1,
                                            x_train_array,
                                            lgbm.predict_proba,
                                            sample_size=20,
                                            num_features=11,
                                            num_exps_desired=10)
    [
        exp.show_in_notebook(show_table=True, show_all=False)
        for exp in sp_obj.sp_explanations
    ]

    #subtask 4
    shap_values = explainer2.shap_values(x_train_array, y_train_array)
    shap.summary_plot(numpy.array(shap_values)[0],
                      x_train_array,
                      feature_names=feature_names)
    shap_values = explainer2.shap_values(x_test_array, y_test_array)
    shap.summary_plot(numpy.array(shap_values)[0],
                      x_test_array,