Example #1
0
    def fit(
        self, model: RegressorMixin, X: np.ndarray, feature_names: List[str] = None
    ):
        """ Fit function. The initialization of `LimeTabular` is made here.
        This choice has been made, since it needs a fitted scikit-learn model as input.

        Parameters
        ----------
        model: RegressorMixin, required
            scikit-learn model given as input
        X: np.ndarray, required
            train matrix
        feature_names: List[str], optional, (default=``None``)
            the name of the feature column of X

        Returns
        -------
        Fitted _LimeExplainer
        """
        check_is_fitted(model)
        if feature_names is None:
            feature_names = self._define_feature_names(X)
        else:
            feature_names = list(feature_names)

        self.model_ = model
        self.explainer_ = lime_tabular.LimeTabularExplainer(
            X, feature_names=feature_names, mode="regression"
        )
        self.feature_names_ = feature_names
        return self
Example #2
0
    def __init__(self, *argv, **kwargs):
        """
        Initialize lime Tabular Explainer object
        """
        super(LimeTabularExplainer, self).__init__(*argv, **kwargs)

        self.explainer = lime_tabular.LimeTabularExplainer(*argv, **kwargs)
Example #3
0
 def lime_explainer(self):
     self.explainer = lt.LimeTabularExplainer(
         np.array(self.data["train_df"]),
         feature_names=self.feature_names,
         verbose=False,
         mode="regression",
     )
Example #4
0
    def __init__(self, dataset, verbose=True):

        train_dataset, training_labels = dataset.make_numpy_array(
            dataset.get_train_file())

        mode = dataset.get_mode()
        (
            categorical_features,
            categorical_index,
            categorical_names,
        ) = dataset.get_categorical_features()
        unique = dataset.get_target_labels()

        self._mode = mode
        self.dataset = dataset

        self._explainer = lime_tabular.LimeTabularExplainer(
            train_dataset,
            feature_names=dataset.get_feature_names(),
            class_names=unique,
            categorical_features=categorical_index,
            categorical_names=categorical_names,
            training_labels=training_labels,
            verbose=verbose,
            mode=self._mode,
        )
Example #5
0
 def fit(self, model: sklearn.base.BaseEstimator, x_train: Union[pd.Series, pd.DataFrame, np.ndarray],
         y_train: pd.DataFrame, ):
     x_train = self._get_dataframe_from_mixed_input(x_train)
     super().fit(model, x_train, y_train)
     self._explainer = lime_tabular.LimeTabularExplainer(x_train.values, feature_names=x_train.columns,
                                                         class_names=self.class_names,
                                                         categorical_features=self.categorical_features,
                                                         discretize_continuous=True)
     return self
Example #6
0
    def generate_global_lime_explanations(self):
        explainer = lime_tabular.LimeTabularExplainer(
            training_data=np.array(self.X_train),
            class_names=['unstable', 'stable'],
            mode="classification",
            feature_names=self.feature_names)
        model_name = type(self.model).__name__

        # LIME Global Explainer with Submodule Pick
        if model_name == 'Sequential':
            if self.model.name == 'DNN':
                predict_fn = self.dnn_model_predict
                model_name = 'DNN'
            elif self.model.name == 'RNN':
                predict_fn = self.rnn_model_predict
                model_name = 'RNN'
        else:
            predict_fn = self.model.predict_proba

        root = Path(".")
        my_file = Path(root / "explainer_outputs" / "LIME_pickles" /
                       (model_name + '_LIME_SP_' + self.grid))

        if my_file.is_file():
            print("EXISTS!!!!!!!!!!!!!!")
            pickle_in = open(my_file, "rb")
            sp_obj = pickle.load(pickle_in)
            print("LOADED!!!!!!!!!!!!!!")
        else:
            print("DOESNT EXIST. CREATING NEW")
            start = timer()
            sp_obj = submodular_pick.SubmodularPick(
                explainer,
                np.array(self.X_train),
                predict_fn,
                num_features=self.X_test.shape[1],
                num_exps_desired=5)
            end = timer()
            print('Global LIME Explanations: ', end - start)

            # Store in Pickle
            pickle_out = open(my_file, "wb")
            pickle.dump(sp_obj, pickle_out)
            pickle_out.close()
            sp_explanation_time = end - start
            print('LIME Global Explanation time: ', sp_explanation_time)

        dir_name = os.path.join('explainer_outputs', 'LIME', 'Global',
                                self.grid)
        path_global = os.path.join(dir_name, model_name + 'LIME_SP.pdf')
        with PdfPages(path_global) as pdf:
            for exp in sp_obj.sp_explanations:
                fig = exp.as_pyplot_figure(label=exp.available_labels()[0])
                pdf.savefig(fig, bbox_inches='tight')
                plt.close()
        return sp_obj
Example #7
0
def scoreComment():
    # text of comment
    comment = request.form.get("comment")
    reddit_url = request.form.get('reddit_link')
    cleaned_article_text = request.form.get('cleaned_article_text')
    no_url_article_text = request.form.get('no_url_article_text')
    no_stop_article_text = request.form.get('no_stop_article_text')
    no_stop_or_url_article_text = request.form.get(
        'no_stop_or_url_article_text')

    swearwords_df = pd.read_csv('files/edited-swear-words.csv')
    swearwords = swearwords_df.swear.tolist()
    features = [
        'profanity', 'length', 'adjWordScore', 'NER_count', 'NER_match',
        'WordScore', 'WholeScore', 'contains_url', 'no_url_WordScore',
        'no_url_WholeScore', 'WordScoreNoStop', 'WholeScoreNoStop',
        'no_url_or_stops_WholeScore', 'no_url_or_stops_WordScore'
    ]
    our_model = load("updated_model.pkl",
                     compression="lzma",
                     set_default_extension=False)
    punctuation_lst = [
        ',', '.', '!', '?', '<', '>', '/', ':', ';', '\'', '\"', '[', '{', ']',
        '}', '|', '\\', '`', '~', '!', '@', '#', '$', '%', '^', '&', '*', '(',
        ')', '-', '_', '=', '+'
    ]

    #Lime stuff to add
    new_X_train = np.loadtxt('files/X_train.csv', delimiter=',')

    full_score = ab.judgeComment(comment, reddit_url, swearwords, features,
                                 our_model, cleaned_article_text,
                                 no_url_article_text, no_stop_article_text,
                                 no_stop_or_url_article_text, punctuation_lst)
    print('This is full_score[3][0]: \n')
    print(full_score[3][0])

    explainer = lime_tabular.LimeTabularExplainer(
        training_data=np.array(new_X_train),
        feature_names=features,
        class_names=[False, True],
        mode='classification')

    exp = explainer.explain_instance(data_row=full_score[3][0],
                                     predict_fn=our_model.predict_proba)

    score = str(full_score[1]) + str(full_score[2])

    img = exp.as_pyplot_figure()
    img.savefig('files/visual.pdf', bbox_inches='tight')
    webbrowser.open('files/visual.pdf')

    print("made it")
    return jsonify(score=score)
def lime_explanation(model, X, ys=None, num_samples=1000, multiprocessing=True):
    from lime import lime_tabular
    import warnings
    warnings.filterwarnings("ignore", message="Singular matrix")
    warnings.filterwarnings("ignore", message="Ill-conditioned")

    # identify categorical data
    # https://stackoverflow.com/questions/47094676/how-to-identify-the-categorical-variables-in-the-200-numerical-variables?noredirect=1&lq=1
    X = X.numpy()
    ys = ys.numpy()
    df = pd.DataFrame(X)
    categorical_features = detect_categorical_top_k(df)
    # print(categorical_features)
    lime_expl = lime_tabular.LimeTabularExplainer(training_data=X,
                                                  training_labels=ys,
                                                  categorical_features=categorical_features,
                                                  class_names=np.unique(ys),
                                                  discretizer="decile"
                                                  )

    num_features = X.shape[0]
    predict_fn = lambda x: model.predict_proba(x)

    def lime_explanation_row(data_row, predict_fn, data_labels, num_features, num_samples):
        # print(data_row.shape)
        # experiment with lavels:
        # either labels=[data_label] or None
        explanation = lime_expl.explain_instance(data_row, predict_fn, labels=data_labels, top_labels=1,
                                                 num_features=num_features,
                                                 num_samples=num_samples, distance_metric='euclidean',
                                                 model_regressor=None)

        data_label = list(explanation.local_exp.keys())[0]
        # print(explanation.local_exp[data_label])

        # sort and return importance
        feature_importance = list(zip(*sorted(explanation.local_exp[data_label])))[1]
        return feature_importance

    from functools import partial

    lime_partial = partial(lime_explanation_row, predict_fn=predict_fn, data_labels=[], num_features=num_features,
                           num_samples=num_samples)

    if multiprocessing:
        print("*" * 10)
        print("NOT IMPLEMENTED: reverting to SLOW compute")
        print("*" * 10)
        result = np.apply_along_axis(lime_partial, 1, X)
        # result = parallel_apply_along_axis(lime_explanation_row, axis=1, arr=X,)
    else:
        result = np.apply_along_axis(lime_partial, 1, X)
    return result
    def lime_analysis(self, cat_features=None):

        self.split_predictions()
        self.limeObj = lime_tabular.LimeTabularExplainer(
            training_data=self.x_train,
            feature_names=self.featureNames,
            categorical_features=cat_features)
        print("Explaining... this might take some time")
        self.lime_expl(self.limeObj, self.model, self.false_pos,
                       "False Positive")
        #self.lime_expl(self.limeObj, self.model, self.false_neg, "False Negative")
        print("Done! Required graphs are in corresponding folder")
def explain_tree(data, period, ratings, model, train_set, sov_lab_encoder, le,
                 feat_key, print_exp):

    import numpy as np
    from lime import lime_tabular
    #    import webbrowser

    X_new = np.array(data.loc[feat_key.index].T)
    if sov_lab_encoder is not None:
        pos_sr = feat_key.index.get_loc(
            feat_key[feat_key["Key"] == 'SovereignRating'].index[0])
        sob_rating = X_new[:, pos_sr].copy()
        X_new[:, pos_sr] = sov_lab_encoder.transform(X_new[:, pos_sr])

    # Predicting to check actual prediction
#    pred_calif = np.array([le.iloc[x == list(le.iloc[:,0]),0].index[0] for x in model.predict(X_new)])

    X_new = X_new.astype('float')

    # features_names = sum([feature_names_key], [])
    # print(features_names)
    class_names = list(le.index)[0:-1]
    class_names.reverse()
    feature_names = list(
        feat_key.index
    )  # Usar .index (nombres muy largos) o usar .Key (Ratio y #)
    # Create the the Lime explainer and the lambda function
    explainer = lime_tabular.LimeTabularExplainer(train_set,
                                                  mode='classification',
                                                  feature_names=feature_names,
                                                  class_names=class_names,
                                                  discretize_continuous=True)

    predict_fn_rf = lambda x: model.predict_proba(x).astype(float)

    # explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=feature_names,
    #                                                   class_names=class_names, categorical_features=columns,
    #                                                   categorical_names=feature_names_cat, kernel_width=3)
    # Explaining prediction with Lime
    exp = explainer.explain_instance(X_new[period],
                                     model.predict_proba,
                                     num_features=5,
                                     top_labels=ratings)
    exp.show_in_notebook(show_table=True, show_all=False)
    #print(exp.available_labels())
    exp.save_to_file('explainer/lime_output.html')

    if print_exp:
        av_lab = exp.available_labels()
        for lab in av_lab:
            print('Explanation for class %s' % class_names[lab])
            print('\n'.join(map(str, exp.as_list(label=lab))))
            print()
Example #11
0
 def __init__(self, preprocessor, model, X, y, feature_names):
     self.preprocessor = preprocessor
     self.model = model
     self.X = X
     self.y = y
     self.feature_names = feature_names
     self.shap_explainer = shap.TreeExplainer(self.model)
     self.lime_explainer = lime_tabular.LimeTabularExplainer(
         X,
         training_labels=self.y,
         feature_names=feature_names,
         class_names=[False, True])
     self.shap_values = self.shap_explainer.shap_values(X)
Example #12
0
    def explainPredictions(self):
        '''Use LIME (https://github.com/marcotcr/lime) to give local explanations for the predictions of certain points'''

        explainer = lime_tabular.LimeTabularExplainer(
            training_data=self.data.X_train.values,  # training data
            mode='classification',
            feature_names=list(self.data.X_train
                               ),  # names of all features (regardless of type)
            class_names=['background', 'signal'],  # class names
            #class_names=[0,1],            # class names
            discretize_continuous=True,
            categorical_features=None,
            categorical_names=None,
        )

        def predict_fn_keras(x):
            if x.ndim >= 2:
                pred = self.model.predict(x, batch_size=1)
            else:
                pred = self.model.predict(x.reshape(1, x.shape[-1]),
                                          batch_size=1)
            return np.concatenate((1. - pred, pred), axis=1)

        for i in range(0, 10):
            #len(self.data.X_test)
            exp = explainer.explain_instance(
                data_row=self.data.X_test.values[random.randint(
                    0,
                    len(self.data.X_test) -
                    1)],  # 2d numpy array, corresponding to a row
                predict_fn=
                predict_fn_keras,  # classifier prediction probability function, 
                labels=[
                    1,
                ],  # iterable with labels to be explained.
                num_features=self.data.X_train.
                shape[1],  # maximum number of features present in explanation
                #top_labels=0,                # explanations for the K labels with highest prediction probabilities,
                #num_samples=2000,            # size of the neighborhood to learn the linear model
                #distance_metric='euclidean'  # the distance metric to use for weights.
            )

            out = os.path.join(self.output, 'explanations')
            if not os.path.exists(out): os.makedirs(out)
            exp.save_to_file(
                os.path.join(out, 'explanation' + str(i) + '.html'))
            # print exp.as_pyplot_figure()
            exp.as_pyplot_figure().savefig(
                os.path.join(out, 'explanation' + str(i) + '.png'))
        pass
Example #13
0
def run_lime_sklearn(classifier):
    explainer = lime_tabular.LimeTabularExplainer(
        training_data=np.array(X_train),
        feature_names=X_train.columns,
        class_names=['Fail', 'Pass'],
        mode='classification')

    exp = explainer.explain_instance(data_row=X_test.iloc[1],
                                     predict_fn=classifier.predict_proba)

    plt.close()
    exp.as_pyplot_figure()
    plt.tight_layout()
    plt.show()
Example #14
0
    def _initialize(self, **kwargs):
        if self.verbose:
            print("Setting up LIME explainer")

        features = self.data.drop([self.target], axis=1)
        self.explainer = lime_tabular.LimeTabularExplainer(
            features,
            feature_names=kwargs.get("feature_names")
            or list(features.columns),
            class_names=kwargs.get("feature_names")
            or ["Outcome (no)", "Outcome (yes)"],
            mode=kwargs.get("mode") or "classification",
            discretize_continuous=kwargs.get("discretize_continuous") or False,
        )
Example #15
0
def calculate_values(number_of_rows=200,
                     number_of_exaplanations=11,
                     which_explainer='random',
                     nsampleslist=[100],
                     feature_rankings=['first', 'middle', 'last'],
                     strategies=["mean", "distribution"],
                     verbose=0):
    explainer = None
    if which_explainer == 'lime':
        explainer = lime_tabular.LimeTabularExplainer(
            X_train, training_labels=['paid', 'unpaid'])
    elif which_explainer == 'shap':
        explainer = shap.KernelExplainer(predict_fn, X_train[0:1000])

    neutral_points = ((df[df['loan_repaid'] != 0].mean() +
                       df[df['loan_repaid'] != 1].mean()) /
                      2).drop('loan_repaid')
    results = []

    predicted_classes = model.predict_classes(X_test)

    correctly_predicted_indices = get_correctly_predicted_indices(
        number_of_rows)

    counter = 0

    for nsamples in nsampleslist:
        # for row_number in range(number_of_rows):
        for row_number in correctly_predicted_indices:
            print("Row Number {} counter: {}".format(row_number, counter))
            counter = counter + 1
            for no_exp in range(number_of_exaplanations):
                if (predicted_classes[row_number][0] != y_test[row_number]):
                    print("Predicted and actual classes are different, skip")
                    continue

                for feature_ranking in feature_rankings:
                    for strategy in strategies:
                        # print("Explanation number:{}---Exaplainer:{}----NSamples:{}---no_exp:{} --feature_ranking:{}---strategy:{}".format(no_exp, which_explainer,
                        #                                                                     nsamples, no_exp, feature_ranking, strategy))
                        calculate_values_datapoint(explainer, neutral_points,
                                                   no_exp, nsamples, results,
                                                   row_number, verbose,
                                                   which_explainer,
                                                   feature_ranking, strategy)

    print(results)
    return results
Example #16
0
    def explain_instance_tabular_data(instance):
        newshape = numpy.prod(instance.shape)

        if notebook['model_type'] == "NEURAL NETWORK":
            model = keras.models.load_model(
                "NOTEBOOK_" + notebook_name_dict['notebook_name'] +
                "_neural_network_model.hdf5")
            target = list(
                map(
                    numpy.argmax,
                    model.predict(
                        numpy.reshape(instance,
                                      newshape=(1, *instance.shape)))[0]))[0]
        else:
            target = notebook['model'].predict([instance])[0]

        explainer = lt.LimeTabularExplainer(
            training_data=notebook['x_train'],
            feature_names=[str(i) for i in range(len(instance))])
        exp = explainer.explain_instance(instance,
                                         predict_fn,
                                         num_features=len(instance),
                                         num_samples=min(
                                             len(notebook['x_train']), 100),
                                         labels=(target, ))
        exp.as_pyplot_figure(label=target).savefig(
            "../UI/src/assets/" + "NOTEBOOK_" + notebook['notebook_name'] +
            "_investigate_model_instance1.jpg",
            figsize=(50, 50))
        exp.save_to_file(file_path="../UI/src/assets/" + "NOTEBOOK_" +
                         notebook['notebook_name'] +
                         "_investigate_model_instance.html")
        notebook['explanation'] = "NOTEBOOK_" + notebook[
            'notebook_name'] + "_investigate_model_instance.html"

        set_notebook_data(notebook_name_dict['notebook_name'])

        try:
            keras.backend.clear_session()
        except:
            pass

        return json_encoder.encode({
            'explanation':
            "NOTEBOOK_" + notebook['notebook_name'] +
            "_investigate_model_instance.html"
        })
Example #17
0
 def lime():
     df = load_data()
     X = df.drop(columns=['target'])
     y = df.target
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
     clf = pickle.load(open('Model/model.pkl', 'rb'))
     explainer = lime_tabular.LimeTabularExplainer(
     training_data=np.array(client),
     feature_names= client.columns,
     class_names=['Non Solvable', 'Solvable'],
     mode='classification'
     )
     exp = explainer.explain_instance(
     data_row= client.iloc[0], 
     predict_fn=clf.predict_proba, num_features= 7 
     )
     # Display explainer HTML object
     components.html(exp.as_html(), height=400 )
     return
Example #18
0
    def __init__(self,
                 train,
                 training_labels,
                 feature_names,
                 class_names,
                 categorical_features,
                 categorical_names,
                 mode,
                 verbose=True):

        self._mode = mode

        self._explainer = lime_tabular.LimeTabularExplainer(
            train,
            feature_names=feature_names,
            class_names=class_names,
            categorical_features=categorical_features,
            categorical_names=categorical_names,
            training_labels=training_labels,
            verbose=verbose,
            mode=self._mode)
Example #19
0
    def generate_local_lime_explanation(self, i, path_localocal=None):
        explainer = lime_tabular.LimeTabularExplainer(
            training_data=np.array(self.X_train),
            class_names=['unstable', 'stable'],
            mode="classification",
            feature_names=self.feature_names)
        model_name = type(self.model).__name__

        if model_name == 'Sequential':
            if self.model.name == 'DNN':
                predict_fn = self.dnn_model_predict
                model_name = 'DNN'
            elif self.model.name == 'RNN':
                predict_fn = self.rnn_model_predict
                model_name = 'RNN'
            start = timer()
            exp = explainer.explain_instance(data_row=np.squeeze(
                self.X_test[i]),
                                             predict_fn=predict_fn,
                                             num_features=17)
            end = timer()
        else:
            start = timer()
            exp = explainer.explain_instance(
                data_row=np.array(self.X_test.iloc[i]),
                predict_fn=self.model.predict_proba,
                num_features=17)
            end = timer()

        # Path to store Local Explanation Outputs
        dir_name = os.path.join('explainer_outputs', 'LIME', 'Local',
                                self.grid)
        base_filename = model_name + str(i)

        suffix = '.html'

        path_local = os.path.join(dir_name, base_filename + suffix)
        single_explanation_time = end - start
        exp.save_to_file(path_localocal)
        print('Single Lime Explanation Time:', single_explanation_time)
Example #20
0
def prepare_lime(training_path, class_names, discretize_continuous=True,
                 sel_feat_file=None):
    """
    Prepared LIME.

    Parameters
    ----------
    training_path: string
        Path to the hdf5 file of the training data.
    class_names: list
        List containing the name of the classes.
    discretize_continuous: boolean
        To discretize continuous data using LIME.
    sel_feat_file: string
        Path to a file with a list of the indexes of the selected features.

    Returns
    -------
    Lime explainer
    """
    train_data = h5.File(training_path, 'r')
    X_train = \
        np.array(train_data['features'][
                 :, 0:train_data['features'].shape[1] - 1])
    train_data.close()

    if sel_feat_file is not None:
        selected_feat = read_data(sel_feat_file)
        X_train = X_train[:, selected_feat]

    explainer = \
        lime_tabular.LimeTabularExplainer(
            X_train,
            feature_names=list(
                np.arange(0, X_train.shape[1]).astype(str)),
            class_names=class_names,
            discretize_continuous=discretize_continuous)

    return explainer
Example #21
0
def lime_run(rf, X_train, y_train, X_test, y_test, i, feature_names):

    lm = []

    explainer = lime_tabular.LimeTabularExplainer(X_train,
                                                  feature_names=range(
                                                      (X_train.shape[1])),
                                                  class_names=[0, 1],
                                                  discretize_continuous=False,
                                                  verbose=True,
                                                  sample_around_instance=True)
    n_ft = len(feature_names)

    #generate a random point and test using UI

    values = st.slider('Select number of sampling points', 200, 2000, 500)
    exp = explainer.explain_instance(X_test[i],
                                     rf.predict_proba,
                                     num_features=n_ft,
                                     num_samples=values)
    coef = [0] * n_ft
    for i in exp.as_list():
        coef[i[0]] = i[1]
    X_lime_scaled = exp.scaled_data
    X_lime = exp.scaled_data * explainer.scaler.scale_ + explainer.scaler.mean_
    y_lime = rf.predict(X_lime)
    plt.bar(feature_names, coef)
    plt.xticks(rotation=45)
    fig_size = plt.gcf().get_size_inches()  #Get current size
    sizefactor = 2  #Set a zoom factor
    # Modify the current size by the factor
    plt.gcf().set_size_inches(sizefactor * fig_size)
    st.write(plt.gcf())
    lime_pred = lime_perturbed_pred(coef, X_lime_scaled, exp)
    sn = fidelity_lime(lime_pred, y_lime)
    lm.append(sn)
    st.write(sn)
    plt.close()
Example #22
0
def explain_tabular():

    data = {"success": "failed"}

    #TODO send sample to be explained

    if flask.request.method == "POST":

        if flask.request.form:

            #data_dict = ast.literal_eval(json.loads(flask.request.data))

            print("try open model")
            with open(flask.request.form.get("model_path"), 'rb') as f:
                model = pickle.load(f)

            train_data = json.loads(flask.request.form.get("data"))
            dim = json.loads(flask.request.form.get("dim"))
            train_data = np.asarray(train_data)
            train_data = train_data.reshape(((int)(train_data.size/dim), dim))
            sample = json.loads(flask.request.form.get("sample"))
            
            num_features = int(request.args.get("numfeatures"))

            explainer = lime_tabular.LimeTabularExplainer(train_data, mode="classification", discretize_continuous=True)
            exp = explainer.explain_instance(np.asarray(sample), model.predict_proba, num_features=num_features, top_labels=1)

            explanation_dictionary = {}

            for entry in exp.as_list():
                explanation_dictionary.update({entry[0]: entry[1]})

            data["explanation"] = explanation_dictionary
            data["success"] = "success"

    return flask.Response(json.dumps(data), mimetype="text/plain")
    def __init__ (self, training_data, training_targets, feature_names, class_names):
        """
        Parameters
        ----------
            training_data: numpy array
                The data that the machine learning have been trained on
            training_targets: numpy array
                The data that the machine learning have been trained on
            feature_names: list
                The names of the features
            class_names: list
                The names of the classes
        """
        self.training_data = training_data
        self.training_targets = training_targets
        self.training_summary = shap.kmeans(training_data, 10)

        self.feature_names = feature_names
        self.number_of_features = len(feature_names)
        self.class_names = class_names
        
        self.explainer = lt.LimeTabularExplainer(training_data=self.training_data,
            feature_names=self.feature_names, class_names=self.class_names,
            discretize_continuous=True)

# ## Explanations

# In[55]:


from lime import lime_tabular


# In[116]:


explainer = lime_tabular.LimeTabularExplainer(features_train_df, 
                                              feature_names=features_train_df.columns.tolist(),
                                              class_names=['notengaged', 'engaged'],
                                              discretize_continuous=False,
                                              verbose=True)


# ### Explore some random points

# In[128]:


i = 44
exp = explainer.explain_instance(features_train_df.iloc[i], best_model.predict_proba)


# In[193]:
Example #25
0
def metrics_lime(model, X_train, X_test, stddev = 0.1):

    # Get the model predictions on the test data
    test_pred = model.predict(X_test)

    # Get the necessary sizes
    n_test = X_test.shape[0]
    d_in = X_test.shape[1]
    d_out = test_pred.shape[1]

    # Configure LIME
    exp = lime_tabular.LimeTabularExplainer(X_train, discretize_continuous = False, mode = "regression")

    def unpack_coefs(explainer, x, predict_fn, num_features, x_train, num_samples = 1000):
        d = x_train.shape[1]
        coefs = np.zeros((d))

        u = np.mean(x_train, axis = 0)
        sd = np.sqrt(np.var(x_train, axis = 0))

        exp = explainer.explain_instance(x, predict_fn, num_features = num_features, num_samples = num_samples)

        coef_pairs = exp.local_exp[1]
        for pair in coef_pairs:
            coefs[pair[0]] = pair[1]

        coefs = coefs / sd

        intercept = exp.intercept[1] - np.sum(coefs * u)

        return np.insert(coefs, 0, intercept)

    # Compute the standard, causal, and stability metrics
    standard_metric = np.zeros(d_out)
    causal_metric = np.zeros(d_out)
    stability_metric = np.zeros(d_out)
    for i in range(d_out):
        model.set_index(i)

        for j in range(n_test):
            x = X_test[j, :]

            # Get LIME's Explanation
            coefs = unpack_coefs(exp, x, model.predict_index, d_in, X_train)

            # Standard Metric
            standard_metric[i] += (np.dot(np.insert(x, 0, 1), coefs) - test_pred[j,i])**2

            for k in range(num_perturbations):
                x_pert = generate_neighbor(x, stddev = stddev)

                # Causal Metric
                model_pred = model.predict_index(x_pert.reshape(1, d_in))
                lime_pred = np.dot(np.insert(x_pert, 0, 1), coefs)
                causal_metric[i] += (lime_pred - model_pred)**2

                # Stability Metric
                coefs_pert = unpack_coefs(exp, x, model.predict_index, d_in, X_train)
                stability_metric[i] += np.sum((coefs_pert - coefs)**2)

    standard_metric /= n_test
    causal_metric /= num_perturbations * n_test
    stability_metric /= num_perturbations * n_test

    return standard_metric, causal_metric, stability_metric
Example #26
0
def run(args):
    # Hyperparamaters
    num_perturbations = 5
    
    # Fixes an issue where threads of inherit the same rng state
    scipy.random.seed()
    
    # Arguments
    dataset = args[1]
    trial = args[0]
    
    # Outpt
    out = {}
    file = open("Trials/" + dataset + "_" + str(trial) + ".json", "w")

    # Load data
    X_train, y_train, X_valid, y_valid, X_test, y_test, train_mean, train_stddev = load_normalize_data("../Datasets/" + dataset + ".csv")
    n = X_test.shape[0]
    d = X_test.shape[1]
    
    scales = [0.1, 0.25]
    scales_len = len(scales)
        
    # Fit model
    model = fit_svr(X_train, y_train, X_test, y_test)
    out["model_rmse"] = np.sqrt(np.mean((y_test - model.predict(X_test))**2))
        
    # Fit LIME and MAPLE explainers to the model
    exp_lime = lime_tabular.LimeTabularExplainer(X_train, discretize_continuous=False, mode="regression")
    exp_maple = MAPLE(X_train, model.predict(X_train), X_valid, model.predict(X_valid))
        
    # Evaluate model faithfullness on the test set
    lime_rmse = np.zeros((scales_len))
    maple_rmse = np.zeros((scales_len))
    
    for i in range(n):
        x = X_test[i, :]
        
        coefs_lime = unpack_coefs(exp_lime, x, model.predict, d, X_train) #Allow full number of features
    
        e_maple = exp_maple.explain(x)
        coefs_maple = e_maple["coefs"]
        
        for j in range(num_perturbations):
            
            noise = np.random.normal(loc = 0.0, scale = 1.0, size = d)
            
            for k in range(scales_len):
                scale = scales[k]
            
                x_pert = x + scale * noise
            
                model_pred = model.predict(x_pert.reshape(1,-1))
                lime_pred = np.dot(np.insert(x_pert, 0, 1), coefs_lime)
                maple_pred = np.dot(np.insert(x_pert, 0, 1), coefs_maple)
            
                lime_rmse[k] += (lime_pred - model_pred)**2
                maple_rmse[k] += (maple_pred - model_pred)**2

    lime_rmse /= n * num_perturbations
    maple_rmse /= n * num_perturbations

    lime_rmse = np.sqrt(lime_rmse)
    maple_rmse = np.sqrt(maple_rmse)

    out["lime_rmse_0.1"] = lime_rmse[0]
    out["maple_rmse_0.1"] = maple_rmse[0]
    out["lime_rmse_0.25"] = lime_rmse[1]
    out["maple_rmse_0.25"] = maple_rmse[1]

    json.dump(out, file)
    file.close()
Example #27
0
def explain_tree(data, periods, model, train_set, sov_lab_encoder, le, feat_key):
    
    import pandas as pd
    import numpy as np
    from lime import lime_tabular
    from ipywidgets import widgets, interactive
    from IPython.display import display, clear_output

    def f(Variable):
        return feat_key[feat_key['Key']==Variable].index[0]
    
    def on_button_clicked(b):
        with output:
            clear_output()
            print(w.result)
    
    ratios = ['Ratio' + str(i+1) for i in range(0,26)]
    ratios.append('SovereignRating')    
    w = interactive(f, Variable=ratios)
        
    button = widgets.Button(description="Obtener nombre")
    output = widgets.Output()
    
    display(w)        
    display(button, output)
    button.on_click(on_button_clicked)
    
    X_new = np.array(data.loc[feat_key.index].T)
    if sov_lab_encoder is not None:
        pos_sr = feat_key.index.get_loc(feat_key[feat_key["Key"] == 'SovereignRating'].index[0])
        sob_rating = X_new[:, pos_sr].copy()
        X_new[:, pos_sr] = sov_lab_encoder.transform(X_new[:, pos_sr])
    
    # Predicting to check actual prediction
#    pred_calif = np.array([le.iloc[x == list(le.iloc[:,0]),0].index[0] for x in model.predict(X_new)])
    
    X_new = X_new.astype('float')
    
    # features_names = sum([feature_names_key], [])
    # print(features_names)
    class_names = list(le.index)[0:-2]
    class_names.reverse()
    feature_names = list(feat_key.Key) # Usar .index (nombres muy largos) o usar .Key (Ratio y #)
    # Create the the Lime explainer and the lambda function
    categorical_names = {}
    categorical_names[26] = sov_lab_encoder.classes_
    
    explainer = lime_tabular.LimeTabularExplainer(train_set, mode='classification',
                                                  feature_names=feature_names,
                                                  class_names=class_names,
                                                  categorical_features=[26],
                                                  categorical_names=categorical_names,
                                                  discretize_continuous=True)
    
    predict_fn_rf = lambda x: model.predict_proba(x).astype(float)
    
    # explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=feature_names,
    #                                                   class_names=class_names, categorical_features=columns,
    #                                                   categorical_names=feature_names_cat, kernel_width=3)
    # Explaining prediction with Lime
    per = pd.DataFrame(list(data.columns), columns=["Periodo"])
    print_exp = False
    for period in periods:
        print("Explicación para periodo " + str(per.loc[period].Periodo))
        exp = explainer.explain_instance(X_new[period], model.predict_proba, num_features=5, top_labels=2)
        exp.show_in_notebook(show_table=True, show_all=False)
        if print_exp:
            av_lab = exp.available_labels()
            for lab in av_lab:
                print ('Explicación para rating %s' % class_names[lab])
                display ('\n'.join(map(str, exp.as_list(label=lab))))
                print ()
Example #28
0
File: run.py Project: GDPlumb/ExpO
network = MLP(shape)
with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
    pred = network.model(X)

saver = tf.train.Saver(max_to_keep=1)

# Create tf session
sess = tf.Session()

# Wrap it for LIME
wrapper = Wrapper(sess, pred, X)
wrapper.set_index(0)

# Configure LIME
exp = lime_tabular.LimeTabularExplainer(X_train,
                                        discretize_continuous=False,
                                        mode="regression")

###
# Run Experiment
###

print("")
print("What is the 'Explanation'?")
print("The left column is the feature index")
print(
    "The right column is the expected change of the model's prediction if we increase that feature by 1."
)
print(
    "The index of -1 is the intercept term for the explanation and should not be changed."
)
Example #29
0
def main():
    epochs = 100
    batch_size = 400
    input_dim = 12
    hidden_dim = 6
    rng = np.random.RandomState(12345)
    csv_in_file_name = sys.argv[1]
    test_id = int(sys.argv[2])
    perturb_ind = int(sys.argv[3])

    try:
        with tf.device("/gpu:0"):
            print "Using gpu!"
            ae = AutoEncoder_tf(rng, input_dim, hidden_dim)
    except:
        with tf.device("/cpu:0"):
            print "Using cpu!"
            ae = AutoEncoder_tf(rng, input_dim, hidden_dim)

    # Train
    # min_max_scaler = preprocessing.MinMaxScaler()
    # rawdataX = gen_syndata(rng, input_dim)

    rawdataX = pd.read_csv(csv_in_file_name, header=None).as_matrix()
    # '''
    # print 'before', rawdataX[10]
    raw_testX_positive = perturb(rng, input_dim, rawdataX[3000:], perturb_ind)
    # dataX = preprocessing.scale(np.concatenate((rawdataX, raw_testX_positive), axis = 0))
    mean_std_scaler = preprocessing.StandardScaler().fit(
        rawdataX.astype(np.float))
    trainX = mean_std_scaler.transform(rawdataX.astype(np.float))
    # dataX = preprocessing.normalize(rawdataX, norm='l2')
    # trainX = dataX[:4000]
    # testX_positive = dataX[4000:]
    testX_positive = mean_std_scaler.transform(raw_testX_positive)

    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)
        print 'Training AutoEncoder...'
        for epoch in range(epochs):
            rng.shuffle(trainX)
            for batch_ind in range(10):
                batch_xs = trainX[batch_ind * batch_size:(batch_ind + 1) *
                                  batch_size]
                # print batch_xs[0]
                train_loss = ae.train(batch_xs, sess)

            # print 'epoch, loss = {}: {}'.format(epoch, train_loss)
        print 'Trained AutoEncoder.'
        # print 'loss (train) = ', ae.predict([trainX[0]])

        feature_names = [str(x) for x in range(input_dim)]
        explainer = lime_tabular.LimeTabularExplainer(
            trainX,
            feature_names=feature_names,
            class_names=['Normal'],
            verbose=True)

        # test_id = 8
        # examed_example = trainX[3000+test_id]
        # examed_example = testX_positive[test_id]
        examed_example = rawdataX[test_id] + np.asarray(
            [100, 8, 100, 172, 30, 30000, 200, 31, 1000, 14, 0, 800])

        scaled_examed_example = mean_std_scaler.transform(
            examed_example.reshape(1, -1).astype(np.float)).flatten()
        print scaled_examed_example

        print 'Training LIME...'
        exp = explainer.explain_instance(scaled_examed_example,
                                         ae.calas,
                                         labels=[0],
                                         num_features=12)
        print 'Trained LIME.'

        # print exp.as_map()[0]

        lime_res = sorted(exp.as_map()[0], key=lambda x: x[0])
        sorted_lime_res = sorted(lime_res,
                                 key=lambda x: np.absolute(x[1]),
                                 reverse=True)
        print "lime", sorted_lime_res

        lime_ind_ord = [ele[0] for ele in sorted_lime_res]
        # print lime_ind_ord
        lime_to_figure = [lime_ind_ord.index(u) for u in range(12)]
        # print lime_to_figure

        # print scaled_examed_example
        # print ae.predict(np.asarray([scaled_examed_example]))[0]
        direc_res = [(i, v) for i, v in enumerate((
            scaled_examed_example -
            ae.predict(np.asarray([scaled_examed_example]))[0])**2)]
        sorted_direc_res = sorted(direc_res, key=lambda x: x[1], reverse=True)
        print "AE", sorted_direc_res

        direc_ind_ord = [ele[0] for ele in sorted_direc_res]
        # print direc_ind_ord
        direc_to_figure = [direc_ind_ord.index(u) for u in range(12)]
        # print direc_to_figure

    # plot_ranking(str(test_id), lime_to_figure, direc_to_figure)

    plot_magnitude(lime_res, direc_res, scaled_examed_example, str(test_id))
Example #30
0
def run(args):
    # Hyperparamaters
    num_perturbations = 5

    # Fixes an issue where threads inherit the same rng state
    scipy.random.seed()

    # Arguments
    dataset = args[0]
    trial = args[1]

    # Output
    out = {}
    file = open("Trials/" + dataset + "_" + str(trial) + ".json", "w")

    # Load data
    X_train, y_train, X_valid, y_valid, X_test, y_test, train_mean, train_stddev = load_normalize_data(
        "../Datasets/" + dataset + ".csv")
    n = X_test.shape[0]
    d = X_test.shape[1]

    # Load the noise scale parameters
    #with open("Sigmas/" + dataset + ".json", "r") as tmp:
    #scales = json.load(tmp)
    scales = [0.1, 0.25]
    scales_len = len(scales)

    # Fit MAPLE model
    exp_maple = MAPLE(X_train, y_train, X_valid, y_valid)

    # Fit LIME to explain MAPLE
    exp_lime = lime_tabular.LimeTabularExplainer(X_train,
                                                 discretize_continuous=False,
                                                 mode="regression")

    # Evaluate model faithfullness on the test set
    rmse = 0.0  #MAPLE accuracy on the dataset
    lime_rmse = np.zeros((scales_len))
    maple_rmse = np.zeros((scales_len))
    for i in range(n):
        x = X_test[i, :]

        #LIME's default parameter for num_samples is 500
        # 1) This is larger than any of the datasets we tested on
        # 2) It makes explaining MAPLE impractically slow since the complexity of MAPLE's predict() depends on the dataset size
        coefs_lime = unpack_coefs(exp_lime,
                                  x,
                                  exp_maple.predict,
                                  d,
                                  X_train,
                                  num_samples=100)

        e_maple = exp_maple.explain(x)
        coefs_maple = e_maple["coefs"]

        rmse += (e_maple["pred"] - y_test[i])**2

        for j in range(num_perturbations):

            noise = np.random.normal(loc=0.0, scale=1.0, size=d)

            for k in range(scales_len):
                scale = scales[k]

                x_pert = x + scale * noise

                e_maple_pert = exp_maple.explain(x_pert)
                model_pred = e_maple_pert["pred"]
                lime_pred = np.dot(np.insert(x_pert, 0, 1), coefs_lime)
                maple_pred = np.dot(np.insert(x_pert, 0, 1), coefs_maple)

                lime_rmse[k] += (lime_pred - model_pred)**2
                maple_rmse[k] += (maple_pred - model_pred)**2

    rmse /= n
    lime_rmse /= n * num_perturbations
    maple_rmse /= n * num_perturbations

    rmse = np.sqrt(rmse)
    lime_rmse = np.sqrt(lime_rmse)
    maple_rmse = np.sqrt(maple_rmse)

    out["model_rmse"] = rmse[0]
    out["lime_rmse_0.1"] = lime_rmse[0]
    out["maple_rmse_0.1"] = maple_rmse[0]
    out["lime_rmse_0.25"] = lime_rmse[1]
    out["maple_rmse_0.25"] = maple_rmse[1]

    json.dump(out, file)
    file.close()