def fit( self, model: RegressorMixin, X: np.ndarray, feature_names: List[str] = None ): """ Fit function. The initialization of `LimeTabular` is made here. This choice has been made, since it needs a fitted scikit-learn model as input. Parameters ---------- model: RegressorMixin, required scikit-learn model given as input X: np.ndarray, required train matrix feature_names: List[str], optional, (default=``None``) the name of the feature column of X Returns ------- Fitted _LimeExplainer """ check_is_fitted(model) if feature_names is None: feature_names = self._define_feature_names(X) else: feature_names = list(feature_names) self.model_ = model self.explainer_ = lime_tabular.LimeTabularExplainer( X, feature_names=feature_names, mode="regression" ) self.feature_names_ = feature_names return self
def __init__(self, *argv, **kwargs): """ Initialize lime Tabular Explainer object """ super(LimeTabularExplainer, self).__init__(*argv, **kwargs) self.explainer = lime_tabular.LimeTabularExplainer(*argv, **kwargs)
def lime_explainer(self): self.explainer = lt.LimeTabularExplainer( np.array(self.data["train_df"]), feature_names=self.feature_names, verbose=False, mode="regression", )
def __init__(self, dataset, verbose=True): train_dataset, training_labels = dataset.make_numpy_array( dataset.get_train_file()) mode = dataset.get_mode() ( categorical_features, categorical_index, categorical_names, ) = dataset.get_categorical_features() unique = dataset.get_target_labels() self._mode = mode self.dataset = dataset self._explainer = lime_tabular.LimeTabularExplainer( train_dataset, feature_names=dataset.get_feature_names(), class_names=unique, categorical_features=categorical_index, categorical_names=categorical_names, training_labels=training_labels, verbose=verbose, mode=self._mode, )
def fit(self, model: sklearn.base.BaseEstimator, x_train: Union[pd.Series, pd.DataFrame, np.ndarray], y_train: pd.DataFrame, ): x_train = self._get_dataframe_from_mixed_input(x_train) super().fit(model, x_train, y_train) self._explainer = lime_tabular.LimeTabularExplainer(x_train.values, feature_names=x_train.columns, class_names=self.class_names, categorical_features=self.categorical_features, discretize_continuous=True) return self
def generate_global_lime_explanations(self): explainer = lime_tabular.LimeTabularExplainer( training_data=np.array(self.X_train), class_names=['unstable', 'stable'], mode="classification", feature_names=self.feature_names) model_name = type(self.model).__name__ # LIME Global Explainer with Submodule Pick if model_name == 'Sequential': if self.model.name == 'DNN': predict_fn = self.dnn_model_predict model_name = 'DNN' elif self.model.name == 'RNN': predict_fn = self.rnn_model_predict model_name = 'RNN' else: predict_fn = self.model.predict_proba root = Path(".") my_file = Path(root / "explainer_outputs" / "LIME_pickles" / (model_name + '_LIME_SP_' + self.grid)) if my_file.is_file(): print("EXISTS!!!!!!!!!!!!!!") pickle_in = open(my_file, "rb") sp_obj = pickle.load(pickle_in) print("LOADED!!!!!!!!!!!!!!") else: print("DOESNT EXIST. CREATING NEW") start = timer() sp_obj = submodular_pick.SubmodularPick( explainer, np.array(self.X_train), predict_fn, num_features=self.X_test.shape[1], num_exps_desired=5) end = timer() print('Global LIME Explanations: ', end - start) # Store in Pickle pickle_out = open(my_file, "wb") pickle.dump(sp_obj, pickle_out) pickle_out.close() sp_explanation_time = end - start print('LIME Global Explanation time: ', sp_explanation_time) dir_name = os.path.join('explainer_outputs', 'LIME', 'Global', self.grid) path_global = os.path.join(dir_name, model_name + 'LIME_SP.pdf') with PdfPages(path_global) as pdf: for exp in sp_obj.sp_explanations: fig = exp.as_pyplot_figure(label=exp.available_labels()[0]) pdf.savefig(fig, bbox_inches='tight') plt.close() return sp_obj
def scoreComment(): # text of comment comment = request.form.get("comment") reddit_url = request.form.get('reddit_link') cleaned_article_text = request.form.get('cleaned_article_text') no_url_article_text = request.form.get('no_url_article_text') no_stop_article_text = request.form.get('no_stop_article_text') no_stop_or_url_article_text = request.form.get( 'no_stop_or_url_article_text') swearwords_df = pd.read_csv('files/edited-swear-words.csv') swearwords = swearwords_df.swear.tolist() features = [ 'profanity', 'length', 'adjWordScore', 'NER_count', 'NER_match', 'WordScore', 'WholeScore', 'contains_url', 'no_url_WordScore', 'no_url_WholeScore', 'WordScoreNoStop', 'WholeScoreNoStop', 'no_url_or_stops_WholeScore', 'no_url_or_stops_WordScore' ] our_model = load("updated_model.pkl", compression="lzma", set_default_extension=False) punctuation_lst = [ ',', '.', '!', '?', '<', '>', '/', ':', ';', '\'', '\"', '[', '{', ']', '}', '|', '\\', '`', '~', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '_', '=', '+' ] #Lime stuff to add new_X_train = np.loadtxt('files/X_train.csv', delimiter=',') full_score = ab.judgeComment(comment, reddit_url, swearwords, features, our_model, cleaned_article_text, no_url_article_text, no_stop_article_text, no_stop_or_url_article_text, punctuation_lst) print('This is full_score[3][0]: \n') print(full_score[3][0]) explainer = lime_tabular.LimeTabularExplainer( training_data=np.array(new_X_train), feature_names=features, class_names=[False, True], mode='classification') exp = explainer.explain_instance(data_row=full_score[3][0], predict_fn=our_model.predict_proba) score = str(full_score[1]) + str(full_score[2]) img = exp.as_pyplot_figure() img.savefig('files/visual.pdf', bbox_inches='tight') webbrowser.open('files/visual.pdf') print("made it") return jsonify(score=score)
def lime_explanation(model, X, ys=None, num_samples=1000, multiprocessing=True): from lime import lime_tabular import warnings warnings.filterwarnings("ignore", message="Singular matrix") warnings.filterwarnings("ignore", message="Ill-conditioned") # identify categorical data # https://stackoverflow.com/questions/47094676/how-to-identify-the-categorical-variables-in-the-200-numerical-variables?noredirect=1&lq=1 X = X.numpy() ys = ys.numpy() df = pd.DataFrame(X) categorical_features = detect_categorical_top_k(df) # print(categorical_features) lime_expl = lime_tabular.LimeTabularExplainer(training_data=X, training_labels=ys, categorical_features=categorical_features, class_names=np.unique(ys), discretizer="decile" ) num_features = X.shape[0] predict_fn = lambda x: model.predict_proba(x) def lime_explanation_row(data_row, predict_fn, data_labels, num_features, num_samples): # print(data_row.shape) # experiment with lavels: # either labels=[data_label] or None explanation = lime_expl.explain_instance(data_row, predict_fn, labels=data_labels, top_labels=1, num_features=num_features, num_samples=num_samples, distance_metric='euclidean', model_regressor=None) data_label = list(explanation.local_exp.keys())[0] # print(explanation.local_exp[data_label]) # sort and return importance feature_importance = list(zip(*sorted(explanation.local_exp[data_label])))[1] return feature_importance from functools import partial lime_partial = partial(lime_explanation_row, predict_fn=predict_fn, data_labels=[], num_features=num_features, num_samples=num_samples) if multiprocessing: print("*" * 10) print("NOT IMPLEMENTED: reverting to SLOW compute") print("*" * 10) result = np.apply_along_axis(lime_partial, 1, X) # result = parallel_apply_along_axis(lime_explanation_row, axis=1, arr=X,) else: result = np.apply_along_axis(lime_partial, 1, X) return result
def lime_analysis(self, cat_features=None): self.split_predictions() self.limeObj = lime_tabular.LimeTabularExplainer( training_data=self.x_train, feature_names=self.featureNames, categorical_features=cat_features) print("Explaining... this might take some time") self.lime_expl(self.limeObj, self.model, self.false_pos, "False Positive") #self.lime_expl(self.limeObj, self.model, self.false_neg, "False Negative") print("Done! Required graphs are in corresponding folder")
def explain_tree(data, period, ratings, model, train_set, sov_lab_encoder, le, feat_key, print_exp): import numpy as np from lime import lime_tabular # import webbrowser X_new = np.array(data.loc[feat_key.index].T) if sov_lab_encoder is not None: pos_sr = feat_key.index.get_loc( feat_key[feat_key["Key"] == 'SovereignRating'].index[0]) sob_rating = X_new[:, pos_sr].copy() X_new[:, pos_sr] = sov_lab_encoder.transform(X_new[:, pos_sr]) # Predicting to check actual prediction # pred_calif = np.array([le.iloc[x == list(le.iloc[:,0]),0].index[0] for x in model.predict(X_new)]) X_new = X_new.astype('float') # features_names = sum([feature_names_key], []) # print(features_names) class_names = list(le.index)[0:-1] class_names.reverse() feature_names = list( feat_key.index ) # Usar .index (nombres muy largos) o usar .Key (Ratio y #) # Create the the Lime explainer and the lambda function explainer = lime_tabular.LimeTabularExplainer(train_set, mode='classification', feature_names=feature_names, class_names=class_names, discretize_continuous=True) predict_fn_rf = lambda x: model.predict_proba(x).astype(float) # explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=feature_names, # class_names=class_names, categorical_features=columns, # categorical_names=feature_names_cat, kernel_width=3) # Explaining prediction with Lime exp = explainer.explain_instance(X_new[period], model.predict_proba, num_features=5, top_labels=ratings) exp.show_in_notebook(show_table=True, show_all=False) #print(exp.available_labels()) exp.save_to_file('explainer/lime_output.html') if print_exp: av_lab = exp.available_labels() for lab in av_lab: print('Explanation for class %s' % class_names[lab]) print('\n'.join(map(str, exp.as_list(label=lab)))) print()
def __init__(self, preprocessor, model, X, y, feature_names): self.preprocessor = preprocessor self.model = model self.X = X self.y = y self.feature_names = feature_names self.shap_explainer = shap.TreeExplainer(self.model) self.lime_explainer = lime_tabular.LimeTabularExplainer( X, training_labels=self.y, feature_names=feature_names, class_names=[False, True]) self.shap_values = self.shap_explainer.shap_values(X)
def explainPredictions(self): '''Use LIME (https://github.com/marcotcr/lime) to give local explanations for the predictions of certain points''' explainer = lime_tabular.LimeTabularExplainer( training_data=self.data.X_train.values, # training data mode='classification', feature_names=list(self.data.X_train ), # names of all features (regardless of type) class_names=['background', 'signal'], # class names #class_names=[0,1], # class names discretize_continuous=True, categorical_features=None, categorical_names=None, ) def predict_fn_keras(x): if x.ndim >= 2: pred = self.model.predict(x, batch_size=1) else: pred = self.model.predict(x.reshape(1, x.shape[-1]), batch_size=1) return np.concatenate((1. - pred, pred), axis=1) for i in range(0, 10): #len(self.data.X_test) exp = explainer.explain_instance( data_row=self.data.X_test.values[random.randint( 0, len(self.data.X_test) - 1)], # 2d numpy array, corresponding to a row predict_fn= predict_fn_keras, # classifier prediction probability function, labels=[ 1, ], # iterable with labels to be explained. num_features=self.data.X_train. shape[1], # maximum number of features present in explanation #top_labels=0, # explanations for the K labels with highest prediction probabilities, #num_samples=2000, # size of the neighborhood to learn the linear model #distance_metric='euclidean' # the distance metric to use for weights. ) out = os.path.join(self.output, 'explanations') if not os.path.exists(out): os.makedirs(out) exp.save_to_file( os.path.join(out, 'explanation' + str(i) + '.html')) # print exp.as_pyplot_figure() exp.as_pyplot_figure().savefig( os.path.join(out, 'explanation' + str(i) + '.png')) pass
def run_lime_sklearn(classifier): explainer = lime_tabular.LimeTabularExplainer( training_data=np.array(X_train), feature_names=X_train.columns, class_names=['Fail', 'Pass'], mode='classification') exp = explainer.explain_instance(data_row=X_test.iloc[1], predict_fn=classifier.predict_proba) plt.close() exp.as_pyplot_figure() plt.tight_layout() plt.show()
def _initialize(self, **kwargs): if self.verbose: print("Setting up LIME explainer") features = self.data.drop([self.target], axis=1) self.explainer = lime_tabular.LimeTabularExplainer( features, feature_names=kwargs.get("feature_names") or list(features.columns), class_names=kwargs.get("feature_names") or ["Outcome (no)", "Outcome (yes)"], mode=kwargs.get("mode") or "classification", discretize_continuous=kwargs.get("discretize_continuous") or False, )
def calculate_values(number_of_rows=200, number_of_exaplanations=11, which_explainer='random', nsampleslist=[100], feature_rankings=['first', 'middle', 'last'], strategies=["mean", "distribution"], verbose=0): explainer = None if which_explainer == 'lime': explainer = lime_tabular.LimeTabularExplainer( X_train, training_labels=['paid', 'unpaid']) elif which_explainer == 'shap': explainer = shap.KernelExplainer(predict_fn, X_train[0:1000]) neutral_points = ((df[df['loan_repaid'] != 0].mean() + df[df['loan_repaid'] != 1].mean()) / 2).drop('loan_repaid') results = [] predicted_classes = model.predict_classes(X_test) correctly_predicted_indices = get_correctly_predicted_indices( number_of_rows) counter = 0 for nsamples in nsampleslist: # for row_number in range(number_of_rows): for row_number in correctly_predicted_indices: print("Row Number {} counter: {}".format(row_number, counter)) counter = counter + 1 for no_exp in range(number_of_exaplanations): if (predicted_classes[row_number][0] != y_test[row_number]): print("Predicted and actual classes are different, skip") continue for feature_ranking in feature_rankings: for strategy in strategies: # print("Explanation number:{}---Exaplainer:{}----NSamples:{}---no_exp:{} --feature_ranking:{}---strategy:{}".format(no_exp, which_explainer, # nsamples, no_exp, feature_ranking, strategy)) calculate_values_datapoint(explainer, neutral_points, no_exp, nsamples, results, row_number, verbose, which_explainer, feature_ranking, strategy) print(results) return results
def explain_instance_tabular_data(instance): newshape = numpy.prod(instance.shape) if notebook['model_type'] == "NEURAL NETWORK": model = keras.models.load_model( "NOTEBOOK_" + notebook_name_dict['notebook_name'] + "_neural_network_model.hdf5") target = list( map( numpy.argmax, model.predict( numpy.reshape(instance, newshape=(1, *instance.shape)))[0]))[0] else: target = notebook['model'].predict([instance])[0] explainer = lt.LimeTabularExplainer( training_data=notebook['x_train'], feature_names=[str(i) for i in range(len(instance))]) exp = explainer.explain_instance(instance, predict_fn, num_features=len(instance), num_samples=min( len(notebook['x_train']), 100), labels=(target, )) exp.as_pyplot_figure(label=target).savefig( "../UI/src/assets/" + "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance1.jpg", figsize=(50, 50)) exp.save_to_file(file_path="../UI/src/assets/" + "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance.html") notebook['explanation'] = "NOTEBOOK_" + notebook[ 'notebook_name'] + "_investigate_model_instance.html" set_notebook_data(notebook_name_dict['notebook_name']) try: keras.backend.clear_session() except: pass return json_encoder.encode({ 'explanation': "NOTEBOOK_" + notebook['notebook_name'] + "_investigate_model_instance.html" })
def lime(): df = load_data() X = df.drop(columns=['target']) y = df.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) clf = pickle.load(open('Model/model.pkl', 'rb')) explainer = lime_tabular.LimeTabularExplainer( training_data=np.array(client), feature_names= client.columns, class_names=['Non Solvable', 'Solvable'], mode='classification' ) exp = explainer.explain_instance( data_row= client.iloc[0], predict_fn=clf.predict_proba, num_features= 7 ) # Display explainer HTML object components.html(exp.as_html(), height=400 ) return
def __init__(self, train, training_labels, feature_names, class_names, categorical_features, categorical_names, mode, verbose=True): self._mode = mode self._explainer = lime_tabular.LimeTabularExplainer( train, feature_names=feature_names, class_names=class_names, categorical_features=categorical_features, categorical_names=categorical_names, training_labels=training_labels, verbose=verbose, mode=self._mode)
def generate_local_lime_explanation(self, i, path_localocal=None): explainer = lime_tabular.LimeTabularExplainer( training_data=np.array(self.X_train), class_names=['unstable', 'stable'], mode="classification", feature_names=self.feature_names) model_name = type(self.model).__name__ if model_name == 'Sequential': if self.model.name == 'DNN': predict_fn = self.dnn_model_predict model_name = 'DNN' elif self.model.name == 'RNN': predict_fn = self.rnn_model_predict model_name = 'RNN' start = timer() exp = explainer.explain_instance(data_row=np.squeeze( self.X_test[i]), predict_fn=predict_fn, num_features=17) end = timer() else: start = timer() exp = explainer.explain_instance( data_row=np.array(self.X_test.iloc[i]), predict_fn=self.model.predict_proba, num_features=17) end = timer() # Path to store Local Explanation Outputs dir_name = os.path.join('explainer_outputs', 'LIME', 'Local', self.grid) base_filename = model_name + str(i) suffix = '.html' path_local = os.path.join(dir_name, base_filename + suffix) single_explanation_time = end - start exp.save_to_file(path_localocal) print('Single Lime Explanation Time:', single_explanation_time)
def prepare_lime(training_path, class_names, discretize_continuous=True, sel_feat_file=None): """ Prepared LIME. Parameters ---------- training_path: string Path to the hdf5 file of the training data. class_names: list List containing the name of the classes. discretize_continuous: boolean To discretize continuous data using LIME. sel_feat_file: string Path to a file with a list of the indexes of the selected features. Returns ------- Lime explainer """ train_data = h5.File(training_path, 'r') X_train = \ np.array(train_data['features'][ :, 0:train_data['features'].shape[1] - 1]) train_data.close() if sel_feat_file is not None: selected_feat = read_data(sel_feat_file) X_train = X_train[:, selected_feat] explainer = \ lime_tabular.LimeTabularExplainer( X_train, feature_names=list( np.arange(0, X_train.shape[1]).astype(str)), class_names=class_names, discretize_continuous=discretize_continuous) return explainer
def lime_run(rf, X_train, y_train, X_test, y_test, i, feature_names): lm = [] explainer = lime_tabular.LimeTabularExplainer(X_train, feature_names=range( (X_train.shape[1])), class_names=[0, 1], discretize_continuous=False, verbose=True, sample_around_instance=True) n_ft = len(feature_names) #generate a random point and test using UI values = st.slider('Select number of sampling points', 200, 2000, 500) exp = explainer.explain_instance(X_test[i], rf.predict_proba, num_features=n_ft, num_samples=values) coef = [0] * n_ft for i in exp.as_list(): coef[i[0]] = i[1] X_lime_scaled = exp.scaled_data X_lime = exp.scaled_data * explainer.scaler.scale_ + explainer.scaler.mean_ y_lime = rf.predict(X_lime) plt.bar(feature_names, coef) plt.xticks(rotation=45) fig_size = plt.gcf().get_size_inches() #Get current size sizefactor = 2 #Set a zoom factor # Modify the current size by the factor plt.gcf().set_size_inches(sizefactor * fig_size) st.write(plt.gcf()) lime_pred = lime_perturbed_pred(coef, X_lime_scaled, exp) sn = fidelity_lime(lime_pred, y_lime) lm.append(sn) st.write(sn) plt.close()
def explain_tabular(): data = {"success": "failed"} #TODO send sample to be explained if flask.request.method == "POST": if flask.request.form: #data_dict = ast.literal_eval(json.loads(flask.request.data)) print("try open model") with open(flask.request.form.get("model_path"), 'rb') as f: model = pickle.load(f) train_data = json.loads(flask.request.form.get("data")) dim = json.loads(flask.request.form.get("dim")) train_data = np.asarray(train_data) train_data = train_data.reshape(((int)(train_data.size/dim), dim)) sample = json.loads(flask.request.form.get("sample")) num_features = int(request.args.get("numfeatures")) explainer = lime_tabular.LimeTabularExplainer(train_data, mode="classification", discretize_continuous=True) exp = explainer.explain_instance(np.asarray(sample), model.predict_proba, num_features=num_features, top_labels=1) explanation_dictionary = {} for entry in exp.as_list(): explanation_dictionary.update({entry[0]: entry[1]}) data["explanation"] = explanation_dictionary data["success"] = "success" return flask.Response(json.dumps(data), mimetype="text/plain")
def __init__ (self, training_data, training_targets, feature_names, class_names): """ Parameters ---------- training_data: numpy array The data that the machine learning have been trained on training_targets: numpy array The data that the machine learning have been trained on feature_names: list The names of the features class_names: list The names of the classes """ self.training_data = training_data self.training_targets = training_targets self.training_summary = shap.kmeans(training_data, 10) self.feature_names = feature_names self.number_of_features = len(feature_names) self.class_names = class_names self.explainer = lt.LimeTabularExplainer(training_data=self.training_data, feature_names=self.feature_names, class_names=self.class_names, discretize_continuous=True)
# ## Explanations # In[55]: from lime import lime_tabular # In[116]: explainer = lime_tabular.LimeTabularExplainer(features_train_df, feature_names=features_train_df.columns.tolist(), class_names=['notengaged', 'engaged'], discretize_continuous=False, verbose=True) # ### Explore some random points # In[128]: i = 44 exp = explainer.explain_instance(features_train_df.iloc[i], best_model.predict_proba) # In[193]:
def metrics_lime(model, X_train, X_test, stddev = 0.1): # Get the model predictions on the test data test_pred = model.predict(X_test) # Get the necessary sizes n_test = X_test.shape[0] d_in = X_test.shape[1] d_out = test_pred.shape[1] # Configure LIME exp = lime_tabular.LimeTabularExplainer(X_train, discretize_continuous = False, mode = "regression") def unpack_coefs(explainer, x, predict_fn, num_features, x_train, num_samples = 1000): d = x_train.shape[1] coefs = np.zeros((d)) u = np.mean(x_train, axis = 0) sd = np.sqrt(np.var(x_train, axis = 0)) exp = explainer.explain_instance(x, predict_fn, num_features = num_features, num_samples = num_samples) coef_pairs = exp.local_exp[1] for pair in coef_pairs: coefs[pair[0]] = pair[1] coefs = coefs / sd intercept = exp.intercept[1] - np.sum(coefs * u) return np.insert(coefs, 0, intercept) # Compute the standard, causal, and stability metrics standard_metric = np.zeros(d_out) causal_metric = np.zeros(d_out) stability_metric = np.zeros(d_out) for i in range(d_out): model.set_index(i) for j in range(n_test): x = X_test[j, :] # Get LIME's Explanation coefs = unpack_coefs(exp, x, model.predict_index, d_in, X_train) # Standard Metric standard_metric[i] += (np.dot(np.insert(x, 0, 1), coefs) - test_pred[j,i])**2 for k in range(num_perturbations): x_pert = generate_neighbor(x, stddev = stddev) # Causal Metric model_pred = model.predict_index(x_pert.reshape(1, d_in)) lime_pred = np.dot(np.insert(x_pert, 0, 1), coefs) causal_metric[i] += (lime_pred - model_pred)**2 # Stability Metric coefs_pert = unpack_coefs(exp, x, model.predict_index, d_in, X_train) stability_metric[i] += np.sum((coefs_pert - coefs)**2) standard_metric /= n_test causal_metric /= num_perturbations * n_test stability_metric /= num_perturbations * n_test return standard_metric, causal_metric, stability_metric
def run(args): # Hyperparamaters num_perturbations = 5 # Fixes an issue where threads of inherit the same rng state scipy.random.seed() # Arguments dataset = args[1] trial = args[0] # Outpt out = {} file = open("Trials/" + dataset + "_" + str(trial) + ".json", "w") # Load data X_train, y_train, X_valid, y_valid, X_test, y_test, train_mean, train_stddev = load_normalize_data("../Datasets/" + dataset + ".csv") n = X_test.shape[0] d = X_test.shape[1] scales = [0.1, 0.25] scales_len = len(scales) # Fit model model = fit_svr(X_train, y_train, X_test, y_test) out["model_rmse"] = np.sqrt(np.mean((y_test - model.predict(X_test))**2)) # Fit LIME and MAPLE explainers to the model exp_lime = lime_tabular.LimeTabularExplainer(X_train, discretize_continuous=False, mode="regression") exp_maple = MAPLE(X_train, model.predict(X_train), X_valid, model.predict(X_valid)) # Evaluate model faithfullness on the test set lime_rmse = np.zeros((scales_len)) maple_rmse = np.zeros((scales_len)) for i in range(n): x = X_test[i, :] coefs_lime = unpack_coefs(exp_lime, x, model.predict, d, X_train) #Allow full number of features e_maple = exp_maple.explain(x) coefs_maple = e_maple["coefs"] for j in range(num_perturbations): noise = np.random.normal(loc = 0.0, scale = 1.0, size = d) for k in range(scales_len): scale = scales[k] x_pert = x + scale * noise model_pred = model.predict(x_pert.reshape(1,-1)) lime_pred = np.dot(np.insert(x_pert, 0, 1), coefs_lime) maple_pred = np.dot(np.insert(x_pert, 0, 1), coefs_maple) lime_rmse[k] += (lime_pred - model_pred)**2 maple_rmse[k] += (maple_pred - model_pred)**2 lime_rmse /= n * num_perturbations maple_rmse /= n * num_perturbations lime_rmse = np.sqrt(lime_rmse) maple_rmse = np.sqrt(maple_rmse) out["lime_rmse_0.1"] = lime_rmse[0] out["maple_rmse_0.1"] = maple_rmse[0] out["lime_rmse_0.25"] = lime_rmse[1] out["maple_rmse_0.25"] = maple_rmse[1] json.dump(out, file) file.close()
def explain_tree(data, periods, model, train_set, sov_lab_encoder, le, feat_key): import pandas as pd import numpy as np from lime import lime_tabular from ipywidgets import widgets, interactive from IPython.display import display, clear_output def f(Variable): return feat_key[feat_key['Key']==Variable].index[0] def on_button_clicked(b): with output: clear_output() print(w.result) ratios = ['Ratio' + str(i+1) for i in range(0,26)] ratios.append('SovereignRating') w = interactive(f, Variable=ratios) button = widgets.Button(description="Obtener nombre") output = widgets.Output() display(w) display(button, output) button.on_click(on_button_clicked) X_new = np.array(data.loc[feat_key.index].T) if sov_lab_encoder is not None: pos_sr = feat_key.index.get_loc(feat_key[feat_key["Key"] == 'SovereignRating'].index[0]) sob_rating = X_new[:, pos_sr].copy() X_new[:, pos_sr] = sov_lab_encoder.transform(X_new[:, pos_sr]) # Predicting to check actual prediction # pred_calif = np.array([le.iloc[x == list(le.iloc[:,0]),0].index[0] for x in model.predict(X_new)]) X_new = X_new.astype('float') # features_names = sum([feature_names_key], []) # print(features_names) class_names = list(le.index)[0:-2] class_names.reverse() feature_names = list(feat_key.Key) # Usar .index (nombres muy largos) o usar .Key (Ratio y #) # Create the the Lime explainer and the lambda function categorical_names = {} categorical_names[26] = sov_lab_encoder.classes_ explainer = lime_tabular.LimeTabularExplainer(train_set, mode='classification', feature_names=feature_names, class_names=class_names, categorical_features=[26], categorical_names=categorical_names, discretize_continuous=True) predict_fn_rf = lambda x: model.predict_proba(x).astype(float) # explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=feature_names, # class_names=class_names, categorical_features=columns, # categorical_names=feature_names_cat, kernel_width=3) # Explaining prediction with Lime per = pd.DataFrame(list(data.columns), columns=["Periodo"]) print_exp = False for period in periods: print("Explicación para periodo " + str(per.loc[period].Periodo)) exp = explainer.explain_instance(X_new[period], model.predict_proba, num_features=5, top_labels=2) exp.show_in_notebook(show_table=True, show_all=False) if print_exp: av_lab = exp.available_labels() for lab in av_lab: print ('Explicación para rating %s' % class_names[lab]) display ('\n'.join(map(str, exp.as_list(label=lab)))) print ()
network = MLP(shape) with tf.variable_scope("model", reuse=tf.AUTO_REUSE): pred = network.model(X) saver = tf.train.Saver(max_to_keep=1) # Create tf session sess = tf.Session() # Wrap it for LIME wrapper = Wrapper(sess, pred, X) wrapper.set_index(0) # Configure LIME exp = lime_tabular.LimeTabularExplainer(X_train, discretize_continuous=False, mode="regression") ### # Run Experiment ### print("") print("What is the 'Explanation'?") print("The left column is the feature index") print( "The right column is the expected change of the model's prediction if we increase that feature by 1." ) print( "The index of -1 is the intercept term for the explanation and should not be changed." )
def main(): epochs = 100 batch_size = 400 input_dim = 12 hidden_dim = 6 rng = np.random.RandomState(12345) csv_in_file_name = sys.argv[1] test_id = int(sys.argv[2]) perturb_ind = int(sys.argv[3]) try: with tf.device("/gpu:0"): print "Using gpu!" ae = AutoEncoder_tf(rng, input_dim, hidden_dim) except: with tf.device("/cpu:0"): print "Using cpu!" ae = AutoEncoder_tf(rng, input_dim, hidden_dim) # Train # min_max_scaler = preprocessing.MinMaxScaler() # rawdataX = gen_syndata(rng, input_dim) rawdataX = pd.read_csv(csv_in_file_name, header=None).as_matrix() # ''' # print 'before', rawdataX[10] raw_testX_positive = perturb(rng, input_dim, rawdataX[3000:], perturb_ind) # dataX = preprocessing.scale(np.concatenate((rawdataX, raw_testX_positive), axis = 0)) mean_std_scaler = preprocessing.StandardScaler().fit( rawdataX.astype(np.float)) trainX = mean_std_scaler.transform(rawdataX.astype(np.float)) # dataX = preprocessing.normalize(rawdataX, norm='l2') # trainX = dataX[:4000] # testX_positive = dataX[4000:] testX_positive = mean_std_scaler.transform(raw_testX_positive) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) print 'Training AutoEncoder...' for epoch in range(epochs): rng.shuffle(trainX) for batch_ind in range(10): batch_xs = trainX[batch_ind * batch_size:(batch_ind + 1) * batch_size] # print batch_xs[0] train_loss = ae.train(batch_xs, sess) # print 'epoch, loss = {}: {}'.format(epoch, train_loss) print 'Trained AutoEncoder.' # print 'loss (train) = ', ae.predict([trainX[0]]) feature_names = [str(x) for x in range(input_dim)] explainer = lime_tabular.LimeTabularExplainer( trainX, feature_names=feature_names, class_names=['Normal'], verbose=True) # test_id = 8 # examed_example = trainX[3000+test_id] # examed_example = testX_positive[test_id] examed_example = rawdataX[test_id] + np.asarray( [100, 8, 100, 172, 30, 30000, 200, 31, 1000, 14, 0, 800]) scaled_examed_example = mean_std_scaler.transform( examed_example.reshape(1, -1).astype(np.float)).flatten() print scaled_examed_example print 'Training LIME...' exp = explainer.explain_instance(scaled_examed_example, ae.calas, labels=[0], num_features=12) print 'Trained LIME.' # print exp.as_map()[0] lime_res = sorted(exp.as_map()[0], key=lambda x: x[0]) sorted_lime_res = sorted(lime_res, key=lambda x: np.absolute(x[1]), reverse=True) print "lime", sorted_lime_res lime_ind_ord = [ele[0] for ele in sorted_lime_res] # print lime_ind_ord lime_to_figure = [lime_ind_ord.index(u) for u in range(12)] # print lime_to_figure # print scaled_examed_example # print ae.predict(np.asarray([scaled_examed_example]))[0] direc_res = [(i, v) for i, v in enumerate(( scaled_examed_example - ae.predict(np.asarray([scaled_examed_example]))[0])**2)] sorted_direc_res = sorted(direc_res, key=lambda x: x[1], reverse=True) print "AE", sorted_direc_res direc_ind_ord = [ele[0] for ele in sorted_direc_res] # print direc_ind_ord direc_to_figure = [direc_ind_ord.index(u) for u in range(12)] # print direc_to_figure # plot_ranking(str(test_id), lime_to_figure, direc_to_figure) plot_magnitude(lime_res, direc_res, scaled_examed_example, str(test_id))
def run(args): # Hyperparamaters num_perturbations = 5 # Fixes an issue where threads inherit the same rng state scipy.random.seed() # Arguments dataset = args[0] trial = args[1] # Output out = {} file = open("Trials/" + dataset + "_" + str(trial) + ".json", "w") # Load data X_train, y_train, X_valid, y_valid, X_test, y_test, train_mean, train_stddev = load_normalize_data( "../Datasets/" + dataset + ".csv") n = X_test.shape[0] d = X_test.shape[1] # Load the noise scale parameters #with open("Sigmas/" + dataset + ".json", "r") as tmp: #scales = json.load(tmp) scales = [0.1, 0.25] scales_len = len(scales) # Fit MAPLE model exp_maple = MAPLE(X_train, y_train, X_valid, y_valid) # Fit LIME to explain MAPLE exp_lime = lime_tabular.LimeTabularExplainer(X_train, discretize_continuous=False, mode="regression") # Evaluate model faithfullness on the test set rmse = 0.0 #MAPLE accuracy on the dataset lime_rmse = np.zeros((scales_len)) maple_rmse = np.zeros((scales_len)) for i in range(n): x = X_test[i, :] #LIME's default parameter for num_samples is 500 # 1) This is larger than any of the datasets we tested on # 2) It makes explaining MAPLE impractically slow since the complexity of MAPLE's predict() depends on the dataset size coefs_lime = unpack_coefs(exp_lime, x, exp_maple.predict, d, X_train, num_samples=100) e_maple = exp_maple.explain(x) coefs_maple = e_maple["coefs"] rmse += (e_maple["pred"] - y_test[i])**2 for j in range(num_perturbations): noise = np.random.normal(loc=0.0, scale=1.0, size=d) for k in range(scales_len): scale = scales[k] x_pert = x + scale * noise e_maple_pert = exp_maple.explain(x_pert) model_pred = e_maple_pert["pred"] lime_pred = np.dot(np.insert(x_pert, 0, 1), coefs_lime) maple_pred = np.dot(np.insert(x_pert, 0, 1), coefs_maple) lime_rmse[k] += (lime_pred - model_pred)**2 maple_rmse[k] += (maple_pred - model_pred)**2 rmse /= n lime_rmse /= n * num_perturbations maple_rmse /= n * num_perturbations rmse = np.sqrt(rmse) lime_rmse = np.sqrt(lime_rmse) maple_rmse = np.sqrt(maple_rmse) out["model_rmse"] = rmse[0] out["lime_rmse_0.1"] = lime_rmse[0] out["maple_rmse_0.1"] = maple_rmse[0] out["lime_rmse_0.25"] = lime_rmse[1] out["maple_rmse_0.25"] = maple_rmse[1] json.dump(out, file) file.close()