def test_lime_text_tabular_not_equal_random_state(self): categories = ['alt.atheism', 'soc.religion.christian'] newsgroups_train = fetch_20newsgroups(subset='train', categories=categories) newsgroups_test = fetch_20newsgroups(subset='test', categories=categories) class_names = ['atheism', 'christian'] vectorizer = TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) pred = nb.predict(test_vectors) f1_score(newsgroups_test.target, pred, average='weighted') c = make_pipeline(vectorizer, nb) explainer = LimeTextExplainer( class_names=class_names, random_state=10) exp_1 = explainer.explain_instance(newsgroups_test.data[83], c.predict_proba, num_features=6) explainer = LimeTextExplainer( class_names=class_names, random_state=20) exp_2 = explainer.explain_instance(newsgroups_test.data[83], c.predict_proba, num_features=6) self.assertFalse(exp_1.as_map() == exp_2.as_map())
def test_lime_text_tabular_not_equal_random_state(self): categories = ['alt.atheism', 'soc.religion.christian'] newsgroups_train = fetch_20newsgroups(subset='train', categories=categories) newsgroups_test = fetch_20newsgroups(subset='test', categories=categories) class_names = ['atheism', 'christian'] vectorizer = TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) pred = nb.predict(test_vectors) f1_score(newsgroups_test.target, pred, average='weighted') c = make_pipeline(vectorizer, nb) explainer = LimeTextExplainer(class_names=class_names, random_state=10) exp_1 = explainer.explain_instance(newsgroups_test.data[83], c.predict_proba, num_features=6) explainer = LimeTextExplainer(class_names=class_names, random_state=20) exp_2 = explainer.explain_instance(newsgroups_test.data[83], c.predict_proba, num_features=6) self.assertFalse(exp_1.as_map() == exp_2.as_map())
def explainer(method: str, path_to_file: str, text: str, lowercase: bool, num_samples: int) -> LimeTextExplainer: """Run LIME explainer on provided classifier""" model = explainer_class(method, path_to_file) predictor = model.predict # Lower case the input text if requested (for certain classifiers) if lowercase: text = text.lower() # Create a LimeTextExplainer explainer = LimeTextExplainer( # Specify split option split_expression=lambda x: x.split(), # Our classifer uses trigrams or contextual ordering to classify text # Hence, order matters, and we cannot use bag of words. bow=False, # Specify class names for this case class_names=[1, 2, 3, 4, 5]) # Make a prediction and explain it: exp = explainer.explain_instance( text, classifier_fn=predictor, top_labels=1, num_features=20, num_samples=num_samples, ) return exp
def explain(self, docs): """Generate LIME Explanations for list of docs. Takes as input a list of strings that make up the documents where LIME should be applied to. Returns Explanation class instances. Parameters ---------- docs : list of strings List of input documents. Returns ------- exps : list of classes For each input document, an Explanation class object on which for example the .to_list, to_notebook etc functions can be called on. """ explainer = LimeTextExplainer() experiments = [] for doc in docs: # NOTE: this might have messed up in a generator experiment = explainer.explain_instance( doc, self.pipeline.predict_proba, top_labels=self.n_classes) experiments.append(experiment) return experiments
def test_lime_text_explainer_bad_regressor(self): newsgroups_train = fetch_20newsgroups(subset='train') newsgroups_test = fetch_20newsgroups(subset='test') # making class names shorter class_names = [ x.split('.')[-1] if 'misc' not in x else '.'.join( x.split('.')[-2:]) for x in newsgroups_train.target_names ] class_names[3] = 'pc.hardware' class_names[4] = 'mac.hardware' vectorizer = TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) pred = nb.predict(test_vectors) f1_score(newsgroups_test.target, pred, average='weighted') c = make_pipeline(vectorizer, nb) explainer = LimeTextExplainer(class_names=class_names) idx = 1340 with self.assertRaises(TypeError): exp = explainer.explain_instance( # noqa:F841 newsgroups_test.data[idx], c.predict_proba, num_features=6, labels=[0, 17], model_regressor=Lasso())
class LSTMExplainer: def __init__(self, tokenizer, num_features): super().__init__() self.TEXT = torch.load(os.getcwd() + '/app/models/binaries' + '/text_field.ptz') embedding_size = 300 lstm_hidden = 200 fc_hidden = [100, 50] self.model = NNet(self.TEXT.vocab.vectors, embedding_size, 2, self.TEXT.vocab.stoi[self.TEXT.pad_token], lstm_hidden, fc_hidden).to(device) self.model.load_state_dict( torch.load(os.getcwd() + '/app/models/binaries' + '/rnn_model.pt', map_location=device)) # TODO change to Path self.predict = Predict(self.model, self.TEXT) self.explainer = LimeTextExplainer( class_names=['Negative', 'Positive']) self.tokenizer = tokenizer self.num_features = num_features def get_lime_exp(self, text): text = ' '.join(self.tokenizer(text)) exp = self.explainer.explain_instance(text, self.predict, num_features=self.num_features, top_labels=2, num_samples=500) return exp.as_html(text=True, labels=(1, ))
def classify_lime(model, dataset, train_dataset, config_dict): explainer = LimeTextExplainer( class_names=(0, 1), bow= False, # try with True as well: False causes masking to be done, True means removing words mask_string=tokenizer.mask_token if not config_dict.get("lime_mask_string_use_pad", False) else tokenizer.pad_token, feature_selection="none", # use all features split_expression=r"\s", ) classify_sentence_partial = partial( batch_predict, model=model, dataset=train_dataset, batch_size=config_dict["per_device_eval_batch_size"], method="lime", ) res_list = [] for i in range(0, len(dataset)): if i % 50 == 0: logger.info("lime_sample_idx:" + str(i) + "/" + str(len(dataset))) exp = explainer.explain_instance( " ".join(dataset.examples[i].words), classify_sentence_partial, labels=(1, ), num_samples=config_dict["lime_num_samples"], ) lst = exp.as_map()[1] lst.sort(key=(lambda x: x[0])) dataset.examples[i].predictions = list(map(lambda x: x[1], lst)) return dataset
def final_yok_classifing(sentence): class_names = ['욕설이 아님', '욕설'] explainer = LimeTextExplainer(class_names=class_names) exp = explainer.explain_instance(sentence[0], yok_classifier_lime, num_features=100) return exp.as_list()
def go(): # save user input in query query = request.args.get('query', '') #query = request.form['query'] # use model to predict classification for query print( "generating classification prediction for message {}...".format(query)) classification_labels = model.predict([query])[0] classification_results = dict(zip(df.columns[4:], classification_labels)) # set-up Lime classes = df.columns[4:].to_list() print("classes = {}".format(classes)) limeexplainer = LimeTextExplainer(class_names=classes) exp = limeexplainer.explain_instance(query, model.predict_proba, num_features=10, top_labels=3) # This will render the go.html Please see that file. return render_template('go.html', query=query, exp=exp.as_html(), model=model[-1], classification_result=classification_results)
def explain(self, docs): """Generate LIME Explanations for list of docs. Takes as input a list of strings that make up the documents where LIME should be applied to. Returns Explanation class instances. Parameters ---------- docs : list of strings List of input documents. Returns ------- exps : list of classes For each input document, an Explanation class object on which for example the .to_list, to_notebook etc functions can be called on. """ explainer = LimeTextExplainer() experiments = [] for doc in docs: # NOTE: this might have messed up in a generator experiment = explainer.explain_instance(doc, self.pipeline.predict_proba, top_labels=self.n_classes) experiments.append(experiment) return experiments
def classifier(request, format=None): tm_classifier = Pickle.objects.get(name='clf') classifier = tm_classifier.pickled_model tm_vectorizer = Pickle.objects.get(name='tfidf') vectorizer = tm_vectorizer.pickled_model input_text = request.data.get('description', 'ERROR') if not input_text: response = {'error': 'Input is an empty string'} return Response(response, status=status.HTTP_404_NOT_FOUND) standardized_text = standardize_text(input_text) explainer = LimeTextExplainer(class_names=GRADE_CATEGORIES) c = make_pipeline(vectorizer, classifier) exp = explainer.explain_instance(standardized_text, c.predict_proba, num_features=6, labels=[0, 1, 2, 3]) predict_probas = dict(zip(exp.class_names, exp.predict_proba)) prediction = max(predict_probas.items(), key=itemgetter(1))[0] response = { 'final_prediction': prediction, 'ordered_class_names': exp.class_names, 'predict_probas': predict_probas, 'as_list': { exp.class_names[lbl]: exp.as_list(label=lbl) for lbl in exp.available_labels() }, 'standardized_text': standardized_text } return Response(response, status=status.HTTP_201_CREATED)
def limevisual(pData, pDesc, Idx, pClassNames, pAccountName, pVec, nNumFeatures, nTopLabels, tLabels, pRootDir): try: pIntent = pData['Intent'][int(Idx)] _, pModels = loadmodel(pRootDir, pAccountName, pIntent) pPipeModel = make_pipeline(pVec, pModels) tokenizer = lambda doc: re.compile(r"(?u)\b\w\w+\b").findall(doc) pExplainer = LimeTextExplainer(class_names=pClassNames, split_expression=tokenizer) pExplainText = pExplainer.explain_instance( pData[pDesc][int(Idx)], classifier_fn=pPipeModel.predict_proba, num_features=int(nNumFeatures), top_labels=int(nTopLabels)) pExplainText.show_in_notebook(text=pData[pDesc][int(Idx)], labels=tLabels) pExplainText.save_to_file( 'C:\\Users\\tamohant\\Desktop\\Auto_synthesis_Training_data\\AutoSynthesisLite\\demo.html', labels=None, predict_proba=True, show_predicted_value=True) except Exception as e: print( '*** ERROR[001]: Error in visualiation file of Limevisual function: ', sys.exc_info()[0], str(e)) print(traceback.format_exc()) return (-1) return (0)
def get_lime(model, test_tokens, model_name): explainer = LimeTextExplainer(class_names=["genuine", "deceptive"], split_expression=u'\s+') W = [] for idx, text in enumerate(test_tokens): tmp_d = {} for i in text.split(): tmp_d[i] = 1 exp = explainer.explain_instance(text, partial(wrapper_clf_predict, model=model, model_name=model_name), num_features=len(text.split()), num_samples=1000) if len(tmp_d) != len(exp.as_list()): print(idx, len(tmp_d), len(dict(exp.as_list()))) W.append(dict(exp.as_list())) if (idx + 1) % 10 == 0: print('{} instances have been processed..'.format(idx + 1)) features_l, scores_l = [], [] for d in W: features, scores = [], [] for key, score in d.items(): features.append(key) tmp = ' '.join(features) scores.append(score) # abs value should be taken subsequently features_l.append(tmp) scores_l.append(scores) return features_l, scores_l
def run(self, input_text, print_results=True): output = self.classify_text(input_text, True) outputlen = len(output) classes = output[0:int((outputlen / 2) - 1)] scores = output[int((outputlen / 2)):outputlen - 1] if print_results: print('Intents: ' + '\t'.join(map(str, classes))) print('Scores: ' + '\t'.join(map(str, scores))) # explain class explainer = LimeTextExplainer(class_names=classes) exp = explainer.explain_instance(input_text, self.classify_text, num_features=7, top_labels=3, num_samples=1000) # print explanation if print_results: print("") print('Explanation for class %s' % classes[0]) print('\n'.join(map(str, exp.as_list(label=0)))) return exp
def explain_one_example(self, idx=None, num_features=5, print_out=True): '''Explaines predictions for a single datapoint with LIME. If the index of the datapoint is not specified, explaines random point from the validation data. Optionally prints out explanation. # Arguments: idx: int, index of a datapoint in the validation data (default=None) num_features: int, number of explanatory features (default=5) print_out: boolean (default=True) # Returns: exp: lime.explanation.Explanation object ''' if idx is None: idx = np.random.choice(self.for_explanation.index) explainer = LimeTextExplainer(class_names=self.class_names) exp = explainer.explain_instance(self.for_explanation[idx], self.predict_proba, num_features=num_features) if print_out: print('Tweet {}: {}'.format(idx, self.for_explanation[idx])) print(self.predict_proba([self.for_explanation[idx]])) print(exp.as_pyplot_figure()) plt.show() return exp
class ExplainerText(object): """ X = df.review.astype(str).map(lambda x: ' '.join(jieba.cut(x))) y = df.label enlp = ExplainNLP() enlp.fit(X, y) enlp.explain(X[0]) """ def __init__(self, estimator=LogisticRegression(), class_names=None): self._baseline = BaselineBow(estimator)() self._explainer = LimeTextExplainer(verbose=True, class_names=class_names) def fit(self, X, y): self._baseline.fit(X, y) return self._baseline def explain(self, sentence, num_features=6): """ :param sentence: '分词 空格 拼接' :param num_features: :return: """ exp = self._explainer.explain_instance( sentence, self._baseline.predict_proba, num_features=num_features) exp.show_in_notebook(text=1 if len(sentence) < 256 else 0) return exp
def finalExplain_n(codes): resData = [] r = Rake() classNames = ['negative', 'positive'] exp = LimeTextExplainer(class_names=classNames) for j, code in enumerate(codes): tmpResult = {} c = translate(code) com = '' for i in range(1, len(c)): if c[i] == '</s>': break com += c[i] + ' ' tmpResult['code'] = code tmpResult['comment'] = com r.extract_keywords_from_text(com) comKeys = r.get_ranked_phrases() tmpResult['commentKeywords'] = comKeys tmpList = [] for _key in comKeys: global key key = _key tmpExp = { 'commentKeyword': key, } explanation = exp.explain_instance(code, predictorLime, num_features=6) print(explanation.as_list()) tmpExp['lime'] = explanation.as_list tmpList.append(tmpExp) tmpResult['explanations'] = tmpList resData.append(tmpResult) return resData
def explainer(args, text, num_samples: int = 20): """Run LIME explainer on provided classifier""" model = WrapedSenti(args) predictor = model.predict # Create a LimeTextExplainer explainer = LimeTextExplainer( # Specify split option split_expression=lambda x: x.split(), # Our classifer uses bigrams or contextual ordering to classify text # Hence, order matters, and we cannot use bag of words. bow=False, class_names=["neutral", "positive", "negative"], ) # Make a prediction and explain it: exp = explainer.explain_instance( text, classifier_fn=predictor, top_labels=1, num_features=20, num_samples=num_samples, ) return exp
def prediction(txt, sentiment, logistic, num_features): ##LIME c = make_pipeline(sentiment.tfidf_vect, logistic) class_names = ['NEGATIVE', 'POSITIVE'] explainer = LimeTextExplainer(class_names=class_names) exp = explainer.explain_instance(txt, c.predict_proba, num_features=num_features) output = "static/outputs/output.html" exp.save_to_file(output) exp.as_pyplot_figure(label=1) plt.savefig('static/outputs/lime_explanation_graph.png') # LOGISTIC REGRESSION list_of_words = re.sub("[^\w]", " ", txt).split() words_with_weights = defaultdict() for word in list_of_words: feats = sentiment.tfidf_vect.get_feature_names() coefs = logistic.coef_[0] if word in feats: ind = feats.index(word) words_with_weights[word] = coefs[ind] data = pd.DataFrame.from_dict(words_with_weights, orient='index') data[0].plot(kind='barh', color=(data[0] > 0).map({True: 'g', False: 'r'})) plt.savefig('static/outputs/log_explanation_graph.png')
def get_result_per_word(self, text, num_samples): if not self.intention_names: return {} explainer = LimeTextExplainer(class_names=self.intention_names) labels = list(range(len(self.intention_names))) # List try: exp = explainer.explain_instance(text, self.parse, num_features=6, labels=labels, num_samples=num_samples) except ValueError: labels = [] result_per_word = {} for label in labels: for j in exp.as_list(label=label): if j[0] not in result_per_word: result_per_word[j[0]] = [] result_per_word[j[0]].append({ "intent": self.intention_names[label], "relevance": j[1] * 100 }) for word in result_per_word: result_per_word[word] = sorted(result_per_word[word], key=lambda k: k.get("relevance"), reverse=True) return result_per_word
def explain_prediction(sent, file_name): # vect=transform_inp_sent_to_vect(sent) labels = get_categories(sent, file_name) explainer = LimeTextExplainer(class_names=labels) exp = explainer.explain_instance(sent, spacy_prediction, labels=[0, 1]) return exp.save_to_file(r'{}explanation.html'.format(DIRECTORY_PATH))
def get_result_per_intent(self, text, num_samples): explainer = LimeTextExplainer(class_names=self.intention_names) labels = list(range(len(self.intention_names))) # List exp = explainer.explain_instance(text, self.parse, num_features=6, labels=labels, num_samples=num_samples) result_per_intent = {} for intent in self.intention_names: result_per_intent[intent] = [] for i in labels: intent_sum = 0 for j in exp.as_list(label=i): result_per_intent[self.intention_names[i]].append({ "word": j[0], "relevance": j[1] * 100 }) intent_sum += j[1] result_per_intent[self.intention_names[i]].append({ "sum": intent_sum, "relevance": -1 }) for intent in result_per_intent: result_per_intent[intent] = sorted( result_per_intent[intent], key=lambda k: k.get("relevance"), reverse=True, ) return result_per_intent
def limer(example): # show in lime graph # TODO: ext -> html로 return # note가 아닌 html API 찾기 explainer = LimeTextExplainer() exp = explainer.explain_instance(spacing_example(example), lambda s: do_inference(s, True).detach().numpy(), top_labels=1) exp.show_in_notebook()
def explain_instance(headline, body): text = combine_sentence(headline, body) explainer = LimeTextExplainer(class_names=CLASS_NAMES) return explainer.explain_instance(text, classifier_fn, labels=[0, 1, 2, 3], top_labels=4, num_samples=4)
class Lime: def __init__(self, class_names): self.class_names = class_names self.explainer = LimeTextExplainer(class_names=class_names) self.num_features = 20 self.num_samples = 20 self.use_top_labels = True self.top_labels = 2 self.investigate_labels = list(range(len(class_names))) def explain_text(self, text_str, predict_fn): """ Explain the outcome from a text :param text_str: text to investigate :param predict_fn: lambda function to predict, should like def predict(raw_str_arr): encoded = tokenizer(raw_str_arr, truncation=True, padding=True) tf_slice = tf.data.Dataset.from_tensor_slices((dict(encoded), [0 for i in range(len(raw_str_arr))])) prob_result = model.predict(tf_slice.batch(1))[0] return prob_result :return: explaination object that can be used as follows: print(exp.as_list()) exp.show_in_notebook() exp.as_pyplot_figure() """ if self.use_top_labels: exp = self.explainer.explain_instance( text_str, predict_fn, num_features=self.num_features, num_samples=self.num_samples, top_labels=self.top_labels) else: exp = self.explainer.explain_instance( text_str, predict_fn, num_features=self.num_features, num_samples=self.num_samples, labels=self.investigate_labels) return exp
def lime_explanation(classifier, data, features=MAX_FEATURES): explainer = LimeTextExplainer(class_names=CLASS_NAMES) explanation = explainer.explain_instance( text_instance=data, classifier_fn=classifier.predict_proba, num_features=features, ) return explanation
def explain(self, text, nwords, return_weights=False): ''' Use `LimeTextExplainer` to obtain the top `nwords` most important/polar words in the `text` as an explanation. Parameters -------------- text: str The text to explain. nwords: int The number of most important words to return (i.e. explanation size). return_weights: bool Set to True to return the weights assigned by LIME also. Returns --------------- word_ranking : list Indexes of the `nwords` top-ranked words in the text. ranked_words: list List of `nwords` top-ranked words in the text. weights: dict, optional The dictionary of weights (wordposition -> weight) assigned by LIME to the words in the text. explanation: optional The explanation object returned by `LimeTextExplainer`. ''' text = preprocess_text(text) text_words = get_tokens(text) class_names = ['negative', 'positive'] # bow is set to False because word order is important explainer = LimeTextExplainer(class_names=class_names, feature_selection='auto', bow=False, split_expression=' ', verbose=False) explanation = explainer.explain_instance( text_instance=text, labels=[0, 1], classifier_fn=self.predict_texts, num_features=nwords, num_samples=self.nsamples) # sort weights by decreasing absolute value weights = OrderedDict( sorted(explanation.as_map()[1], key=lambda weight: -abs(weight[1]))) word_ranking = np.array(list(weights.keys())) ranked_words = [text_words[i] for i in word_ranking] if return_weights: return word_ranking, ranked_words, weights, explanation return word_ranking, ranked_words
def model_load_and_explain(x_text_input): from lime import lime_text print(max_document_length) if x_text_input == 'default': x_text_instance = '" extreme ops " exceeds expectations . good fun , good action , good acting , good dialogue , good pace , good cinematography .' else: x_text_instance = x_text_input # print(x_text_instance) output = cnn.predict_text_instance([x_text_instance]) #batch -> instnace from lime.lime_text import LimeTextExplainer class_names = ['Negative', 'Positive'] explainer = LimeTextExplainer(class_names=class_names) # print(x_text_instance) # print(type(x_text_instance)) exp = explainer.explain_instance(x_text_instance, cnn.predict_text_instance, num_features=6) exp.as_list() print("") print("output prob (Negative, Positive)") print('Original prediction:', cnn.predict_text_instance([x_text_instance])[0]) print("") x_text_removed = x_text_instance x_text_removed = x_text_removed.replace(exp.as_list()[0][0], '<unk>') x_text_removed = x_text_removed.replace(exp.as_list()[1][0], '<unk>') print("x_text_instance: ", x_text_instance) print("") print("x_text_removed: ", x_text_removed) print(exp.as_list()[0][0]) print(exp.as_list()[1][0]) print("") print('Prediction removing some features:', cnn.predict_text_instance([x_text_removed])[0]) print( 'Difference:', cnn.predict_text_instance([x_text_instance])[0] - cnn.predict_text_instance([x_text_removed])[0]) timestamp = str(int(time.time())) static_dir = os.path.abspath(os.path.join(os.curdir, 'static')) oi_lime_dir = os.path.abspath(os.path.join(static_dir, 'oi_lime')) oi_file_path = os.path.abspath( os.path.join(oi_lime_dir, 'oi_' + timestamp + '.html')) exp.save_to_file(oi_file_path) return 'oi_' + timestamp + '.html'
def explain_prediction(sent,pipe,filename): # vect=transform_inp_sent_to_vect(sent) label_encoding=pickle.load(open(glob.glob(r'{}{}_label_encoding.pkl'.format(DIRECTORY_PATH,filename))[0],'rb')) labels=list(label_encoding.values()) explainer = LimeTextExplainer(class_names=labels) exp = explainer.explain_instance(sent, pipe.predict_proba,labels=labels) return exp.save_to_file(r'{}explanation.html'.format(DIRECTORY_PATH))
def limeTextExplain(data, model, class_names): explainer = LimeTextExplainer(class_names=class_names) exp = explainer.explain_instance(data, model.predict_proba, num_features=6) probArray = model.predict_proba([data]) return dict(exp=exp.as_list(), predictProbabilities=getPredictProbabilities( [probArray[0][0], probArray[0][1]], class_names))
def explain(clf, X_train, y, instance, name, method): clf.fit(X_train, y) explainer = LimeTextExplainer(class_names=[-1, 0, 1]) exp = explainer.explain_instance(instance, method, top_labels=1, num_features=10) exp.show_in_notebook() exp.save_to_file(f"../{name}_explanation.html")
def text_explanation_with_lime(x_train, instance_ind, model, class_name=None): try: instance = x_train.iloc[instance_ind] explainer = LimeTextExplainer(class_names=class_name) exp = explainer.explain_instance(instance, model.predict_proba) return exp.show_in_notebook(text=instance) except Exception as e: print('Model is not supported by LimeTextExplainer') print(e)
def interpret_data(X, y, func, class_names): explainer = LimeTextExplainer(class_names=class_names) times, scores = [], [] for r_idx in range(10): start_time = time.time() exp = explainer.explain_instance(newsgroups_test.data[r_idx], func, num_features=6) times.append(time.time() - start_time) scores.append(exp.score) print('...') return times, scores
def explain_text(self, labels, instance, column_name=None, num_features=10, num_samples=5000): """Explain a text field of a prediction. It analyze the prediction by LIME, and returns a report of which words are most impactful in contributing to certain labels. Args: labels: a list of labels to explain. instance: the prediction instance. It needs to conform to model's input. Can be a csv line string, or a dict. column_name: which text column to explain. Can be None if there is only one text column in the model input. num_features: maximum number of words (features) to analyze. Passed to LIME LimeTextExplainer directly. num_samples: size of the neighborhood to learn the linear model. Passed to LIME LimeTextExplainer directly. Returns: A LIME's lime.explanation.Explanation. Throws: ValueError if the given text column is not found in model input or column_name is None but there are multiple text columns in model input. """ from lime.lime_text import LimeTextExplainer if len(self._text_columns) > 1 and not column_name: raise ValueError('There are multiple text columns in the input of the model. ' + 'Please specify "column_name".') elif column_name and column_name not in self._text_columns: raise ValueError('Specified column_name "%s" not found in the model input.' % column_name) text_column_name = column_name if column_name else self._text_columns[0] if isinstance(instance, six.string_types): instance = next(csv.DictReader([instance], fieldnames=self._headers)) predict_fn = self._make_text_predict_fn(labels, instance, text_column_name) explainer = LimeTextExplainer(class_names=labels) exp = explainer.explain_instance( instance[text_column_name], predict_fn, labels=range(len(labels)), num_features=num_features, num_samples=num_samples) return exp
def test_lime_text_explainer_good_regressor(self): from sklearn.datasets import fetch_20newsgroups newsgroups_train = fetch_20newsgroups(subset='train') newsgroups_test = fetch_20newsgroups(subset='test') # making class names shorter class_names = [x.split('.')[-1] if 'misc' not in x else '.'.join(x.split('.')[-2:]) for x in newsgroups_train.target_names] class_names[3] = 'pc.hardware' class_names[4] = 'mac.hardware' vectorizer = TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) pred = nb.predict(test_vectors) f1_score(newsgroups_test.target, pred, average='weighted') c = make_pipeline(vectorizer, nb) explainer = LimeTextExplainer(class_names=class_names) idx = 1340 exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, labels=[0, 17], model_regressor=LinearRegression())
def test_lime_text_explainer_good_regressor(self): categories = ['alt.atheism', 'soc.religion.christian'] newsgroups_train = fetch_20newsgroups(subset='train', categories=categories) newsgroups_test = fetch_20newsgroups(subset='test', categories=categories) class_names = ['atheism', 'christian'] vectorizer = TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) pred = nb.predict(test_vectors) f1_score(newsgroups_test.target, pred, average='weighted') c = make_pipeline(vectorizer, nb) explainer = LimeTextExplainer(class_names=class_names) idx = 83 exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6) self.assertIsNotNone(exp) self.assertEqual(6, len(exp.as_list()))
import sklearn from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import make_pipeline from sklearn.datasets import fetch_20newsgroups import matplotlib.pyplot as plt from lime.lime_text import LimeTextExplainer categories = ['alt.atheism', 'soc.religion.christian'] newsgroups_train = fetch_20newsgroups(subset='train', categories=categories) newsgroups_test = fetch_20newsgroups(subset='test', categories=categories) class_names = ['atheism', 'christian'] vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) rf = RandomForestClassifier(n_estimators=500) rf.fit(train_vectors, newsgroups_train.target) c = make_pipeline(vectorizer, rf) explainer = LimeTextExplainer(class_names=class_names) idx = 81 exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=10) print('Document id: %d' % idx) print('Probability(christian) =', c.predict_proba([newsgroups_test.data[idx]])[0,1]) print('True class: %s' % class_names[newsgroups_test.target[idx]]) fig = exp.as_pyplot_figure() plt.show()
def TextInterpret(text, predict): lte = LimeTextExplainer() explanation = lte.explain_instance(text, predict) explanation.show_in_notebook() return explanation
# In[7]: print(c.predict_proba([newsgroups_test.data[0]])) # In[63]: from lime.lime_text import LimeTextExplainer explainer = LimeTextExplainer(class_names=class_names) # In[64]: idx = 83 exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=10) print("value to be predicted") print(newsgroups_test.data[idx]) print(newsgroups_test.target[idx]) print(newsgroups_test.target_names) # In[17]: print('Document id: %d' % idx) print('Probability(christian) =', c.predict_proba([newsgroups_test.data[idx]])[0,1]) print('True class: %s' % class_names[newsgroups_test.target[idx]]) # In[18]: