def explain_model_weights(): """ Explain the weights/parameters of a certain model """ data = request.get_json(force=True) # Use the original documents, not the corrected ones target_names = ['negative', 'neutral', 'positive', 'very_negative', 'very_positive'] clf, vocabulary = load_clf_and_vocabulary(data['classifier'], data['vocabModel'], data['tfIdf'], data['corrected']) vect = CountVectorizer(vocabulary=vocabulary) vect._validate_vocabulary() if data['classifier'] == 'LR': explanation = explain_weights.explain_linear_classifier_weights(clf, vec=vect, target_names=target_names) div = html.format_as_html(explanation, include_styles=False) style = html.format_html_styles() return jsonify({ 'div': div, 'style': style })
def get_lime_analyze_as_html(self, query, indicies, max_len=50, max_replace=0.7, top_targets=None): query = query.replace(' ', '') for i in indicies: if i >= len(query): raise ValueError() def build_query_info_html(query, index): _html_format = '' _html_format += '<p>' + 'query: ' + query + '</p><br>' _html_format += '<p>' + 'index: ' + str(index) + '</p><br>' _html_format += '<p>' + 'char: ' + query[index] + '</p><br>' return _html_format explainers = self._lime_analyze(query, indicies, max_len, max_replace, top_targets=top_targets) html_format = '' html_format += '<div>' for i, e in zip(indicies, explainers): html_format += '<table class="table"><tr>' html_format += '<td><div>' + build_query_info_html( query, i) + '</div></td>' html_format += '<td><div>' + format_as_html(e).replace( '\n', '') + '</div></td>' html_format += '</tr></table>' html_format += '<hr>' html_format += '</div>' return html_format
def _repr_html_(self): """ HTML formatting for the notebook. """ from eli5.formatters import fields from eli5.formatters.html import format_as_html return format_as_html(self, force_weights=False, show=fields.WEIGHTS)
def get_prediction_message(self, text): class_prediction, eli5_prediction = self.predict(text) prediction_message = self.classes_dict[class_prediction] return prediction_message, format_as_html(eli5_prediction)
def explain_review_prediction(): """ Explain a specific prediction using the eli5 library """ data = request.get_json(force=True) # Use the original documents, not the corrected ones target_names = ['negative', 'neutral', 'positive', 'very_negative', 'very_positive'] clf, vocabulary = load_clf_and_vocabulary(data['classifier'], data['vocabModel'], data['tfIdf'], False) vect = CountVectorizer(vocabulary=vocabulary) vect._validate_vocabulary() # reviews = load_files(dir_path + '/../../data/reviews/not_corrected') # text_train, text_test, y_train, y_test = train_test_split(reviews.data, reviews.target, test_size=0.2, random_state=0) # if data['tfIdf']: # if data['vocabModel'] == 'unigram': # vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(1, 1)).fit(text_train) # elif data['vocabModel'] == 'bigram': # vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(2, 2)).fit(text_train) # elif data['vocabModel'] == 'trigram': # vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(3, 3)).fit(text_train) # else: # if data['vocabModel'] == 'unigram': # vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(1, 1)).fit(text_train) # elif data['vocabModel'] == 'bigram': # vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(2, 2)).fit(text_train) # elif data['vocabModel'] == 'trigram': # vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(3, 3)).fit(text_train) if data['classifier'] == 'LR': explanation = explain_prediction.explain_prediction_linear_classifier(clf, data['review'], vec=vect, top=10, target_names=target_names) div = html.format_as_html(explanation, include_styles=False) style = html.format_html_styles() txt = text.format_as_text(explanation, show=eli5.formatters.fields.ALL, highlight_spaces=True, show_feature_values=True) print(txt) return jsonify({ 'div': div, 'style': style }) elif data['classifier'] == 'SVM' or data['classifier'] == 'MLP': te = TextExplainer(n_samples=100, clf=LogisticRegression(solver='newton-cg'), vec=vect, random_state=0) te.fit(data['review'], clf.predict_proba) explanation = te.explain_prediction(top=10, target_names=target_names) div = html.format_as_html(explanation, include_styles=False) style = html.format_html_styles() distorted_texts = [] for sample in te.samples_: sample_explanation = explain_prediction.explain_prediction_linear_classifier(te.clf_, sample, vec=te.vec_, top=10, target_names=target_names) dict_explanation = as_dict.format_as_dict(sample_explanation) curr = { 'text': sample } for c in dict_explanation['targets']: if c['target'] == 'negative': curr['negative'] = c['proba'] elif c['target'] == 'neutral': curr['neutral'] = c['proba'] elif c['target'] == 'positive': curr['positive'] = c['proba'] elif c['target'] == 'very_negative': curr['very_negative'] = c['proba'] elif c['target'] == 'very_positive': curr['very_positive'] = c['proba'] distorted_texts.append(curr) review_explanation = as_dict.format_as_dict(explanation) probabilities = {} for c in review_explanation['targets']: if c['target'] == 'negative': probabilities['negative'] = c['proba'] elif c['target'] == 'neutral': probabilities['neutral'] = c['proba'] elif c['target'] == 'positive': probabilities['positive'] = c['proba'] elif c['target'] == 'very_negative': probabilities['very_negative'] = c['proba'] elif c['target'] == 'very_positive': probabilities['very_positive'] = c['proba'] return jsonify({ 'div': div, 'style': style, 'original_text': data['review'], 'probabilities': probabilities, 'distorted_texts': distorted_texts, 'metrics': te.metrics_ })