예제 #1
0
파일: routes.py 프로젝트: septa97/SP-server
def explain_model_weights():
	"""
	Explain the weights/parameters of a certain model
	"""
	data = request.get_json(force=True)

	# Use the original documents, not the corrected ones
	target_names = ['negative', 'neutral', 'positive', 'very_negative', 'very_positive']
	clf, vocabulary = load_clf_and_vocabulary(data['classifier'], data['vocabModel'], data['tfIdf'], data['corrected'])
	vect = CountVectorizer(vocabulary=vocabulary)
	vect._validate_vocabulary()

	if data['classifier'] == 'LR':
		explanation = explain_weights.explain_linear_classifier_weights(clf, vec=vect, target_names=target_names)
		div = html.format_as_html(explanation, include_styles=False)
		style = html.format_html_styles()

	return jsonify({
			'div': div,
			'style': style
		})
예제 #2
0
    def get_lime_analyze_as_html(self,
                                 query,
                                 indicies,
                                 max_len=50,
                                 max_replace=0.7,
                                 top_targets=None):
        query = query.replace(' ', '')

        for i in indicies:
            if i >= len(query):
                raise ValueError()

        def build_query_info_html(query, index):
            _html_format = ''
            _html_format += '<p>' + 'query: ' + query + '</p><br>'
            _html_format += '<p>' + 'index: ' + str(index) + '</p><br>'
            _html_format += '<p>' + 'char: ' + query[index] + '</p><br>'

            return _html_format

        explainers = self._lime_analyze(query,
                                        indicies,
                                        max_len,
                                        max_replace,
                                        top_targets=top_targets)
        html_format = ''
        html_format += '<div>'

        for i, e in zip(indicies, explainers):
            html_format += '<table class="table"><tr>'
            html_format += '<td><div>' + build_query_info_html(
                query, i) + '</div></td>'
            html_format += '<td><div>' + format_as_html(e).replace(
                '\n', '') + '</div></td>'
            html_format += '</tr></table>'
            html_format += '<hr>'

        html_format += '</div>'

        return html_format
예제 #3
0
파일: base.py 프로젝트: phillipparr/NER
 def _repr_html_(self):
     """ HTML formatting for the notebook.
     """
     from eli5.formatters import fields
     from eli5.formatters.html import format_as_html
     return format_as_html(self, force_weights=False, show=fields.WEIGHTS)
 def get_prediction_message(self, text):
     class_prediction, eli5_prediction = self.predict(text)
     prediction_message = self.classes_dict[class_prediction]
     return prediction_message, format_as_html(eli5_prediction)
예제 #5
0
파일: routes.py 프로젝트: septa97/SP-server
def explain_review_prediction():
	"""
	Explain a specific prediction using the eli5 library
	"""
	data = request.get_json(force=True)

	# Use the original documents, not the corrected ones
	target_names = ['negative', 'neutral', 'positive', 'very_negative', 'very_positive']
	clf, vocabulary = load_clf_and_vocabulary(data['classifier'], data['vocabModel'], data['tfIdf'], False)
	vect = CountVectorizer(vocabulary=vocabulary)
	vect._validate_vocabulary()

	# reviews = load_files(dir_path + '/../../data/reviews/not_corrected')
	# text_train, text_test, y_train, y_test = train_test_split(reviews.data, reviews.target, test_size=0.2, random_state=0)

	# if data['tfIdf']:
	# 	if data['vocabModel'] == 'unigram':
	# 		vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(1, 1)).fit(text_train)
	# 	elif data['vocabModel'] == 'bigram':
	# 		vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(2, 2)).fit(text_train)
	# 	elif data['vocabModel'] == 'trigram':
	# 		vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(3, 3)).fit(text_train)
	# else:
	# 	if data['vocabModel'] == 'unigram':
	# 		vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(1, 1)).fit(text_train)
	# 	elif data['vocabModel'] == 'bigram':
	# 		vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(2, 2)).fit(text_train)
	# 	elif data['vocabModel'] == 'trigram':
	# 		vect = CountVectorizer(min_df=5, stop_words=stopwords, ngram_range=(3, 3)).fit(text_train)

	if data['classifier'] == 'LR':
		explanation = explain_prediction.explain_prediction_linear_classifier(clf, data['review'], vec=vect, top=10, target_names=target_names)
		div = html.format_as_html(explanation, include_styles=False)
		style = html.format_html_styles()

		txt = text.format_as_text(explanation, show=eli5.formatters.fields.ALL, highlight_spaces=True, show_feature_values=True)
		print(txt)

		return jsonify({
			'div': div,
			'style': style
		})

	elif data['classifier'] == 'SVM' or data['classifier'] == 'MLP':
		te = TextExplainer(n_samples=100, clf=LogisticRegression(solver='newton-cg'), vec=vect, random_state=0)
		te.fit(data['review'], clf.predict_proba)
		explanation = te.explain_prediction(top=10, target_names=target_names)
		div = html.format_as_html(explanation, include_styles=False)
		style = html.format_html_styles()

		distorted_texts = []

		for sample in te.samples_:
			sample_explanation = explain_prediction.explain_prediction_linear_classifier(te.clf_, sample, vec=te.vec_, top=10, target_names=target_names)
			dict_explanation = as_dict.format_as_dict(sample_explanation)

			curr = {
				'text': sample
			}

			for c in dict_explanation['targets']:
				if c['target'] == 'negative':
					curr['negative'] = c['proba']
				elif c['target'] == 'neutral':
					curr['neutral'] = c['proba']
				elif c['target'] == 'positive':
					curr['positive'] = c['proba']
				elif c['target'] == 'very_negative':
					curr['very_negative'] = c['proba']
				elif c['target'] == 'very_positive':
					curr['very_positive'] = c['proba']

			distorted_texts.append(curr)

		review_explanation = as_dict.format_as_dict(explanation)
		probabilities = {}

		for c in review_explanation['targets']:
			if c['target'] == 'negative':
				probabilities['negative'] = c['proba']
			elif c['target'] == 'neutral':
				probabilities['neutral'] = c['proba']
			elif c['target'] == 'positive':
				probabilities['positive'] = c['proba']
			elif c['target'] == 'very_negative':
				probabilities['very_negative'] = c['proba']
			elif c['target'] == 'very_positive':
				probabilities['very_positive'] = c['proba']

		return jsonify({
			'div': div,
			'style': style,
			'original_text': data['review'],
			'probabilities': probabilities,
			'distorted_texts': distorted_texts,
			'metrics': te.metrics_
		})