def plot_story_evaluation(test_y, predictions, report, precision, f1, accuracy, in_training_data_fraction, out_directory): """Plot the results of story evaluation""" from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt log_evaluation_table(test_y, "ACTION", report, precision, f1, accuracy, in_training_data_fraction, include_report=True) cnf_matrix = confusion_matrix(test_y, predictions) plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, predictions), title='Action Confusion matrix') fig = plt.gcf() fig.set_size_inches(int(20), int(20)) fig.savefig(os.path.join(out_directory, "story_confmat.pdf"), bbox_inches='tight')
def run_story_evaluation(resource_name, policy_model_path, nlu_model_path, out_file, max_stories, message_preprocessor=None, interpreter_class=None): """Run the evaluation of the stories, plots the results.""" from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels test_y, preds = collect_story_predictions( resource_name, policy_model_path, nlu_model_path, max_stories, message_preprocessor=message_preprocessor, interpreter_class=interpreter_class) log_evaluation_table(test_y, preds) cnf_matrix = confusion_matrix(test_y, preds) plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds), title='Action Confusion matrix') compute_and_print_action_error_rate(test_y, preds) fig = plt.gcf() fig.set_size_inches(int(20), int(20)) fig.savefig(out_file, bbox_inches='tight')
def plot_story_evaluation(test_y, preds, out_file): """Plot the results. of story evaluation""" from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt log_evaluation_table(test_y, preds) cnf_matrix = confusion_matrix(test_y, preds) plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds), title='Action Confusion matrix') fig = plt.gcf() fig.set_size_inches(int(20), int(20)) fig.savefig(out_file, bbox_inches='tight')
def run_story_evaluation(story_file, policy_model_path, nlu_model_path, out_file, max_stories): """Run the evaluation of the stories, plots the results.""" from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels test_y, preds = collect_story_predictions(story_file, policy_model_path, nlu_model_path, max_stories) log_evaluation_table(test_y, preds) cnf_matrix = confusion_matrix(test_y, preds) plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds), title='Action Confusion matrix') fig = plt.gcf() fig.set_size_inches(int(20), int(20)) fig.savefig(out_file, bbox_inches='tight')
def evaluate_intents(intent_results, report_folder, successes_filename, errors_filename, confmat_filename, intent_hist_filename): # pragma: no cover """Creates a confusion matrix and summary statistics for intent predictions. Log samples which could not be classified correctly and save them to file. Creates a confidence histogram which is saved to file. Wrong and correct prediction confidences will be plotted in separate bars of the same histogram plot. Only considers those examples with a set intent. Others are filtered out. Returns a dictionary of containing the evaluation result.""" # remove empty intent targets num_examples = len(intent_results) intent_results = remove_empty_intent_examples(intent_results) logger.info("Intent Evaluation: Only considering those " "{} examples that have a defined intent out " "of {} examples".format(len(intent_results), num_examples)) targets, predictions = _targets_predictions_from(intent_results) if report_folder: report, precision, f1, accuracy = get_evaluation_metrics( targets, predictions, output_dict=True) report_filename = os.path.join(report_folder, 'intent_report.json') save_json(report, report_filename) logger.info("Classification report saved to {}." .format(report_filename)) else: report, precision, f1, accuracy = get_evaluation_metrics(targets, predictions) log_evaluation_table(report, precision, f1, accuracy) if successes_filename: # save classified samples to file for debugging collect_nlu_successes(intent_results, successes_filename) if errors_filename: # log and save misclassified samples to file for debugging collect_nlu_errors(intent_results, errors_filename) if confmat_filename: from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt cnf_matrix = confusion_matrix(targets, predictions) labels = unique_labels(targets, predictions) plot_confusion_matrix(cnf_matrix, classes=labels, title='Intent Confusion matrix', out=confmat_filename) plt.show() plot_intent_confidences(intent_results, intent_hist_filename) plt.show() predictions = [ { "text": res.message, "intent": res.target, "entities": res.target_entities, "predicted_entities": res.entities_prediction, "predicted": res.prediction, "confidence": res.confidence } for res in intent_results ] return { "predictions": predictions, "report": report, "precision": precision, "f1_score": f1, "accuracy": accuracy }