def test_empty_intent_removal(): targets = ["", "greet"] predicted = ["restaurant_search", "greet"] targets_r, predicted_r = remove_empty_intent_examples(targets, predicted) assert targets_r == ["greet"] assert predicted_r == ["greet"]
def test_empty_intent_removal(): intent_results = [ IntentEvaluationResult("", "restaurant_search", "I am hungry", 0.12345), IntentEvaluationResult("greet", "greet", "hello", 0.98765) ] intent_results = remove_empty_intent_examples(intent_results) assert len(intent_results) == 1 assert intent_results[0].target == "greet" assert intent_results[0].prediction == "greet" assert intent_results[0].confidence == 0.98765 assert intent_results[0].message == "hello"
def evaluate_intents(intent_results, report_folder, successes_filename, errors_filename, confmat_filename, intent_hist_filename): # pragma: no cover """Creates a confusion matrix and summary statistics for intent predictions. Log samples which could not be classified correctly and save them to file. Creates a confidence histogram which is saved to file. Wrong and correct prediction confidences will be plotted in separate bars of the same histogram plot. Only considers those examples with a set intent. Others are filtered out. Returns a dictionary of containing the evaluation result.""" # remove empty intent targets num_examples = len(intent_results) intent_results = remove_empty_intent_examples(intent_results) logger.info("Intent Evaluation: Only considering those " "{} examples that have a defined intent out " "of {} examples".format(len(intent_results), num_examples)) targets, predictions = _targets_predictions_from(intent_results) if report_folder: report, precision, f1, accuracy = get_evaluation_metrics( targets, predictions, output_dict=True) report_filename = os.path.join(report_folder, 'intent_report.json') save_json(report, report_filename) logger.info("Classification report saved to {}." .format(report_filename)) else: report, precision, f1, accuracy = get_evaluation_metrics(targets, predictions) log_evaluation_table(report, precision, f1, accuracy) if successes_filename: # save classified samples to file for debugging collect_nlu_successes(intent_results, successes_filename) if errors_filename: # log and save misclassified samples to file for debugging collect_nlu_errors(intent_results, errors_filename) if confmat_filename: from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels import matplotlib.pyplot as plt cnf_matrix = confusion_matrix(targets, predictions) labels = unique_labels(targets, predictions) plot_confusion_matrix(cnf_matrix, classes=labels, title='Intent Confusion matrix', out=confmat_filename) plt.show() plot_intent_confidences(intent_results, intent_hist_filename) plt.show() predictions = [ { "text": res.message, "intent": res.target, "entities": res.target_entities, "predicted_entities": res.entities_prediction, "predicted": res.prediction, "confidence": res.confidence } for res in intent_results ] return { "predictions": predictions, "report": report, "precision": precision, "f1_score": f1, "accuracy": accuracy }