def evaluate_entities(entity_results, extractors): # pragma: no cover """Creates summary statistics for each entity extractor. Logs precision, recall, and F1 per entity type for each extractor.""" aligned_predictions = align_all_entity_predictions(entity_results, extractors) merged_targets = merge_labels(aligned_predictions) merged_targets = substitute_labels(merged_targets, "O", "no_entity") result = {} for extractor in extractors: merged_predictions = merge_labels(aligned_predictions, extractor) merged_predictions = substitute_labels(merged_predictions, "O", "no_entity") report, precision, f1, accuracy = get_evaluation_metrics( merged_targets, merged_predictions, output_dict=True, exclude_label="no_entity", ) log = collect_incorrect_entity_predictions( entity_results, merged_predictions, merged_targets) + collect_successful_entity_predictions( entity_results, merged_predictions, merged_targets) result = { "report": report, "precision": precision, "f1_score": f1, "accuracy": accuracy, "log": log, } return result
def __evaluate_entities(entity_results, extractors: Set[Text]) -> Dict: """ Creates summary statistics for each entity extractor. Logs precision, recall, and F1 per entity type for each extractor. Args: entity_results: entity evaluation results extractors: entity extractors to consider Returns: dictionary with evaluation results """ from rasa.model_testing import get_evaluation_metrics from rasa.nlu.test import ( NO_ENTITY, align_all_entity_predictions, merge_labels, substitute_labels, collect_successful_entity_predictions, collect_incorrect_entity_predictions ) aligned_predictions = align_all_entity_predictions(entity_results, extractors) merged_targets = merge_labels(aligned_predictions) from rasa.shared.nlu.constants import NO_ENTITY_TAG merged_targets = substitute_labels(merged_targets, NO_ENTITY_TAG, NO_ENTITY) result = {} for extractor in extractors: merged_predictions = merge_labels(aligned_predictions, extractor) merged_predictions = substitute_labels( merged_predictions, NO_ENTITY_TAG, NO_ENTITY ) report, precision, f1, accuracy = get_evaluation_metrics( merged_targets, merged_predictions, output_dict=False, exclude_label=NO_ENTITY, ) successes = collect_successful_entity_predictions( entity_results, merged_predictions, merged_targets ) errors = collect_incorrect_entity_predictions( entity_results, merged_predictions, merged_targets ) result[extractor] = { "total_count": len(successes) + len(errors), "success_count": len(successes), "failure_count": len(errors), "precision": precision, "f1_score": f1, "accuracy": accuracy, # 'successes': successes, 'errors': errors } return result