def test_collect_entity_predictions( entity_results, targets, predictions, successes, errors ): actual = collect_successful_entity_predictions(entity_results, targets, predictions) assert len(successes) == len(actual) assert successes == actual actual = collect_incorrect_entity_predictions(entity_results, targets, predictions) assert len(errors) == len(actual) assert errors == actual
def test_collect_entity_predictions( entity_results: List[EntityEvaluationResult], targets: List[Text], predictions: List[Text], successes: List[Dict[Text, Any]], errors: List[Dict[Text, Any]], ): actual = collect_successful_entity_predictions(entity_results, targets, predictions) assert len(successes) == len(actual) assert successes == actual actual = collect_incorrect_entity_predictions(entity_results, targets, predictions) assert len(errors) == len(actual) assert errors == actual
def __evaluate_entities(entity_results, extractors: Set[Text]) -> Dict: """ Creates summary statistics for each entity extractor. Logs precision, recall, and F1 per entity type for each extractor. Args: entity_results: entity evaluation results extractors: entity extractors to consider Returns: dictionary with evaluation results """ from rasa.model_testing import get_evaluation_metrics from rasa.nlu.test import ( NO_ENTITY, align_all_entity_predictions, merge_labels, substitute_labels, collect_successful_entity_predictions, collect_incorrect_entity_predictions ) aligned_predictions = align_all_entity_predictions(entity_results, extractors) merged_targets = merge_labels(aligned_predictions) from rasa.shared.nlu.constants import NO_ENTITY_TAG merged_targets = substitute_labels(merged_targets, NO_ENTITY_TAG, NO_ENTITY) result = {} for extractor in extractors: merged_predictions = merge_labels(aligned_predictions, extractor) merged_predictions = substitute_labels( merged_predictions, NO_ENTITY_TAG, NO_ENTITY ) report, precision, f1, accuracy = get_evaluation_metrics( merged_targets, merged_predictions, output_dict=False, exclude_label=NO_ENTITY, ) successes = collect_successful_entity_predictions( entity_results, merged_predictions, merged_targets ) errors = collect_incorrect_entity_predictions( entity_results, merged_predictions, merged_targets ) result[extractor] = { "total_count": len(successes) + len(errors), "success_count": len(successes), "failure_count": len(errors), "precision": precision, "f1_score": f1, "accuracy": accuracy, # 'successes': successes, 'errors': errors } return result