def evaluation_get_individual_report(self): from sklearn import metrics intent_report = metrics.classification_report(self.target_intents, self.predicted_intents, output_dict=True) from rasa.nlu.test import align_all_entity_predictions, merge_labels, substitute_labels, get_entity_extractors extractors = get_entity_extractors(self.interpreter) aligned_predictions = align_all_entity_predictions( self.entity_results, extractors) merged_targets = merge_labels(aligned_predictions) merged_targets = substitute_labels(merged_targets, "O", "no_entity") entity_report = {} for extractor in extractors: merged_predictions = merge_labels(aligned_predictions, extractor) merged_predictions = substitute_labels(merged_predictions, "O", "no_entity") entity_report = metrics.classification_report(merged_targets, merged_predictions, output_dict=True) return [intent_report, entity_report]
def test_entity_evaluation_report(tmpdir_factory): class EntityExtractorA(EntityExtractor): provides = ["entities"] def __init__(self, component_config=None) -> None: super().__init__(component_config) class EntityExtractorB(EntityExtractor): provides = ["entities"] def __init__(self, component_config=None) -> None: super().__init__(component_config) path = tmpdir_factory.mktemp("evaluation").strpath report_folder = os.path.join(path, "reports") report_filename_a = os.path.join(report_folder, "EntityExtractorA_report.json") report_filename_b = os.path.join(report_folder, "EntityExtractorB_report.json") rasa.utils.io.create_directory(report_folder) mock_interpreter = Interpreter( [ EntityExtractorA({"provides": ["entities"]}), EntityExtractorB({"provides": ["entities"]}), ], None, ) extractors = get_entity_extractors(mock_interpreter) result = evaluate_entities( [EN_entity_result], extractors, report_folder, errors=True, successes=True, disable_plotting=False, ) report_a = json.loads(rasa.utils.io.read_file(report_filename_a)) report_b = json.loads(rasa.utils.io.read_file(report_filename_b)) assert len(report_a) == 6 assert report_a["datetime"]["support"] == 1.0 assert report_b["macro avg"]["recall"] == 0.0 assert report_a["macro avg"]["recall"] == 0.5 assert result["EntityExtractorA"]["accuracy"] == 0.75 assert os.path.exists( os.path.join(report_folder, "EntityExtractorA_confusion_matrix.png")) assert os.path.exists( os.path.join(report_folder, "EntityExtractorA_errors.json")) assert os.path.exists( os.path.join(report_folder, "EntityExtractorA_successes.json")) assert not os.path.exists( os.path.join(report_folder, "EntityExtractorA_histogram.png"))
def test_get_entity_extractors( components: List[Component], expected_extractors: Set[Text] ): mock_interpreter = Interpreter(components, None) extractors = get_entity_extractors(mock_interpreter) assert extractors == expected_extractors
def test_entity_evaluation_report(tmpdir_factory): class EntityExtractorA(EntityExtractor): provides = ["entities"] def __init__(self, component_config=None) -> None: super(EntityExtractorA, self).__init__(component_config) class EntityExtractorB(EntityExtractor): provides = ["entities"] def __init__(self, component_config=None) -> None: super(EntityExtractorB, self).__init__(component_config) path = tmpdir_factory.mktemp("evaluation").strpath report_folder = os.path.join(path, "reports") report_filename_a = os.path.join(report_folder, "EntityExtractorA_report.json") report_filename_b = os.path.join(report_folder, "EntityExtractorB_report.json") utils.create_dir(report_folder) mock_interpreter = Interpreter( [ EntityExtractorA({"provides": ["entities"]}), EntityExtractorB({"provides": ["entities"]}), ], None, ) extractors = get_entity_extractors(mock_interpreter) result = evaluate_entities([EN_entity_result], extractors, report_folder) report_a = json.loads(rasa.utils.io.read_file(report_filename_a)) report_b = json.loads(rasa.utils.io.read_file(report_filename_b)) assert len(report_a) == 8 assert report_a["datetime"]["support"] == 1.0 assert report_b["macro avg"]["recall"] == 0.2 assert result["EntityExtractorA"]["accuracy"] == 0.75
def test_get_entity_extractors(pretrained_interpreter): assert get_entity_extractors(pretrained_interpreter) == { "SpacyEntityExtractor", "DucklingHTTPExtractor", }
def evaluate_update(repository_version, repository_authorization): evaluations = backend().request_backend_start_evaluation( repository_version, repository_authorization) training_examples = [] for evaluate in evaluations: training_examples.append( Message.build( text=evaluate.get("text"), intent=evaluate.get("intent"), entities=evaluate.get("entities"), )) test_data = TrainingData(training_examples=training_examples) interpreter = update_interpreters.get(repository_version, repository_authorization, rasa_version, use_cache=False) result = { "intent_evaluation": None, "entity_evaluation": None, "response_selection_evaluation": None, } intent_results, response_selection_results, entity_results, = get_eval_data( interpreter, test_data) if intent_results: result["intent_evaluation"] = evaluate_intents(intent_results) if entity_results: extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = evaluate_entities( entity_results, extractors) intent_evaluation = result.get("intent_evaluation") entity_evaluation = result.get("entity_evaluation") merged_logs = merge_intent_entity_log(intent_evaluation, entity_evaluation) log = get_formatted_log(merged_logs) charts = plot_and_save_charts(repository_version, intent_results) evaluate_result = backend().request_backend_create_evaluate_results( { "repository_version": repository_version, "matrix_chart": charts.get("matrix_chart"), "confidence_chart": charts.get("confidence_chart"), "log": json.dumps(log), "intentprecision": intent_evaluation.get("precision"), "intentf1_score": intent_evaluation.get("f1_score"), "intentaccuracy": intent_evaluation.get("accuracy"), "entityprecision": entity_evaluation.get("precision"), "entityf1_score": entity_evaluation.get("f1_score"), "entityaccuracy": entity_evaluation.get("accuracy"), }, repository_authorization, ) intent_reports = intent_evaluation.get("report", {}) entity_reports = entity_evaluation.get("report", {}) for intent_key in intent_reports.keys(): if intent_key and intent_key not in excluded_itens: intent = intent_reports.get(intent_key) backend().request_backend_create_evaluate_results_intent( { "evaluate_id": evaluate_result.get("evaluate_id"), "precision": intent.get("precision"), "recall": intent.get("recall"), "f1_score": intent.get("f1-score"), "support": intent.get("support"), "intent_key": intent_key, }, repository_authorization, ) for entity_key in entity_reports.keys(): if entity_key and entity_key not in excluded_itens: # pragma: no cover entity = entity_reports.get(entity_key) backend().request_backend_create_evaluate_results_score( { "evaluate_id": evaluate_result.get("evaluate_id"), "repository_version": repository_version, "precision": entity.get("precision"), "recall": entity.get("recall"), "f1_score": entity.get("f1-score"), "support": entity.get("support"), "entity_key": entity_key, }, repository_authorization, ) return { "id": evaluate_result.get("evaluate_id"), "version": evaluate_result.get("evaluate_version"), "cross_validation": False }
def test_get_entity_extractors(components, expected_extractors): mock_interpreter = Interpreter(components, None) extractors = get_entity_extractors(mock_interpreter) assert extractors == expected_extractors
def test_get_entity_extractors(duckling_interpreter): assert get_entity_extractors(duckling_interpreter) == { "DucklingHTTPExtractor" }
def evaluate_crossval_update(repository_version, by, repository_authorization, from_queue='celery'): update_request = backend().request_backend_start_training_nlu( repository_version, by, repository_authorization, from_queue) examples_list = get_examples_request(repository_version, repository_authorization) with PokeLogging() as pl: try: examples = [] for example in examples_list: examples.append( Message.build( text=example.get("text"), intent=example.get("intent"), entities=example.get("entities"), )) data = TrainingData(training_examples=examples) rasa_nlu_config = get_rasa_nlu_config(update_request) trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False)) result = { "intent_evaluation": None, "entity_evaluation": None, "response_selection_evaluation": None, } intent_train_metrics: IntentMetrics = defaultdict(list) intent_test_metrics: IntentMetrics = defaultdict(list) entity_train_metrics: EntityMetrics = defaultdict( lambda: defaultdict(list)) entity_test_metrics: EntityMetrics = defaultdict( lambda: defaultdict(list)) response_selection_train_metrics: ResponseSelectionMetrics = defaultdict( list) response_selection_test_metrics: ResponseSelectionMetrics = defaultdict( list) intent_results: List[IntentEvaluationResult] = [] entity_results: List[EntityEvaluationResult] = [] response_selection_test_results: List[ ResponseSelectionEvaluationResult] = ([]) entity_evaluation_possible = False extractors: Set[Text] = set() for train, test in generate_folds(3, data): interpreter = trainer.train(train) # calculate train accuracy combine_result( intent_train_metrics, entity_train_metrics, response_selection_train_metrics, interpreter, train, ) # calculate test accuracy combine_result( intent_test_metrics, entity_test_metrics, response_selection_test_metrics, interpreter, test, intent_results, entity_results, response_selection_test_results, ) if not extractors: extractors = get_entity_extractors(interpreter) entity_evaluation_possible = ( entity_evaluation_possible or _contains_entity_labels(entity_results)) if intent_results: result["intent_evaluation"] = evaluate_intents(intent_results) if entity_results: extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = evaluate_entities( entity_results, extractors) intent_evaluation = result.get("intent_evaluation") entity_evaluation = result.get("entity_evaluation") merged_logs = merge_intent_entity_log(intent_evaluation, entity_evaluation) log = get_formatted_log(merged_logs) charts = plot_and_save_charts(repository_version, intent_results) evaluate_result = backend( ).request_backend_create_evaluate_results( { "repository_version": repository_version, "matrix_chart": charts.get("matrix_chart"), "confidence_chart": charts.get("confidence_chart"), "log": json.dumps(log), "intentprecision": intent_evaluation.get("precision"), "intentf1_score": intent_evaluation.get("f1_score"), "intentaccuracy": intent_evaluation.get("accuracy"), "entityprecision": entity_evaluation.get("precision"), "entityf1_score": entity_evaluation.get("f1_score"), "entityaccuracy": entity_evaluation.get("accuracy"), }, repository_authorization, ) intent_reports = intent_evaluation.get("report", {}) entity_reports = entity_evaluation.get("report", {}) for intent_key in intent_reports.keys(): if intent_key and intent_key not in excluded_itens: intent = intent_reports.get(intent_key) backend().request_backend_create_evaluate_results_intent( { "evaluate_id": evaluate_result.get("evaluate_id"), "precision": intent.get("precision"), "recall": intent.get("recall"), "f1_score": intent.get("f1-score"), "support": intent.get("support"), "intent_key": intent_key, }, repository_authorization, ) for entity_key in entity_reports.keys(): if entity_key and entity_key not in excluded_itens: # pragma: no cover entity = entity_reports.get(entity_key) backend().request_backend_create_evaluate_results_score( { "evaluate_id": evaluate_result.get("evaluate_id"), "repository_version": repository_version, "precision": entity.get("precision"), "recall": entity.get("recall"), "f1_score": entity.get("f1-score"), "support": entity.get("support"), "entity_key": entity_key, }, repository_authorization, ) return { "id": evaluate_result.get("evaluate_id"), "version": evaluate_result.get("evaluate_version"), "cross_validation": True } except Exception as e: logger.exception(e) backend().request_backend_trainfail_nlu(repository_version, repository_authorization) raise e finally: backend().request_backend_traininglog_nlu( repository_version, pl.getvalue(), repository_authorization)
def run_test_on_nlu(nlu_path: str, model_path: str): """ Run tests on stories. Args: nlu_path: path where nlu test data is present as YAML. model_path: Model path where model on which test has to be run is present. Returns: dictionary with evaluation results """ from rasa.model import get_model import rasa.shared.nlu.training_data.loading from rasa.nlu.model import Interpreter from rasa.nlu.test import ( remove_pretrained_extractors, get_eval_data, evaluate_intents, evaluate_response_selections, get_entity_extractors, ) from kairon import Utility unpacked_model = get_model(model_path) nlu_model = os.path.join(unpacked_model, "nlu") interpreter = Interpreter.load(nlu_model) interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline) test_data = rasa.shared.nlu.training_data.loading.load_data( nlu_path, interpreter.model_metadata.language ) result: Dict[Text, Optional[Dict]] = { "intent_evaluation": None, "entity_evaluation": None, "response_selection_evaluation": None, } (intent_results, response_selection_results, entity_results) = get_eval_data( interpreter, test_data ) if intent_results: successes = [] errors = [] result["intent_evaluation"] = evaluate_intents(intent_results, None, False, False, True) if result["intent_evaluation"].get('predictions'): del result["intent_evaluation"]['predictions'] del result["intent_evaluation"]['report'] for r in intent_results: if r.intent_target == r.intent_prediction: pass # successes.append({ # "text": r.message, # "intent": r.intent_target, # "intent_prediction": { # 'name': r.intent_prediction, # "confidence": r.confidence, # }, # }) else: errors.append({ "text": r.message, "intent": r.intent_target, "intent_prediction": { 'name': r.intent_prediction, "confidence": r.confidence, }, }) result["intent_evaluation"]['total_count'] = len(successes) + len(errors) result["intent_evaluation"]['success_count'] = len(successes) result["intent_evaluation"]['failure_count'] = len(errors) result["intent_evaluation"]['successes'] = successes result["intent_evaluation"]['errors'] = errors if response_selection_results: successes = [] errors = [] result["response_selection_evaluation"] = evaluate_response_selections( response_selection_results, None, False, False, True ) if result["response_selection_evaluation"].get('predictions'): del result["response_selection_evaluation"]['predictions'] del result["response_selection_evaluation"]['report'] for r in response_selection_results: if r.intent_response_key_prediction == r.intent_response_key_target: pass # successes.append({ # "text": r.message, # "intent_response_key_target": r.intent_response_key_target, # "intent_response_key_prediction": { # "name": r.intent_response_key_prediction, # "confidence": r.confidence, # }, # }) else: if not Utility.check_empty_string(r.intent_response_key_target): errors.append( { "text": r.message, "intent_response_key_target": r.intent_response_key_target, "intent_response_key_prediction": { "name": r.intent_response_key_prediction, "confidence": r.confidence, }, } ) result["response_selection_evaluation"]['total_count'] = len(successes) + len(errors) result["response_selection_evaluation"]['success_count'] = len(successes) result["response_selection_evaluation"]['failure_count'] = len(errors) result["response_selection_evaluation"]['successes'] = successes result["response_selection_evaluation"]['errors'] = errors if any(entity_results): extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = ModelTester.__evaluate_entities(entity_results, extractors) return result
def get_extractor(self): from rasa.nlu.test import get_entity_extractors return list(get_entity_extractors(self.interpreter))
def evaluate_crossval_update(repository_version_language, repository_authorization, aws_bucket_authentication, language): update_request = backend().request_backend_get_current_configuration( repository_authorization) examples_list = get_examples_request(repository_version_language, repository_authorization) with PokeLogging() as pl: try: examples = [] for example in examples_list: examples.append( Message.build( text=example.get("text"), intent=example.get("intent"), entities=example.get("entities"), )) data = TrainingData(training_examples=examples) pipeline_builder = PipelineBuilder(update_request) pipeline_builder.print_pipeline() rasa_nlu_config = pipeline_builder.get_nlu_model() trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False)) result = { "intent_evaluation": None, "entity_evaluation": None, "response_selection_evaluation": None, } intent_test_metrics: IntentMetrics = defaultdict(list) entity_test_metrics: EntityMetrics = defaultdict( lambda: defaultdict(list)) response_selection_test_metrics: ResponseSelectionMetrics = defaultdict( list) intent_results: List[IntentEvaluationResult] = [] entity_results: List[EntityEvaluationResult] = [] response_selection_test_results: List[ ResponseSelectionEvaluationResult] = ([]) entity_evaluation_possible = False extractors: Set[Text] = set() language_preprocessor = PreprocessingFactory(language).factory() for train, test in generate_folds(3, data): interpreter = trainer.train(train) test.training_examples = [ language_preprocessor.preprocess(x) for x in test.training_examples ] # calculate test accuracy combine_result( intent_test_metrics, entity_test_metrics, response_selection_test_metrics, interpreter, test, intent_results, entity_results, response_selection_test_results, ) if not extractors: extractors = get_entity_extractors(interpreter) entity_evaluation_possible = ( entity_evaluation_possible or _contains_entity_labels(entity_results)) if intent_results: result["intent_evaluation"] = evaluate_intents(intent_results) if entity_results: extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = evaluate_entities( entity_results, extractors) intent_evaluation = result.get("intent_evaluation") entity_evaluation = result.get("entity_evaluation") merged_logs = merge_intent_entity_log(intent_evaluation, entity_evaluation) log = get_formatted_log(merged_logs) charts = plot_and_save_charts(repository_version_language, intent_results, aws_bucket_authentication) evaluate_result = backend( ).request_backend_create_evaluate_results( { "repository_version": repository_version_language, "matrix_chart": charts.get("matrix_chart"), "confidence_chart": charts.get("confidence_chart"), "log": json.dumps(log), "intentprecision": intent_evaluation.get("precision"), "intentf1_score": intent_evaluation.get("f1_score"), "intentaccuracy": intent_evaluation.get("accuracy"), "entityprecision": entity_evaluation.get("precision"), "entityf1_score": entity_evaluation.get("f1_score"), "entityaccuracy": entity_evaluation.get("accuracy"), "cross_validation": True }, repository_authorization, ) intent_reports = intent_evaluation.get("report", {}) entity_reports = entity_evaluation.get("report", {}) for intent_key in intent_reports.keys(): if intent_key not in excluded_itens: intent = intent_reports.get(intent_key) backend().request_backend_create_evaluate_results_intent( { "evaluate_id": evaluate_result.get("evaluate_id"), "precision": intent.get("precision"), "recall": intent.get("recall"), "f1_score": intent.get("f1-score"), "support": intent.get("support"), "intent_key": intent_key, }, repository_authorization, ) # remove group entities when entities returned as "<entity>.<group_entity>" for entity_key in entity_reports.keys(): if '.' in entity_key: new_entity_key = entity_key.split('.')[0] entity_reports[new_entity_key] = entity_reports[entity_key] entity_reports.pop(entity_key, None) for entity_key in entity_reports.keys(): if entity_key not in excluded_itens: # pragma: no cover entity = entity_reports.get(entity_key) backend().request_backend_create_evaluate_results_score( { "evaluate_id": evaluate_result.get("evaluate_id"), "repository_version": repository_version_language, "precision": entity.get("precision"), "recall": entity.get("recall"), "f1_score": entity.get("f1-score"), "support": entity.get("support"), "entity_key": entity_key, }, repository_authorization, ) return { "id": evaluate_result.get("evaluate_id"), "version": evaluate_result.get("evaluate_version"), "cross_validation": True, } except Exception as e: logger.exception(e) raise e