Exemple #1
0
    def evaluation_get_individual_report(self):
        from sklearn import metrics
        intent_report = metrics.classification_report(self.target_intents,
                                                      self.predicted_intents,
                                                      output_dict=True)
        from rasa.nlu.test import align_all_entity_predictions, merge_labels, substitute_labels, get_entity_extractors

        extractors = get_entity_extractors(self.interpreter)

        aligned_predictions = align_all_entity_predictions(
            self.entity_results, extractors)
        merged_targets = merge_labels(aligned_predictions)
        merged_targets = substitute_labels(merged_targets, "O", "no_entity")

        entity_report = {}

        for extractor in extractors:
            merged_predictions = merge_labels(aligned_predictions, extractor)
            merged_predictions = substitute_labels(merged_predictions, "O",
                                                   "no_entity")

            entity_report = metrics.classification_report(merged_targets,
                                                          merged_predictions,
                                                          output_dict=True)

        return [intent_report, entity_report]
Exemple #2
0
def test_entity_evaluation_report(tmpdir_factory):
    class EntityExtractorA(EntityExtractor):

        provides = ["entities"]

        def __init__(self, component_config=None) -> None:

            super().__init__(component_config)

    class EntityExtractorB(EntityExtractor):

        provides = ["entities"]

        def __init__(self, component_config=None) -> None:

            super().__init__(component_config)

    path = tmpdir_factory.mktemp("evaluation").strpath
    report_folder = os.path.join(path, "reports")

    report_filename_a = os.path.join(report_folder,
                                     "EntityExtractorA_report.json")
    report_filename_b = os.path.join(report_folder,
                                     "EntityExtractorB_report.json")

    rasa.utils.io.create_directory(report_folder)
    mock_interpreter = Interpreter(
        [
            EntityExtractorA({"provides": ["entities"]}),
            EntityExtractorB({"provides": ["entities"]}),
        ],
        None,
    )
    extractors = get_entity_extractors(mock_interpreter)
    result = evaluate_entities(
        [EN_entity_result],
        extractors,
        report_folder,
        errors=True,
        successes=True,
        disable_plotting=False,
    )

    report_a = json.loads(rasa.utils.io.read_file(report_filename_a))
    report_b = json.loads(rasa.utils.io.read_file(report_filename_b))

    assert len(report_a) == 6
    assert report_a["datetime"]["support"] == 1.0
    assert report_b["macro avg"]["recall"] == 0.0
    assert report_a["macro avg"]["recall"] == 0.5
    assert result["EntityExtractorA"]["accuracy"] == 0.75

    assert os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_confusion_matrix.png"))
    assert os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_errors.json"))
    assert os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_successes.json"))
    assert not os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_histogram.png"))
Exemple #3
0
def test_get_entity_extractors(
    components: List[Component], expected_extractors: Set[Text]
):
    mock_interpreter = Interpreter(components, None)
    extractors = get_entity_extractors(mock_interpreter)

    assert extractors == expected_extractors
Exemple #4
0
def test_entity_evaluation_report(tmpdir_factory):
    class EntityExtractorA(EntityExtractor):

        provides = ["entities"]

        def __init__(self, component_config=None) -> None:

            super(EntityExtractorA, self).__init__(component_config)

    class EntityExtractorB(EntityExtractor):

        provides = ["entities"]

        def __init__(self, component_config=None) -> None:

            super(EntityExtractorB, self).__init__(component_config)

    path = tmpdir_factory.mktemp("evaluation").strpath
    report_folder = os.path.join(path, "reports")

    report_filename_a = os.path.join(report_folder,
                                     "EntityExtractorA_report.json")
    report_filename_b = os.path.join(report_folder,
                                     "EntityExtractorB_report.json")

    utils.create_dir(report_folder)
    mock_interpreter = Interpreter(
        [
            EntityExtractorA({"provides": ["entities"]}),
            EntityExtractorB({"provides": ["entities"]}),
        ],
        None,
    )
    extractors = get_entity_extractors(mock_interpreter)
    result = evaluate_entities([EN_entity_result], extractors, report_folder)

    report_a = json.loads(rasa.utils.io.read_file(report_filename_a))
    report_b = json.loads(rasa.utils.io.read_file(report_filename_b))

    assert len(report_a) == 8
    assert report_a["datetime"]["support"] == 1.0
    assert report_b["macro avg"]["recall"] == 0.2
    assert result["EntityExtractorA"]["accuracy"] == 0.75
Exemple #5
0
def test_get_entity_extractors(pretrained_interpreter):
    assert get_entity_extractors(pretrained_interpreter) == {
        "SpacyEntityExtractor",
        "DucklingHTTPExtractor",
    }
Exemple #6
0
def evaluate_update(repository_version, repository_authorization):
    evaluations = backend().request_backend_start_evaluation(
        repository_version, repository_authorization)
    training_examples = []

    for evaluate in evaluations:
        training_examples.append(
            Message.build(
                text=evaluate.get("text"),
                intent=evaluate.get("intent"),
                entities=evaluate.get("entities"),
            ))

    test_data = TrainingData(training_examples=training_examples)
    interpreter = update_interpreters.get(repository_version,
                                          repository_authorization,
                                          rasa_version,
                                          use_cache=False)

    result = {
        "intent_evaluation": None,
        "entity_evaluation": None,
        "response_selection_evaluation": None,
    }

    intent_results, response_selection_results, entity_results, = get_eval_data(
        interpreter, test_data)

    if intent_results:
        result["intent_evaluation"] = evaluate_intents(intent_results)

    if entity_results:
        extractors = get_entity_extractors(interpreter)
        result["entity_evaluation"] = evaluate_entities(
            entity_results, extractors)

    intent_evaluation = result.get("intent_evaluation")
    entity_evaluation = result.get("entity_evaluation")

    merged_logs = merge_intent_entity_log(intent_evaluation, entity_evaluation)
    log = get_formatted_log(merged_logs)

    charts = plot_and_save_charts(repository_version, intent_results)
    evaluate_result = backend().request_backend_create_evaluate_results(
        {
            "repository_version": repository_version,
            "matrix_chart": charts.get("matrix_chart"),
            "confidence_chart": charts.get("confidence_chart"),
            "log": json.dumps(log),
            "intentprecision": intent_evaluation.get("precision"),
            "intentf1_score": intent_evaluation.get("f1_score"),
            "intentaccuracy": intent_evaluation.get("accuracy"),
            "entityprecision": entity_evaluation.get("precision"),
            "entityf1_score": entity_evaluation.get("f1_score"),
            "entityaccuracy": entity_evaluation.get("accuracy"),
        },
        repository_authorization,
    )

    intent_reports = intent_evaluation.get("report", {})
    entity_reports = entity_evaluation.get("report", {})

    for intent_key in intent_reports.keys():
        if intent_key and intent_key not in excluded_itens:
            intent = intent_reports.get(intent_key)

            backend().request_backend_create_evaluate_results_intent(
                {
                    "evaluate_id": evaluate_result.get("evaluate_id"),
                    "precision": intent.get("precision"),
                    "recall": intent.get("recall"),
                    "f1_score": intent.get("f1-score"),
                    "support": intent.get("support"),
                    "intent_key": intent_key,
                },
                repository_authorization,
            )

    for entity_key in entity_reports.keys():
        if entity_key and entity_key not in excluded_itens:  # pragma: no cover
            entity = entity_reports.get(entity_key)

            backend().request_backend_create_evaluate_results_score(
                {
                    "evaluate_id": evaluate_result.get("evaluate_id"),
                    "repository_version": repository_version,
                    "precision": entity.get("precision"),
                    "recall": entity.get("recall"),
                    "f1_score": entity.get("f1-score"),
                    "support": entity.get("support"),
                    "entity_key": entity_key,
                },
                repository_authorization,
            )

    return {
        "id": evaluate_result.get("evaluate_id"),
        "version": evaluate_result.get("evaluate_version"),
        "cross_validation": False
    }
Exemple #7
0
def test_get_entity_extractors(components, expected_extractors):
    mock_interpreter = Interpreter(components, None)
    extractors = get_entity_extractors(mock_interpreter)

    assert extractors == expected_extractors
Exemple #8
0
def test_get_entity_extractors(duckling_interpreter):
    assert get_entity_extractors(duckling_interpreter) == {
        "DucklingHTTPExtractor"
    }
Exemple #9
0
def evaluate_crossval_update(repository_version,
                             by,
                             repository_authorization,
                             from_queue='celery'):
    update_request = backend().request_backend_start_training_nlu(
        repository_version, by, repository_authorization, from_queue)
    examples_list = get_examples_request(repository_version,
                                         repository_authorization)

    with PokeLogging() as pl:
        try:
            examples = []

            for example in examples_list:
                examples.append(
                    Message.build(
                        text=example.get("text"),
                        intent=example.get("intent"),
                        entities=example.get("entities"),
                    ))

            data = TrainingData(training_examples=examples)
            rasa_nlu_config = get_rasa_nlu_config(update_request)
            trainer = Trainer(rasa_nlu_config,
                              ComponentBuilder(use_cache=False))

            result = {
                "intent_evaluation": None,
                "entity_evaluation": None,
                "response_selection_evaluation": None,
            }

            intent_train_metrics: IntentMetrics = defaultdict(list)
            intent_test_metrics: IntentMetrics = defaultdict(list)
            entity_train_metrics: EntityMetrics = defaultdict(
                lambda: defaultdict(list))
            entity_test_metrics: EntityMetrics = defaultdict(
                lambda: defaultdict(list))
            response_selection_train_metrics: ResponseSelectionMetrics = defaultdict(
                list)
            response_selection_test_metrics: ResponseSelectionMetrics = defaultdict(
                list)

            intent_results: List[IntentEvaluationResult] = []
            entity_results: List[EntityEvaluationResult] = []
            response_selection_test_results: List[
                ResponseSelectionEvaluationResult] = ([])
            entity_evaluation_possible = False
            extractors: Set[Text] = set()

            for train, test in generate_folds(3, data):
                interpreter = trainer.train(train)

                # calculate train accuracy
                combine_result(
                    intent_train_metrics,
                    entity_train_metrics,
                    response_selection_train_metrics,
                    interpreter,
                    train,
                )
                # calculate test accuracy
                combine_result(
                    intent_test_metrics,
                    entity_test_metrics,
                    response_selection_test_metrics,
                    interpreter,
                    test,
                    intent_results,
                    entity_results,
                    response_selection_test_results,
                )

                if not extractors:
                    extractors = get_entity_extractors(interpreter)
                    entity_evaluation_possible = (
                        entity_evaluation_possible
                        or _contains_entity_labels(entity_results))

            if intent_results:
                result["intent_evaluation"] = evaluate_intents(intent_results)

            if entity_results:
                extractors = get_entity_extractors(interpreter)
                result["entity_evaluation"] = evaluate_entities(
                    entity_results, extractors)

            intent_evaluation = result.get("intent_evaluation")
            entity_evaluation = result.get("entity_evaluation")

            merged_logs = merge_intent_entity_log(intent_evaluation,
                                                  entity_evaluation)
            log = get_formatted_log(merged_logs)

            charts = plot_and_save_charts(repository_version, intent_results)
            evaluate_result = backend(
            ).request_backend_create_evaluate_results(
                {
                    "repository_version": repository_version,
                    "matrix_chart": charts.get("matrix_chart"),
                    "confidence_chart": charts.get("confidence_chart"),
                    "log": json.dumps(log),
                    "intentprecision": intent_evaluation.get("precision"),
                    "intentf1_score": intent_evaluation.get("f1_score"),
                    "intentaccuracy": intent_evaluation.get("accuracy"),
                    "entityprecision": entity_evaluation.get("precision"),
                    "entityf1_score": entity_evaluation.get("f1_score"),
                    "entityaccuracy": entity_evaluation.get("accuracy"),
                },
                repository_authorization,
            )

            intent_reports = intent_evaluation.get("report", {})
            entity_reports = entity_evaluation.get("report", {})

            for intent_key in intent_reports.keys():
                if intent_key and intent_key not in excluded_itens:
                    intent = intent_reports.get(intent_key)

                    backend().request_backend_create_evaluate_results_intent(
                        {
                            "evaluate_id": evaluate_result.get("evaluate_id"),
                            "precision": intent.get("precision"),
                            "recall": intent.get("recall"),
                            "f1_score": intent.get("f1-score"),
                            "support": intent.get("support"),
                            "intent_key": intent_key,
                        },
                        repository_authorization,
                    )

            for entity_key in entity_reports.keys():
                if entity_key and entity_key not in excluded_itens:  # pragma: no cover
                    entity = entity_reports.get(entity_key)

                    backend().request_backend_create_evaluate_results_score(
                        {
                            "evaluate_id": evaluate_result.get("evaluate_id"),
                            "repository_version": repository_version,
                            "precision": entity.get("precision"),
                            "recall": entity.get("recall"),
                            "f1_score": entity.get("f1-score"),
                            "support": entity.get("support"),
                            "entity_key": entity_key,
                        },
                        repository_authorization,
                    )

            return {
                "id": evaluate_result.get("evaluate_id"),
                "version": evaluate_result.get("evaluate_version"),
                "cross_validation": True
            }

        except Exception as e:
            logger.exception(e)
            backend().request_backend_trainfail_nlu(repository_version,
                                                    repository_authorization)
            raise e
        finally:
            backend().request_backend_traininglog_nlu(
                repository_version, pl.getvalue(), repository_authorization)
Exemple #10
0
    def run_test_on_nlu(nlu_path: str, model_path: str):
        """
        Run tests on stories.

        Args:
            nlu_path: path where nlu test data is present as YAML.
            model_path: Model path where model on which test has to be run is present.

        Returns: dictionary with evaluation results
        """
        from rasa.model import get_model
        import rasa.shared.nlu.training_data.loading
        from rasa.nlu.model import Interpreter
        from rasa.nlu.test import (
            remove_pretrained_extractors,
            get_eval_data,
            evaluate_intents,
            evaluate_response_selections,
            get_entity_extractors,
        )
        from kairon import Utility

        unpacked_model = get_model(model_path)
        nlu_model = os.path.join(unpacked_model, "nlu")
        interpreter = Interpreter.load(nlu_model)
        interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline)
        test_data = rasa.shared.nlu.training_data.loading.load_data(
            nlu_path, interpreter.model_metadata.language
        )

        result: Dict[Text, Optional[Dict]] = {
            "intent_evaluation": None,
            "entity_evaluation": None,
            "response_selection_evaluation": None,
        }

        (intent_results, response_selection_results, entity_results) = get_eval_data(
            interpreter, test_data
        )

        if intent_results:
            successes = []
            errors = []
            result["intent_evaluation"] = evaluate_intents(intent_results, None, False, False, True)
            if result["intent_evaluation"].get('predictions'):
                del result["intent_evaluation"]['predictions']
                del result["intent_evaluation"]['report']
            for r in intent_results:
                if r.intent_target == r.intent_prediction:
                    pass
                    # successes.append({
                    #     "text": r.message,
                    #     "intent": r.intent_target,
                    #     "intent_prediction": {
                    #         'name': r.intent_prediction,
                    #         "confidence": r.confidence,
                    #     },
                    # })
                else:
                    errors.append({
                        "text": r.message,
                        "intent": r.intent_target,
                        "intent_prediction": {
                            'name': r.intent_prediction,
                            "confidence": r.confidence,
                        },
                    })
            result["intent_evaluation"]['total_count'] = len(successes) + len(errors)
            result["intent_evaluation"]['success_count'] = len(successes)
            result["intent_evaluation"]['failure_count'] = len(errors)
            result["intent_evaluation"]['successes'] = successes
            result["intent_evaluation"]['errors'] = errors

        if response_selection_results:
            successes = []
            errors = []
            result["response_selection_evaluation"] = evaluate_response_selections(
                response_selection_results,
                None,
                False,
                False,
                True
            )
            if result["response_selection_evaluation"].get('predictions'):
                del result["response_selection_evaluation"]['predictions']
                del result["response_selection_evaluation"]['report']
            for r in response_selection_results:
                if r.intent_response_key_prediction == r.intent_response_key_target:
                    pass
                    # successes.append({
                    #     "text": r.message,
                    #     "intent_response_key_target": r.intent_response_key_target,
                    #     "intent_response_key_prediction": {
                    #         "name": r.intent_response_key_prediction,
                    #         "confidence": r.confidence,
                    #     },
                    # })
                else:
                    if not Utility.check_empty_string(r.intent_response_key_target):
                        errors.append(
                            {
                                "text": r.message,
                                "intent_response_key_target": r.intent_response_key_target,
                                "intent_response_key_prediction": {
                                    "name": r.intent_response_key_prediction,
                                    "confidence": r.confidence,
                                },
                            }
                        )
            result["response_selection_evaluation"]['total_count'] = len(successes) + len(errors)
            result["response_selection_evaluation"]['success_count'] = len(successes)
            result["response_selection_evaluation"]['failure_count'] = len(errors)
            result["response_selection_evaluation"]['successes'] = successes
            result["response_selection_evaluation"]['errors'] = errors

        if any(entity_results):
            extractors = get_entity_extractors(interpreter)
            result["entity_evaluation"] = ModelTester.__evaluate_entities(entity_results, extractors)
        return result
Exemple #11
0
 def get_extractor(self):
     from rasa.nlu.test import get_entity_extractors
     return list(get_entity_extractors(self.interpreter))
Exemple #12
0
def evaluate_crossval_update(repository_version_language,
                             repository_authorization,
                             aws_bucket_authentication, language):
    update_request = backend().request_backend_get_current_configuration(
        repository_authorization)
    examples_list = get_examples_request(repository_version_language,
                                         repository_authorization)

    with PokeLogging() as pl:
        try:
            examples = []

            for example in examples_list:
                examples.append(
                    Message.build(
                        text=example.get("text"),
                        intent=example.get("intent"),
                        entities=example.get("entities"),
                    ))

            data = TrainingData(training_examples=examples)
            pipeline_builder = PipelineBuilder(update_request)
            pipeline_builder.print_pipeline()
            rasa_nlu_config = pipeline_builder.get_nlu_model()
            trainer = Trainer(rasa_nlu_config,
                              ComponentBuilder(use_cache=False))

            result = {
                "intent_evaluation": None,
                "entity_evaluation": None,
                "response_selection_evaluation": None,
            }

            intent_test_metrics: IntentMetrics = defaultdict(list)
            entity_test_metrics: EntityMetrics = defaultdict(
                lambda: defaultdict(list))
            response_selection_test_metrics: ResponseSelectionMetrics = defaultdict(
                list)

            intent_results: List[IntentEvaluationResult] = []
            entity_results: List[EntityEvaluationResult] = []
            response_selection_test_results: List[
                ResponseSelectionEvaluationResult] = ([])
            entity_evaluation_possible = False
            extractors: Set[Text] = set()

            language_preprocessor = PreprocessingFactory(language).factory()

            for train, test in generate_folds(3, data):

                interpreter = trainer.train(train)

                test.training_examples = [
                    language_preprocessor.preprocess(x)
                    for x in test.training_examples
                ]

                # calculate test accuracy
                combine_result(
                    intent_test_metrics,
                    entity_test_metrics,
                    response_selection_test_metrics,
                    interpreter,
                    test,
                    intent_results,
                    entity_results,
                    response_selection_test_results,
                )

                if not extractors:
                    extractors = get_entity_extractors(interpreter)
                    entity_evaluation_possible = (
                        entity_evaluation_possible
                        or _contains_entity_labels(entity_results))

            if intent_results:
                result["intent_evaluation"] = evaluate_intents(intent_results)

            if entity_results:
                extractors = get_entity_extractors(interpreter)
                result["entity_evaluation"] = evaluate_entities(
                    entity_results, extractors)

            intent_evaluation = result.get("intent_evaluation")
            entity_evaluation = result.get("entity_evaluation")

            merged_logs = merge_intent_entity_log(intent_evaluation,
                                                  entity_evaluation)
            log = get_formatted_log(merged_logs)

            charts = plot_and_save_charts(repository_version_language,
                                          intent_results,
                                          aws_bucket_authentication)
            evaluate_result = backend(
            ).request_backend_create_evaluate_results(
                {
                    "repository_version": repository_version_language,
                    "matrix_chart": charts.get("matrix_chart"),
                    "confidence_chart": charts.get("confidence_chart"),
                    "log": json.dumps(log),
                    "intentprecision": intent_evaluation.get("precision"),
                    "intentf1_score": intent_evaluation.get("f1_score"),
                    "intentaccuracy": intent_evaluation.get("accuracy"),
                    "entityprecision": entity_evaluation.get("precision"),
                    "entityf1_score": entity_evaluation.get("f1_score"),
                    "entityaccuracy": entity_evaluation.get("accuracy"),
                    "cross_validation": True
                },
                repository_authorization,
            )

            intent_reports = intent_evaluation.get("report", {})
            entity_reports = entity_evaluation.get("report", {})

            for intent_key in intent_reports.keys():
                if intent_key not in excluded_itens:
                    intent = intent_reports.get(intent_key)

                    backend().request_backend_create_evaluate_results_intent(
                        {
                            "evaluate_id": evaluate_result.get("evaluate_id"),
                            "precision": intent.get("precision"),
                            "recall": intent.get("recall"),
                            "f1_score": intent.get("f1-score"),
                            "support": intent.get("support"),
                            "intent_key": intent_key,
                        },
                        repository_authorization,
                    )

            # remove group entities when entities returned as "<entity>.<group_entity>"
            for entity_key in entity_reports.keys():
                if '.' in entity_key:
                    new_entity_key = entity_key.split('.')[0]
                    entity_reports[new_entity_key] = entity_reports[entity_key]
                    entity_reports.pop(entity_key, None)

            for entity_key in entity_reports.keys():
                if entity_key not in excluded_itens:  # pragma: no cover
                    entity = entity_reports.get(entity_key)

                    backend().request_backend_create_evaluate_results_score(
                        {
                            "evaluate_id": evaluate_result.get("evaluate_id"),
                            "repository_version": repository_version_language,
                            "precision": entity.get("precision"),
                            "recall": entity.get("recall"),
                            "f1_score": entity.get("f1-score"),
                            "support": entity.get("support"),
                            "entity_key": entity_key,
                        },
                        repository_authorization,
                    )

            return {
                "id": evaluate_result.get("evaluate_id"),
                "version": evaluate_result.get("evaluate_version"),
                "cross_validation": True,
            }

        except Exception as e:
            logger.exception(e)
            raise e