예제 #1
0
async def test_eval_data(
    component_builder: ComponentBuilder,
    tmp_path: Path,
    project: Text,
    unpacked_trained_rasa_model: Text,
):
    config_path = os.path.join(project, "config.yml")
    data_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config_path,
        training_data_paths=[
            "data/examples/rasa/demo-rasa.yml",
            "data/examples/rasa/demo-rasa-responses.yml",
        ],
    )

    _, nlu_model_directory = rasa.model.get_model_subdirectories(
        unpacked_trained_rasa_model
    )
    interpreter = Interpreter.load(nlu_model_directory, component_builder)

    data = await data_importer.get_nlu_data()
    (intent_results, response_selection_results, entity_results) = get_eval_data(
        interpreter, data
    )

    assert len(intent_results) == 46
    assert len(response_selection_results) == 0
    assert len(entity_results) == 46
예제 #2
0
def test_replacing_fallback_intent():
    expected_intent = "greet"
    expected_confidence = 0.345
    fallback_prediction = {
        INTENT: {
            INTENT_NAME_KEY: DEFAULT_NLU_FALLBACK_INTENT_NAME,
            PREDICTED_CONFIDENCE_KEY: 1,
        },
        INTENT_RANKING_KEY: [
            {
                INTENT_NAME_KEY: DEFAULT_NLU_FALLBACK_INTENT_NAME,
                PREDICTED_CONFIDENCE_KEY: 1,
            },
            {
                INTENT_NAME_KEY: expected_intent,
                PREDICTED_CONFIDENCE_KEY: expected_confidence,
            },
            {INTENT_NAME_KEY: "some", PREDICTED_CONFIDENCE_KEY: 0.1},
        ],
    }

    interpreter = ConstantInterpreter(fallback_prediction)
    training_data = TrainingData(
        [Message.build("hi", "greet"), Message.build("bye", "bye")]
    )

    intent_evaluations, _, _ = get_eval_data(interpreter, training_data)

    assert all(
        prediction.intent_prediction == expected_intent
        and prediction.confidence == expected_confidence
        for prediction in intent_evaluations
    )
예제 #3
0
async def test_eval_data(component_builder, tmpdir, project):
    _config = RasaNLUModelConfig({
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                "epochs": 2
            },
            {
                "name": "ResponseSelector",
                "epochs": 2
            },
        ],
        "language":
        "en",
    })

    config_path = os.path.join(project, "config.yml")
    data_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config_path,
        training_data_paths=[
            "data/examples/rasa/demo-rasa.md",
            "data/examples/rasa/demo-rasa-responses.md",
        ],
    )

    (_, _, persisted_path) = await train(
        _config,
        path=tmpdir.strpath,
        data=data_importer,
        component_builder=component_builder,
        persist_nlu_training_data=True,
    )

    interpreter = Interpreter.load(persisted_path, component_builder)

    data = await data_importer.get_nlu_data()
    (
        intent_results,
        response_selection_results,
        entity_results,
    ) = get_eval_data(interpreter, data)

    assert len(intent_results) == 46
    assert len(response_selection_results) == 46
    assert len(entity_results) == 46
예제 #4
0
def evaluate_update(repository_version, repository_authorization):
    evaluations = backend().request_backend_start_evaluation(
        repository_version, repository_authorization)
    training_examples = []

    for evaluate in evaluations:
        training_examples.append(
            Message.build(
                text=evaluate.get("text"),
                intent=evaluate.get("intent"),
                entities=evaluate.get("entities"),
            ))

    test_data = TrainingData(training_examples=training_examples)
    interpreter = update_interpreters.get(repository_version,
                                          repository_authorization,
                                          rasa_version,
                                          use_cache=False)

    result = {
        "intent_evaluation": None,
        "entity_evaluation": None,
        "response_selection_evaluation": None,
    }

    intent_results, response_selection_results, entity_results, = get_eval_data(
        interpreter, test_data)

    if intent_results:
        result["intent_evaluation"] = evaluate_intents(intent_results)

    if entity_results:
        extractors = get_entity_extractors(interpreter)
        result["entity_evaluation"] = evaluate_entities(
            entity_results, extractors)

    intent_evaluation = result.get("intent_evaluation")
    entity_evaluation = result.get("entity_evaluation")

    merged_logs = merge_intent_entity_log(intent_evaluation, entity_evaluation)
    log = get_formatted_log(merged_logs)

    charts = plot_and_save_charts(repository_version, intent_results)
    evaluate_result = backend().request_backend_create_evaluate_results(
        {
            "repository_version": repository_version,
            "matrix_chart": charts.get("matrix_chart"),
            "confidence_chart": charts.get("confidence_chart"),
            "log": json.dumps(log),
            "intentprecision": intent_evaluation.get("precision"),
            "intentf1_score": intent_evaluation.get("f1_score"),
            "intentaccuracy": intent_evaluation.get("accuracy"),
            "entityprecision": entity_evaluation.get("precision"),
            "entityf1_score": entity_evaluation.get("f1_score"),
            "entityaccuracy": entity_evaluation.get("accuracy"),
        },
        repository_authorization,
    )

    intent_reports = intent_evaluation.get("report", {})
    entity_reports = entity_evaluation.get("report", {})

    for intent_key in intent_reports.keys():
        if intent_key and intent_key not in excluded_itens:
            intent = intent_reports.get(intent_key)

            backend().request_backend_create_evaluate_results_intent(
                {
                    "evaluate_id": evaluate_result.get("evaluate_id"),
                    "precision": intent.get("precision"),
                    "recall": intent.get("recall"),
                    "f1_score": intent.get("f1-score"),
                    "support": intent.get("support"),
                    "intent_key": intent_key,
                },
                repository_authorization,
            )

    for entity_key in entity_reports.keys():
        if entity_key and entity_key not in excluded_itens:  # pragma: no cover
            entity = entity_reports.get(entity_key)

            backend().request_backend_create_evaluate_results_score(
                {
                    "evaluate_id": evaluate_result.get("evaluate_id"),
                    "repository_version": repository_version,
                    "precision": entity.get("precision"),
                    "recall": entity.get("recall"),
                    "f1_score": entity.get("f1-score"),
                    "support": entity.get("support"),
                    "entity_key": entity_key,
                },
                repository_authorization,
            )

    return {
        "id": evaluate_result.get("evaluate_id"),
        "version": evaluate_result.get("evaluate_version"),
        "cross_validation": False
    }
예제 #5
0
    def run_test_on_nlu(nlu_path: str, model_path: str):
        """
        Run tests on stories.

        Args:
            nlu_path: path where nlu test data is present as YAML.
            model_path: Model path where model on which test has to be run is present.

        Returns: dictionary with evaluation results
        """
        from rasa.model import get_model
        import rasa.shared.nlu.training_data.loading
        from rasa.nlu.model import Interpreter
        from rasa.nlu.test import (
            remove_pretrained_extractors,
            get_eval_data,
            evaluate_intents,
            evaluate_response_selections,
            get_entity_extractors,
        )
        from kairon import Utility

        unpacked_model = get_model(model_path)
        nlu_model = os.path.join(unpacked_model, "nlu")
        interpreter = Interpreter.load(nlu_model)
        interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline)
        test_data = rasa.shared.nlu.training_data.loading.load_data(
            nlu_path, interpreter.model_metadata.language
        )

        result: Dict[Text, Optional[Dict]] = {
            "intent_evaluation": None,
            "entity_evaluation": None,
            "response_selection_evaluation": None,
        }

        (intent_results, response_selection_results, entity_results) = get_eval_data(
            interpreter, test_data
        )

        if intent_results:
            successes = []
            errors = []
            result["intent_evaluation"] = evaluate_intents(intent_results, None, False, False, True)
            if result["intent_evaluation"].get('predictions'):
                del result["intent_evaluation"]['predictions']
                del result["intent_evaluation"]['report']
            for r in intent_results:
                if r.intent_target == r.intent_prediction:
                    pass
                    # successes.append({
                    #     "text": r.message,
                    #     "intent": r.intent_target,
                    #     "intent_prediction": {
                    #         'name': r.intent_prediction,
                    #         "confidence": r.confidence,
                    #     },
                    # })
                else:
                    errors.append({
                        "text": r.message,
                        "intent": r.intent_target,
                        "intent_prediction": {
                            'name': r.intent_prediction,
                            "confidence": r.confidence,
                        },
                    })
            result["intent_evaluation"]['total_count'] = len(successes) + len(errors)
            result["intent_evaluation"]['success_count'] = len(successes)
            result["intent_evaluation"]['failure_count'] = len(errors)
            result["intent_evaluation"]['successes'] = successes
            result["intent_evaluation"]['errors'] = errors

        if response_selection_results:
            successes = []
            errors = []
            result["response_selection_evaluation"] = evaluate_response_selections(
                response_selection_results,
                None,
                False,
                False,
                True
            )
            if result["response_selection_evaluation"].get('predictions'):
                del result["response_selection_evaluation"]['predictions']
                del result["response_selection_evaluation"]['report']
            for r in response_selection_results:
                if r.intent_response_key_prediction == r.intent_response_key_target:
                    pass
                    # successes.append({
                    #     "text": r.message,
                    #     "intent_response_key_target": r.intent_response_key_target,
                    #     "intent_response_key_prediction": {
                    #         "name": r.intent_response_key_prediction,
                    #         "confidence": r.confidence,
                    #     },
                    # })
                else:
                    if not Utility.check_empty_string(r.intent_response_key_target):
                        errors.append(
                            {
                                "text": r.message,
                                "intent_response_key_target": r.intent_response_key_target,
                                "intent_response_key_prediction": {
                                    "name": r.intent_response_key_prediction,
                                    "confidence": r.confidence,
                                },
                            }
                        )
            result["response_selection_evaluation"]['total_count'] = len(successes) + len(errors)
            result["response_selection_evaluation"]['success_count'] = len(successes)
            result["response_selection_evaluation"]['failure_count'] = len(errors)
            result["response_selection_evaluation"]['successes'] = successes
            result["response_selection_evaluation"]['errors'] = errors

        if any(entity_results):
            extractors = get_entity_extractors(interpreter)
            result["entity_evaluation"] = ModelTester.__evaluate_entities(entity_results, extractors)
        return result