コード例 #1
0
def test_intent_evaluation_report_large(tmpdir_factory: TempdirFactory):
    path = tmpdir_factory.mktemp("evaluation")
    report_folder = path / "reports"
    report_filename = report_folder / "intent_report.json"

    rasa.utils.io.create_directory(str(report_folder))

    def correct(label: Text) -> IntentEvaluationResult:
        return IntentEvaluationResult(label, label, "", 1.0)

    def incorrect(label: Text, _label: Text) -> IntentEvaluationResult:
        return IntentEvaluationResult(label, _label, "", 1.0)

    a_results = [correct("A")] * 10
    b_results = [correct("B")] * 7 + [incorrect("B", "C")] * 3
    c_results = [correct("C")] * 3 + [incorrect("C", "D")
                                      ] + [incorrect("C", "E")]
    d_results = [correct("D")] * 29 + [incorrect("D", "B")] * 3
    e_results = [incorrect("E", "C")] * 5 + [incorrect("E", "")] * 5

    intent_results = a_results + b_results + c_results + d_results + e_results

    evaluate_intents(
        intent_results,
        report_folder,
        successes=False,
        errors=False,
        confmat_filename=None,
        intent_hist_filename=None,
        disable_plotting=False,
    )

    report = json.loads(rasa.utils.io.read_file(str(report_filename)))

    a_results = {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 10,
        "confused_with": {},
    }

    e_results = {
        "precision": 0.0,
        "recall": 0.0,
        "f1-score": 0.0,
        "support": 10,
        "confused_with": {
            "C": 5,
            "": 5
        },
    }

    c_confused_with = {"D": 1, "E": 1}

    assert len(report.keys()) == 8
    assert report["A"] == a_results
    assert report["E"] == e_results
    assert report["C"]["confused_with"] == c_confused_with
コード例 #2
0
def test_intent_evaluation_report(tmpdir_factory):
    path = tmpdir_factory.mktemp("evaluation").strpath
    report_folder = os.path.join(path, "reports")
    report_filename = os.path.join(report_folder, "intent_report.json")

    utils.create_dir(report_folder)

    intent_results = [
        IntentEvaluationResult("", "restaurant_search",
                               "I am hungry", 0.12345),
        IntentEvaluationResult("greet", "greet",
                               "hello", 0.98765)]

    result = evaluate_intents(intent_results,
                              report_folder,
                              successes_filename=None,
                              errors_filename=None,
                              confmat_filename=None,
                              intent_hist_filename=None)

    report = json.loads(utils.read_file(report_filename))

    greet_results = {"precision": 1.0,
                     "recall": 1.0,
                     "f1-score": 1.0,
                     "support": 1}

    prediction = {'text': 'hello',
                  'intent': 'greet',
                  'predicted': 'greet',
                  'confidence': 0.98765}

    assert len(report.keys()) == 4
    assert report["greet"] == greet_results
    assert result["predictions"][0] == prediction
コード例 #3
0
ファイル: test_evaluation.py プロジェクト: ysinjab/rasa
def test_intent_evaluation_report(tmp_path: Path):
    path = tmp_path / "evaluation"
    path.mkdir()
    report_folder = str(path / "reports")
    report_filename = os.path.join(report_folder, "intent_report.json")

    rasa.shared.utils.io.create_directory(report_folder)

    intent_results = [
        IntentEvaluationResult("", "restaurant_search", "I am hungry",
                               0.12345),
        IntentEvaluationResult("greet", "greet", "hello", 0.98765),
    ]

    result = evaluate_intents(
        intent_results,
        report_folder,
        successes=True,
        errors=True,
        disable_plotting=False,
    )

    report = json.loads(rasa.shared.utils.io.read_file(report_filename))

    greet_results = {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 1,
        "confused_with": {},
    }

    prediction = {
        "text": "hello",
        "intent": "greet",
        "predicted": "greet",
        "confidence": 0.98765,
    }

    assert len(report.keys()) == 4
    assert report["greet"] == greet_results
    assert result["predictions"][0] == prediction

    assert os.path.exists(
        os.path.join(report_folder, "intent_confusion_matrix.png"))
    assert os.path.exists(os.path.join(report_folder, "intent_histogram.png"))
    assert not os.path.exists(os.path.join(report_folder,
                                           "intent_errors.json"))
    assert os.path.exists(os.path.join(report_folder, "intent_successes.json"))
コード例 #4
0
def test_intent_evaluation_report(tmpdir_factory):
    path = tmpdir_factory.mktemp("evaluation").strpath
    report_folder = os.path.join(path, "reports")
    report_filename = os.path.join(report_folder, "intent_report.json")

    rasa.utils.io.create_directory(report_folder)

    intent_results = [
        IntentEvaluationResult("", "restaurant_search", "I am hungry",
                               0.12345),
        IntentEvaluationResult("greet", "greet", "hello", 0.98765),
    ]

    result = evaluate_intents(
        intent_results,
        report_folder,
        successes=False,
        errors=False,
        confmat_filename=None,
        intent_hist_filename=None,
        disable_plotting=False,
    )

    report = json.loads(rasa.utils.io.read_file(report_filename))

    greet_results = {
        "precision": 1.0,
        "recall": 1.0,
        "f1-score": 1.0,
        "support": 1,
        "confused_with": {},
    }

    prediction = {
        "text": "hello",
        "intent": "greet",
        "predicted": "greet",
        "confidence": 0.98765,
    }

    assert len(report.keys()) == 4
    assert report["greet"] == greet_results
    assert result["predictions"][0] == prediction
コード例 #5
0
    def run_test_on_nlu(nlu_path: str, model_path: str):
        """
        Run tests on stories.

        Args:
            nlu_path: path where nlu test data is present as YAML.
            model_path: Model path where model on which test has to be run is present.

        Returns: dictionary with evaluation results
        """
        from rasa.model import get_model
        import rasa.shared.nlu.training_data.loading
        from rasa.nlu.model import Interpreter
        from rasa.nlu.test import (
            remove_pretrained_extractors,
            get_eval_data,
            evaluate_intents,
            evaluate_response_selections,
            get_entity_extractors,
        )
        from kairon import Utility

        unpacked_model = get_model(model_path)
        nlu_model = os.path.join(unpacked_model, "nlu")
        interpreter = Interpreter.load(nlu_model)
        interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline)
        test_data = rasa.shared.nlu.training_data.loading.load_data(
            nlu_path, interpreter.model_metadata.language
        )

        result: Dict[Text, Optional[Dict]] = {
            "intent_evaluation": None,
            "entity_evaluation": None,
            "response_selection_evaluation": None,
        }

        (intent_results, response_selection_results, entity_results) = get_eval_data(
            interpreter, test_data
        )

        if intent_results:
            successes = []
            errors = []
            result["intent_evaluation"] = evaluate_intents(intent_results, None, False, False, True)
            if result["intent_evaluation"].get('predictions'):
                del result["intent_evaluation"]['predictions']
                del result["intent_evaluation"]['report']
            for r in intent_results:
                if r.intent_target == r.intent_prediction:
                    pass
                    # successes.append({
                    #     "text": r.message,
                    #     "intent": r.intent_target,
                    #     "intent_prediction": {
                    #         'name': r.intent_prediction,
                    #         "confidence": r.confidence,
                    #     },
                    # })
                else:
                    errors.append({
                        "text": r.message,
                        "intent": r.intent_target,
                        "intent_prediction": {
                            'name': r.intent_prediction,
                            "confidence": r.confidence,
                        },
                    })
            result["intent_evaluation"]['total_count'] = len(successes) + len(errors)
            result["intent_evaluation"]['success_count'] = len(successes)
            result["intent_evaluation"]['failure_count'] = len(errors)
            result["intent_evaluation"]['successes'] = successes
            result["intent_evaluation"]['errors'] = errors

        if response_selection_results:
            successes = []
            errors = []
            result["response_selection_evaluation"] = evaluate_response_selections(
                response_selection_results,
                None,
                False,
                False,
                True
            )
            if result["response_selection_evaluation"].get('predictions'):
                del result["response_selection_evaluation"]['predictions']
                del result["response_selection_evaluation"]['report']
            for r in response_selection_results:
                if r.intent_response_key_prediction == r.intent_response_key_target:
                    pass
                    # successes.append({
                    #     "text": r.message,
                    #     "intent_response_key_target": r.intent_response_key_target,
                    #     "intent_response_key_prediction": {
                    #         "name": r.intent_response_key_prediction,
                    #         "confidence": r.confidence,
                    #     },
                    # })
                else:
                    if not Utility.check_empty_string(r.intent_response_key_target):
                        errors.append(
                            {
                                "text": r.message,
                                "intent_response_key_target": r.intent_response_key_target,
                                "intent_response_key_prediction": {
                                    "name": r.intent_response_key_prediction,
                                    "confidence": r.confidence,
                                },
                            }
                        )
            result["response_selection_evaluation"]['total_count'] = len(successes) + len(errors)
            result["response_selection_evaluation"]['success_count'] = len(successes)
            result["response_selection_evaluation"]['failure_count'] = len(errors)
            result["response_selection_evaluation"]['successes'] = successes
            result["response_selection_evaluation"]['errors'] = errors

        if any(entity_results):
            extractors = get_entity_extractors(interpreter)
            result["entity_evaluation"] = ModelTester.__evaluate_entities(entity_results, extractors)
        return result