コード例 #1
0
def test_entity_evaluation_report(tmpdir_factory):
    class EntityExtractorA(EntityExtractor):

        provides = ["entities"]

        def __init__(self, component_config=None) -> None:

            super().__init__(component_config)

    class EntityExtractorB(EntityExtractor):

        provides = ["entities"]

        def __init__(self, component_config=None) -> None:

            super().__init__(component_config)

    path = tmpdir_factory.mktemp("evaluation").strpath
    report_folder = os.path.join(path, "reports")

    report_filename_a = os.path.join(report_folder,
                                     "EntityExtractorA_report.json")
    report_filename_b = os.path.join(report_folder,
                                     "EntityExtractorB_report.json")

    rasa.utils.io.create_directory(report_folder)
    mock_interpreter = Interpreter(
        [
            EntityExtractorA({"provides": ["entities"]}),
            EntityExtractorB({"provides": ["entities"]}),
        ],
        None,
    )
    extractors = get_entity_extractors(mock_interpreter)
    result = evaluate_entities(
        [EN_entity_result],
        extractors,
        report_folder,
        errors=True,
        successes=True,
        disable_plotting=False,
    )

    report_a = json.loads(rasa.utils.io.read_file(report_filename_a))
    report_b = json.loads(rasa.utils.io.read_file(report_filename_b))

    assert len(report_a) == 6
    assert report_a["datetime"]["support"] == 1.0
    assert report_b["macro avg"]["recall"] == 0.0
    assert report_a["macro avg"]["recall"] == 0.5
    assert result["EntityExtractorA"]["accuracy"] == 0.75

    assert os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_confusion_matrix.png"))
    assert os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_errors.json"))
    assert os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_successes.json"))
    assert not os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_histogram.png"))
コード例 #2
0
def test_entity_evaluation_report(tmpdir_factory):
    path = tmpdir_factory.mktemp("evaluation").strpath
    report_folder = os.path.join(path, "reports")

    mock_extractors = ["A", "B"]
    report_filename_a = os.path.join(report_folder, "A_report.json")
    report_filename_b = os.path.join(report_folder, "B_report.json")
    ner_filename = os.path.join(report_folder, "ner_filename.json")

    utils.create_dir(report_folder)

    result = evaluate_entities(
        [EN_targets],
        [EN_predicted],
        [EN_tokens],
        mock_extractors,
        report_folder,
        ner_filename,
    )

    report_a = json.loads(rasa.utils.io.read_file(report_filename_a))
    report_b = json.loads(rasa.utils.io.read_file(report_filename_b))

    assert len(report_a) == 8
    assert report_a["datetime"]["support"] == 1.0
    assert report_b["macro avg"]["recall"] == 0.2
    assert result["A"]["accuracy"] == 0.75
コード例 #3
0
ファイル: test_evaluation.py プロジェクト: zhiyuxzh/rasa
def test_entity_evaluation_report(tmpdir_factory):
    class EntityExtractorA(EntityExtractor):

        provides = ["entities"]

        def __init__(self, component_config=None) -> None:

            super(EntityExtractorA, self).__init__(component_config)

    class EntityExtractorB(EntityExtractor):

        provides = ["entities"]

        def __init__(self, component_config=None) -> None:

            super(EntityExtractorB, self).__init__(component_config)

    path = tmpdir_factory.mktemp("evaluation").strpath
    report_folder = os.path.join(path, "reports")

    report_filename_a = os.path.join(report_folder,
                                     "EntityExtractorA_report.json")
    report_filename_b = os.path.join(report_folder,
                                     "EntityExtractorB_report.json")

    utils.create_dir(report_folder)
    mock_interpreter = Interpreter(
        [
            EntityExtractorA({"provides": ["entities"]}),
            EntityExtractorB({"provides": ["entities"]}),
        ],
        None,
    )
    extractors = get_entity_extractors(mock_interpreter)
    result = evaluate_entities([EN_entity_result], extractors, report_folder)

    report_a = json.loads(rasa.utils.io.read_file(report_filename_a))
    report_b = json.loads(rasa.utils.io.read_file(report_filename_b))

    assert len(report_a) == 8
    assert report_a["datetime"]["support"] == 1.0
    assert report_b["macro avg"]["recall"] == 0.2
    assert result["EntityExtractorA"]["accuracy"] == 0.75
コード例 #4
0
def test_entity_evaluation_report(tmp_path: Path):

    path = tmp_path / "evaluation"
    path.mkdir()
    report_folder = str(path / "reports")

    report_filename_a = os.path.join(report_folder, "EntityExtractorA_report.json")
    report_filename_b = os.path.join(report_folder, "EntityExtractorB_report.json")

    rasa.shared.utils.io.create_directory(report_folder)

    extractors = _get_active_entity_extractors([EN_entity_result])
    result = evaluate_entities(
        [EN_entity_result],
        extractors,
        report_folder,
        errors=True,
        successes=True,
        disable_plotting=False,
    )

    report_a = json.loads(rasa.shared.utils.io.read_file(report_filename_a))
    report_b = json.loads(rasa.shared.utils.io.read_file(report_filename_b))

    assert len(report_a) == 6
    assert report_a["datetime"]["support"] == 1.0
    assert report_b["macro avg"]["recall"] == 0.0
    assert report_a["macro avg"]["recall"] == 0.5
    assert result["EntityExtractorA"]["accuracy"] == 0.75

    assert os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_confusion_matrix.png")
    )
    assert os.path.exists(os.path.join(report_folder, "EntityExtractorA_errors.json"))
    assert os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_successes.json")
    )
    assert not os.path.exists(
        os.path.join(report_folder, "EntityExtractorA_histogram.png")
    )
コード例 #5
0
ファイル: test.py プロジェクト: praneethgb/rasa
async def test(
    stories: Text,
    agent: "Agent",
    max_stories: Optional[int] = None,
    out_directory: Optional[Text] = None,
    fail_on_prediction_errors: bool = False,
    e2e: bool = False,
    disable_plotting: bool = False,
    successes: bool = False,
    errors: bool = True,
    warnings: bool = True,
) -> Dict[Text, Any]:
    """Run the evaluation of the stories, optionally plot the results.

    Args:
        stories: the stories to evaluate on
        agent: the agent
        max_stories: maximum number of stories to consider
        out_directory: path to directory to results to
        fail_on_prediction_errors: boolean indicating whether to fail on prediction
            errors or not
        e2e: boolean indicating whether to use end to end evaluation or not
        disable_plotting: boolean indicating whether to disable plotting or not
        successes: boolean indicating whether to write down successful predictions or
            not
        errors: boolean indicating whether to write down incorrect predictions or not
        warnings: boolean indicating whether to write down prediction warnings or not

    Returns:
        Evaluation summary.
    """
    from rasa.model_testing import get_evaluation_metrics

    generator = _create_data_generator(stories, agent, max_stories, e2e)
    completed_trackers = generator.generate_story_trackers()

    story_evaluation, _, entity_results = await _collect_story_predictions(
        completed_trackers, agent, fail_on_prediction_errors, use_e2e=e2e)

    evaluation_store = story_evaluation.evaluation_store

    with pywarnings.catch_warnings():
        from sklearn.exceptions import UndefinedMetricWarning

        pywarnings.simplefilter("ignore", UndefinedMetricWarning)

        targets, predictions = evaluation_store.serialise()

        if out_directory:
            report, precision, f1, action_accuracy = get_evaluation_metrics(
                targets, predictions, output_dict=True)

            # Add conversation level accuracy to story report.
            num_failed = len(story_evaluation.failed_stories)
            num_correct = len(story_evaluation.successful_stories)
            num_warnings = len(story_evaluation.stories_with_warnings)
            num_convs = num_failed + num_correct
            if num_convs and isinstance(report, Dict):
                conv_accuracy = num_correct / num_convs
                report["conversation_accuracy"] = {
                    "accuracy": conv_accuracy,
                    "correct": num_correct,
                    "with_warnings": num_warnings,
                    "total": num_convs,
                }
            report_filename = os.path.join(out_directory, REPORT_STORIES_FILE)
            rasa.shared.utils.io.dump_obj_as_json_to_file(
                report_filename, report)
            logger.info(f"Stories report saved to {report_filename}.")

        else:
            report, precision, f1, action_accuracy = get_evaluation_metrics(
                targets, predictions, output_dict=True)

        evaluate_entities(
            entity_results,
            POLICIES_THAT_EXTRACT_ENTITIES,
            out_directory,
            successes,
            errors,
            disable_plotting,
        )

    telemetry.track_core_model_test(len(generator.story_graph.story_steps),
                                    e2e, agent)

    _log_evaluation_table(
        evaluation_store.action_targets,
        "ACTION",
        action_accuracy,
        precision=precision,
        f1=f1,
        in_training_data_fraction=story_evaluation.in_training_data_fraction,
    )

    if not disable_plotting and out_directory:
        _plot_story_evaluation(
            evaluation_store.action_targets,
            evaluation_store.action_predictions,
            out_directory,
        )

    if errors and out_directory:
        _log_stories(
            story_evaluation.failed_stories,
            os.path.join(out_directory, FAILED_STORIES_FILE),
            "None of the test stories failed - all good!",
        )
    if successes and out_directory:
        _log_stories(
            story_evaluation.successful_stories,
            os.path.join(out_directory, SUCCESSFUL_STORIES_FILE),
            "None of the test stories succeeded :(",
        )
    if warnings and out_directory:
        _log_stories(
            story_evaluation.stories_with_warnings,
            os.path.join(out_directory, STORIES_WITH_WARNINGS_FILE),
            "No warnings for test stories",
        )

    return {
        "report": report,
        "precision": precision,
        "f1": f1,
        "accuracy": action_accuracy,
        "actions": story_evaluation.action_list,
        "in_training_data_fraction":
        story_evaluation.in_training_data_fraction,
        "is_end_to_end_evaluation": e2e,
    }