Ejemplo n.º 1
0
async def test_nlu_comparison(
    tmp_path: Path, monkeypatch: MonkeyPatch, nlu_as_json_path: Text
):
    config = {
        "language": "en",
        "pipeline": [
            {"name": "WhitespaceTokenizer"},
            {"name": "KeywordIntentClassifier"},
            {"name": "RegexEntityExtractor"},
        ],
    }
    # the configs need to be at a different path, otherwise the results are
    # combined on the same dictionary key and cannot be plotted properly
    configs = [write_file_config(config).name, write_file_config(config).name]

    # mock training
    monkeypatch.setattr(Interpreter, "load", Mock(spec=RasaNLUInterpreter))
    monkeypatch.setattr(sys.modules["rasa.nlu"], "train", AsyncMock())

    monkeypatch.setattr(
        sys.modules["rasa.nlu.test"],
        "get_eval_data",
        Mock(return_value=(1, None, (None,),)),
    )
    monkeypatch.setattr(
        sys.modules["rasa.nlu.test"],
        "evaluate_intents",
        Mock(return_value={"f1_score": 1}),
    )

    output = str(tmp_path)
    test_data_importer = TrainingDataImporter.load_from_dict(
        training_data_paths=[nlu_as_json_path]
    )
    test_data = test_data_importer.get_nlu_data()
    await compare_nlu_models(
        configs, test_data, output, runs=2, exclusion_percentages=[50, 80]
    )

    assert set(os.listdir(output)) == {
        "run_1",
        "run_2",
        "results.json",
        "nlu_model_comparison_graph.pdf",
    }

    run_1_path = os.path.join(output, "run_1")
    assert set(os.listdir(run_1_path)) == {"50%_exclusion", "80%_exclusion", "test.yml"}

    exclude_50_path = os.path.join(run_1_path, "50%_exclusion")
    modelnames = [os.path.splitext(os.path.basename(config))[0] for config in configs]

    modeloutputs = set(
        ["train"]
        + [f"{m}_report" for m in modelnames]
        + [f"{m}.tar.gz" for m in modelnames]
    )
    assert set(os.listdir(exclude_50_path)) == modeloutputs
Ejemplo n.º 2
0
def test_nlu_comparison(tmp_path: Path):
    config = {
        "language":
        "en",
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "KeywordIntentClassifier"
            },
            {
                "name": "RegexEntityExtractor"
            },
        ],
    }
    # the configs need to be at a different path, otherwise the results are
    # combined on the same dictionary key and cannot be plotted properly
    configs = [
        write_file_config(config).name,
        write_file_config(config).name,
    ]

    output = str(tmp_path)
    compare_nlu_models(configs,
                       DEFAULT_DATA_PATH,
                       output,
                       runs=2,
                       exclusion_percentages=[50, 80])

    assert set(os.listdir(output)) == {
        "run_1",
        "run_2",
        "results.json",
        "nlu_model_comparison_graph.pdf",
    }

    run_1_path = os.path.join(output, "run_1")
    assert set(os.listdir(run_1_path)) == {
        "50%_exclusion", "80%_exclusion", "test.md"
    }

    exclude_50_path = os.path.join(run_1_path, "50%_exclusion")
    modelnames = [
        os.path.splitext(os.path.basename(config))[0] for config in configs
    ]

    modeloutputs = set(["train"] + [f"{m}_report" for m in modelnames] +
                       [f"{m}.tar.gz" for m in modelnames])
    assert set(os.listdir(exclude_50_path)) == modeloutputs
Ejemplo n.º 3
0
def test_invalid_pipeline_template():
    args = {"pipeline": "my_made_up_name"}
    f = write_file_config(args)

    with pytest.raises(config.InvalidConfigError) as execinfo:
        config.load(f.name)
    assert "unknown pipeline template" in str(execinfo.value)
Ejemplo n.º 4
0
def test_pipeline_looksup_registry():
    pipeline_template = list(registered_pipeline_templates)[0]
    args = {"pipeline": pipeline_template}
    f = write_file_config(args)
    final_config = config.load(f.name)
    components = [c.get("name") for c in final_config.pipeline]
    assert components == registered_pipeline_templates[pipeline_template]
Ejemplo n.º 5
0
def test_pipeline_registry_lookup(pipeline_template: Text):
    args = {"pipeline": pipeline_template}
    f = write_file_config(args)
    final_config = config.load(f.name)
    components = [c for c in final_config.pipeline]

    assert json.dumps(components, sort_keys=True) == json.dumps(
        registered_pipeline_templates[pipeline_template], sort_keys=True)
Ejemplo n.º 6
0
def config_path_duplicate() -> Text:
    return write_file_config({
        "language":
        "en",
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CRFEntityExtractor",
                EPOCHS: 1,
                RANDOM_SEED: 42
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                EPOCHS: 1,
                RANDOM_SEED: 42
            },
        ],
    }).name
Ejemplo n.º 7
0
def test_blank_config(blank_config):
    file_config = {}
    f = write_file_config(file_config)
    final_config = config.load(f.name)

    assert final_config.as_dict() == blank_config.as_dict()