async def test_nlu_comparison( tmp_path: Path, monkeypatch: MonkeyPatch, nlu_as_json_path: Text ): config = { "language": "en", "pipeline": [ {"name": "WhitespaceTokenizer"}, {"name": "KeywordIntentClassifier"}, {"name": "RegexEntityExtractor"}, ], } # the configs need to be at a different path, otherwise the results are # combined on the same dictionary key and cannot be plotted properly configs = [write_file_config(config).name, write_file_config(config).name] # mock training monkeypatch.setattr(Interpreter, "load", Mock(spec=RasaNLUInterpreter)) monkeypatch.setattr(sys.modules["rasa.nlu"], "train", AsyncMock()) monkeypatch.setattr( sys.modules["rasa.nlu.test"], "get_eval_data", Mock(return_value=(1, None, (None,),)), ) monkeypatch.setattr( sys.modules["rasa.nlu.test"], "evaluate_intents", Mock(return_value={"f1_score": 1}), ) output = str(tmp_path) test_data_importer = TrainingDataImporter.load_from_dict( training_data_paths=[nlu_as_json_path] ) test_data = test_data_importer.get_nlu_data() await compare_nlu_models( configs, test_data, output, runs=2, exclusion_percentages=[50, 80] ) assert set(os.listdir(output)) == { "run_1", "run_2", "results.json", "nlu_model_comparison_graph.pdf", } run_1_path = os.path.join(output, "run_1") assert set(os.listdir(run_1_path)) == {"50%_exclusion", "80%_exclusion", "test.yml"} exclude_50_path = os.path.join(run_1_path, "50%_exclusion") modelnames = [os.path.splitext(os.path.basename(config))[0] for config in configs] modeloutputs = set( ["train"] + [f"{m}_report" for m in modelnames] + [f"{m}.tar.gz" for m in modelnames] ) assert set(os.listdir(exclude_50_path)) == modeloutputs
def test_nlu_comparison(tmp_path: Path): config = { "language": "en", "pipeline": [ { "name": "WhitespaceTokenizer" }, { "name": "KeywordIntentClassifier" }, { "name": "RegexEntityExtractor" }, ], } # the configs need to be at a different path, otherwise the results are # combined on the same dictionary key and cannot be plotted properly configs = [ write_file_config(config).name, write_file_config(config).name, ] output = str(tmp_path) compare_nlu_models(configs, DEFAULT_DATA_PATH, output, runs=2, exclusion_percentages=[50, 80]) assert set(os.listdir(output)) == { "run_1", "run_2", "results.json", "nlu_model_comparison_graph.pdf", } run_1_path = os.path.join(output, "run_1") assert set(os.listdir(run_1_path)) == { "50%_exclusion", "80%_exclusion", "test.md" } exclude_50_path = os.path.join(run_1_path, "50%_exclusion") modelnames = [ os.path.splitext(os.path.basename(config))[0] for config in configs ] modeloutputs = set(["train"] + [f"{m}_report" for m in modelnames] + [f"{m}.tar.gz" for m in modelnames]) assert set(os.listdir(exclude_50_path)) == modeloutputs
def test_invalid_pipeline_template(): args = {"pipeline": "my_made_up_name"} f = write_file_config(args) with pytest.raises(config.InvalidConfigError) as execinfo: config.load(f.name) assert "unknown pipeline template" in str(execinfo.value)
def test_pipeline_looksup_registry(): pipeline_template = list(registered_pipeline_templates)[0] args = {"pipeline": pipeline_template} f = write_file_config(args) final_config = config.load(f.name) components = [c.get("name") for c in final_config.pipeline] assert components == registered_pipeline_templates[pipeline_template]
def test_pipeline_registry_lookup(pipeline_template: Text): args = {"pipeline": pipeline_template} f = write_file_config(args) final_config = config.load(f.name) components = [c for c in final_config.pipeline] assert json.dumps(components, sort_keys=True) == json.dumps( registered_pipeline_templates[pipeline_template], sort_keys=True)
def config_path_duplicate() -> Text: return write_file_config({ "language": "en", "pipeline": [ { "name": "WhitespaceTokenizer" }, { "name": "CRFEntityExtractor", EPOCHS: 1, RANDOM_SEED: 42 }, { "name": "CountVectorsFeaturizer" }, { "name": "DIETClassifier", EPOCHS: 1, RANDOM_SEED: 42 }, ], }).name
def test_blank_config(blank_config): file_config = {} f = write_file_config(file_config) final_config = config.load(f.name) assert final_config.as_dict() == blank_config.as_dict()