def load_from_dict( config: Optional[Dict], config_path: Text, domain_path: Optional[Text] = None, training_data_paths: Optional[List[Text]] = None, training_type: Optional[TrainingType] = TrainingType.BOTH, ) -> "TrainingDataImporter": """Loads a `TrainingDataImporter` instance from a dictionary.""" from rasa.importers.rasa import RasaFileImporter config = config or {} importers = config.get("importers", []) importers = [ TrainingDataImporter._importer_from_dict(importer, config_path, domain_path, training_data_paths, training_type) for importer in importers ] importers = [importer for importer in importers if importer] if not importers: importers = [ RasaFileImporter(config_path, domain_path, training_data_paths, training_type) ] return E2EImporter( RetrievalModelsDataImporter(CombinedDataImporter(importers)))
async def test_verify_valid_utterances(): importer = RasaFileImporter( domain_path="data/test_domains/default.yml", training_data_paths=[DEFAULT_NLU_DATA, DEFAULT_STORIES_FILE], ) validator = await Validator.from_importer(importer) assert validator.verify_utterances()
async def test_verify_intents_does_not_fail_on_valid_data(): importer = RasaFileImporter( domain_path="examples/moodbot/domain.yml", training_data_paths=[DEFAULT_NLU_DATA], ) validator = await Validator.from_importer(importer) assert validator.verify_intents()
async def test_verify_story_structure(): importer = RasaFileImporter( domain_path="data/test_domains/default.yml", training_data_paths=[DEFAULT_STORIES_FILE], ) validator = await Validator.from_importer(importer) assert validator.verify_story_structure(ignore_warnings=False)
async def test_verify_bad_story_structure_ignore_warnings(): importer = RasaFileImporter( domain_path="data/test_domains/default.yml", training_data_paths=["data/test_stories/stories_conflicting_2.md"], ) validator = await Validator.from_importer(importer) assert validator.verify_story_structure(ignore_warnings=True)
async def test_verify_there_is_not_example_repetition_in_intents(): importer = RasaFileImporter( domain_path="examples/moodbot/domain.yml", training_data_paths=["examples/knowledgebasebot/data/nlu.md"], ) validator = await Validator.from_importer(importer) assert validator.verify_example_repetition_in_intents(False)
def validate_files(args: argparse.Namespace, stories_only: bool = False) -> None: """ Validates either the story structure or the entire project. Args: args: Commandline arguments stories_only: If `True`, only the story structure is validated. """ loop = asyncio.get_event_loop() file_importer = RasaFileImporter( domain_path=args.domain, training_data_paths=args.data ) validator = loop.run_until_complete(Validator.from_importer(file_importer)) if stories_only: all_good = _validate_story_structure(validator, args) else: all_good = ( _validate_domain(validator) and _validate_nlu(validator, args) and _validate_story_structure(validator, args) ) if not all_good: rasa.cli.utils.print_error_and_exit("Project validation completed with errors.")
async def test_verify_intents_does_fail_on_invalid_data(): # domain and nlu data are from different domain and should produce warnings importer = RasaFileImporter( domain_path="data/test_domains/default.yml", training_data_paths=[DEFAULT_NLU_DATA], ) validator = await Validator.from_importer(importer) assert not validator.verify_intents()
async def test_verify_there_is_example_repetition_in_intents(): # moodbot nlu data already has duplicated example 'good afternoon' # for intents greet and goodbye importer = RasaFileImporter( domain_path="examples/moodbot/domain.yml", training_data_paths=[DEFAULT_NLU_DATA], ) validator = await Validator.from_importer(importer) assert not validator.verify_example_repetition_in_intents(False)
async def test_train_docker_and_docs_configs(config_file: Text, monkeypatch: MonkeyPatch): monkeypatch.setattr(autoconfig, "_dump_config", Mock()) importer = RasaFileImporter(config_file=config_file) imported_config = await importer.get_config() loaded_config = config.load(imported_config) assert len(loaded_config.component_names) > 1 assert loaded_config.language == imported_config["language"]
def validate_files(args): from rasa.core.validator import Validator from rasa.importers.rasa import RasaFileImporter loop = asyncio.get_event_loop() file_importer = RasaFileImporter(domain_path=args.domain, training_data_paths=args.data) validator = loop.run_until_complete(Validator.from_importer(file_importer)) validator.verify_all()
async def test_verify_logging_message_for_repetition_in_intents(caplog): # moodbot nlu data already has duplicated example 'good afternoon' # for intents greet and goodbye importer = RasaFileImporter( domain_path="examples/moodbot/domain.yml", training_data_paths=[DEFAULT_NLU_DATA], ) validator = await Validator.from_importer(importer) caplog.clear() # clear caplog to avoid counting earlier debug messages with pytest.warns(UserWarning) as record: validator.verify_example_repetition_in_intents(False) assert len(record) == 1 assert "You should fix that conflict " in record[0].message.args[0]
async def test_verify_logging_message_for_repetition_in_intents(caplog): # moodbot nlu data already has duplicated example 'good afternoon' # for intents greet and goodbye importer = RasaFileImporter( domain_path="examples/moodbot/domain.yml", training_data_paths=["examples/moodbot/data/nlu.md"], ) validator = await Validator.from_importer(importer) caplog.clear() # clear caplog to avoid counting earlier debug messages with pytest.warns(UserWarning) as record: validator.verify_example_repetition_in_intents(False) assert len(record) == 1 assert ("The example 'good afternoon' was found in " "multiple intents: goodbye, greet" in record[0].message.args[0])
def validate_files(args): """Validate all files needed for training a model. Fails with a non-zero exit code if there are any errors in the data.""" from rasa.core.validator import Validator from rasa.importers.rasa import RasaFileImporter loop = asyncio.get_event_loop() file_importer = RasaFileImporter(domain_path=args.domain, training_data_paths=args.data) validator = loop.run_until_complete(Validator.from_importer(file_importer)) everything_is_alright = validator.verify_all(not args.fail_on_warnings) sys.exit(0) if everything_is_alright else sys.exit(1)
async def _setup_trackers_for_testing( domain_path: Text, training_data_file: Text ) -> Tuple[List[TrackerWithCachedStates], Domain]: importer = RasaFileImporter(domain_path=domain_path, training_data_paths=[training_data_file]) validator = await Validator.from_importer(importer) trackers = TrainingDataGenerator( validator.story_graph, domain=validator.domain, remove_duplicates=False, augmentation_factor=0, ).generate() return trackers, validator.domain
async def test_verify_logging_message_for_repetition_in_intents(caplog): # moodbot nlu data already has duplicated example 'good afternoon' # for intents greet and goodbye importer = RasaFileImporter( domain_path="examples/moodbot/domain.yml", training_data_paths=["examples/moodbot/data/nlu.md"], ) validator = await Validator.from_importer(importer) validator.verify_example_repetition_in_intents(False) log_object = caplog.records[-1] level = log_object.levelname message = log_object.message assert "WARNING" == level assert ("The example 'good afternoon' was found in these " + "multiples intents: goodbye, greet" == message)
async def test_fail_on_invalid_utterances(tmpdir): # domain and stories are from different domain and should produce warnings invalid_domain = str(tmpdir / "invalid_domain.yml") io_utils.write_yaml_file( { "responses": {"utter_greet": {"text": "hello"}}, "actions": [ "utter_greet", "utter_non_existent", # error: utter template odes not exist ], }, invalid_domain, ) importer = RasaFileImporter(domain_path=invalid_domain) validator = await Validator.from_importer(importer) assert not validator.verify_utterances()
async def test_early_exit_on_invalid_domain(): domain_path = "data/test_domains/duplicate_intents.yml" importer = RasaFileImporter(domain_path=domain_path) with pytest.warns(UserWarning) as record: validator = await Validator.from_importer(importer) validator.verify_domain_validity() # two for non-unique domains assert len(record) == 2 assert ( f"Loading domain from '{domain_path}' failed. Using empty domain. " "Error: 'Intents are not unique! Found multiple intents with name(s) " "['default', 'goodbye']. Either rename or remove the duplicate ones.'" in record[0].message.args[0]) assert record[0].message.args[0] == record[1].message.args[0]
async def test_early_exit_on_invalid_domain(caplog): domain_path = "data/test_domains/duplicate_intents.yml" importer = RasaFileImporter(domain_path=domain_path) validator = await Validator.from_importer(importer) validator.verify_domain_validity() log_object = caplog.records[-1] message = log_object.message level = log_object.levelname assert "WARNING" == level assert ( f"Loading domain from '{domain_path}' failed. Using empty domain. " "Error: 'Intents are not unique! Found two intents with name " "'default'. Either rename or remove one of them.'" == message )
def _project_files( project: Text, config_file: Text = DEFAULT_CONFIG_PATH, domain: Text = DEFAULT_DOMAIN_PATH, training_files: Text = DEFAULT_DATA_PATH, ) -> TrainingDataImporter: paths = { "config_file": config_file, "domain_path": domain, "training_data_paths": training_files, } paths = { k: v if v is None or Path(v).is_absolute() else os.path.join(project, v) for k, v in paths.items() } paths["training_data_paths"] = [paths["training_data_paths"]] return RasaFileImporter(**paths)
def _project_files( project, config_file=DEFAULT_CONFIG_PATH, domain=DEFAULT_DOMAIN_PATH, training_files=DEFAULT_DATA_PATH, ): paths = { "config_file": config_file, "domain_path": domain, "training_data_paths": training_files, } paths = { k: v if v is None else os.path.join(project, v) for k, v in paths.items() } paths["training_data_paths"] = [paths["training_data_paths"]] return RasaFileImporter(**paths)
async def test_rasa_file_importer(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) importer = RasaFileImporter(config_path, domain_path, [default_data_path]) domain = await importer.get_domain() assert len(domain.intents) == 7 + len(DEFAULT_INTENTS) assert domain.slots == [] assert domain.entities == [] assert len(domain.action_names) == 17 assert len(domain.templates) == 6 stories = await importer.get_stories() assert len(stories.story_steps) == 5 nlu_data = await importer.get_nlu_data("en") assert len(nlu_data.intents) == 7 assert len(nlu_data.intent_examples) == 68
async def test_combined_file_importer_with_single_importer(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) importer = RasaFileImporter(config_path, domain_path, [default_data_path]) combined = CombinedDataImporter([importer]) assert await importer.get_config() == await combined.get_config() actual_domain = await combined.get_domain() expected_domain = await importer.get_domain() assert hash(actual_domain) == hash(expected_domain) actual_training_data = await combined.get_nlu_data() expected_training_data = await importer.get_nlu_data() assert hash(actual_training_data) == hash(expected_training_data) expected_stories = await importer.get_stories() actual_stories = await combined.get_stories() assert actual_stories.as_story_string( ) == expected_stories.as_story_string()
async def validator(): importer = RasaFileImporter( domain_path=DEFAULT_DOMAIN_PATH_WITH_SLOTS, training_data_paths=[DEFAULT_NLU_DATA, DEFAULT_STORIES_FILE], ) return await Validator.from_importer(importer)
async def test_rasa_file_importer_with_invalid_config(): importer = RasaFileImporter(config_file="invalid path") actual = await importer.get_config() assert actual == {}