def test_data_convert_nlu_yml( run: Callable[..., RunResult], tmp_path: Path, request: FixtureRequest ): target_file = tmp_path / "out.yml" # The request rootdir is required as the `testdir` fixture in `run` changes the # working directory test_data_dir = Path(request.config.rootdir, "data", "examples", "rasa") source_file = (test_data_dir / "demo-rasa.json").absolute() result = run( "data", "convert", "nlu", "--data", str(source_file), "--out", str(target_file), "-f", "yaml", ) assert result.ret == 0 assert target_file.exists() actual_data = RasaYAMLReader().read(target_file) expected = RasaYAMLReader().read(test_data_dir / "demo-rasa.yml") assert len(actual_data.training_examples) == len(expected.training_examples) assert len(actual_data.entity_synonyms) == len(expected.entity_synonyms) assert len(actual_data.regex_features) == len(expected.regex_features) assert len(actual_data.lookup_tables) == len(expected.lookup_tables) assert actual_data.entities == expected.entities
def test_train_model_training_data_persisted( tmp_path: Path, nlu_as_json_path: Text, tmp_path_factory: TempPathFactory ): config_file = tmp_path / "config.yml" rasa.shared.utils.io.dump_obj_as_json_to_file( config_file, {"pipeline": [{"name": "KeywordIntentClassifier"}], "language": "en"}, ) persisted_path = rasa.model_training.train_nlu( str(config_file), nlu_as_json_path, output=str(tmp_path), persist_nlu_training_data=True, ) assert Path(persisted_path).is_file() model_dir = tmp_path_factory.mktemp("loaded") storage, _ = LocalModelStorage.from_model_archive(model_dir, Path(persisted_path)) nlu_data_dir = model_dir / "nlu_training_data_provider" assert nlu_data_dir.is_dir() assert not RasaYAMLReader().read(nlu_data_dir / "training_data.yml").is_empty()
def _reader_factory(fformat: Text) -> Optional["TrainingDataReader"]: """Generates the appropriate reader class based on the file format.""" from rasa.shared.nlu.training_data.formats import ( RasaYAMLReader, MarkdownReader, WitReader, LuisReader, RasaReader, DialogflowReader, NLGMarkdownReader, ) reader = None if fformat == LUIS: reader = LuisReader() elif fformat == WIT: reader = WitReader() elif fformat in DIALOGFLOW_RELEVANT: reader = DialogflowReader() elif fformat == RASA: reader = RasaReader() elif fformat == MARKDOWN: reader = MarkdownReader() elif fformat == MARKDOWN_NLG: reader = NLGMarkdownReader() elif fformat == RASA_YAML: reader = RasaYAMLReader() return reader
def _validate_yaml_training_payload(yaml_text: Text) -> None: try: RasaYAMLReader().validate(yaml_text) except Exception as e: raise ErrorResponse( 400, "BadRequest", f"The request body does not contain valid YAML. Error: {e}", help_url=DOCS_URL_TRAINING_DATA, )
async def test_multi_project_training(trained_async, tmp_path_factory: TempPathFactory): example_directory = "data/test_multi_domain" config_file = os.path.join(example_directory, "config.yml") domain_file = os.path.join(example_directory, "domain.yml") files_of_root_project = os.path.join(example_directory, "data") trained_stack_model_path = await trained_async( config=config_file, domain=domain_file, training_files=files_of_root_project, force_training=True, persist_nlu_training_data=True, ) storage_path = tmp_path_factory.mktemp("storage_path") model_storage, model_metadata = LocalModelStorage.from_model_archive( storage_path, trained_stack_model_path ) domain = model_metadata.domain expected_intents = { "greet", "goodbye", "affirm", "deny", "mood_great", "mood_unhappy", } assert all([i in domain.intents for i in expected_intents]) with model_storage.read_from( Resource("nlu_training_data_provider") ) as resource_dir: nlu_training_data_file = resource_dir / "training_data.yml" nlu_training_data = RasaYAMLReader().read(nlu_training_data_file) assert expected_intents == nlu_training_data.intents expected_actions = [ "utter_greet", "utter_cheer_up", "utter_did_that_help", "utter_happy", "utter_goodbye", ] assert all([a in domain.action_names_or_texts for a in expected_actions])
async def test_multi_project_training(trained_async): example_directory = "data/test_multi_domain" config_file = os.path.join(example_directory, "config.yml") domain_file = os.path.join(example_directory, "domain.yml") files_of_root_project = os.path.join(example_directory, "data") trained_stack_model_path = await trained_async( config=config_file, domain=domain_file, training_files=files_of_root_project, force_training=True, persist_nlu_training_data=True, ) unpacked = model.unpack_model(trained_stack_model_path) domain_file = os.path.join( unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME, DEFAULT_DOMAIN_PATH ) domain = Domain.load(domain_file) expected_intents = { "greet", "goodbye", "affirm", "deny", "mood_great", "mood_unhappy", } assert all([i in domain.intents for i in expected_intents]) nlu_training_data_file = os.path.join(unpacked, "nlu", "training_data.yml") nlu_training_data = RasaYAMLReader().read(nlu_training_data_file) assert expected_intents == nlu_training_data.intents expected_actions = [ "utter_greet", "utter_cheer_up", "utter_did_that_help", "utter_happy", "utter_goodbye", ] assert all([a in domain.action_names for a in expected_actions])
def persist( state: StateMachineState, is_initial_state: bool, domain_folder: str, nlu_folder: str, ): domain, nlu_data = get_domain_nlu( state=state, is_initial_state=is_initial_state ) # Generate filename filename = "".join( e.lower() for e in state.name if e.isalnum() or e.isspace() or e in ["-", "_"] ) filename = "_".join(filename.split(" ")) + ".yaml" # Persist domain domain_filename = os.path.join(domain_folder, filename) Path(domain_filename).parent.mkdir(parents=True, exist_ok=True) rasa.shared.utils.validation.validate_yaml_schema( domain.as_yaml(), rasa.shared.constants.DOMAIN_SCHEMA_FILE ) # Delete domain_filename if os.path.exists(domain_filename): os.remove(domain_filename) domain.persist(domain_filename) # Persist NLU nlu_filename = os.path.join(nlu_folder, filename) nlu_data_yaml = dump_obj_as_yaml_to_string( nlu_data, should_preserve_key_order=True ) RasaYAMLReader().validate(nlu_data_yaml) Path(nlu_filename).parent.mkdir(parents=True, exist_ok=True) if os.path.exists(nlu_filename): os.remove(nlu_filename) write_text_file(nlu_data_yaml, nlu_filename)
def persist( stories: List[Story], domain_filename: str, nlu_filename: str, additional_intents: List[Intent], additional_utterances: List[Utterance], slots: List[Slot], use_rules: bool = False, ): all_domain = Domain.empty() all_intents: Set[Intent] = set(additional_intents) all_stories: List[Story] = [] all_slot_was_sets: Set[SlotWasSet] = set() for story in stories: domain, sub_stories, intents, slot_was_sets = story.get_domain_nlu( use_rules=use_rules) all_domain = all_domain.merge(domain) all_intents.update(intents) all_stories.extend(sub_stories) all_slot_was_sets.update(slot_was_sets) # Append consolidated slots domain_slots = Domain( intents=set([intent.name for intent in all_intents]), entities=[slot.name for slot in slots], slots=slots, responses={ utterance.name: [{ "text": utterance.text }] for utterance in additional_utterances }, action_names=[], forms={}, ) all_domain = all_domain.merge(domain_slots) # Validate domain rasa.shared.utils.validation.validate_yaml_schema( all_domain.as_yaml(), rasa.shared.constants.DOMAIN_SCHEMA_FILE) # Write domain if os.path.exists(domain_filename): os.remove(domain_filename) Path(domain_filename).parent.mkdir(parents=True, exist_ok=True) all_domain.persist(domain_filename) # Write NLU nlu_data = { "version": "2.0", "nlu": [ intent.as_nlu_yaml() for intent in all_intents if isinstance(intent, IntentWithExamples) ], "rules" if use_rules else "stories": all_stories, } nlu_data_yaml = dump_obj_as_yaml_to_string(nlu_data, should_preserve_key_order=True) RasaYAMLReader().validate(nlu_data_yaml) # TODO: Create folders if not existent if os.path.exists(nlu_filename): os.remove(nlu_filename) Path(nlu_filename).parent.mkdir(parents=True, exist_ok=True) write_text_file(nlu_data_yaml, nlu_filename)