def convert_and_write(cls, source_path: Path, output_path: Path) -> None: """Converts the given training data file and saves it to the output directory. Args: source_path: Path to the training data file. output_path: Path to the output directory. """ from rasa.core.training.story_reader.yaml_story_reader import KEY_ACTIVE_LOOP output_core_path = cls.generate_path_for_converted_training_data_file( source_path, output_path ) reader = MarkdownStoryReader(unfold_or_utterances=False) writer = YAMLStoryWriter() loop = asyncio.get_event_loop() steps = loop.run_until_complete(reader.read_from_file(source_path)) if YAMLStoryWriter.stories_contain_loops(steps): print_warning( f"Training data file '{source_path}' contains forms. " f"Any 'form' events will be converted to '{KEY_ACTIVE_LOOP}' events. " f"Please note that in order for these stories to work you still " f"need the 'FormPolicy' to be active. However the 'FormPolicy' is " f"deprecated, please consider switching to the new 'RulePolicy', " f"for which you can find the documentation here: {DOCS_URL_RULES}." ) writer.dump(output_core_path, steps) print_success(f"Converted Core file: '{source_path}' >> '{output_core_path}'.")
def _write_core_yaml(training_data_path: Path, output_path: Path, source_path: Path) -> None: reader = MarkdownStoryReader() writer = YAMLStoryWriter() loop = asyncio.get_event_loop() steps = loop.run_until_complete(reader.read_from_file(training_data_path)) writer.dump(output_path, steps) print_success(f"Converted Core file: '{source_path}' >> '{output_path}'.")
def _guess_reader( filename: Text, domain: Domain, template_variables: Optional[Dict] = None, use_e2e: bool = False, ) -> StoryReader: if YAMLStoryReader.is_yaml_story_file(filename): return YAMLStoryReader(domain, template_variables, use_e2e, filename) elif MarkdownStoryReader.is_markdown_story_file(filename): return MarkdownStoryReader(domain, template_variables, use_e2e, filename) raise ValueError( f"Failed to find a reader class for the story file `{filename}`. " f"Supported formats are " f"{', '.join(MARKDOWN_FILE_EXTENSIONS.union(YAML_FILE_EXTENSIONS))}.")
def _convert_to_yaml(args: argparse.Namespace, is_nlu: bool) -> None: output = Path(args.out) if not os.path.exists(output): print_error_and_exit( f"The output path '{output}' doesn't exist. Please make sure to specify " f"an existing directory and try again." ) training_data = Path(args.data) if not os.path.exists(training_data): print_error_and_exit( f"The training data path {training_data} doesn't exist " f"and will be skipped." ) num_of_files_converted = 0 for file in os.listdir(training_data): source_path = training_data / file output_path = output / f"{source_path.stem}{CONVERTED_FILE_SUFFIX}" if MarkdownReader.is_markdown_nlu_file(source_path): if not is_nlu: continue _write_nlu_yaml(source_path, output_path, source_path) num_of_files_converted += 1 elif not is_nlu and MarkdownStoryReader.is_markdown_story_file(source_path): _write_core_yaml(source_path, output_path, source_path) num_of_files_converted += 1 else: print_warning(f"Skipped file: '{source_path}'.") print_info(f"Converted {num_of_files_converted} file(s), saved in '{output}'.")
async def test_simple_story(tmpdir: Path, default_domain: Domain, input_md_file: Text, input_yaml_file: Text): original_md_reader = MarkdownStoryReader( default_domain, None, False, input_yaml_file, unfold_or_utterances=False, ) original_md_story_steps = await original_md_reader.read_from_file( input_md_file) assert not YAMLStoryWriter.stories_contain_loops(original_md_story_steps) original_yaml_reader = YAMLStoryReader(default_domain, None, False) original_yaml_story_steps = await original_yaml_reader.read_from_file( input_yaml_file) target_story_filename = tmpdir / "test.yml" writer = YAMLStoryWriter() writer.dump(target_story_filename, original_md_story_steps) processed_yaml_reader = YAMLStoryReader(default_domain, None, False) processed_yaml_story_steps = await processed_yaml_reader.read_from_file( target_story_filename) assert len(processed_yaml_story_steps) == len(original_yaml_story_steps) for processed_step, original_step in zip(processed_yaml_story_steps, original_yaml_story_steps): assert len(processed_step.events) == len(original_step.events)
def _guess_reader( filename: Text, domain: Domain, interpreter: NaturalLanguageInterpreter = RegexInterpreter(), template_variables: Optional[Dict] = None, use_e2e: bool = False, ) -> StoryReader: if YAMLStoryReader.is_yaml_story_file(filename): return YAMLStoryReader(interpreter, domain, template_variables, use_e2e, filename) elif MarkdownStoryReader.is_markdown_story_file(filename): return MarkdownStoryReader(interpreter, domain, template_variables, use_e2e, filename) raise ValueError( f"Failed to find a reader class for the story file `{filename}`. " f"Supported formats are {MARKDOWN_FILE_EXTENSION}, {YAML_FILE_EXTENSIONS}." )
def filter(cls, source_path: Path) -> bool: """Checks if the given training data file contains Core data in `Markdown` format and can be converted to `YAML`. Args: source_path: Path to the training data file. Returns: `True` if the given file can be converted, `False` otherwise """ return MarkdownStoryReader.is_markdown_story_file(source_path)
def is_story_file(file_path: Text) -> bool: from rasa.core.training.story_reader.markdown_story_reader import ( MarkdownStoryReader, ) from rasa.core.training.story_reader.yaml_story_reader import YAMLStoryReader """Checks if a file is a Rasa story file. Args: file_path: Path of the file which should be checked. Returns: `True` if it's a story file, otherwise `False`. """ return YAMLStoryReader.is_yaml_story_file( file_path) or MarkdownStoryReader.is_markdown_story_file(file_path)
def _get_reader( filename: Text, domain: Domain, template_variables: Optional[Dict] = None, use_e2e: bool = False, ) -> StoryReader: if rasa.shared.data.is_likely_markdown_file(filename): return MarkdownStoryReader(domain, template_variables, use_e2e, filename) elif rasa.shared.data.is_likely_yaml_file(filename): return YAMLStoryReader(domain, template_variables, use_e2e, filename) else: # This is a use case for uploading the story over REST API. # The source file has a random name. return _guess_reader(filename, domain, template_variables, use_e2e)
async def test_forms_are_skipped_with_warning(default_domain: Domain): original_md_reader = MarkdownStoryReader( default_domain, None, False, unfold_or_utterances=False, ) original_md_story_steps = await original_md_reader.read_from_file( "data/test_stories/stories_form.md" ) writer = YAMLStoryWriter() with pytest.warns(UserWarning) as record: writer.dumps(original_md_story_steps) # We skip 5 stories with the forms and warn users assert len(record) == 5
async def test_read_stories_with_multiline_comments(tmpdir, default_domain: Domain): reader = MarkdownStoryReader(RegexInterpreter(), default_domain) story_steps = await reader.read_from_file( "data/test_stories/stories_with_multiline_comments.md") assert len(story_steps) == 4 assert story_steps[0].block_name == "happy path" assert len(story_steps[0].events) == 4 assert story_steps[1].block_name == "sad path 1" assert len(story_steps[1].events) == 7 assert story_steps[2].block_name == "sad path 2" assert len(story_steps[2].events) == 7 assert story_steps[3].block_name == "say goodbye" assert len(story_steps[3].events) == 2
async def test_story_start_checkpoint_is_skipped(default_domain: Domain): input_md_file = "data/test_stories/stories.md" original_md_reader = MarkdownStoryReader( default_domain, None, False, input_md_file, unfold_or_utterances=False, ) original_md_story_steps = await original_md_reader.read_from_file( input_md_file) yaml_text = YAMLStoryWriter().dumps(original_md_story_steps) assert STORY_START not in yaml_text
async def test_forms_are_converted(default_domain: Domain): original_md_reader = MarkdownStoryReader( default_domain, None, False, unfold_or_utterances=False, ) original_md_story_steps = await original_md_reader.read_from_file( "data/test_stories/stories_form.md") assert YAMLStoryWriter.stories_contain_loops(original_md_story_steps) writer = YAMLStoryWriter() with pytest.warns(None) as record: writer.dumps(original_md_story_steps) assert len(record) == 0
def _get_reader( filename: Text, domain: Domain, interpreter: NaturalLanguageInterpreter = RegexInterpreter(), template_variables: Optional[Dict] = None, use_e2e: bool = False, ) -> StoryReader: if filename.endswith(MARKDOWN_FILE_EXTENSION): return MarkdownStoryReader(interpreter, domain, template_variables, use_e2e, filename) elif Path(filename).suffix in YAML_FILE_EXTENSIONS: return YAMLStoryReader(interpreter, domain, template_variables, use_e2e, filename) else: # This is a use case for uploading the story over REST API. # The source file has a random name. return _guess_reader(filename, domain, interpreter, template_variables, use_e2e)
def test_invalid_end_to_end_format(line: Text): reader = MarkdownStoryReader() with pytest.raises(ValueError): # noinspection PyProtectedMember _ = reader.parse_e2e_message(line)
def test_e2e_parsing(line: Text, expected: Dict): actual = MarkdownStoryReader.parse_e2e_message(line) assert actual.as_dict() == expected