def convert_and_write(cls, source_path: Path, output_path: Path) -> None:
        """Converts the given training data file and saves it to the output directory.

        Args:
            source_path: Path to the training data file.
            output_path: Path to the output directory.
        """
        from rasa.core.training.story_reader.yaml_story_reader import KEY_ACTIVE_LOOP

        output_core_path = cls.generate_path_for_converted_training_data_file(
            source_path, output_path
        )

        reader = MarkdownStoryReader(unfold_or_utterances=False)
        writer = YAMLStoryWriter()

        loop = asyncio.get_event_loop()
        steps = loop.run_until_complete(reader.read_from_file(source_path))

        if YAMLStoryWriter.stories_contain_loops(steps):
            print_warning(
                f"Training data file '{source_path}' contains forms. "
                f"Any 'form' events will be converted to '{KEY_ACTIVE_LOOP}' events. "
                f"Please note that in order for these stories to work you still "
                f"need the 'FormPolicy' to be active. However the 'FormPolicy' is "
                f"deprecated, please consider switching to the new 'RulePolicy', "
                f"for which you can find the documentation here: {DOCS_URL_RULES}."
            )

        writer.dump(output_core_path, steps)

        print_success(f"Converted Core file: '{source_path}' >> '{output_core_path}'.")
Ejemplo n.º 2
0
def _write_core_yaml(training_data_path: Path, output_path: Path,
                     source_path: Path) -> None:
    reader = MarkdownStoryReader()
    writer = YAMLStoryWriter()

    loop = asyncio.get_event_loop()
    steps = loop.run_until_complete(reader.read_from_file(training_data_path))

    writer.dump(output_path, steps)

    print_success(f"Converted Core file: '{source_path}' >> '{output_path}'.")
Ejemplo n.º 3
0
def _guess_reader(
    filename: Text,
    domain: Domain,
    template_variables: Optional[Dict] = None,
    use_e2e: bool = False,
) -> StoryReader:
    if YAMLStoryReader.is_yaml_story_file(filename):
        return YAMLStoryReader(domain, template_variables, use_e2e, filename)
    elif MarkdownStoryReader.is_markdown_story_file(filename):
        return MarkdownStoryReader(domain, template_variables, use_e2e,
                                   filename)
    raise ValueError(
        f"Failed to find a reader class for the story file `{filename}`. "
        f"Supported formats are "
        f"{', '.join(MARKDOWN_FILE_EXTENSIONS.union(YAML_FILE_EXTENSIONS))}.")
Ejemplo n.º 4
0
Archivo: data.py Proyecto: sysang/rasa
def _convert_to_yaml(args: argparse.Namespace, is_nlu: bool) -> None:

    output = Path(args.out)
    if not os.path.exists(output):
        print_error_and_exit(
            f"The output path '{output}' doesn't exist. Please make sure to specify "
            f"an existing directory and try again."
        )

    training_data = Path(args.data)
    if not os.path.exists(training_data):
        print_error_and_exit(
            f"The training data path {training_data} doesn't exist "
            f"and will be skipped."
        )

    num_of_files_converted = 0
    for file in os.listdir(training_data):
        source_path = training_data / file
        output_path = output / f"{source_path.stem}{CONVERTED_FILE_SUFFIX}"

        if MarkdownReader.is_markdown_nlu_file(source_path):
            if not is_nlu:
                continue
            _write_nlu_yaml(source_path, output_path, source_path)
            num_of_files_converted += 1
        elif not is_nlu and MarkdownStoryReader.is_markdown_story_file(source_path):
            _write_core_yaml(source_path, output_path, source_path)
            num_of_files_converted += 1
        else:
            print_warning(f"Skipped file: '{source_path}'.")

    print_info(f"Converted {num_of_files_converted} file(s), saved in '{output}'.")
Ejemplo n.º 5
0
async def test_simple_story(tmpdir: Path, default_domain: Domain,
                            input_md_file: Text, input_yaml_file: Text):

    original_md_reader = MarkdownStoryReader(
        default_domain,
        None,
        False,
        input_yaml_file,
        unfold_or_utterances=False,
    )
    original_md_story_steps = await original_md_reader.read_from_file(
        input_md_file)

    assert not YAMLStoryWriter.stories_contain_loops(original_md_story_steps)

    original_yaml_reader = YAMLStoryReader(default_domain, None, False)
    original_yaml_story_steps = await original_yaml_reader.read_from_file(
        input_yaml_file)

    target_story_filename = tmpdir / "test.yml"
    writer = YAMLStoryWriter()
    writer.dump(target_story_filename, original_md_story_steps)

    processed_yaml_reader = YAMLStoryReader(default_domain, None, False)
    processed_yaml_story_steps = await processed_yaml_reader.read_from_file(
        target_story_filename)

    assert len(processed_yaml_story_steps) == len(original_yaml_story_steps)
    for processed_step, original_step in zip(processed_yaml_story_steps,
                                             original_yaml_story_steps):
        assert len(processed_step.events) == len(original_step.events)
Ejemplo n.º 6
0
def _guess_reader(
    filename: Text,
    domain: Domain,
    interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
    template_variables: Optional[Dict] = None,
    use_e2e: bool = False,
) -> StoryReader:
    if YAMLStoryReader.is_yaml_story_file(filename):
        return YAMLStoryReader(interpreter, domain, template_variables,
                               use_e2e, filename)
    elif MarkdownStoryReader.is_markdown_story_file(filename):
        return MarkdownStoryReader(interpreter, domain, template_variables,
                                   use_e2e, filename)
    raise ValueError(
        f"Failed to find a reader class for the story file `{filename}`. "
        f"Supported formats are {MARKDOWN_FILE_EXTENSION}, {YAML_FILE_EXTENSIONS}."
    )
    def filter(cls, source_path: Path) -> bool:
        """Checks if the given training data file contains Core data in `Markdown`
        format and can be converted to `YAML`.

        Args:
            source_path: Path to the training data file.

        Returns:
            `True` if the given file can be converted, `False` otherwise
        """
        return MarkdownStoryReader.is_markdown_story_file(source_path)
Ejemplo n.º 8
0
def is_story_file(file_path: Text) -> bool:
    from rasa.core.training.story_reader.markdown_story_reader import (
        MarkdownStoryReader, )
    from rasa.core.training.story_reader.yaml_story_reader import YAMLStoryReader
    """Checks if a file is a Rasa story file.

    Args:
        file_path: Path of the file which should be checked.

    Returns:
        `True` if it's a story file, otherwise `False`.
    """
    return YAMLStoryReader.is_yaml_story_file(
        file_path) or MarkdownStoryReader.is_markdown_story_file(file_path)
Ejemplo n.º 9
0
def _get_reader(
    filename: Text,
    domain: Domain,
    template_variables: Optional[Dict] = None,
    use_e2e: bool = False,
) -> StoryReader:

    if rasa.shared.data.is_likely_markdown_file(filename):
        return MarkdownStoryReader(domain, template_variables, use_e2e, filename)
    elif rasa.shared.data.is_likely_yaml_file(filename):
        return YAMLStoryReader(domain, template_variables, use_e2e, filename)
    else:
        # This is a use case for uploading the story over REST API.
        # The source file has a random name.
        return _guess_reader(filename, domain, template_variables, use_e2e)
Ejemplo n.º 10
0
async def test_forms_are_skipped_with_warning(default_domain: Domain):
    original_md_reader = MarkdownStoryReader(
        default_domain, None, False, unfold_or_utterances=False,
    )
    original_md_story_steps = await original_md_reader.read_from_file(
        "data/test_stories/stories_form.md"
    )

    writer = YAMLStoryWriter()

    with pytest.warns(UserWarning) as record:
        writer.dumps(original_md_story_steps)

    # We skip 5 stories with the forms and warn users
    assert len(record) == 5
Ejemplo n.º 11
0
async def test_read_stories_with_multiline_comments(tmpdir,
                                                    default_domain: Domain):
    reader = MarkdownStoryReader(RegexInterpreter(), default_domain)

    story_steps = await reader.read_from_file(
        "data/test_stories/stories_with_multiline_comments.md")

    assert len(story_steps) == 4
    assert story_steps[0].block_name == "happy path"
    assert len(story_steps[0].events) == 4
    assert story_steps[1].block_name == "sad path 1"
    assert len(story_steps[1].events) == 7
    assert story_steps[2].block_name == "sad path 2"
    assert len(story_steps[2].events) == 7
    assert story_steps[3].block_name == "say goodbye"
    assert len(story_steps[3].events) == 2
Ejemplo n.º 12
0
async def test_story_start_checkpoint_is_skipped(default_domain: Domain):
    input_md_file = "data/test_stories/stories.md"

    original_md_reader = MarkdownStoryReader(
        default_domain,
        None,
        False,
        input_md_file,
        unfold_or_utterances=False,
    )
    original_md_story_steps = await original_md_reader.read_from_file(
        input_md_file)

    yaml_text = YAMLStoryWriter().dumps(original_md_story_steps)

    assert STORY_START not in yaml_text
Ejemplo n.º 13
0
async def test_forms_are_converted(default_domain: Domain):
    original_md_reader = MarkdownStoryReader(
        default_domain,
        None,
        False,
        unfold_or_utterances=False,
    )
    original_md_story_steps = await original_md_reader.read_from_file(
        "data/test_stories/stories_form.md")

    assert YAMLStoryWriter.stories_contain_loops(original_md_story_steps)

    writer = YAMLStoryWriter()

    with pytest.warns(None) as record:
        writer.dumps(original_md_story_steps)

    assert len(record) == 0
Ejemplo n.º 14
0
def _get_reader(
    filename: Text,
    domain: Domain,
    interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
    template_variables: Optional[Dict] = None,
    use_e2e: bool = False,
) -> StoryReader:

    if filename.endswith(MARKDOWN_FILE_EXTENSION):
        return MarkdownStoryReader(interpreter, domain, template_variables,
                                   use_e2e, filename)
    elif Path(filename).suffix in YAML_FILE_EXTENSIONS:
        return YAMLStoryReader(interpreter, domain, template_variables,
                               use_e2e, filename)
    else:
        # This is a use case for uploading the story over REST API.
        # The source file has a random name.
        return _guess_reader(filename, domain, interpreter, template_variables,
                             use_e2e)
Ejemplo n.º 15
0
def test_invalid_end_to_end_format(line: Text):
    reader = MarkdownStoryReader()

    with pytest.raises(ValueError):
        # noinspection PyProtectedMember
        _ = reader.parse_e2e_message(line)
Ejemplo n.º 16
0
def test_e2e_parsing(line: Text, expected: Dict):
    actual = MarkdownStoryReader.parse_e2e_message(line)

    assert actual.as_dict() == expected