Exemple #1
0
def test_data_convert_nlu_yml(
    run: Callable[..., RunResult], tmp_path: Path, request: FixtureRequest
):

    target_file = tmp_path / "out.yml"

    # The request rootdir is required as the `testdir` fixture in `run` changes the
    # working directory
    test_data_dir = Path(request.config.rootdir, "data", "examples", "rasa")
    source_file = (test_data_dir / "demo-rasa.json").absolute()
    result = run(
        "data",
        "convert",
        "nlu",
        "--data",
        str(source_file),
        "--out",
        str(target_file),
        "-f",
        "yaml",
    )

    assert result.ret == 0
    assert target_file.exists()

    actual_data = RasaYAMLReader().read(target_file)
    expected = RasaYAMLReader().read(test_data_dir / "demo-rasa.yml")

    assert len(actual_data.training_examples) == len(expected.training_examples)
    assert len(actual_data.entity_synonyms) == len(expected.entity_synonyms)
    assert len(actual_data.regex_features) == len(expected.regex_features)
    assert len(actual_data.lookup_tables) == len(expected.lookup_tables)
    assert actual_data.entities == expected.entities
Exemple #2
0
def test_docs_training_data(mdx_file_path: Path):
    with mdx_file_path.open("r") as handle:
        mdx_content = handle.read()

    matches = TRAINING_DATA_CODEBLOCK_RE.finditer(mdx_content)
    lines_with_errors: List[Text] = []

    for match in matches:
        yaml_path = match.group("yaml_path")
        if yaml_path:
            with (DOCS_BASE_DIR / yaml_path).open("r") as handle:
                codeblock = handle.read()
        else:
            codeblock = match.group("codeblock")

        start_index = match.span()[0]
        line_number = mdx_content.count("\n", 0, start_index) + 1
        try:
            RasaYAMLReader.validate(codeblock)
        except ValueError:
            lines_with_errors.append(str(line_number))

    if lines_with_errors:
        raise AssertionError(
            f"({mdx_file_path}): Invalid training data found "
            f"at line{'s' if len(lines_with_errors) > 1 else ''} {', '.join(lines_with_errors)}"
        )
Exemple #3
0
def _validate_yaml_training_payload(yaml_text: Text) -> None:
    try:
        RasaYAMLReader.validate(yaml_text)
    except Exception as e:
        raise ErrorResponse(
            400,
            "BadRequest",
            f"The request body does not contain valid YAML. Error: {e}",
            help_url=DOCS_URL_TRAINING_DATA_NLU,
        )
Exemple #4
0
def test_train_model_training_data_persisted(
    tmp_path: Path, nlu_as_json_path: Text, tmp_path_factory: TempPathFactory
):
    config_file = tmp_path / "config.yml"
    rasa.shared.utils.io.dump_obj_as_json_to_file(
        config_file,
        {"pipeline": [{"name": "KeywordIntentClassifier"}], "language": "en"},
    )

    persisted_path = rasa.model_training.train_nlu(
        str(config_file),
        nlu_as_json_path,
        output=str(tmp_path),
        persist_nlu_training_data=True,
    )

    assert Path(persisted_path).is_file()

    model_dir = tmp_path_factory.mktemp("loaded")
    storage, _ = LocalModelStorage.from_model_archive(model_dir, Path(persisted_path))

    nlu_data_dir = model_dir / "nlu_training_data_provider"

    assert nlu_data_dir.is_dir()

    assert not RasaYAMLReader().read(nlu_data_dir / "training_data.yml").is_empty()
Exemple #5
0
def _reader_factory(fformat: Text) -> Optional["TrainingDataReader"]:
    """Generates the appropriate reader class based on the file format."""
    from rasa.shared.nlu.training_data.formats import (
        RasaYAMLReader,
        MarkdownReader,
        WitReader,
        LuisReader,
        RasaReader,
        DialogflowReader,
        NLGMarkdownReader,
    )

    reader = None
    if fformat == LUIS:
        reader = LuisReader()
    elif fformat == WIT:
        reader = WitReader()
    elif fformat in DIALOGFLOW_RELEVANT:
        reader = DialogflowReader()
    elif fformat == RASA:
        reader = RasaReader()
    elif fformat == MARKDOWN:
        reader = MarkdownReader()
    elif fformat == MARKDOWN_NLG:
        reader = NLGMarkdownReader()
    elif fformat == RASA_YAML:
        reader = RasaYAMLReader()
    return reader
Exemple #6
0
def guess_format(filename: Text) -> Text:
    """Applies heuristics to guess the data format of a file.

    Args:
        filename: file whose type should be guessed

    Returns:
        Guessed file format.
    """
    from rasa.shared.nlu.training_data.formats import RasaYAMLReader

    guess = UNK

    if not os.path.isfile(filename):
        return guess

    try:
        content = rasa.shared.utils.io.read_file(filename)
        js = json.loads(content)
    except ValueError:
        if MarkdownReader.is_markdown_nlu_file(filename):
            guess = MARKDOWN
        elif NLGMarkdownReader.is_markdown_nlg_file(filename):
            guess = MARKDOWN_NLG
        elif RasaYAMLReader.is_yaml_nlu_file(filename):
            guess = RASA_YAML
    else:
        for file_format, format_heuristic in _json_format_heuristics.items():
            if format_heuristic(js, filename):
                guess = file_format
                break

    logger.debug(f"Training data format of '{filename}' is '{guess}'.")

    return guess
Exemple #7
0
async def test_multi_project_training(trained_async, tmp_path_factory: TempPathFactory):
    example_directory = "data/test_multi_domain"
    config_file = os.path.join(example_directory, "config.yml")
    domain_file = os.path.join(example_directory, "domain.yml")
    files_of_root_project = os.path.join(example_directory, "data")

    trained_stack_model_path = await trained_async(
        config=config_file,
        domain=domain_file,
        training_files=files_of_root_project,
        force_training=True,
        persist_nlu_training_data=True,
    )

    storage_path = tmp_path_factory.mktemp("storage_path")
    model_storage, model_metadata = LocalModelStorage.from_model_archive(
        storage_path, trained_stack_model_path
    )
    domain = model_metadata.domain

    expected_intents = {
        "greet",
        "goodbye",
        "affirm",
        "deny",
        "mood_great",
        "mood_unhappy",
    }

    assert all([i in domain.intents for i in expected_intents])

    with model_storage.read_from(
        Resource("nlu_training_data_provider")
    ) as resource_dir:
        nlu_training_data_file = resource_dir / "training_data.yml"
        nlu_training_data = RasaYAMLReader().read(nlu_training_data_file)

    assert expected_intents == nlu_training_data.intents

    expected_actions = [
        "utter_greet",
        "utter_cheer_up",
        "utter_did_that_help",
        "utter_happy",
        "utter_goodbye",
    ]

    assert all([a in domain.action_names_or_texts for a in expected_actions])
async def test_multi_project_training(trained_async):
    example_directory = "data/test_multi_domain"
    config_file = os.path.join(example_directory, "config.yml")
    domain_file = os.path.join(example_directory, "domain.yml")
    files_of_root_project = os.path.join(example_directory, "data")

    trained_stack_model_path = await trained_async(
        config=config_file,
        domain=domain_file,
        training_files=files_of_root_project,
        force_training=True,
        persist_nlu_training_data=True,
    )

    unpacked = model.unpack_model(trained_stack_model_path)

    domain_file = os.path.join(
        unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME, DEFAULT_DOMAIN_PATH
    )
    domain = Domain.load(domain_file)

    expected_intents = {
        "greet",
        "goodbye",
        "affirm",
        "deny",
        "mood_great",
        "mood_unhappy",
    }

    assert all([i in domain.intents for i in expected_intents])

    nlu_training_data_file = os.path.join(unpacked, "nlu", "training_data.yml")
    nlu_training_data = RasaYAMLReader().read(nlu_training_data_file)

    assert expected_intents == nlu_training_data.intents

    expected_actions = [
        "utter_greet",
        "utter_cheer_up",
        "utter_did_that_help",
        "utter_happy",
        "utter_goodbye",
    ]

    assert all([a in domain.action_names for a in expected_actions])
def persist(
    state: StateMachineState,
    is_initial_state: bool,
    domain_folder: str,
    nlu_folder: str,
):
    domain, nlu_data = get_domain_nlu(
        state=state, is_initial_state=is_initial_state
    )

    # Generate filename
    filename = "".join(
        e.lower()
        for e in state.name
        if e.isalnum() or e.isspace() or e in ["-", "_"]
    )
    filename = "_".join(filename.split(" ")) + ".yaml"

    # Persist domain
    domain_filename = os.path.join(domain_folder, filename)
    Path(domain_filename).parent.mkdir(parents=True, exist_ok=True)
    rasa.shared.utils.validation.validate_yaml_schema(
        domain.as_yaml(), rasa.shared.constants.DOMAIN_SCHEMA_FILE
    )

    # Delete domain_filename
    if os.path.exists(domain_filename):
        os.remove(domain_filename)

    domain.persist(domain_filename)

    # Persist NLU
    nlu_filename = os.path.join(nlu_folder, filename)
    nlu_data_yaml = dump_obj_as_yaml_to_string(
        nlu_data, should_preserve_key_order=True
    )
    RasaYAMLReader().validate(nlu_data_yaml)
    Path(nlu_filename).parent.mkdir(parents=True, exist_ok=True)

    if os.path.exists(nlu_filename):
        os.remove(nlu_filename)

    write_text_file(nlu_data_yaml, nlu_filename)
Exemple #10
0
def persist(
    stories: List[Story],
    domain_filename: str,
    nlu_filename: str,
    additional_intents: List[Intent],
    additional_utterances: List[Utterance],
    slots: List[Slot],
    use_rules: bool = False,
):
    all_domain = Domain.empty()
    all_intents: Set[Intent] = set(additional_intents)
    all_stories: List[Story] = []
    all_slot_was_sets: Set[SlotWasSet] = set()

    for story in stories:
        domain, sub_stories, intents, slot_was_sets = story.get_domain_nlu(
            use_rules=use_rules)

        all_domain = all_domain.merge(domain)
        all_intents.update(intents)
        all_stories.extend(sub_stories)
        all_slot_was_sets.update(slot_was_sets)

    # Append consolidated slots
    domain_slots = Domain(
        intents=set([intent.name for intent in all_intents]),
        entities=[slot.name for slot in slots],
        slots=slots,
        responses={
            utterance.name: [{
                "text": utterance.text
            }]
            for utterance in additional_utterances
        },
        action_names=[],
        forms={},
    )
    all_domain = all_domain.merge(domain_slots)

    # Validate domain
    rasa.shared.utils.validation.validate_yaml_schema(
        all_domain.as_yaml(), rasa.shared.constants.DOMAIN_SCHEMA_FILE)

    # Write domain
    if os.path.exists(domain_filename):
        os.remove(domain_filename)

    Path(domain_filename).parent.mkdir(parents=True, exist_ok=True)
    all_domain.persist(domain_filename)

    # Write NLU
    nlu_data = {
        "version":
        "2.0",
        "nlu": [
            intent.as_nlu_yaml() for intent in all_intents
            if isinstance(intent, IntentWithExamples)
        ],
        "rules" if use_rules else "stories":
        all_stories,
    }

    nlu_data_yaml = dump_obj_as_yaml_to_string(nlu_data,
                                               should_preserve_key_order=True)

    RasaYAMLReader().validate(nlu_data_yaml)

    # TODO: Create folders if not existent

    if os.path.exists(nlu_filename):
        os.remove(nlu_filename)

    Path(nlu_filename).parent.mkdir(parents=True, exist_ok=True)

    write_text_file(nlu_data_yaml, nlu_filename)