Ejemplo n.º 1
0
def test_load_imports_from_directory_tree(tmpdir_factory: TempdirFactory):
    root = tmpdir_factory.mktemp("Parent Bot")
    root_imports = {"imports": ["Skill A"]}
    utils.dump_obj_as_yaml_to_file(root / "config.yml", root_imports)

    skill_a_directory = root / "Skill A"
    skill_a_directory.mkdir()
    skill_a_imports = {"imports": ["../Skill B"]}
    utils.dump_obj_as_yaml_to_file(skill_a_directory / "config.yml", skill_a_imports)

    skill_b_directory = root / "Skill B"
    skill_b_directory.mkdir()
    skill_b_imports = {"some other": ["../Skill C"]}
    utils.dump_obj_as_yaml_to_file(skill_b_directory / "config.yml", skill_b_imports)

    skill_b_subskill_directory = skill_b_directory / "Skill B-1"
    skill_b_subskill_directory.mkdir()
    skill_b_1_imports = {"imports": ["../../Skill A"]}
    # Check if loading from `.yaml` also works
    utils.dump_obj_as_yaml_to_file(
        skill_b_subskill_directory / "config.yaml", skill_b_1_imports
    )

    # should not be imported
    subdirectory_3 = root / "Skill C"
    subdirectory_3.mkdir()

    actual = SkillSelector.load(root / "config.yml")
    expected = {
        os.path.join(str(skill_a_directory)),
        os.path.join(str(skill_b_directory)),
    }

    assert actual._imports == expected
Ejemplo n.º 2
0
async def train_core_async(
    domain: Union[Domain, Text],
    config: Text,
    stories: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Core model.

    Args:
        domain: Path to the domain file.
        config: Path to the config file for Core.
        stories: Path to the Core training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.
        fixed_model_name: Name of model to be stored.
        uncompress: If `True` the model will not be compressed.
        kwargs: Additional training parameters.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """

    skill_imports = SkillSelector.load(config, stories)

    if isinstance(domain, str):
        try:
            domain = Domain.load(domain, skill_imports)
            domain.check_missing_templates()
        except InvalidDomain as e:
            print_error(
                "Could not load domain due to: '{}'. To specify a valid domain path "
                "use the '--domain' argument.".format(e)
            )
            return None

    train_context = TempDirectoryPath(data.get_core_directory(stories, skill_imports))

    with train_context as story_directory:
        if not os.listdir(story_directory):
            print_error(
                "No stories given. Please provide stories in order to "
                "train a Rasa Core model using the '--stories' argument."
            )
            return

        return await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            kwargs=kwargs,
        )
Ejemplo n.º 3
0
async def train_core_async(
    domain: Union[Domain, Text],
    config: Text,
    stories: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Core model.

    Args:
        domain: Path to the domain file.
        config: Path to the config file for Core.
        stories: Path to the Core training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.
        fixed_model_name: Name of model to be stored.
        uncompress: If `True` the model will not be compressed.
        kwargs: Additional training parameters.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """

    config = _get_valid_config(config, CONFIG_MANDATORY_KEYS_CORE)
    skill_imports = SkillSelector.load(config)

    if isinstance(domain, str):
        try:
            domain = Domain.load(domain, skill_imports)
        except InvalidDomain as e:
            print_error(e)
            return None

    story_directory = data.get_core_directory(stories, skill_imports)

    if not os.listdir(story_directory):
        print_error(
            "No dialogue data given. Please provide dialogue data in order to "
            "train a Rasa Core model."
        )
        return

    return await _train_core_with_validated_data(
        domain=domain,
        config=config,
        story_directory=story_directory,
        output=output,
        train_path=train_path,
        fixed_model_name=fixed_model_name,
        uncompress=uncompress,
        kwargs=kwargs,
    )
Ejemplo n.º 4
0
async def train_async(
    domain: Union[Domain, Text],
    config: Text,
    training_files: Optional[Union[Text, List[Text]]],
    output_path: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    skill_imports = SkillSelector.load(config, training_files)
    try:
        domain = Domain.load(domain, skill_imports)
        domain.check_missing_templates()
    except InvalidDomain:
        domain = None

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files, skill_imports)

    with ExitStack() as stack:
        train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp()))
        nlu_data = stack.enter_context(TempDirectoryPath(nlu_data_directory))
        story = stack.enter_context(TempDirectoryPath(story_directory))

        if domain is None:
            return handle_domain_if_not_exists(config, nlu_data_directory,
                                               output_path, fixed_model_name)

        return await _train_async_internal(
            domain,
            config,
            train_path,
            nlu_data,
            story,
            output_path,
            force_training,
            fixed_model_name,
            kwargs,
        )

    if domain is None:
        return handle_domain_if_not_exists(config, nlu_data_directory,
                                           output_path, fixed_model_name)
Ejemplo n.º 5
0
def train_nlu(config: Text, nlu_data: Text, output: Text,
              train_path: Optional[Text]) -> Optional[Text]:
    """Trains a NLU model.

    Args:
        config: Path to the config file for NLU.
        nlu_data: Path to the NLU training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """
    import rasa.nlu.train

    config = get_valid_config(config, CONFIG_MANDATORY_KEYS_NLU)

    if not train_path:
        # training NLU only hence the training files still have to be selected
        skill_imports = SkillSelector.load(config)
        nlu_data_directory = data.get_nlu_directory(nlu_data, skill_imports)
    else:
        nlu_data_directory = nlu_data

    if not os.listdir(nlu_data_directory):
        print_error(
            "No NLU data given. Please provide NLU data in order to train "
            "a Rasa NLU model.")
        return

    _train_path = train_path or tempfile.mkdtemp()

    print_color("Start training NLU model ...", color=bcolors.OKBLUE)
    _, nlu_model, _ = rasa.nlu.train(config,
                                     nlu_data_directory,
                                     _train_path,
                                     fixed_model_name="nlu")
    print_color("Done.", color=bcolors.OKBLUE)

    if not train_path:
        output_path = create_output_path(output, prefix="nlu-")
        new_fingerprint = model.model_fingerprint(config,
                                                  nlu_data=nlu_data_directory)
        model.create_package_rasa(_train_path, output_path, new_fingerprint)
        print_success(
            "Your Rasa NLU model is trained and saved at '{}'.".format(
                output_path))

        return output_path

    return _train_path
Ejemplo n.º 6
0
def test_load_if_subskill_is_more_specific_than_parent(tmpdir_factory: TempdirFactory):
    root = tmpdir_factory.mktemp("Parent Bot")
    config_path = root / "config.yml"
    utils.dump_obj_as_yaml_to_file(root / "config.yml", {})

    skill_a_directory = root / "Skill A"
    skill_a_directory.mkdir()
    skill_a_imports = {"imports": ["Skill B"]}
    utils.dump_obj_as_yaml_to_file(skill_a_directory / "config.yml", skill_a_imports)

    actual = SkillSelector.load(config_path)

    assert actual.is_imported(str(skill_a_directory))
Ejemplo n.º 7
0
def test_load_imports_without_imports(tmpdir_factory: TempdirFactory):
    empty_config = {}
    root = tmpdir_factory.mktemp("Parent Bot")
    utils.dump_obj_as_yaml_to_file(root / "config.yml", empty_config)

    skill_a_directory = root / "Skill A"
    skill_a_directory.mkdir()
    utils.dump_obj_as_yaml_to_file(skill_a_directory / "config.yml", empty_config)

    skill_b_directory = root / "Skill B"
    skill_b_directory.mkdir()
    utils.dump_obj_as_yaml_to_file(skill_b_directory / "config.yml", empty_config)

    actual = SkillSelector.load(root / "config.yml")

    assert actual.is_imported(root / "Skill C")
Ejemplo n.º 8
0
def train_nlu(
    config: Text,
    nlu_data: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
) -> Optional[Text]:
    """Trains an NLU model.

    Args:
        config: Path to the config file for NLU.
        nlu_data: Path to the NLU training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.
        fixed_model_name: Name of the model to be stored.
        uncompress: If `True` the model will not be compressed.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """
    config = _get_valid_config(config, CONFIG_MANDATORY_KEYS_NLU)

    # training NLU only hence the training files still have to be selected
    skill_imports = SkillSelector.load(config)
    nlu_data_directory = data.get_nlu_directory(nlu_data, skill_imports)

    if not os.listdir(nlu_data_directory):
        print_error(
            "No NLU data given. Please provide NLU data in order to train "
            "a Rasa NLU model."
        )
        return

    return _train_nlu_with_validated_data(
        config=config,
        nlu_data_directory=nlu_data_directory,
        output=output,
        train_path=train_path,
        fixed_model_name=fixed_model_name,
        uncompress=uncompress,
    )
Ejemplo n.º 9
0
def test_import_outside_project_directory(tmpdir_factory):
    root = tmpdir_factory.mktemp("Parent Bot")
    skill_imports = {"imports": ["Skill A"]}
    utils.dump_obj_as_yaml_to_file(root / "config.yml", skill_imports)

    skill_a_directory = root / "Skill A"
    skill_a_directory.mkdir()
    skill_a_imports = {"imports": ["../Skill B"]}
    utils.dump_obj_as_yaml_to_file(skill_a_directory / "config.yml", skill_a_imports)

    skill_b_directory = root / "Skill B"
    skill_b_directory.mkdir()
    skill_b_imports = {"imports": ["../Skill C"]}
    utils.dump_obj_as_yaml_to_file(skill_b_directory / "config.yml", skill_b_imports)

    actual = SkillSelector.load(skill_a_directory / "config.yml")

    assert actual._imports == {str(skill_b_directory), str(root / "Skill C")}
Ejemplo n.º 10
0
async def train_async(
    domain: Union[Domain, Text],
    config: Text,
    training_files: Optional[Union[Text, List[Text]]],
    output_path: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        uncompress: If `True` the model will not be compressed.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    config = _get_valid_config(config, CONFIG_MANDATORY_KEYS)
    train_path = tempfile.mkdtemp()

    skill_imports = SkillSelector.load(config)
    try:
        domain = Domain.load(domain, skill_imports)
    except InvalidDomain as e:
        print_error(
            "Could not load domain due to: '{}'. To specify a valid domain path use "
            "the '--domain' argument.".format(e))
        return None

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files, skill_imports)
    new_fingerprint = model.model_fingerprint(config, domain,
                                              nlu_data_directory,
                                              story_directory)

    dialogue_data_not_present = not os.listdir(story_directory)
    nlu_data_not_present = not os.listdir(nlu_data_directory)

    if dialogue_data_not_present and nlu_data_not_present:
        print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return

    if dialogue_data_not_present:
        print_warning(
            "No dialogue data present. Just a Rasa NLU model will be trained.")
        return _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )

    if nlu_data_not_present:
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )

    old_model = model.get_latest_model(output_path)
    retrain_core, retrain_nlu = should_retrain(new_fingerprint, old_model,
                                               train_path)

    if force_training or retrain_core or retrain_nlu:
        await _do_training(
            domain=domain,
            config=config,
            output_path=output_path,
            train_path=train_path,
            nlu_data_directory=nlu_data_directory,
            story_directory=story_directory,
            force_training=force_training,
            retrain_core=retrain_core,
            retrain_nlu=retrain_nlu,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )

        return _package_model(
            new_fingerprint=new_fingerprint,
            output_path=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model
Ejemplo n.º 11
0
async def train_async(
    domain: Optional,
    config: Text,
    training_files: Optional[Union[Text, List[Text]]],
    output_path: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        uncompress: If `True` the model will not be compressed.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    config = get_valid_config(config, CONFIG_MANDATORY_KEYS)

    train_path = tempfile.mkdtemp()
    old_model = model.get_latest_model(output_path)
    retrain_core = True
    retrain_nlu = True

    skill_imports = SkillSelector.load(config)
    try:
        domain = Domain.load(domain, skill_imports)
    except InvalidDomain as e:
        print_error(e)
        return None

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files, skill_imports)
    new_fingerprint = model.model_fingerprint(config, domain,
                                              nlu_data_directory,
                                              story_directory)

    dialogue_data_not_present = not os.listdir(story_directory)
    nlu_data_not_present = not os.listdir(nlu_data_directory)

    if dialogue_data_not_present and nlu_data_not_present:
        print_error(
            "No training data given. Please provide dialogue and NLU data in "
            "order to train a Rasa model.")
        return

    if dialogue_data_not_present:
        print_warning(
            "No dialogue data present. Just a Rasa NLU model will be trained.")
        return _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )

    if nlu_data_not_present:
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )

    if not force_training and old_model:
        unpacked = model.unpack_model(old_model)
        old_core, old_nlu = model.get_model_subdirectories(unpacked)
        last_fingerprint = model.fingerprint_from_path(unpacked)

        if not model.core_fingerprint_changed(last_fingerprint,
                                              new_fingerprint):
            target_path = os.path.join(train_path, "core")
            retrain_core = not model.merge_model(old_core, target_path)

        if not model.nlu_fingerprint_changed(last_fingerprint,
                                             new_fingerprint):
            target_path = os.path.join(train_path, "nlu")
            retrain_nlu = not model.merge_model(old_nlu, target_path)

    if force_training or retrain_core:
        await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )
    else:
        print("Dialogue data / configuration did not change. "
              "No need to retrain dialogue model.")

    if force_training or retrain_nlu:
        _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )
    else:
        print(
            "NLU data / configuration did not change. No need to retrain NLU model."
        )

    if retrain_core or retrain_nlu:
        output_path = create_output_path(output_path,
                                         fixed_name=fixed_model_name)
        model.create_package_rasa(train_path, output_path, new_fingerprint)

        if uncompress:
            output_path = decompress(output_path)

        print_success("Your Rasa model is trained and saved at '{}'.".format(
            output_path))

        return output_path
    else:
        print_success(
            "Nothing changed. You can use the old model stored at '{}'"
            "".format(os.path.abspath(old_model)))

        return old_model
Ejemplo n.º 12
0
async def train_core_async(
    domain: Union[Domain, Text],
    config: Text,
    stories: Text,
    output: Text,
    train_path: Optional[Text] = None,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Core model.

    Args:
        domain: Path to the domain file.
        config: Path to the config file for Core.
        stories: Path to the Core training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.
        kwargs: Additional training parameters.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """
    import rasa.core.train

    config = get_valid_config(config, CONFIG_MANDATORY_KEYS_CORE)

    _train_path = train_path or tempfile.mkdtemp()

    if isinstance(Domain, str) or not train_path:
        skill_imports = SkillSelector.load(config)
        domain = Domain.load(domain, skill_imports)
        story_directory = data.get_core_directory(stories, skill_imports)
    else:
        story_directory = stories

    if not os.listdir(story_directory):
        print_error(
            "No dialogue data given. Please provide dialogue data in order to "
            "train a Rasa Core model.")
        return

    # normal (not compare) training
    print_color("Start training dialogue model ...", color=bcolors.OKBLUE)
    await rasa.core.train(
        domain_file=domain,
        stories_file=story_directory,
        output_path=os.path.join(_train_path, "core"),
        policy_config=config,
        kwargs=kwargs,
    )
    print_color("Done.", color=bcolors.OKBLUE)

    if not train_path:
        # Only Core was trained.
        output_path = create_output_path(output, prefix="core-")
        new_fingerprint = model.model_fingerprint(config,
                                                  domain,
                                                  stories=story_directory)
        model.create_package_rasa(_train_path, output_path, new_fingerprint)
        print_success(
            "Your Rasa Core model is trained and saved at '{}'.".format(
                output_path))

        return output_path

    return _train_path