コード例 #1
0
def test_same_file_names_get_resolved(tmpdir):
    # makes sure the resolution properly handles if there are two files with
    # with the same name in different directories

    tmpdir.join("one").mkdir()
    tmpdir.join("two").mkdir()
    data_dir_one = os.path.join(tmpdir.join("one").join("stories.md").strpath)
    data_dir_two = os.path.join(tmpdir.join("two").join("stories.md").strpath)
    shutil.copy2(DEFAULT_STORIES_FILE, data_dir_one)
    shutil.copy2(DEFAULT_STORIES_FILE, data_dir_two)

    nlu_dir_one = os.path.join(tmpdir.join("one").join("nlu.md").strpath)
    nlu_dir_two = os.path.join(tmpdir.join("two").join("nlu.md").strpath)
    shutil.copy2(DEFAULT_NLU_DATA, nlu_dir_one)
    shutil.copy2(DEFAULT_NLU_DATA, nlu_dir_two)

    core_directory, nlu_directory = data.get_core_nlu_directories([tmpdir.strpath])

    nlu_files = os.listdir(nlu_directory)

    assert len(nlu_files) == 2
    assert all([f.endswith("nlu.md") for f in nlu_files])

    stories = os.listdir(core_directory)

    assert len(stories) == 2
    assert all([f.endswith("stories.md") for f in stories])
コード例 #2
0
def test_same_file_names_get_resolved(tmp_path):
    # makes sure the resolution properly handles if there are two files with
    # with the same name in different directories

    (tmp_path / "one").mkdir()
    (tmp_path / "two").mkdir()
    data_dir_one = str(tmp_path / "one" / "stories.md")
    data_dir_two = str(tmp_path / "two" / "stories.md")
    shutil.copy2(DEFAULT_STORIES_FILE, data_dir_one)
    shutil.copy2(DEFAULT_STORIES_FILE, data_dir_two)

    nlu_dir_one = str(tmp_path / "one" / "nlu.yml")
    nlu_dir_two = str(tmp_path / "two" / "nlu.yml")
    shutil.copy2(DEFAULT_NLU_DATA, nlu_dir_one)
    shutil.copy2(DEFAULT_NLU_DATA, nlu_dir_two)

    core_directory, nlu_directory = data.get_core_nlu_directories(
        [str(tmp_path)])

    nlu_files = os.listdir(nlu_directory)

    assert len(nlu_files) == 2
    assert all(f.endswith("nlu.yml") for f in nlu_files)

    stories = os.listdir(core_directory)

    assert len(stories) == 2
    assert all(f.endswith("stories.md") for f in stories)
コード例 #3
0
async def train_async(
    domain: Union[Domain, Text],
    config: Text,
    training_files: Optional[Union[Text, List[Text]]],
    output_path: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    skill_imports = SkillSelector.load(config, training_files)
    try:
        domain = Domain.load(domain, skill_imports)
        domain.check_missing_templates()
    except InvalidDomain:
        domain = None

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files, skill_imports)

    with ExitStack() as stack:
        train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp()))
        nlu_data = stack.enter_context(TempDirectoryPath(nlu_data_directory))
        story = stack.enter_context(TempDirectoryPath(story_directory))

        if domain is None:
            return handle_domain_if_not_exists(config, nlu_data_directory,
                                               output_path, fixed_model_name)

        return await _train_async_internal(
            domain,
            config,
            train_path,
            nlu_data,
            story,
            output_path,
            force_training,
            fixed_model_name,
            kwargs,
        )

    if domain is None:
        return handle_domain_if_not_exists(config, nlu_data_directory,
                                           output_path, fixed_model_name)
コード例 #4
0
def validate_files(args):
    from rasa.core.validator import Validator

    loop = asyncio.get_event_loop()

    story_directory, nlu_data_directory = data.get_core_nlu_directories(args.data)
    validator = loop.run_until_complete(
        Validator.from_files(args.domain, nlu_data_directory, story_directory)
    )
    validator.verify_all()
コード例 #5
0
def check_training_data(args):
    training_files = [
        get_validated_path(f, "data", DEFAULT_DATA_PATH, none_is_valid=True)
        for f in args.data
    ]
    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files)
    if not os.listdir(story_directory) or not os.listdir(nlu_data_directory):
        print_error(
            "Cannot train initial Rasa model. Please provide NLU data and Core data."
        )
        exit(1)
コード例 #6
0
def test_get_core_nlu_directories(project):
    data_dir = os.path.join(project, "data")
    core_directory, nlu_directory = data.get_core_nlu_directories([data_dir])

    nlu_files = os.listdir(nlu_directory)

    assert len(nlu_files) == 1
    assert nlu_files[0].endswith("nlu.md")

    stories = os.listdir(core_directory)

    assert len(stories) == 1
    assert stories[0].endswith("stories.md")
コード例 #7
0
ファイル: test_data.py プロジェクト: tmfc/rasa
def test_get_core_nlu_directories(project):
    data_dir = os.path.join(project, "data")
    core_directory, nlu_directory = data.get_core_nlu_directories([data_dir])

    nlu_files = os.listdir(nlu_directory)

    assert len(nlu_files) == 1
    assert nlu_files[0].endswith("nlu.yml")

    core_files = os.listdir(core_directory)

    assert len(core_files) == 2
    assert any(file.endswith("stories.yml") for file in core_files)
    assert any(file.endswith("rules.yml") for file in core_files)
コード例 #8
0
def _project_files(project, config_file=DEFAULT_CONFIG_PATH,
                   domain=DEFAULT_DOMAIN_PATH,
                   training_files=DEFAULT_DATA_PATH):

    if training_files is None:
        core_directory = None
        nlu_directory = None
    else:
        core_directory, nlu_directory = data.get_core_nlu_directories(
            os.path.join(project, training_files))
    paths = {"config_file": config_file,
             "domain_file": domain,
             "nlu_data": core_directory,
             "stories": nlu_directory}

    return {k: v if v is None else os.path.join(project, v)
            for k, v in paths.items()}
コード例 #9
0
ファイル: interactive.py プロジェクト: jayceyxc/rasa
def interactive(args: argparse.Namespace):
    args.finetune = False  # Don't support finetuning

    training_files = [
        get_validated_path(f, "data", DEFAULT_DATA_PATH) for f in args.data
    ]
    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files)

    if not os.listdir(story_directory) or not os.listdir(nlu_data_directory):
        print_error(
            "Cannot train initial Rasa model. Please provide NLU data and Core data."
        )
        exit(1)

    zipped_model = train.train(args)

    perform_interactive_learning(args, zipped_model)
コード例 #10
0
def test_get_core_nlu_directories_with_none():
    directories = data.get_core_nlu_directories(None)

    assert all([directory for directory in directories])
    assert all([not os.listdir(directory) for directory in directories])
コード例 #11
0
async def train_async(
    domain: Union[Domain, Text],
    config: Text,
    training_files: Optional[Union[Text, List[Text]]],
    output_path: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        uncompress: If `True` the model will not be compressed.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    config = _get_valid_config(config, CONFIG_MANDATORY_KEYS)
    train_path = tempfile.mkdtemp()

    skill_imports = SkillSelector.load(config)
    try:
        domain = Domain.load(domain, skill_imports)
    except InvalidDomain as e:
        print_error(
            "Could not load domain due to: '{}'. To specify a valid domain path use "
            "the '--domain' argument.".format(e))
        return None

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files, skill_imports)
    new_fingerprint = model.model_fingerprint(config, domain,
                                              nlu_data_directory,
                                              story_directory)

    dialogue_data_not_present = not os.listdir(story_directory)
    nlu_data_not_present = not os.listdir(nlu_data_directory)

    if dialogue_data_not_present and nlu_data_not_present:
        print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return

    if dialogue_data_not_present:
        print_warning(
            "No dialogue data present. Just a Rasa NLU model will be trained.")
        return _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )

    if nlu_data_not_present:
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )

    old_model = model.get_latest_model(output_path)
    retrain_core, retrain_nlu = should_retrain(new_fingerprint, old_model,
                                               train_path)

    if force_training or retrain_core or retrain_nlu:
        await _do_training(
            domain=domain,
            config=config,
            output_path=output_path,
            train_path=train_path,
            nlu_data_directory=nlu_data_directory,
            story_directory=story_directory,
            force_training=force_training,
            retrain_core=retrain_core,
            retrain_nlu=retrain_nlu,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )

        return _package_model(
            new_fingerprint=new_fingerprint,
            output_path=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model
コード例 #12
0
ファイル: train.py プロジェクト: zhfneu/rasa_core
async def train_async(domain: Text,
                      config: Text,
                      training_files: Union[Text, List[Text]],
                      output: Text = DEFAULT_MODELS_PATH,
                      force_training: bool = False) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output: Output path.
        force_training: If `True` retrain model even if data has not changed.

    Returns:
        Path of the trained model archive.
    """

    train_path = tempfile.mkdtemp()
    old_model = model.get_latest_model(output)
    retrain_core = True
    retrain_nlu = True

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files)
    new_fingerprint = model.model_fingerprint(config, domain,
                                              nlu_data_directory,
                                              story_directory)
    if not force_training and old_model:
        unpacked = model.unpack_model(old_model)
        old_core, old_nlu = model.get_model_subdirectories(unpacked)
        last_fingerprint = model.fingerprint_from_path(unpacked)

        if not model.core_fingerprint_changed(last_fingerprint,
                                              new_fingerprint):
            target_path = os.path.join(train_path, "core")
            retrain_core = not model.merge_model(old_core, target_path)

        if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint):
            target_path = os.path.join(train_path, "nlu")
            retrain_nlu = not model.merge_model(old_nlu, target_path)

    if force_training or retrain_core:
        await train_core_async(domain, config, story_directory,
                               output, train_path)
    else:
        print("Dialogue data / configuration did not change. "
              "No need to retrain dialogue model.")

    if force_training or retrain_nlu:
        train_nlu(config, nlu_data_directory, output, train_path)
    else:
        print("NLU data / configuration did not change. "
              "No need to retrain NLU model.")

    if retrain_core or retrain_nlu:
        output = create_output_path(output)
        model.create_package_rasa(train_path, output, new_fingerprint)

        print("Train path: '{}'.".format(train_path))

        print_success("Your bot is trained and ready to take for a spin!")

        return output
    else:
        print("Nothing changed. You can use the old model stored at {}"
              "".format(os.path.abspath(old_model)))

        return old_model
コード例 #13
0
async def train_async(
    domain: Text,
    config: Text,
    training_files: Union[Text, List[Text]],
    output: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output: Output path.
        force_training: If `True` retrain model even if data has not changed.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    config = get_valid_config(config, CONFIG_MANDATORY_KEYS)

    train_path = tempfile.mkdtemp()
    old_model = model.get_latest_model(output)
    retrain_core = True
    retrain_nlu = True

    story_directory, nlu_data_directory = data.get_core_nlu_directories(training_files)
    new_fingerprint = model.model_fingerprint(
        config, domain, nlu_data_directory, story_directory
    )

    dialogue_data_not_present = not os.listdir(story_directory)
    nlu_data_not_present = not os.listdir(nlu_data_directory)

    if dialogue_data_not_present and nlu_data_not_present:
        print_error(
            "No training data given. Please provide dialogue and NLU data in "
            "order to train a Rasa model."
        )
        return

    if dialogue_data_not_present:
        print_warning(
            "No dialogue data present. Just a Rasa NLU model will be trained."
        )
        return train_nlu(config, nlu_data_directory, output, None)

    if nlu_data_not_present:
        print_warning("No NLU data present. Just a Rasa Core model will be trained.")
        return await train_core_async(
            domain, config, story_directory, output, None, kwargs
        )

    if not force_training and old_model:
        unpacked = model.unpack_model(old_model)
        old_core, old_nlu = model.get_model_subdirectories(unpacked)
        last_fingerprint = model.fingerprint_from_path(unpacked)

        if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint):
            target_path = os.path.join(train_path, "core")
            retrain_core = not model.merge_model(old_core, target_path)

        if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint):
            target_path = os.path.join(train_path, "nlu")
            retrain_nlu = not model.merge_model(old_nlu, target_path)

    if force_training or retrain_core:
        await train_core_async(
            domain, config, story_directory, output, train_path, kwargs
        )
    else:
        print (
            "Dialogue data / configuration did not change. "
            "No need to retrain dialogue model."
        )

    if force_training or retrain_nlu:
        train_nlu(config, nlu_data_directory, output, train_path)
    else:
        print ("NLU data / configuration did not change. No need to retrain NLU model.")

    if retrain_core or retrain_nlu:
        output = create_output_path(output)
        model.create_package_rasa(train_path, output, new_fingerprint)

        print_success("Your bot is trained and ready to take for a spin!")

        return output
    else:
        print_success(
            "Nothing changed. You can use the old model stored at '{}'"
            "".format(os.path.abspath(old_model))
        )

        return old_model
コード例 #14
0
ファイル: train.py プロジェクト: quickyue/rasa
async def train_async(
    domain: Optional,
    config: Text,
    training_files: Optional[Union[Text, List[Text]]],
    output_path: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        uncompress: If `True` the model will not be compressed.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    config = get_valid_config(config, CONFIG_MANDATORY_KEYS)

    train_path = tempfile.mkdtemp()
    old_model = model.get_latest_model(output_path)
    retrain_core = True
    retrain_nlu = True

    skill_imports = SkillSelector.load(config)
    try:
        domain = Domain.load(domain, skill_imports)
    except InvalidDomain as e:
        print_error(e)
        return None

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files, skill_imports)
    new_fingerprint = model.model_fingerprint(config, domain,
                                              nlu_data_directory,
                                              story_directory)

    dialogue_data_not_present = not os.listdir(story_directory)
    nlu_data_not_present = not os.listdir(nlu_data_directory)

    if dialogue_data_not_present and nlu_data_not_present:
        print_error(
            "No training data given. Please provide dialogue and NLU data in "
            "order to train a Rasa model.")
        return

    if dialogue_data_not_present:
        print_warning(
            "No dialogue data present. Just a Rasa NLU model will be trained.")
        return _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )

    if nlu_data_not_present:
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )

    if not force_training and old_model:
        unpacked = model.unpack_model(old_model)
        old_core, old_nlu = model.get_model_subdirectories(unpacked)
        last_fingerprint = model.fingerprint_from_path(unpacked)

        if not model.core_fingerprint_changed(last_fingerprint,
                                              new_fingerprint):
            target_path = os.path.join(train_path, "core")
            retrain_core = not model.merge_model(old_core, target_path)

        if not model.nlu_fingerprint_changed(last_fingerprint,
                                             new_fingerprint):
            target_path = os.path.join(train_path, "nlu")
            retrain_nlu = not model.merge_model(old_nlu, target_path)

    if force_training or retrain_core:
        await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )
    else:
        print("Dialogue data / configuration did not change. "
              "No need to retrain dialogue model.")

    if force_training or retrain_nlu:
        _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )
    else:
        print(
            "NLU data / configuration did not change. No need to retrain NLU model."
        )

    if retrain_core or retrain_nlu:
        output_path = create_output_path(output_path,
                                         fixed_name=fixed_model_name)
        model.create_package_rasa(train_path, output_path, new_fingerprint)

        if uncompress:
            output_path = decompress(output_path)

        print_success("Your Rasa model is trained and saved at '{}'.".format(
            output_path))

        return output_path
    else:
        print_success(
            "Nothing changed. You can use the old model stored at '{}'"
            "".format(os.path.abspath(old_model)))

        return old_model