def _get_train_files_cmd():
     """Get the raw train data by fetching the train file given in the
     command line arguments to the train script. When training the NLU model
     explicitly, the training data will be in the "nlu" argument, otherwise
     it will be in the "data" argument.
     """
     cmdline_args = create_argument_parser().parse_args()
     try:
         files = list_files(cmdline_args.nlu)
     except AttributeError:
         files = list(get_core_nlu_files(cmdline_args.data)[1])
     return [file for file in files if guess_format(file) == RASA_NLU]
Example #2
0
def test_train_with_only_core_data(run_in_simple_project: Callable[..., RunResult]):
    temp_dir = os.getcwd()

    assert os.path.exists(os.path.join(temp_dir, "data/nlu.yml"))
    os.remove(os.path.join(temp_dir, "data/nlu.yml"))

    run_in_simple_project("train", "--fixed-model-name", "test-model")

    assert os.path.exists(os.path.join(temp_dir, "models"))
    files = io_utils.list_files(os.path.join(temp_dir, "models"))
    assert len(files) == 1
    assert os.path.basename(files[0]) == "test-model.tar.gz"
Example #3
0
File: test.py Project: zzBBc/rasa
def test_nlu(args: argparse.Namespace) -> None:
    from rasa import data
    from rasa.test import compare_nlu_models, perform_nlu_cross_validation, test_nlu

    nlu_data = cli_utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH)
    nlu_data = data.get_nlu_directory(nlu_data)
    output = args.out or DEFAULT_RESULTS_PATH
    args.errors = not args.no_errors

    io_utils.create_directory(output)

    if args.config is not None and len(args.config) == 1:
        args.config = os.path.abspath(args.config[0])
        if os.path.isdir(args.config):
            args.config = io_utils.list_files(args.config)

    if isinstance(args.config, list):
        logger.info(
            "Multiple configuration files specified, running nlu comparison mode."
        )

        config_files = []
        for file in args.config:
            try:
                validation_utils.validate_yaml_schema(
                    io_utils.read_file(file),
                    CONFIG_SCHEMA_FILE,
                    show_validation_errors=False,
                )
                config_files.append(file)
            except validation_utils.InvalidYamlFileError:
                logger.debug(
                    "Ignoring file '{}' as it is not a valid config file.".
                    format(file))
                continue

        compare_nlu_models(
            configs=config_files,
            nlu=nlu_data,
            output=output,
            runs=args.runs,
            exclusion_percentages=args.percentages,
        )
    elif args.cross_validation:
        logger.info("Test model using cross validation.")
        config = cli_utils.get_validated_path(args.config, "config",
                                              DEFAULT_CONFIG_PATH)
        perform_nlu_cross_validation(config, nlu_data, output, vars(args))
    else:
        model_path = cli_utils.get_validated_path(args.model, "model",
                                                  DEFAULT_MODELS_PATH)

        test_nlu(model_path, nlu_data, output, vars(args))
Example #4
0
def test_train_core_compare(run_in_simple_project: Callable[..., RunResult]):
    temp_dir = os.getcwd()

    io_utils.write_yaml_file(
        {
            "language": "en",
            "pipeline": "supervised_embeddings",
            "policies": [{
                "name": "MemoizationPolicy"
            }],
        },
        "config_1.yml",
    )

    io_utils.write_yaml_file(
        {
            "language": "en",
            "pipeline": "supervised_embeddings",
            "policies": [{
                "name": "MemoizationPolicy"
            }],
        },
        "config_2.yml",
    )

    run_in_simple_project(
        "train",
        "core",
        "-c",
        "config_1.yml",
        "config_2.yml",
        "--stories",
        "data/stories.md",
        "--out",
        "core_comparison_results",
        "--runs",
        "2",
        "--percentages",
        "25",
        "75",
        "--augmentation",
        "5",
    )

    assert os.path.exists(os.path.join(temp_dir, "core_comparison_results"))
    run_directories = io_utils.list_subdirectories(
        os.path.join(temp_dir, "core_comparison_results"))
    assert len(run_directories) == 2
    model_files = io_utils.list_files(
        os.path.join(temp_dir, "core_comparison_results", run_directories[0]))
    assert len(model_files) == 4
    assert model_files[0].endswith("tar.gz")
Example #5
0
def test_train_with_only_core_data(run_in_default_project_without_models):
    temp_dir = os.getcwd()

    assert os.path.exists(os.path.join(temp_dir, "data/nlu.md"))
    os.remove(os.path.join(temp_dir, "data/nlu.md"))

    run_in_default_project_without_models("train", "--fixed-model-name",
                                          "test-model")

    assert os.path.exists(os.path.join(temp_dir, "models"))
    files = io_utils.list_files(os.path.join(temp_dir, "models"))
    assert len(files) == 1
    assert os.path.basename(files[0]) == "test-model.tar.gz"
Example #6
0
def test_train_with_only_nlu_data(run_in_simple_project: Callable[..., RunResult]):
    temp_dir = Path.cwd()

    for core_file in ["stories.yml", "rules.yml"]:
        assert (temp_dir / "data" / core_file).exists()
        (temp_dir / "data" / core_file).unlink()

    run_in_simple_project("train", "--fixed-model-name", "test-model")

    assert os.path.exists(os.path.join(temp_dir, "models"))
    files = io_utils.list_files(os.path.join(temp_dir, "models"))
    assert len(files) == 1
    assert os.path.basename(files[0]) == "test-model.tar.gz"
Example #7
0
def test_test_core_comparison(run_in_default_project):
    files = list_files("models")
    copyfile(files[0], "models/copy-model.tar.gz")

    run_in_default_project(
        "test",
        "core",
        "-m",
        files[0],
        "models/copy-model.tar.gz",
        "--stories",
        "data/stories.md",
    )

    assert os.path.exists(os.path.join(DEFAULT_RESULTS_PATH, RESULTS_FILE))
Example #8
0
def test_test_core_comparison(
        run_in_simple_project_with_model: Callable[..., RunResult]):
    files = list_files("models")
    copyfile(files[0], "models/copy-model.tar.gz")

    run_in_simple_project_with_model(
        "test",
        "core",
        "-m",
        files[0],
        "models/copy-model.tar.gz",
        "--stories",
        "data/stories.md",
    )

    assert os.path.exists(os.path.join(DEFAULT_RESULTS_PATH, RESULTS_FILE))
Example #9
0
def test_train_nlu(run_in_default_project):
    run_in_default_project(
        "train",
        "nlu",
        "-c",
        "config.yml",
        "--nlu",
        "data/nlu.md",
        "--out",
        "train_models",
    )

    assert os.path.exists("train_models")
    files = io_utils.list_files("train_models")
    assert len(files) == 1
    assert os.path.basename(files[0]).startswith("nlu-")
Example #10
0
def get_file_format(resource_name: Text) -> Text:
    from rasa.nlu.training_data import loading

    if resource_name is None or not os.path.exists(resource_name):
        raise AttributeError(f"Resource '{resource_name}' does not exist.")

    files = io_utils.list_files(resource_name)

    file_formats = list(map(lambda f: loading.guess_format(f), files))

    if not file_formats:
        return "json"

    fformat = file_formats[0]
    if fformat == "md" and all(f == fformat for f in file_formats):
        return fformat

    return "json"
Example #11
0
def load_data(resource_name: Text, language: Optional[Text] = "en") -> "TrainingData":
    """Load training data from disk.

    Merges them if loaded from disk and multiple files are found."""
    from rasa.nlu.training_data import TrainingData

    if not os.path.exists(resource_name):
        raise ValueError(f"File '{resource_name}' does not exist.")

    files = io_utils.list_files(resource_name)
    data_sets = [_load(f, language) for f in files]
    data_sets = [ds for ds in data_sets if ds]
    if len(data_sets) == 0:
        training_data = TrainingData()
    elif len(data_sets) == 1:
        training_data = data_sets[0]
    else:
        training_data = data_sets[0].merge(*data_sets[1:])

    return training_data
Example #12
0
def test_train(run_in_default_project):
    temp_dir = os.getcwd()

    run_in_default_project(
        "train",
        "-c",
        "config.yml",
        "-d",
        "domain.yml",
        "--data",
        "data",
        "--out",
        "train_models",
        "--fixed-model-name",
        "test-model",
    )

    assert os.path.exists(os.path.join(temp_dir, "train_models"))
    files = io_utils.list_files(os.path.join(temp_dir, "train_models"))
    assert len(files) == 1
    assert os.path.basename(files[0]) == "test-model.tar.gz"
Example #13
0
def test_train_nlu(run_in_simple_project: Callable[..., RunResult]):
    run_in_simple_project(
        "train",
        "nlu",
        "-c",
        "config.yml",
        "--nlu",
        "data/nlu.md",
        "--out",
        "train_models",
    )

    assert os.path.exists("train_models")
    files = io_utils.list_files("train_models")
    assert len(files) == 1
    assert os.path.basename(files[0]).startswith("nlu-")
    model_dir = model.get_model("train_models")
    assert model_dir is not None
    metadata = Metadata.load(os.path.join(model_dir, "nlu"))
    assert metadata.get("training_data") is None
    assert not os.path.exists(
        os.path.join(model_dir, "nlu",
                     training_data.DEFAULT_TRAINING_DATA_OUTPUT_PATH))
Example #14
0
    async def read_from_folder(
        resource_name: Text,
        domain: Domain,
        interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
        template_variables: Optional[Dict] = None,
        use_e2e: bool = False,
        exclusion_percentage: Optional[int] = None,
    ) -> List[StoryStep]:
        """Given a path reads all contained story files."""
        if not os.path.exists(resource_name):
            raise ValueError("Story file or folder could not be found. Make "
                             "sure '{}' exists and points to a story folder "
                             "or file.".format(os.path.abspath(resource_name)))

        files = io_utils.list_files(resource_name)

        return await StoryFileReader.read_from_files(
            files,
            domain,
            interpreter,
            template_variables,
            use_e2e,
            exclusion_percentage,
        )
Example #15
0
def test_train_no_domain_exists(run_in_default_project):

    os.remove("domain.yml")
    run_in_default_project(
        "train",
        "-c",
        "config.yml",
        "--data",
        "data",
        "--out",
        "train_models_no_domain",
        "--fixed-model-name",
        "nlu-model-only",
    )

    assert os.path.exists("train_models_no_domain")
    files = io_utils.list_files("train_models_no_domain")
    assert len(files) == 1

    trained_model_path = "train_models_no_domain/nlu-model-only.tar.gz"
    unpacked = model.unpack_model(trained_model_path)

    metadata_path = os.path.join(unpacked, "nlu", "metadata.json")
    assert os.path.exists(metadata_path)
Example #16
0
def test_list_files_invalid_resource():
    with pytest.raises(ValueError) as execinfo:
        io_utils.list_files(None)
    assert "must be a string type" in str(execinfo.value)
Example #17
0
def test_list_files_non_existing_dir():
    with pytest.raises(ValueError) as execinfo:
        io_utils.list_files("my/made_up/path")
    assert "Could not locate the resource" in str(execinfo.value)