def test_train_force(run_in_default_project):
    temp_dir = os.getcwd()

    assert os.path.exists(os.path.join(temp_dir, "models"))
    files = list_files(os.path.join(temp_dir, "models"))
    assert len(files) == 1

    run_in_default_project("train", "--force")

    assert os.path.exists(os.path.join(temp_dir, "models"))
    files = list_files(os.path.join(temp_dir, "models"))
    assert len(files) == 2
Esempio n. 2
0
def test_train_skip_on_model_not_changed(run_in_default_project):
    temp_dir = os.getcwd()

    assert os.path.exists(os.path.join(temp_dir, "models"))
    files = list_files(os.path.join(temp_dir, "models"))
    assert len(files) == 1

    file_name = files[0]
    run_in_default_project("train")

    assert os.path.exists(os.path.join(temp_dir, "models"))
    files = list_files(os.path.join(temp_dir, "models"))
    assert len(files) == 1
    assert file_name == files[0]
Esempio n. 3
0
    async def read_from_folder(resource_name, domain,
                               interpreter=RegexInterpreter(),
                               template_variables=None, use_e2e=False,
                               exclusion_percentage=None):
        """Given a path reads all contained story files."""
        import rasa.nlu.utils as nlu_utils

        if not os.path.exists(resource_name):
            raise ValueError("Story file or folder could not be found. Make "
                             "sure '{}' exists and points to a story folder "
                             "or file.".format(os.path.abspath(resource_name)))

        story_steps = []
        for f in nlu_utils.list_files(resource_name):
            steps = await StoryFileReader.read_from_file(f, domain,
                                                         interpreter,
                                                         template_variables,
                                                         use_e2e)
            story_steps.extend(steps)

        # if exclusion percentage is not 100
        if exclusion_percentage and exclusion_percentage is not 100:
            import random
            idx = int(round(exclusion_percentage / 100.0 * len(story_steps)))
            random.shuffle(story_steps)
            story_steps = story_steps[:-idx]

        return story_steps
Esempio n. 4
0
    async def read_from_folder(
        resource_name: Text,
        domain: Domain,
        interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
        template_variables: Optional[Dict] = None,
        use_e2e: bool = False,
        exclusion_percentage: Optional[int] = None,
    ) -> List[StoryStep]:
        """Given a path reads all contained story files."""
        import rasa.nlu.utils as nlu_utils

        if not os.path.exists(resource_name):
            raise ValueError("Story file or folder could not be found. Make "
                             "sure '{}' exists and points to a story folder "
                             "or file.".format(os.path.abspath(resource_name)))

        files = nlu_utils.list_files(resource_name)

        return await StoryFileReader.read_from_files(
            files,
            domain,
            interpreter,
            template_variables,
            use_e2e,
            exclusion_percentage,
        )
Esempio n. 5
0
    def get_data(self,train_data, language):
        lookup_tables = []
        composite_entities = []

        #cmdline_args = create_argument_parser().parse_args()
        import pdb
        pdb.set_trace()
        print(train_data)
        files = utils.list_files(cmdline_args.data)

        for file in files:
            fformat = _guess_format(file)
            file_content = utils.read_json_file(file)
            if fformat == DIALOGFLOW_ENTITIES:
                entity = file_content['name']
                dialogflowReader = DialogflowReader()
                examples_js = dialogflowReader._read_examples_js(fn=file, language=language, fformat=fformat)
                lookup_table = self._extract_lookup_tables(entity, examples_js)
                if(lookup_table):
                    lookup_tables.append(lookup_table)
                composite_entity = self._extract_composite_entities(
                            entity,
                            examples_js)
                if(composite_entity):
                    composite_entities.append(composite_entity)

            if fformat == rasa_nlu:
                rasa_nlu_data = file_content['rasa.nlu_data']
                composite_entities = rasa_nlu_data['composite_entities']
                lookup_tables = rasa_nlu_data['lookup_tables']

        return lookup_tables, composite_entities
Esempio n. 6
0
def test_list_files_ignores_hidden_files(tmpdir):
    # create a hidden file
    open(os.path.join(tmpdir.strpath, ".hidden"), "a").close()
    # create a normal file
    normal_file = os.path.join(tmpdir.strpath, "normal_file")
    open(normal_file, "a").close()
    assert utils.list_files(tmpdir.strpath) == [normal_file]
Esempio n. 7
0
 def _get_train_files_cmd():
     """Get the raw train data by fetching the train file given in the
     command line arguments to the train script.
     """
     cmdline_args = create_argument_parser().parse_args()
     if not cmdline_args.__contains__("nlu"):
         cmdline_args.nlu = 'data/nlu.json'
     files = utils.list_files(cmdline_args.nlu)
     return [file for file in files if _guess_format(file) == RASA_NLU]
Esempio n. 8
0
 def _get_train_files_cmd():
     """Get the raw train data by fetching the train file given in the
     command line arguments to the train script. When training the NLU model
     explicitly, the training data will be in the "nlu" argument, otherwise
     it will be in the "data" argument.
     """
     cmdline_args = create_argument_parser().parse_args()
     try:
         files = list_files(cmdline_args.nlu)
     except AttributeError:
         files = list(get_core_nlu_files(cmdline_args.data)[1])
     return [file for file in files if _guess_format(file) == RASA_NLU]
Esempio n. 9
0
def test_train_core_compare(run_in_default_project):
    temp_dir = os.getcwd()

    write_yaml_file(
        {
            "language": "en",
            "pipeline": "supervised_embeddings",
            "policies": [{
                "name": "KerasPolicy"
            }],
        },
        "config_1.yml",
    )

    write_yaml_file(
        {
            "language": "en",
            "pipeline": "supervised_embeddings",
            "policies": [{
                "name": "MemoizationPolicy"
            }],
        },
        "config_2.yml",
    )

    run_in_default_project(
        "train",
        "core",
        "-c",
        "config_1.yml",
        "config_2.yml",
        "--stories",
        "data/stories.md",
        "--out",
        "core_comparison_results",
        "--runs",
        "2",
        "--percentages",
        "25",
        "75",
        "--augmentation",
        "5",
    )

    assert os.path.exists(os.path.join(temp_dir, "core_comparison_results"))
    run_directories = list_subdirectories(
        os.path.join(temp_dir, "core_comparison_results"))
    assert len(run_directories) == 2
    model_files = list_files(
        os.path.join(temp_dir, "core_comparison_results", run_directories[0]))
    assert len(model_files) == 4
    assert model_files[0].endswith("tar.gz")
def test_train_with_only_core_data(run_in_default_project):
    temp_dir = os.getcwd()

    assert os.path.exists(os.path.join(temp_dir, "data/nlu.md"))
    os.remove(os.path.join(temp_dir, "data/nlu.md"))
    shutil.rmtree(os.path.join(temp_dir, "models"))

    run_in_default_project("train", "--fixed-model-name", "test-model")

    assert os.path.exists(os.path.join(temp_dir, "models"))
    files = list_files(os.path.join(temp_dir, "models"))
    assert len(files) == 1
    assert os.path.basename(files[0]) == "test-model.tar.gz"
def test_train_nlu(run_in_default_project):
    run_in_default_project(
        "train",
        "nlu",
        "-c",
        "config.yml",
        "--nlu",
        "data/nlu.md",
        "--out",
        "train_models",
    )

    assert os.path.exists("train_models")
    files = list_files("train_models")
    assert len(files) == 1
    assert os.path.basename(files[0]).startswith("nlu-")
Esempio n. 12
0
def load_data(resource_name: Text, language: Optional[Text] = "en") -> "TrainingData":
    """Load training data from disk.

    Merges them if loaded from disk and multiple files are found."""
    from rasa.nlu.training_data import TrainingData

    files = utils.list_files(resource_name)
    data_sets = [_load(f, language) for f in files]
    data_sets = [ds for ds in data_sets if ds]
    if len(data_sets) == 0:
        training_data = TrainingData()
    elif len(data_sets) == 1:
        training_data = data_sets[0]
    else:
        training_data = data_sets[0].merge(*data_sets[1:])

    return training_data
Esempio n. 13
0
def get_file_format(resource_name: Text) -> Text:
    from rasa.nlu.training_data.loading import _guess_format

    if resource_name is None or not os.path.exists(resource_name):
        raise AttributeError(
            "Resource '{}' does not exist.".format(resource_name))

    files = utils.list_files(resource_name)

    file_formats = list(map(lambda f: _guess_format(f), files))

    if not file_formats:
        return "json"

    fformat = file_formats[0]
    if fformat == "md" and all(f == fformat for f in file_formats):
        return fformat

    return "json"
def test_train(run_in_default_project):
    temp_dir = os.getcwd()

    run_in_default_project(
        "train",
        "-c",
        "config.yml",
        "-d",
        "domain.yml",
        "--data",
        "data",
        "--out",
        "train_models",
        "--fixed-model-name",
        "test-model",
    )

    assert os.path.exists(os.path.join(temp_dir, "train_models"))
    files = list_files(os.path.join(temp_dir, "train_models"))
    assert len(files) == 1
    assert os.path.basename(files[0]) == "test-model.tar.gz"
Esempio n. 15
0
def test_train_no_domain_exists(run_in_default_project):

    os.remove("domain.yml")
    run_in_default_project(
        "train",
        "-c",
        "config.yml",
        "--data",
        "data",
        "--out",
        "train_models_no_domain",
        "--fixed-model-name",
        "nlu-model-only",
    )

    assert os.path.exists("train_models_no_domain")
    files = list_files("train_models_no_domain")
    assert len(files) == 1

    trained_model_path = "train_models_no_domain/nlu-model-only.tar.gz"
    unpacked = model.unpack_model(trained_model_path)

    metadata_path = os.path.join(unpacked, "nlu", "metadata.json")
    assert os.path.exists(metadata_path)
Esempio n. 16
0
def test_list_files_non_existing_dir():
    with pytest.raises(ValueError) as execinfo:
        utils.list_files("my/made_up/path")
    assert "Could not locate the resource" in str(execinfo.value)
Esempio n. 17
0
def test_list_files_invalid_resource():
    with pytest.raises(ValueError) as execinfo:
        utils.list_files(None)
    assert "must be a string type" in str(execinfo.value)