Esempio n. 1
0
def test_resource_caching_if_already_restored(
        tmp_path_factory: TempPathFactory):
    initial_storage_dir = tmp_path_factory.mktemp("initial_model_storage")
    model_storage = LocalModelStorage(initial_storage_dir)

    resource = Resource("my resource")

    # Fill model storage
    test_filename = "file.txt"
    test_content = "test_resource_caching"
    with model_storage.write_to(resource) as temporary_directory:
        file = temporary_directory / test_filename
        file.write_text(test_content)

    cache_dir = tmp_path_factory.mktemp("cache_dir")

    # Cache resource
    resource.to_cache(cache_dir, model_storage)

    new_storage_dir = tmp_path_factory.mktemp("new dir")
    rasa.utils.common.copy_directory(initial_storage_dir, new_storage_dir)

    reinstantiated_resource = Resource.from_cache(
        resource.name,
        cache_dir,
        LocalModelStorage(new_storage_dir),
        resource.output_fingerprint,
    )

    assert reinstantiated_resource == resource
Esempio n. 2
0
def test_resource_caching_if_already_restored_with_different_state(
    tmp_path_factory: TempPathFactory, ):
    initial_storage_dir = tmp_path_factory.mktemp("initial_model_storage")
    model_storage = LocalModelStorage(initial_storage_dir)

    resource = Resource("my resource")

    # Fill model storage
    test_filename = "file.txt"
    test_content = "test_resource_caching"
    with model_storage.write_to(resource) as temporary_directory:
        file = temporary_directory / test_filename
        file.write_text(test_content)

    cache_dir = tmp_path_factory.mktemp("cache_dir")

    # Cache resource
    resource.to_cache(cache_dir, model_storage)

    # Pretend there is an additional file which is not in the restored storage.
    # This makes the directories and causes the `from_cache` part to fail
    # different
    (temporary_directory / "another_file").touch()

    new_storage_dir = tmp_path_factory.mktemp("new dir")
    rasa.utils.common.copy_directory(initial_storage_dir, new_storage_dir)

    with pytest.raises(ValueError):
        Resource.from_cache(
            resource.name,
            cache_dir,
            LocalModelStorage(new_storage_dir),
            resource.output_fingerprint,
        )
Esempio n. 3
0
    def test_doesnt_checkpoint_with_zero_eval_num_examples(
            self, tmp_path: Path, tmp_path_factory: TempPathFactory):
        config_file = "config_ted_policy_model_checkpointing_zero_eval_num_examples.yml"
        with pytest.warns(UserWarning) as warning:
            train_core(
                domain="data/test_domains/default.yml",
                stories="data/test_yaml_stories/stories_defaultdomain.yml",
                output=str(tmp_path),
                fixed_model_name="my_model.tar.gz",
                config=f"data/test_config/{config_file}",
            )
        warn_text = (
            f"You have opted to save the best model, but the value of "
            f"'{EVAL_NUM_EXAMPLES}' is not greater than 0. No checkpoint model will be "
            f"saved.")

        assert len([w for w in warning if warn_text in str(w.message)]) == 1

        storage_dir = tmp_path_factory.mktemp("storage dir")
        LocalModelStorage.from_model_archive(storage_dir,
                                             tmp_path / "my_model.tar.gz")
        model_dir = storage_dir / "train_TEDPolicy0"
        all_files = list(model_dir.rglob("*.*"))
        assert not any(
            ["from_checkpoint" in str(filename) for filename in all_files])
Esempio n. 4
0
def test_resource_with_model_storage(default_model_storage: ModelStorage,
                                     tmp_path: Path,
                                     temp_cache: TrainingCache):
    node_name = "some node"
    resource = Resource(node_name)
    test_filename = "persisted_model.json"
    test_content = {"epochs": 500}

    with default_model_storage.write_to(resource) as temporary_directory:
        rasa.shared.utils.io.dump_obj_as_json_to_file(
            temporary_directory / test_filename, test_content)

    test_fingerprint_key = uuid.uuid4().hex
    test_output_fingerprint_key = uuid.uuid4().hex
    temp_cache.cache_output(
        test_fingerprint_key,
        resource,
        test_output_fingerprint_key,
        default_model_storage,
    )

    new_model_storage_location = tmp_path / "new_model_storage"
    new_model_storage_location.mkdir()
    new_model_storage = LocalModelStorage(new_model_storage_location)
    restored_resource = temp_cache.get_cached_result(
        test_output_fingerprint_key, node_name, new_model_storage)

    assert isinstance(restored_resource, Resource)
    assert restored_resource == restored_resource

    with new_model_storage.read_from(restored_resource) as temporary_directory:
        cached_content = rasa.shared.utils.io.read_json_file(
            temporary_directory / test_filename)
        assert cached_content == test_content
Esempio n. 5
0
def test_train_skip_on_model_not_changed(
    run_in_simple_project_with_model: Callable[..., RunResult],
    tmp_path_factory: TempPathFactory,
):
    temp_dir = os.getcwd()

    models_dir = Path(temp_dir, "models")
    model_files = list(models_dir.glob("*"))
    assert len(model_files) == 1
    old_model = model_files[0]

    run_in_simple_project_with_model("train")

    model_files = list(sorted(models_dir.glob("*")))
    assert len(model_files) == 2

    new_model = model_files[1]
    assert old_model != new_model

    old_dir = tmp_path_factory.mktemp("old")
    _, old_metadata = LocalModelStorage.from_model_archive(old_dir, old_model)

    new_dir = tmp_path_factory.mktemp("new")
    _, new_metadata = LocalModelStorage.from_model_archive(new_dir, new_model)

    assert old_metadata.model_id != new_metadata.model_id
    assert old_metadata.trained_at < new_metadata.trained_at
    assert old_metadata.domain.as_dict() == new_metadata.domain.as_dict()

    assert rasa.utils.io.are_directories_equal(old_dir, new_dir)
Esempio n. 6
0
def _create_model_storage(is_finetuning: bool,
                          model_to_finetune: Optional[Path],
                          temp_model_dir: Path) -> ModelStorage:
    if is_finetuning:
        model_storage, _ = LocalModelStorage.from_model_archive(
            temp_model_dir, model_to_finetune)
    else:
        model_storage = LocalModelStorage(temp_model_dir)

    return model_storage
Esempio n. 7
0
def test_read_unsupported_model(
    monkeypatch: MonkeyPatch,
    tmp_path_factory: TempPathFactory,
    domain: Domain,
):
    train_model_storage = LocalModelStorage(
        tmp_path_factory.mktemp("train model storage"))
    graph_schema = GraphSchema(nodes={})

    persisted_model_dir = tmp_path_factory.mktemp("persisted models")
    archive_path = persisted_model_dir / "my-model.tar.gz"

    # Create outdated model meta data
    trained_at = datetime.utcnow()
    model_configuration = GraphModelConfiguration(graph_schema, graph_schema,
                                                  TrainingType.BOTH, None,
                                                  None, "nlu")
    outdated_model_meta_data = ModelMetadata(
        trained_at=trained_at,
        rasa_open_source_version=rasa.
        __version__,  # overwrite later to avoid error
        model_id=uuid.uuid4().hex,
        domain=domain,
        train_schema=model_configuration.train_schema,
        predict_schema=model_configuration.predict_schema,
        training_type=model_configuration.training_type,
        project_fingerprint=rasa.model.project_fingerprint(),
        language=model_configuration.language,
        core_target=model_configuration.core_target,
        nlu_target=model_configuration.nlu_target,
    )
    old_version = "0.0.1"
    outdated_model_meta_data.rasa_open_source_version = old_version

    # Package model - and inject the outdated model meta data
    monkeypatch.setattr(
        LocalModelStorage,
        "_create_model_metadata",
        lambda *args, **kwargs: outdated_model_meta_data,
    )
    train_model_storage.create_model_package(
        model_archive_path=archive_path,
        model_configuration=model_configuration,
        domain=domain,
    )

    # Unpack and inspect packaged model
    load_model_storage_dir = tmp_path_factory.mktemp("load model storage")

    expected_message = (
        f"The model version is trained using Rasa Open Source "
        f"{old_version} and is not compatible with your current "
        f"installation .*")
    with pytest.raises(UnsupportedModelVersionError, match=expected_message):
        LocalModelStorage.metadata_from_archive(archive_path)

    with pytest.raises(UnsupportedModelVersionError, match=expected_message):
        LocalModelStorage.from_model_archive(load_model_storage_dir,
                                             archive_path)
Esempio n. 8
0
async def test_nlu(
    model: Optional[Text],
    nlu_data: Optional[Text],
    output_directory: Text = DEFAULT_RESULTS_PATH,
    additional_arguments: Optional[Dict] = None,
) -> None:
    """Tests the NLU Model."""
    from rasa.nlu.test import run_evaluation

    rasa.shared.utils.io.create_directory(output_directory)

    try:
        model = rasa.model.get_local_model(model)
    except ModelNotFound:
        rasa.shared.utils.cli.print_error(
            "Could not find any model. Use 'rasa train nlu' to train a "
            "Rasa model and provide it via the '--model' argument.")
        return

    metadata = LocalModelStorage.metadata_from_archive(model)

    if os.path.exists(model) and metadata.training_type != TrainingType.CORE:
        kwargs = rasa.shared.utils.common.minimal_kwargs(
            additional_arguments, run_evaluation, ["data_path", "model"])
        _agent = Agent.load(model_path=model)
        await run_evaluation(nlu_data,
                             _agent.processor,
                             output_directory=output_directory,
                             **kwargs)
    else:
        rasa.shared.utils.cli.print_error(
            "Could not find any model. Use 'rasa train nlu' to train a "
            "Rasa model and provide it via the '--model' argument.")
Esempio n. 9
0
def test_train_model_training_data_persisted(
    tmp_path: Path, nlu_as_json_path: Text, tmp_path_factory: TempPathFactory
):
    config_file = tmp_path / "config.yml"
    rasa.shared.utils.io.dump_obj_as_json_to_file(
        config_file,
        {"pipeline": [{"name": "KeywordIntentClassifier"}], "language": "en"},
    )

    persisted_path = rasa.model_training.train_nlu(
        str(config_file),
        nlu_as_json_path,
        output=str(tmp_path),
        persist_nlu_training_data=True,
    )

    assert Path(persisted_path).is_file()

    model_dir = tmp_path_factory.mktemp("loaded")
    storage, _ = LocalModelStorage.from_model_archive(model_dir, Path(persisted_path))

    nlu_data_dir = model_dir / "nlu_training_data_provider"

    assert nlu_data_dir.is_dir()

    assert not RasaYAMLReader().read(nlu_data_dir / "training_data.yml").is_empty()
Esempio n. 10
0
def shell(args: argparse.Namespace) -> None:
    """Talk with a bot though the command line."""
    from rasa.cli.utils import get_validated_path
    from rasa.shared.constants import DEFAULT_MODELS_PATH

    args.connector = "cmdline"

    model = get_validated_path(args.model, "model", DEFAULT_MODELS_PATH)

    try:
        model = get_latest_model(model)
    except ModelNotFound:
        print_error("No model found. Train a model before running the "
                    "server using `rasa train`.")
        return

    metadata = LocalModelStorage.metadata_from_archive(model)

    if metadata.training_type == TrainingType.NLU:
        import rasa.nlu.run

        telemetry.track_shell_started("nlu")

        rasa.nlu.run.run_cmdline(model)
    else:
        import rasa.cli.run

        telemetry.track_shell_started("rasa")

        rasa.cli.run.run(args)
Esempio n. 11
0
def perform_interactive_learning(
    args: argparse.Namespace, zipped_model: Text, file_importer: TrainingDataImporter
) -> None:
    """Performs interactive learning.

    Args:
        args: Namespace arguments.
        zipped_model: Path to zipped model.
        file_importer: File importer which provides the training data and model config.
    """
    from rasa.core.train import do_interactive_learning

    args.model = zipped_model

    metadata = LocalModelStorage.metadata_from_archive(zipped_model)
    if metadata.training_type == TrainingType.NLU:
        rasa.shared.utils.cli.print_error_and_exit(
            "Can not run interactive learning on an NLU-only model."
        )

    args.endpoints = rasa.cli.utils.get_validated_path(
        args.endpoints, "endpoints", DEFAULT_ENDPOINTS_PATH, True
    )

    do_interactive_learning(args, file_importer)
Esempio n. 12
0
def test_model_finetuning_nlu_with_default_epochs(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    trained_nlu_moodbot_path: Text,
    tmp_path_factory: TempPathFactory,
):
    (tmp_path / "models").mkdir()
    output = str(tmp_path / "models")

    # Providing a new config with no epochs will mean the default amount are used
    # and then scaled by `finetuning_epoch_fraction`.
    old_config = rasa.shared.utils.io.read_yaml_file(
        "data/test_moodbot/config.yml")
    del old_config["pipeline"][-1][EPOCHS]
    new_config_path = tmp_path / "new_config.yml"
    rasa.shared.utils.io.write_yaml(old_config, new_config_path)

    model_name = rasa.model_training.train_nlu(
        str(new_config_path),
        "data/test_moodbot/data/nlu.yml",
        output=output,
        model_to_finetune=trained_nlu_moodbot_path,
        finetuning_epoch_fraction=0.01,
    )

    storage_dir = tmp_path_factory.mktemp("finetuned model")
    _, metadata = LocalModelStorage.from_model_archive(storage_dir,
                                                       Path(model_name))

    assert metadata.train_schema.nodes["train_DIETClassifier5"].config[
        EPOCHS] == 3
Esempio n. 13
0
def test_train(run_in_simple_project: Callable[..., RunResult],
               tmp_path: Path):
    temp_dir = os.getcwd()

    run_in_simple_project(
        "train",
        "-c",
        "config.yml",
        "-d",
        "domain.yml",
        "--data",
        "data",
        "--out",
        "train_models",
        "--fixed-model-name",
        "test-model",
    )

    models_dir = Path(temp_dir, "train_models")
    assert models_dir.is_dir()

    models = list(models_dir.glob("*"))
    assert len(models) == 1

    model = models[0]
    assert model.name == "test-model.tar.gz"

    _, metadata = LocalModelStorage.from_model_archive(tmp_path, model)
    assert metadata.model_id
    assert (metadata.domain.as_dict() == Domain.load(
        Path(temp_dir, "domain.yml")).as_dict())
Esempio n. 14
0
def test_train_nlu(run_in_simple_project: Callable[..., RunResult],
                   tmp_path: Path):
    run_in_simple_project(
        "train",
        "nlu",
        "-c",
        "config.yml",
        "--nlu",
        "data/nlu.yml",
        "--out",
        "train_models",
    )

    model_dir = Path("train_models")
    assert model_dir.is_dir()

    models = list(model_dir.glob("*.tar.gz"))
    assert len(models) == 1

    model_file = models[0]
    assert model_file.name.startswith("nlu-")

    _, metadata = LocalModelStorage.from_model_archive(tmp_path, model_file)

    assert not any(
        issubclass(component.uses, Policy)
        for component in metadata.train_schema.nodes.values())
    assert not any(
        issubclass(component.uses, Policy)
        for component in metadata.predict_schema.nodes.values())
Esempio n. 15
0
def test_train_nlu_persist_nlu_data(run_in_simple_project: Callable[...,
                                                                    RunResult],
                                    tmp_path: Path) -> None:
    run_in_simple_project(
        "train",
        "nlu",
        "-c",
        "config.yml",
        "--nlu",
        "data/nlu.yml",
        "--out",
        "train_models",
        "--persist-nlu-data",
    )

    models_dir = Path("train_models")
    assert models_dir.is_dir()

    models = list(models_dir.glob("*"))
    assert len(models) == 1

    model = models[0]
    assert model.name.startswith("nlu-")

    storage, _ = LocalModelStorage.from_model_archive(tmp_path, model)

    with storage.read_from(
            Resource("nlu_training_data_provider")) as directory:
        assert (directory / DEFAULT_TRAINING_DATA_OUTPUT_PATH).exists()
Esempio n. 16
0
    def inner(
        train_schema: GraphSchema,
        cache: Optional[TrainingCache] = None,
        model_storage: Optional[ModelStorage] = None,
        path: Optional[Path] = None,
    ) -> Path:
        if not path:
            path = tmp_path_factory.mktemp("model_storage_path")
        if not model_storage:
            model_storage = LocalModelStorage.create(path)
        if not cache:
            cache = local_cache_creator(path)

        graph_trainer = GraphTrainer(
            model_storage=model_storage,
            cache=cache,
            graph_runner_class=DaskGraphRunner,
        )

        output_filename = path / "model.tar.gz"
        graph_trainer.train(
            train_schema=train_schema,
            predict_schema=GraphSchema({}),
            domain_path=domain_path,
            output_filename=output_filename,
        )

        assert output_filename.is_file()
        return output_filename
Esempio n. 17
0
def test_train_no_domain_exists(run_in_simple_project: Callable[...,
                                                                RunResult],
                                tmp_path: Path) -> None:

    os.remove("domain.yml")
    run_in_simple_project(
        "train",
        "-c",
        "config.yml",
        "--data",
        "data",
        "--out",
        "train_models_no_domain",
        "--fixed-model-name",
        "nlu-model-only",
    )

    model_file = Path("train_models_no_domain", "nlu-model-only.tar.gz")
    assert model_file.is_file()

    _, metadata = LocalModelStorage.from_model_archive(tmp_path, model_file)

    assert not any(
        issubclass(component.uses, Policy)
        for component in metadata.train_schema.nodes.values())
    assert not any(
        issubclass(component.uses, Policy)
        for component in metadata.predict_schema.nodes.values())
Esempio n. 18
0
    def inner(
        train_schema: GraphSchema,
        cache: Optional[TrainingCache] = None,
        model_storage: Optional[ModelStorage] = None,
        path: Optional[Path] = None,
        force_retraining: bool = False,
    ) -> Path:
        if not path:
            path = tmp_path_factory.mktemp("model_storage_path")
        if not model_storage:
            model_storage = LocalModelStorage.create(path)
        if not cache:
            cache = local_cache_creator(path)

        graph_trainer = GraphTrainer(
            model_storage=model_storage, cache=cache, graph_runner_class=DaskGraphRunner
        )

        output_filename = path / "model.tar.gz"
        graph_trainer.train(
            GraphModelConfiguration(
                train_schema=train_schema,
                predict_schema=GraphSchema({}),
                language=None,
                core_target=None,
                nlu_target="nlu",
                training_type=TrainingType.BOTH,
            ),
            importer=TrainingDataImporter.load_from_dict(domain_path=str(domain_path)),
            output_filename=output_filename,
            force_retraining=force_retraining,
        )

        assert output_filename.is_file()
        return output_filename
Esempio n. 19
0
def test_train_persist_nlu_data(run_in_simple_project: Callable[...,
                                                                RunResult],
                                tmp_path: Path):
    temp_dir = os.getcwd()

    run_in_simple_project(
        "train",
        "-c",
        "config.yml",
        "-d",
        "domain.yml",
        "--data",
        "data",
        "--out",
        "train_models",
        "--fixed-model-name",
        "test-model",
        "--persist-nlu-data",
    )

    models_dir = Path(temp_dir, "train_models")
    assert models_dir.is_dir()

    models = list(models_dir.glob("*"))
    assert len(models) == 1

    model = models[0]
    assert model.name == "test-model.tar.gz"

    storage, _ = LocalModelStorage.from_model_archive(tmp_path, model)

    with storage.read_from(
            Resource("nlu_training_data_provider")) as directory:
        assert (directory / DEFAULT_TRAINING_DATA_OUTPUT_PATH).exists()
Esempio n. 20
0
def test_create_model_package_with_non_empty_model_storage(tmp_path: Path):
    # Put something in the future model storage directory
    (tmp_path / "somefile.json").touch()

    with pytest.raises(ValueError):
        # Unpacking into an already filled `ModelStorage` raises an exception.
        _ = LocalModelStorage.from_model_archive(tmp_path,
                                                 Path("does not matter"))
Esempio n. 21
0
    def test_train_model_checkpointing(self, tmp_path: Path,
                                       tmp_path_factory: TempPathFactory):
        train_core(
            domain="data/test_domains/default.yml",
            stories="data/test_yaml_stories/stories_defaultdomain.yml",
            output=str(tmp_path),
            fixed_model_name="my_model.tar.gz",
            config="data/test_config/config_ted_policy_model_checkpointing.yml",
        )

        storage_dir = tmp_path_factory.mktemp("storage dir")
        LocalModelStorage.from_model_archive(storage_dir,
                                             tmp_path / "my_model.tar.gz")
        model_dir = storage_dir / "train_TEDPolicy0"
        all_files = list(model_dir.rglob("*.*"))
        assert any(
            ["from_checkpoint" in str(filename) for filename in all_files])
def featurizer_sparse(tmpdir):
    """Generate a featurizer for tests."""
    node_storage = LocalModelStorage(pathlib.Path(tmpdir))
    node_resource = Resource("sparse_feat")
    context = ExecutionContext(node_storage, node_resource)
    return CountVectorsFeaturizer(
        config=CountVectorsFeaturizer.get_default_config(),
        resource=node_resource,
        model_storage=node_storage,
        execution_context=context,
    )
Esempio n. 23
0
async def test_core(
    model: Optional[Text] = None,
    stories: Optional[Text] = None,
    output: Text = DEFAULT_RESULTS_PATH,
    additional_arguments: Optional[Dict] = None,
    use_conversation_test_files: bool = False,
) -> None:
    """Tests a trained Core model against a set of test stories."""
    try:
        model = rasa.model.get_local_model(model)
    except ModelNotFound:
        rasa.shared.utils.cli.print_error(
            "Unable to test: could not find a model. Use 'rasa train' to train a "
            "Rasa model and provide it via the '--model' argument.")
        return

    metadata = LocalModelStorage.metadata_from_archive(model)
    if metadata.training_type == TrainingType.NLU:
        rasa.shared.utils.cli.print_error(
            "Unable to test: no core model found. Use 'rasa train' to train a "
            "Rasa model and provide it via the '--model' argument.")
    elif metadata.training_type == TrainingType.CORE and use_conversation_test_files:
        rasa.shared.utils.cli.print_warning(
            "No NLU model found. Using default 'RegexMessageHandler' for end-to-end "
            "evaluation. If you added actual user messages to your test stories "
            "this will likely lead to the tests failing. In that case, you need "
            "to train a NLU model first, e.g. using `rasa train`.")

    if additional_arguments is None:
        additional_arguments = {}

    if output:
        rasa.shared.utils.io.create_directory(output)

    _agent = Agent.load(model_path=model)

    if not _agent.is_ready():
        rasa.shared.utils.cli.print_error(
            "Unable to test: processor not loaded. Use 'rasa train' to train a "
            "Rasa model and provide it via the '--model' argument.")
        return

    from rasa.core.test import test as core_test

    kwargs = rasa.shared.utils.common.minimal_kwargs(
        additional_arguments, core_test, ["stories", "agent", "e2e"])

    await core_test(
        stories,
        _agent,
        e2e=use_conversation_test_files,
        out_directory=output,
        **kwargs,
    )
Esempio n. 24
0
def test_create_package_with_non_existing_parent(tmp_path: Path):
    storage = LocalModelStorage.create(tmp_path)
    model_file = tmp_path / "new" / "sub" / "dir" / "file.tar.gz"

    storage.create_model_package(
        model_file,
        GraphModelConfiguration(GraphSchema({}), GraphSchema({}),
                                TrainingType.BOTH, None, None, "nlu"),
        Domain.empty(),
    )

    assert model_file.is_file()
Esempio n. 25
0
def test_read_from_rasa2_resource(tmp_path_factory: TempPathFactory):
    # we only search for the fingerprint file - nlu and core folder do not even need
    # to exist
    model_dir = tmp_path_factory.mktemp("model_dir")
    version = "2.8.5"
    rasa.shared.utils.io.dump_obj_as_json_to_file(
        model_dir / "fingerprint.json",
        {
            "version": version,
            "irrelevant-other-key": "bla"
        },
    )

    model_zips = tmp_path_factory.mktemp("model_zips")
    resource_name = "model"
    with TarSafe.open(model_zips / resource_name, "w:gz") as tar:
        tar.add(model_dir, arcname="")

    storage_dir = tmp_path_factory.mktemp("storage_dir")
    storage = LocalModelStorage(storage_path=storage_dir)
    with pytest.raises(UnsupportedModelVersionError, match=f".*{version}.*"):
        storage.from_model_archive(storage_path=storage_dir,
                                   model_archive_path=model_zips /
                                   resource_name)
    with pytest.raises(UnsupportedModelVersionError, match=f".*{version}.*"):
        storage.metadata_from_archive(model_archive_path=model_zips /
                                      resource_name)
Esempio n. 26
0
def test_resource_caching(tmp_path_factory: TempPathFactory):
    model_storage = LocalModelStorage(
        tmp_path_factory.mktemp("initial_model_storage"))

    resource = Resource("my resource")

    # Fill model storage
    test_filename = "file.txt"
    test_content = "test_resource_caching"
    with model_storage.write_to(resource) as temporary_directory:
        file = temporary_directory / test_filename
        file.write_text(test_content)

    cache_dir = tmp_path_factory.mktemp("cache_dir")

    # Cache resource
    resource.to_cache(cache_dir, model_storage)

    # Reload resource from cache and inspect
    new_model_storage = LocalModelStorage(
        tmp_path_factory.mktemp("new_model_storage"))
    reinstantiated_resource = Resource.from_cache(resource.name, cache_dir,
                                                  new_model_storage,
                                                  resource.output_fingerprint)

    assert reinstantiated_resource == resource

    assert reinstantiated_resource.fingerprint() == resource.fingerprint()

    # Read written resource data from model storage to see whether all expected
    # contents are there
    with new_model_storage.read_from(resource) as temporary_directory:
        assert (temporary_directory /
                test_filename).read_text() == test_content
Esempio n. 27
0
    def project_fingerprint_from_model(
        _model_directory: Optional[Text], ) -> Optional[Text]:
        """Gets project fingerprint from an app's loaded model."""
        if not model_directory:
            return None

        try:
            model_archive = model.get_local_model(_model_directory)
            metadata = LocalModelStorage.metadata_from_archive(model_archive)

            return metadata.project_fingerprint
        except Exception:
            return None
Esempio n. 28
0
def test_train_force(
    run_in_simple_project_with_model: Callable[..., RunResult],
    tmp_path_factory: TempPathFactory,
):
    temp_dir = os.getcwd()

    assert os.path.exists(os.path.join(temp_dir, "models"))
    files = rasa.shared.utils.io.list_files(os.path.join(temp_dir, "models"))
    assert len(files) == 1

    run_in_simple_project_with_model("train", "--force")

    assert os.path.exists(os.path.join(temp_dir, "models"))
    files = rasa.shared.utils.io.list_files(os.path.join(temp_dir, "models"))
    assert len(files) == 2

    old_dir = tmp_path_factory.mktemp("old")
    _ = LocalModelStorage.from_model_archive(old_dir, files[0])

    new_dir = tmp_path_factory.mktemp("new")
    _ = LocalModelStorage.from_model_archive(new_dir, files[1])

    assert not rasa.utils.io.are_directories_equal(old_dir, new_dir)
def test_predictions_added(training_data, tmpdir, featurizer_sparse):
    """Checks if the sizes are appropriate."""
    # Set up classifier
    node_storage = LocalModelStorage(pathlib.Path(tmpdir))
    node_resource = Resource("classifier")
    context = ExecutionContext(node_storage, node_resource)
    classifier = LogisticRegressionClassifier(
        config=LogisticRegressionClassifier.get_default_config(),
        name=context.node_name,
        resource=node_resource,
        model_storage=node_storage,
    )

    # First we add tokens.
    tokeniser.process(training_data.training_examples)

    # Next we add features.
    featurizer_sparse.train(training_data)
    featurizer_sparse.process(training_data.training_examples)

    # Train the classifier.
    classifier.train(training_data)

    # Make predictions.
    classifier.process(training_data.training_examples)

    # Check that the messages have been processed correctly
    for msg in training_data.training_examples:
        _, conf = msg.get("intent")["name"], msg.get("intent")["confidence"]
        # Confidence should be between 0 and 1.
        assert 0 < conf < 1
        ranking = msg.get("intent_ranking")
        assert is_sorted(ranking)
        assert {i["name"] for i in ranking} == {"greet", "goodbye"}
        # Confirm the sum of confidences is 1.0
        assert np.isclose(np.sum([i["confidence"] for i in ranking]), 1.0)

    classifier.persist()

    loaded_classifier = LogisticRegressionClassifier.load(
        {}, node_storage, node_resource, context
    )

    predicted = copy.copy(training_data)
    actual = copy.copy(training_data)
    loaded_messages = loaded_classifier.process(predicted.training_examples)
    trained_messages = classifier.process(actual.training_examples)
    for m1, m2 in zip(loaded_messages, trained_messages):
        assert m1.get("intent") == m2.get("intent")
Esempio n. 30
0
def trained_ted(
    tmp_path_factory: TempPathFactory, moodbot_domain_path: Path,
) -> TEDPolicyGraphComponent:
    training_files = "data/test_moodbot/data/stories.yml"
    domain = Domain.load(moodbot_domain_path)
    trackers = training.load_data(str(training_files), domain)
    policy = TEDPolicyGraphComponent.create(
        {**TEDPolicyGraphComponent.get_default_config(), EPOCHS: 1},
        LocalModelStorage.create(tmp_path_factory.mktemp("storage")),
        Resource("ted"),
        ExecutionContext(GraphSchema({})),
    )
    policy.train(trackers, domain)

    return policy