Example #1
0
def test_removing_no_longer_compatible_cache_entries(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    local_cache_creator: Callable[..., LocalTrainingCache],
    default_model_storage: ModelStorage,
):
    cache = local_cache_creator(tmp_path)

    # Cache an entry including serialized output which will be incompatible later
    fingerprint_key1 = uuid.uuid4().hex
    output1 = TestCacheableOutput({"something to cache": "dasdaasda"})
    output_fingerprint1 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key1, output1, output_fingerprint1,
                       default_model_storage)

    # Cache an entry without serialized output which will be incompatible later
    fingerprint_key2 = uuid.uuid4().hex
    output_fingerprint2 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key2, None, output_fingerprint2,
                       default_model_storage)

    # Cache a second entry with a newer Rasa version
    monkeypatch.setattr(rasa, "__version__", "99999.9.5")
    fingerprint_key3 = uuid.uuid4().hex
    output3 = TestCacheableOutput({"something to cache2": "dasdaasda"})
    output_fingerprint3 = uuid.uuid4().hex

    cache.cache_output(fingerprint_key3, output3, output_fingerprint3,
                       default_model_storage)

    # Pretend we updated Rasa Open Source to a no longer compatible version
    monkeypatch.setattr(rasa.engine.caching, "MINIMUM_COMPATIBLE_VERSION",
                        "99999.8.10")

    cache_run_by_future_rasa = LocalTrainingCache()

    # Cached fingerprints can no longer be retrieved
    assert (cache_run_by_future_rasa.get_cached_output_fingerprint(
        fingerprint_key1) is None)
    assert (cache_run_by_future_rasa.get_cached_output_fingerprint(
        fingerprint_key2) is None)

    assert (cache_run_by_future_rasa.get_cached_result(
        output_fingerprint1, "some_node", default_model_storage) is None)
    assert (cache_run_by_future_rasa.get_cached_result(
        output_fingerprint2, "some_node", default_model_storage) is None)

    # Entry 3 wasn't deleted from cache as it's still compatible
    assert (cache_run_by_future_rasa.get_cached_output_fingerprint(
        fingerprint_key3) == output_fingerprint3)
    restored = cache_run_by_future_rasa.get_cached_result(
        output_fingerprint3, "some_node", default_model_storage)
    assert isinstance(restored, TestCacheableOutput)
    assert restored == output3

    # Cached output of no longer compatible stuff was deleted from disk
    assert set(tmp_path.glob("*")) == {
        tmp_path / DEFAULT_CACHE_NAME,
        restored.cache_dir,
    }
Example #2
0
def test_cache_creates_location_if_missing(tmp_path: Path,
                                           monkeypatch: MonkeyPatch):
    cache_location = tmp_path / "directory does not exist yet"

    monkeypatch.setenv(CACHE_LOCATION_ENV, str(cache_location))

    _ = LocalTrainingCache()

    assert cache_location.is_dir()
Example #3
0
def test_get_cached_result_when_result_no_longer_available(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path))

    cache = LocalTrainingCache()

    output = TestCacheableOutput({"something to cache": "dasdaasda"})
    output_fingerprint = uuid.uuid4().hex

    cache.cache_output(uuid.uuid4().hex, output, output_fingerprint,
                       default_model_storage)

    # Pretend something deleted the cache in between
    for path in tmp_path.glob("*"):
        if path.is_dir():
            shutil.rmtree(path)

    assert (cache.get_cached_result(output_fingerprint, "some_node",
                                    default_model_storage) is None)
Example #4
0
def test_cache_exceeds_size_but_not_in_database(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path))

    max_cache_size = 5
    # Pretend we have a cache of size `max_cached_size`
    monkeypatch.setenv(CACHE_SIZE_ENV, str(max_cache_size))

    # Fill cache with something which is not in the cache metadata
    sub_dir = tmp_path / "some dir"
    sub_dir.mkdir()

    # one subdirectory which needs deletion
    tests.conftest.create_test_file_with_size(sub_dir, max_cache_size)
    # one file which needs deletion
    tests.conftest.create_test_file_with_size(tmp_path, max_cache_size)

    cache = LocalTrainingCache()

    # Cache an item
    fingerprint_key = uuid.uuid4().hex
    output = TestCacheableOutput({"something to cache": "dasdaasda"},
                                 size_in_mb=2)
    output_fingerprint = uuid.uuid4().hex
    cache.cache_output(fingerprint_key, output, output_fingerprint,
                       default_model_storage)

    assert cache.get_cached_output_fingerprint(
        fingerprint_key) == output_fingerprint
    assert cache.get_cached_result(output_fingerprint, "some_node",
                                   default_model_storage)
Example #5
0
def test_skip_caching_if_cache_size_is_zero(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    cache_location = tmp_path / "cache"
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(cache_location))

    # Disable cache
    monkeypatch.setenv(CACHE_SIZE_ENV, "0")

    cache = LocalTrainingCache()

    # Cache something
    fingerprint_key1 = uuid.uuid4().hex
    output1 = TestCacheableOutput({"something to cache": "dasdaasda"})
    output_fingerprint1 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key1, output1, output_fingerprint1,
                       default_model_storage)

    # not even the database and no subdirectory was created ⛔️
    assert list(tmp_path.glob("*")) == []

    assert cache.get_cached_output_fingerprint(fingerprint_key1) is None

    assert (cache.get_cached_result(output_fingerprint1, "some_node",
                                    default_model_storage) is None)
Example #6
0
def test_clean_up_of_cached_result_if_database_fails(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    database_name = "test.db"
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path))
    monkeypatch.setenv(CACHE_DB_NAME_ENV, database_name)

    cache = LocalTrainingCache()

    # Deleting the database will cause an error when caching the result
    (tmp_path / database_name).unlink()

    # Cache an item
    fingerprint_key = uuid.uuid4().hex
    output = TestCacheableOutput({"something to cache": "dasdaasda"},
                                 size_in_mb=2)
    output_fingerprint = uuid.uuid4().hex

    with pytest.raises(OperationalError):
        cache.cache_output(fingerprint_key, output, output_fingerprint,
                           default_model_storage)

    assert list(tmp_path.glob("*")) == [tmp_path / database_name]
Example #7
0
def test_skip_caching_if_result_exceeds_max_size(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path))

    # Pretend we have a cache of size "1"
    monkeypatch.setenv(CACHE_SIZE_ENV, "1")

    cache = LocalTrainingCache()

    # Cache something
    fingerprint_key1 = uuid.uuid4().hex
    output1 = TestCacheableOutput({"something to cache": "dasdaasda"},
                                  size_in_mb=2)
    output_fingerprint1 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key1, output1, output_fingerprint1,
                       default_model_storage)

    assert cache.get_cached_output_fingerprint(
        fingerprint_key1) == output_fingerprint1

    assert (cache.get_cached_result(output_fingerprint1, "some_node",
                                    default_model_storage) is None)
Example #8
0
def test_delete_using_lru_if_cache_exceeds_size(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path))

    # Pretend we have a cache of certain size
    monkeypatch.setenv(CACHE_SIZE_ENV, "5")

    cache = LocalTrainingCache()

    # Cache an item
    fingerprint_key1 = uuid.uuid4().hex
    output1 = TestCacheableOutput({"something to cache": "dasdaasda"},
                                  size_in_mb=2)
    output_fingerprint1 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key1, output1, output_fingerprint1,
                       default_model_storage)

    # Cache an non cacheable item to spice it up 🔥
    fingerprint_key2 = uuid.uuid4().hex
    output2 = TestCacheableOutput(None)
    output_fingerprint2 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key2, output2, output_fingerprint2,
                       default_model_storage)

    # Cache another item
    fingerprint_key3 = uuid.uuid4().hex
    output3 = TestCacheableOutput({"something to cache": "dasdaasda"},
                                  size_in_mb=2)
    output_fingerprint3 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key3, output3, output_fingerprint3,
                       default_model_storage)

    # Assert both are there
    for output_fingerprint in [output_fingerprint1, output_fingerprint2]:
        assert cache.get_cached_result(output_fingerprint, "some_node",
                                       default_model_storage)

    # Checkout the first item as this updates `last_used` and hence affects LRU
    cache.get_cached_output_fingerprint(fingerprint_key1)

    # Now store something which requires a deletion
    fingerprint_key4 = uuid.uuid4().hex
    output4 = TestCacheableOutput({"something to cache": "dasdaasda"},
                                  size_in_mb=2)
    output_fingerprint4 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key4, output4, output_fingerprint4,
                       default_model_storage)

    # Assert cached result 1 and 3 are there
    for output_fingerprint in [output_fingerprint1, output_fingerprint4]:
        assert cache.get_cached_result(output_fingerprint, "some_node",
                                       default_model_storage)

    # Cached result 2 and 3 were deleted
    assert cache.get_cached_output_fingerprint(fingerprint_key2) is None
    assert (cache.get_cached_result(output_fingerprint3, "some_node",
                                    default_model_storage) is None)
Example #9
0
 def create_local_cache(path: Path) -> LocalTrainingCache:
     monkeypatch.setenv(CACHE_LOCATION_ENV, str(path))
     return LocalTrainingCache()
Example #10
0
def _train_graph(
    file_importer: TrainingDataImporter,
    training_type: TrainingType,
    output_path: Text,
    fixed_model_name: Text,
    model_to_finetune: Optional[Text] = None,
    force_full_training: bool = False,
    dry_run: bool = False,
    **kwargs: Any,
) -> TrainingResult:
    if model_to_finetune:
        model_to_finetune = rasa.model.get_model_for_finetuning(
            model_to_finetune)
        if not model_to_finetune:
            rasa.shared.utils.cli.print_error_and_exit(
                f"No model for finetuning found. Please make sure to either "
                f"specify a path to a previous model or to have a finetunable "
                f"model within the directory '{output_path}'.")

        rasa.shared.utils.common.mark_as_experimental_feature(
            "Incremental Training feature")

    is_finetuning = model_to_finetune is not None

    config = file_importer.get_config()
    recipe = Recipe.recipe_for_name(config.get("recipe"))
    config, _missing_keys, _configured_keys = recipe.auto_configure(
        file_importer.get_config_file_for_auto_config(),
        config,
        training_type,
    )
    model_configuration = recipe.graph_config_for_recipe(
        config,
        kwargs,
        training_type=training_type,
        is_finetuning=is_finetuning,
    )
    rasa.engine.validation.validate(model_configuration)

    with tempfile.TemporaryDirectory() as temp_model_dir:
        model_storage = _create_model_storage(is_finetuning, model_to_finetune,
                                              Path(temp_model_dir))
        cache = LocalTrainingCache()
        trainer = GraphTrainer(model_storage, cache, DaskGraphRunner)

        if dry_run:
            fingerprint_status = trainer.fingerprint(
                model_configuration.train_schema, file_importer)
            return _dry_run_result(fingerprint_status, force_full_training)

        model_name = _determine_model_name(fixed_model_name, training_type)
        full_model_path = Path(output_path, model_name)

        with telemetry.track_model_training(
                file_importer, model_type=training_type.model_type):
            trainer.train(
                model_configuration,
                file_importer,
                full_model_path,
                force_retraining=force_full_training,
                is_finetuning=is_finetuning,
            )
            rasa.shared.utils.cli.print_success(
                f"Your Rasa model is trained and saved at '{full_model_path}'."
            )

        return TrainingResult(str(full_model_path), 0)
Example #11
0
 def create_local_cache(path: Path) -> LocalTrainingCache:
     monkeypatch.setattr(LocalTrainingCache, "_get_cache_location", lambda: path)
     return LocalTrainingCache()