Ejemplo n.º 1
0
def test_read_only_manager_with_fetch(version_asset_name, version_1, version_2,
                                      versioning, base_dir, monkeypatch):
    if versioning:
        monkeypatch.setenv("MODELKIT_ASSETS_VERSIONING_SYSTEM", versioning)

    # Prepare a read-only dir with raw assets
    working_dir = os.path.join(base_dir, "working-dir")
    shutil.copytree(
        os.path.join(TEST_DIR, "testdata", "test-bucket", "assets-prefix"),
        working_dir)
    os.chmod(working_dir, stat.S_IREAD | stat.S_IEXEC)

    try:
        manager = AssetsManager(
            assets_dir=working_dir,
            storage_provider=None,
        )

        res = manager.fetch_asset(f"category/{version_asset_name}:{version_1}",
                                  return_info=True)
        assert res["path"] == os.path.join(working_dir, "category",
                                           version_asset_name, version_1)

        res = manager.fetch_asset(f"category/{version_asset_name}",
                                  return_info=True)
        assert res["path"] == os.path.join(working_dir, "category",
                                           version_asset_name, version_2)

        if versioning in ["major_minor", None]:
            res = manager.fetch_asset(f"category/{version_asset_name}:0",
                                      return_info=True)
            assert res["path"] == os.path.join(working_dir, "category",
                                               version_asset_name, "0.1")
    finally:
        os.chmod(working_dir, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
Ejemplo n.º 2
0
def test_local_manager_with_fetch(version_asset_name, version_1, version_2,
                                  versioning, working_dir, monkeypatch):
    if versioning:
        monkeypatch.setenv("MODELKIT_ASSETS_VERSIONING_SYSTEM", versioning)

    manager = AssetsManager(
        assets_dir=working_dir,
        storage_provider=StorageProvider(
            provider="local",
            bucket=os.path.join(TEST_DIR, "testdata", "test-bucket"),
            prefix="assets-prefix",
        ),
    )

    res = manager.fetch_asset(f"category/{version_asset_name}:{version_1}",
                              return_info=True)
    assert res["path"] == os.path.join(working_dir, "category",
                                       version_asset_name, version_1)

    res = manager.fetch_asset(f"category/{version_asset_name}",
                              return_info=True)
    assert res["path"] == os.path.join(working_dir, "category",
                                       version_asset_name, version_2)

    if versioning in ["major_minor", None]:
        res = manager.fetch_asset(f"category/{version_asset_name}:0",
                                  return_info=True)
        assert res["path"] == os.path.join(working_dir, "category",
                                           version_asset_name, "0.1")
Ejemplo n.º 3
0
def download_assets(
    assetsmanager_settings: Optional[dict] = None,
    configuration: Optional[Mapping[str, Union[Dict[str, Any],
                                               ModelConfiguration]]] = None,
    models: Optional[LibraryModelsType] = None,
    required_models: Optional[List[str]] = None,
) -> Tuple[Dict[str, Set[str]], Dict[str, AssetInfo]]:
    assetsmanager_settings = assetsmanager_settings or {}
    assets_manager = AssetsManager(**assetsmanager_settings)

    configuration = configure(models=models, configuration=configuration)

    models_assets = {}
    assets_info = {}

    required_models = required_models or [r for r in configuration]

    for model in required_models:
        models_assets[model] = list_assets(required_models=[model],
                                           configuration=configuration)
        for asset in models_assets[model]:
            if asset in assets_info:
                continue
            assets_info[asset] = AssetInfo(**assets_manager.fetch_asset(
                asset,
                return_info=True,
            ))
    return models_assets, assets_info
Ejemplo n.º 4
0
def fetch_asset(asset, download):
    """Fetch an asset and download if necessary"""
    manager = AssetsManager()

    info = manager.fetch_asset(asset,
                               return_info=True,
                               force_download=download)

    console = Console()
    console.print(info)
Ejemplo n.º 5
0
def test_assetsmanager_init(monkeypatch, settings_dict, env_vars, valid,
                            exception, has_storage_provider):
    for key, value in env_vars.items():
        monkeypatch.setenv(key, value)
    if valid:
        mng = AssetsManager(**settings_dict)
        if has_storage_provider:
            assert mng.storage_provider
    else:
        with pytest.raises(exception):
            AssetsManager(**settings_dict)
Ejemplo n.º 6
0
def gcs_assetsmanager(request, working_dir):
    # kill previous fake gcs container (if any)
    subprocess.call(["docker", "rm", "-f", "modelkit-storage-gcs-tests"],
                    stderr=subprocess.DEVNULL)
    # start minio as docker container
    minio_proc = subprocess.Popen([
        "docker",
        "run",
        "-p",
        "4443:4443",
        "--name",
        "modelkit-storage-gcs-tests",
        "fsouza/fake-gcs-server",
    ])

    def finalize():
        subprocess.call(["docker", "stop", "modelkit-storage-gcs-tests"])
        minio_proc.terminate()
        minio_proc.wait()

    request.addfinalizer(finalize)

    storage_provider = StorageProvider(
        prefix="test-prefix",
        provider="gcs",
        bucket="test-bucket",
        client=_get_mock_gcs_client(),
    )
    storage_provider.driver.client.create_bucket("test-bucket")
    mng = AssetsManager(assets_dir=working_dir,
                        storage_provider=storage_provider)
    yield mng
Ejemplo n.º 7
0
def download_asset(assets_dir, driver_path, asset_name):
    """
    Download the asset
    """
    am = AssetsManager(
        assets_dir=assets_dir,
        storage_provider=StorageProvider(
            provider="local",
            bucket=driver_path,
            prefix="prefix",
        ),
    )
    asset_dict = am.fetch_asset(asset_name, return_info=True)
    if asset_dict["from_cache"]:
        print("__ok_from_cache__")
    else:
        print("__ok_not_from_cache__")
Ejemplo n.º 8
0
def test_fetch_asset_version_no_storage_provider(version_asset_name, version,
                                                 versioning):
    manager = AssetsManager(assets_dir=os.path.join(
        TEST_DIR, "testdata", "test-bucket", "assets-prefix"))
    asset_name = os.path.join("category", version_asset_name)
    spec = AssetSpec(name=asset_name, version=version, versioning=versioning)

    asset_dict = manager._fetch_asset_version(
        spec=spec,
        _force_download=False,
    )
    assert asset_dict == {
        "from_cache": True,
        "version": version,
        "path": os.path.join(manager.assets_dir, asset_name, version),
    }

    with pytest.raises(errors.StorageDriverError):
        manager._fetch_asset_version(
            spec=spec,
            _force_download=True,
        )

    spec.name = os.path.join("not-existing-asset", version_asset_name)
    with pytest.raises(errors.LocalAssetDoesNotExistError):
        manager._fetch_asset_version(
            spec=spec,
            _force_download=False,
        )
Ejemplo n.º 9
0
def deploy_tf_models(lib, mode, config_name="config", verbose=False):
    manager = AssetsManager()
    configuration = lib.configuration
    model_paths = {}
    if mode == "remote":
        if not manager.storage_provider:
            raise ValueError(
                "A remote storage provider is required for `remote` mode")
        driver = manager.storage_provider.driver

    for model_name in lib.required_models:
        model_configuration = configuration[model_name]
        if not issubclass(model_configuration.model_type, TensorflowModel):
            logger.debug(f"Skipping non TF model `{model_name}`")
            continue
        if not model_configuration.asset:
            raise ValueError(
                f"TensorFlow model `{model_name}` does not have an asset")
        spec = AssetSpec.from_string(model_configuration.asset)
        if mode == "local-docker":
            model_paths[model_name] = "/".join(
                ("/config", spec.name, spec.version or "")) + (spec.sub_part
                                                               or "")
        elif mode == "local-process":
            model_paths[model_name] = os.path.join(
                manager.assets_dir,
                *spec.name.split("/"),
                f"{spec.version}",
                *(spec.sub_part.split("/") if spec.sub_part else ()),
            )
        elif mode == "remote":
            object_name = manager.storage_provider.get_object_name(
                spec.name, spec.version or "")
            model_paths[model_name] = driver.get_object_uri(object_name)

    if mode == "local-docker" or mode == "local-process":
        logger.info("Checking that local models are present.")
        download_assets(configuration=configuration,
                        required_models=lib.required_models)
    target = os.path.join(manager.assets_dir, f"{config_name}.config")

    if model_paths:
        logger.info(
            "Writing TF serving configuration locally.",
            config_name=config_name,
            target=target,
        )
        write_config(target, model_paths, verbose=verbose)
    else:
        logger.info(
            "Nothing to write",
            config_name=config_name,
            target=target,
        )
Ejemplo n.º 10
0
    def override_assets_manager(self):
        if not self.settings.override_assets_dir:
            return None

        if self._override_assets_manager is None:
            logger.info("Instantiating Override AssetsManager",
                        lazy_loading=self._lazy_loading)
            self._override_assets_manager = AssetsManager(
                assets_dir=self.settings.override_assets_dir)
            self._override_assets_manager.storage_provider = None

        return self._override_assets_manager
Ejemplo n.º 11
0
def test_assetsmanager_force_download(monkeypatch, base_dir, working_dir):
    # Setup a bucket
    bucket_path = os.path.join(base_dir, "local_driver", "bucket")
    os.makedirs(bucket_path)

    mng = AssetsManager(
        assets_dir=working_dir,
        storage_provider=StorageProvider(provider="local", bucket=bucket_path),
    )
    data_path = os.path.join(test_path, "testdata", "some_data.json")
    mng.storage_provider.push(data_path, "category-test/some-data.ext", "1.0")

    asset_info = mng.fetch_asset("category-test/some-data.ext:1.0",
                                 return_info=True)
    assert not asset_info["from_cache"]

    asset_info_re = mng.fetch_asset("category-test/some-data.ext:1.0",
                                    return_info=True)
    assert asset_info_re["from_cache"]

    mng_force = AssetsManager(
        assets_dir=working_dir,
        storage_provider=StorageProvider(
            provider="local",
            bucket=bucket_path,
            force_download=True,
        ),
    )
    asset_info_force = mng_force.fetch_asset("category-test/some-data.ext:1.0",
                                             return_info=True)
    assert not asset_info_force["from_cache"]

    monkeypatch.setenv("MODELKIT_STORAGE_FORCE_DOWNLOAD", "True")
    mng_force = AssetsManager(
        assets_dir=working_dir,
        storage_provider=StorageProvider(provider="local", bucket=bucket_path),
    )
    asset_info_force_env = mng_force.fetch_asset(
        "category-test/some-data.ext:1.0", return_info=True)
    assert not asset_info_force_env["from_cache"]
Ejemplo n.º 12
0
def local_assetsmanager(base_dir, working_dir):
    bucket_path = os.path.join(base_dir, "local_driver", "bucket")
    os.makedirs(bucket_path)

    mng = AssetsManager(
        assets_dir=working_dir,
        storage_provider=StorageProvider(
            provider="local",
            bucket=bucket_path,
        ),
    )
    yield mng
    _delete_all_objects(mng)
Ejemplo n.º 13
0
def test_fetch_asset_version_with_sub_parts(version_asset_name, version,
                                            versioning, working_dir):
    manager = AssetsManager(assets_dir=os.path.join(
        TEST_DIR, "testdata", "test-bucket", "assets-prefix"))
    asset_name = os.path.join("category", version_asset_name)
    sub_part = "sub_part"
    spec = AssetSpec(name=asset_name,
                     version=version,
                     sub_part=sub_part,
                     versioning=versioning)

    # no _has_succeeded cache => fetch
    asset_dict = manager._fetch_asset_version(
        spec=spec,
        _force_download=False,
    )

    assert asset_dict == {
        "from_cache": True,
        "version": version,
        "path": os.path.join(manager.assets_dir, asset_name, version,
                             sub_part),
    }
Ejemplo n.º 14
0
def _start_s3_manager(working_dir):
    mng = AssetsManager(
        assets_dir=working_dir,
        storage_provider=StorageProvider(
            prefix=f"test-assets-{uuid.uuid1().hex}",
            provider="s3",
            aws_default_region="us-east-1",
            bucket="test-assets",
            aws_access_key_id="minioadmin",
            aws_secret_access_key="minioadmin",
            aws_session_token=None,
            s3_endpoint="http://127.0.0.1:9000",
        ),
    )
    mng.storage_provider.driver.client.create_bucket(Bucket="test-assets")
    return mng
Ejemplo n.º 15
0
def test_local_manager_invalid_configuration(working_dir):

    modelkit_storage_bucket = working_dir
    modelkit_storage_prefix = "assets-prefix"
    modelkit_assets_dir = os.path.join(modelkit_storage_bucket,
                                       modelkit_storage_prefix)
    os.makedirs(modelkit_assets_dir)

    with pytest.raises(errors.StorageDriverError):
        AssetsManager(
            assets_dir=modelkit_assets_dir,
            storage_provider=StorageProvider(
                provider="local",
                prefix=modelkit_storage_prefix,
                bucket=modelkit_storage_bucket,
            ),
        )
Ejemplo n.º 16
0
def _start_az_manager(working_dir):
    mng = AssetsManager(
        assets_dir=working_dir,
        storage_provider=StorageProvider(
            prefix=f"test-assets-{uuid.uuid1().hex}",
            provider="az",
            bucket="test-assets",
            connection_string=(
                "DefaultEndpointsProtocol=http;"
                "AccountName=devstoreaccount1;"
                "AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSR"
                "Z6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
                "BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
                "QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;"
                "TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;"),
        ),
    )
    mng.storage_provider.driver.client.create_container("test-assets")
    return mng
Ejemplo n.º 17
0
def test_assetsmanager_retry_on_fail(base_dir, working_dir):
    # Setup a bucket
    bucket_path = os.path.join(base_dir, "local_driver", "bucket")
    os.makedirs(bucket_path)

    mng = AssetsManager(
        assets_dir=working_dir,
        storage_provider=StorageProvider(
            provider="local",
            bucket=bucket_path,
        ),
    )
    # Try with a file asset
    data_path = os.path.join(test_path, "testdata", "some_data.json")
    mng.storage_provider.push(data_path, "category-test/some-data.ext", "1.0")

    asset_info = mng.fetch_asset("category-test/some-data.ext:1.0",
                                 return_info=True)
    assert not asset_info["from_cache"]
    assert os.path.exists(_success_file_path(asset_info["path"]))

    os.unlink(_success_file_path(asset_info["path"]))

    asset_info = mng.fetch_asset("category-test/some-data.ext:1.0",
                                 return_info=True)
    assert not asset_info["from_cache"]

    # Try with a directory asset
    data_path = os.path.join(test_path, "testdata")
    mng.storage_provider.push(data_path, "category-test/some-data-dir", "1.0")

    asset_info = mng.fetch_asset("category-test/some-data-dir:1.0",
                                 return_info=True)
    assert not asset_info["from_cache"]
    assert os.path.exists(_success_file_path(asset_info["path"]))

    os.unlink(_success_file_path(asset_info["path"]))

    asset_info = mng.fetch_asset("category-test/some-data-dir:1.0",
                                 return_info=True)
    assert not asset_info["from_cache"]
Ejemplo n.º 18
0
 def assets_manager(self):
     if self._assets_manager is None:
         logger.info("Instantiating AssetsManager",
                     lazy_loading=self._lazy_loading)
         self._assets_manager = AssetsManager(**self.assetsmanager_settings)
     return self._assets_manager
Ejemplo n.º 19
0
def test_assetsmanager_default_assets_dir():
    manager = AssetsManager()
    assert manager.assets_dir == os.getcwd()
    assert manager.storage_provider is None
Ejemplo n.º 20
0
def test_local_manager_no_versions(working_dir):
    # This test makes sure that the AssetsManager is able to retrieve files
    # refered to by their paths relative to the working_dir
    os.makedirs(os.path.join(working_dir, "something", "else"))
    with open(os.path.join(working_dir, "something", "else", "deep.txt"),
              "w") as f:
        f.write("OK")

    # valid relative path to assets dir
    manager = AssetsManager(assets_dir=working_dir)
    res = manager.fetch_asset("something/else/deep.txt", return_info=True)
    assert res["path"] == os.path.join(working_dir, "something", "else",
                                       "deep.txt")

    # valid relative path to CWD
    manager = AssetsManager()
    res = manager.fetch_asset("README.md", return_info=True)
    assert res["path"] == os.path.join(os.getcwd(), "README.md")

    # valid relative path to CWD with assets dir
    manager = AssetsManager(assets_dir=working_dir)
    res = manager.fetch_asset("README.md", return_info=True)
    assert res["path"] == os.path.join(os.getcwd(), "README.md")

    # valid absolute path
    manager = AssetsManager(assets_dir=working_dir)
    res = manager.fetch_asset(os.path.join(os.getcwd(), "README.md"),
                              return_info=True)
    assert res["path"] == os.path.join(os.getcwd(), "README.md")

    # valid relative path dir
    manager = AssetsManager(assets_dir=working_dir)
    res = manager.fetch_asset("something", return_info=True)
    assert res["path"] == os.path.join(working_dir, "something")

    with open(os.path.join(working_dir, "something.txt"), "w") as f:
        f.write("OK")
    res = manager.fetch_asset("something.txt", return_info=True)
    assert res["path"] == os.path.join(working_dir, "something.txt")

    with pytest.raises(errors.LocalAssetDoesNotExistError):
        res = manager.fetch_asset("something.txt:0.1", return_info=True)

    with pytest.raises(errors.LocalAssetDoesNotExistError):
        res = manager.fetch_asset("something.txt:0", return_info=True)

    with pytest.raises(errors.AssetDoesNotExistError):
        res = manager.fetch_asset("doesnotexist.txt", return_info=True)
Ejemplo n.º 21
0
def test_local_manager_with_versions(v00, v01, v11, v10, versioning,
                                     working_dir, monkeypatch):
    if versioning:
        monkeypatch.setenv("MODELKIT_ASSETS_VERSIONING_SYSTEM", versioning)

    os.makedirs(os.path.join(working_dir, "something", v00))
    open(os.path.join(working_dir, "something", v00, ".SUCCESS"), "w").close()

    os.makedirs(os.path.join(working_dir, "something", v01))
    open(os.path.join(working_dir, "something", v01, ".SUCCESS"), "w").close()

    os.makedirs(os.path.join(working_dir, "something", v11, "subpart"))
    with open(
            os.path.join(working_dir, "something", v11, "subpart", "deep.txt"),
            "w") as f:
        f.write("OK")
    open(os.path.join(working_dir, "something", v11, ".SUCCESS"), "w").close()

    manager = AssetsManager(assets_dir=working_dir)
    res = manager.fetch_asset(f"something:{v11}[subpart/deep.txt]",
                              return_info=True)
    assert res["path"] == os.path.join(working_dir, "something", v11,
                                       "subpart", "deep.txt")

    manager = AssetsManager(assets_dir=working_dir)
    res = manager.fetch_asset(f"something/{v11}/subpart/deep.txt",
                              return_info=True)
    assert res["path"] == os.path.join(working_dir, "something", v11,
                                       "subpart", "deep.txt")

    manager = AssetsManager(assets_dir=working_dir)
    res = manager.fetch_asset(f"something:{v00}", return_info=True)
    assert res["path"] == os.path.join(working_dir, "something", v00)

    manager = AssetsManager(assets_dir=working_dir)
    res = manager.fetch_asset("something", return_info=True)
    assert res["path"] == os.path.join(working_dir, "something", v11)

    if versioning in (None, "major_minor"):
        manager = AssetsManager(assets_dir=working_dir)
        res = manager.fetch_asset("something:0", return_info=True)
        assert res["path"] == os.path.join(working_dir, "something", v01)

    try:
        manager = AssetsManager()
        local_dir = os.path.join("tmp-local-asset", v10, "subpart")
        os.makedirs(local_dir)
        open(os.path.join("tmp-local-asset", v10, ".SUCCESS"), "w").close()

        shutil.copy("README.md", local_dir)

        res = manager.fetch_asset(f"tmp-local-asset:{v10}[subpart/README.md]",
                                  return_info=True)
        assert res["path"] == os.path.abspath(
            os.path.join(local_dir, "README.md"))

        res = manager.fetch_asset("tmp-local-asset", return_info=True)
        assert res["path"] == os.path.abspath(os.path.join(local_dir, ".."))

        abs_path_to_readme = os.path.join(os.path.abspath(local_dir),
                                          "README.md")
        res = manager.fetch_asset(abs_path_to_readme, return_info=True)
        assert res["path"] == abs_path_to_readme
    finally:
        shutil.rmtree("tmp-local-asset")
Ejemplo n.º 22
0
def test_fetch_asset_version_with_storage_provider(version_asset_name, version,
                                                   versioning, working_dir):

    manager = AssetsManager(
        assets_dir=working_dir,
        storage_provider=StorageProvider(
            provider="local",
            bucket=os.path.join(TEST_DIR, "testdata", "test-bucket"),
            prefix="assets-prefix",
        ),
    )

    asset_name = os.path.join("category", version_asset_name)
    spec = AssetSpec(name=asset_name, version=version, versioning=versioning)

    # no _has_succeeded cache => fetch
    asset_dict = manager._fetch_asset_version(
        spec=spec,
        _force_download=False,
    )

    del asset_dict["meta"]  #  fetch meta data
    assert asset_dict == {
        "from_cache": False,
        "version": version,
        "path": os.path.join(working_dir, asset_name, version),
    }

    #  cache
    asset_dict = manager._fetch_asset_version(
        spec=spec,
        _force_download=False,
    )

    assert asset_dict == {
        "from_cache": True,
        "version": version,
        "path": os.path.join(working_dir, asset_name, version),
    }

    #  cache but force download
    asset_dict = manager._fetch_asset_version(
        spec=spec,
        _force_download=True,
    )

    del asset_dict["meta"]  #  fetch meta data
    assert asset_dict == {
        "from_cache": False,
        "version": version,
        "path": os.path.join(working_dir, asset_name, version),
    }

    # Re-Download asset when missing version
    os.remove(os.path.join(working_dir, asset_name, version))
    asset_dict = manager._fetch_asset_version(
        spec=spec,
        _force_download=False,
    )

    del asset_dict["meta"]  #  fetch meta data
    assert asset_dict == {
        "from_cache": False,
        "version": version,
        "path": os.path.join(working_dir, asset_name, version),
    }