def test_read_only_manager_with_fetch(version_asset_name, version_1, version_2, versioning, base_dir, monkeypatch): if versioning: monkeypatch.setenv("MODELKIT_ASSETS_VERSIONING_SYSTEM", versioning) # Prepare a read-only dir with raw assets working_dir = os.path.join(base_dir, "working-dir") shutil.copytree( os.path.join(TEST_DIR, "testdata", "test-bucket", "assets-prefix"), working_dir) os.chmod(working_dir, stat.S_IREAD | stat.S_IEXEC) try: manager = AssetsManager( assets_dir=working_dir, storage_provider=None, ) res = manager.fetch_asset(f"category/{version_asset_name}:{version_1}", return_info=True) assert res["path"] == os.path.join(working_dir, "category", version_asset_name, version_1) res = manager.fetch_asset(f"category/{version_asset_name}", return_info=True) assert res["path"] == os.path.join(working_dir, "category", version_asset_name, version_2) if versioning in ["major_minor", None]: res = manager.fetch_asset(f"category/{version_asset_name}:0", return_info=True) assert res["path"] == os.path.join(working_dir, "category", version_asset_name, "0.1") finally: os.chmod(working_dir, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
def test_local_manager_with_fetch(version_asset_name, version_1, version_2, versioning, working_dir, monkeypatch): if versioning: monkeypatch.setenv("MODELKIT_ASSETS_VERSIONING_SYSTEM", versioning) manager = AssetsManager( assets_dir=working_dir, storage_provider=StorageProvider( provider="local", bucket=os.path.join(TEST_DIR, "testdata", "test-bucket"), prefix="assets-prefix", ), ) res = manager.fetch_asset(f"category/{version_asset_name}:{version_1}", return_info=True) assert res["path"] == os.path.join(working_dir, "category", version_asset_name, version_1) res = manager.fetch_asset(f"category/{version_asset_name}", return_info=True) assert res["path"] == os.path.join(working_dir, "category", version_asset_name, version_2) if versioning in ["major_minor", None]: res = manager.fetch_asset(f"category/{version_asset_name}:0", return_info=True) assert res["path"] == os.path.join(working_dir, "category", version_asset_name, "0.1")
def download_assets( assetsmanager_settings: Optional[dict] = None, configuration: Optional[Mapping[str, Union[Dict[str, Any], ModelConfiguration]]] = None, models: Optional[LibraryModelsType] = None, required_models: Optional[List[str]] = None, ) -> Tuple[Dict[str, Set[str]], Dict[str, AssetInfo]]: assetsmanager_settings = assetsmanager_settings or {} assets_manager = AssetsManager(**assetsmanager_settings) configuration = configure(models=models, configuration=configuration) models_assets = {} assets_info = {} required_models = required_models or [r for r in configuration] for model in required_models: models_assets[model] = list_assets(required_models=[model], configuration=configuration) for asset in models_assets[model]: if asset in assets_info: continue assets_info[asset] = AssetInfo(**assets_manager.fetch_asset( asset, return_info=True, )) return models_assets, assets_info
def fetch_asset(asset, download): """Fetch an asset and download if necessary""" manager = AssetsManager() info = manager.fetch_asset(asset, return_info=True, force_download=download) console = Console() console.print(info)
def test_assetsmanager_init(monkeypatch, settings_dict, env_vars, valid, exception, has_storage_provider): for key, value in env_vars.items(): monkeypatch.setenv(key, value) if valid: mng = AssetsManager(**settings_dict) if has_storage_provider: assert mng.storage_provider else: with pytest.raises(exception): AssetsManager(**settings_dict)
def gcs_assetsmanager(request, working_dir): # kill previous fake gcs container (if any) subprocess.call(["docker", "rm", "-f", "modelkit-storage-gcs-tests"], stderr=subprocess.DEVNULL) # start minio as docker container minio_proc = subprocess.Popen([ "docker", "run", "-p", "4443:4443", "--name", "modelkit-storage-gcs-tests", "fsouza/fake-gcs-server", ]) def finalize(): subprocess.call(["docker", "stop", "modelkit-storage-gcs-tests"]) minio_proc.terminate() minio_proc.wait() request.addfinalizer(finalize) storage_provider = StorageProvider( prefix="test-prefix", provider="gcs", bucket="test-bucket", client=_get_mock_gcs_client(), ) storage_provider.driver.client.create_bucket("test-bucket") mng = AssetsManager(assets_dir=working_dir, storage_provider=storage_provider) yield mng
def download_asset(assets_dir, driver_path, asset_name): """ Download the asset """ am = AssetsManager( assets_dir=assets_dir, storage_provider=StorageProvider( provider="local", bucket=driver_path, prefix="prefix", ), ) asset_dict = am.fetch_asset(asset_name, return_info=True) if asset_dict["from_cache"]: print("__ok_from_cache__") else: print("__ok_not_from_cache__")
def test_fetch_asset_version_no_storage_provider(version_asset_name, version, versioning): manager = AssetsManager(assets_dir=os.path.join( TEST_DIR, "testdata", "test-bucket", "assets-prefix")) asset_name = os.path.join("category", version_asset_name) spec = AssetSpec(name=asset_name, version=version, versioning=versioning) asset_dict = manager._fetch_asset_version( spec=spec, _force_download=False, ) assert asset_dict == { "from_cache": True, "version": version, "path": os.path.join(manager.assets_dir, asset_name, version), } with pytest.raises(errors.StorageDriverError): manager._fetch_asset_version( spec=spec, _force_download=True, ) spec.name = os.path.join("not-existing-asset", version_asset_name) with pytest.raises(errors.LocalAssetDoesNotExistError): manager._fetch_asset_version( spec=spec, _force_download=False, )
def deploy_tf_models(lib, mode, config_name="config", verbose=False): manager = AssetsManager() configuration = lib.configuration model_paths = {} if mode == "remote": if not manager.storage_provider: raise ValueError( "A remote storage provider is required for `remote` mode") driver = manager.storage_provider.driver for model_name in lib.required_models: model_configuration = configuration[model_name] if not issubclass(model_configuration.model_type, TensorflowModel): logger.debug(f"Skipping non TF model `{model_name}`") continue if not model_configuration.asset: raise ValueError( f"TensorFlow model `{model_name}` does not have an asset") spec = AssetSpec.from_string(model_configuration.asset) if mode == "local-docker": model_paths[model_name] = "/".join( ("/config", spec.name, spec.version or "")) + (spec.sub_part or "") elif mode == "local-process": model_paths[model_name] = os.path.join( manager.assets_dir, *spec.name.split("/"), f"{spec.version}", *(spec.sub_part.split("/") if spec.sub_part else ()), ) elif mode == "remote": object_name = manager.storage_provider.get_object_name( spec.name, spec.version or "") model_paths[model_name] = driver.get_object_uri(object_name) if mode == "local-docker" or mode == "local-process": logger.info("Checking that local models are present.") download_assets(configuration=configuration, required_models=lib.required_models) target = os.path.join(manager.assets_dir, f"{config_name}.config") if model_paths: logger.info( "Writing TF serving configuration locally.", config_name=config_name, target=target, ) write_config(target, model_paths, verbose=verbose) else: logger.info( "Nothing to write", config_name=config_name, target=target, )
def override_assets_manager(self): if not self.settings.override_assets_dir: return None if self._override_assets_manager is None: logger.info("Instantiating Override AssetsManager", lazy_loading=self._lazy_loading) self._override_assets_manager = AssetsManager( assets_dir=self.settings.override_assets_dir) self._override_assets_manager.storage_provider = None return self._override_assets_manager
def test_assetsmanager_force_download(monkeypatch, base_dir, working_dir): # Setup a bucket bucket_path = os.path.join(base_dir, "local_driver", "bucket") os.makedirs(bucket_path) mng = AssetsManager( assets_dir=working_dir, storage_provider=StorageProvider(provider="local", bucket=bucket_path), ) data_path = os.path.join(test_path, "testdata", "some_data.json") mng.storage_provider.push(data_path, "category-test/some-data.ext", "1.0") asset_info = mng.fetch_asset("category-test/some-data.ext:1.0", return_info=True) assert not asset_info["from_cache"] asset_info_re = mng.fetch_asset("category-test/some-data.ext:1.0", return_info=True) assert asset_info_re["from_cache"] mng_force = AssetsManager( assets_dir=working_dir, storage_provider=StorageProvider( provider="local", bucket=bucket_path, force_download=True, ), ) asset_info_force = mng_force.fetch_asset("category-test/some-data.ext:1.0", return_info=True) assert not asset_info_force["from_cache"] monkeypatch.setenv("MODELKIT_STORAGE_FORCE_DOWNLOAD", "True") mng_force = AssetsManager( assets_dir=working_dir, storage_provider=StorageProvider(provider="local", bucket=bucket_path), ) asset_info_force_env = mng_force.fetch_asset( "category-test/some-data.ext:1.0", return_info=True) assert not asset_info_force_env["from_cache"]
def local_assetsmanager(base_dir, working_dir): bucket_path = os.path.join(base_dir, "local_driver", "bucket") os.makedirs(bucket_path) mng = AssetsManager( assets_dir=working_dir, storage_provider=StorageProvider( provider="local", bucket=bucket_path, ), ) yield mng _delete_all_objects(mng)
def test_fetch_asset_version_with_sub_parts(version_asset_name, version, versioning, working_dir): manager = AssetsManager(assets_dir=os.path.join( TEST_DIR, "testdata", "test-bucket", "assets-prefix")) asset_name = os.path.join("category", version_asset_name) sub_part = "sub_part" spec = AssetSpec(name=asset_name, version=version, sub_part=sub_part, versioning=versioning) # no _has_succeeded cache => fetch asset_dict = manager._fetch_asset_version( spec=spec, _force_download=False, ) assert asset_dict == { "from_cache": True, "version": version, "path": os.path.join(manager.assets_dir, asset_name, version, sub_part), }
def _start_s3_manager(working_dir): mng = AssetsManager( assets_dir=working_dir, storage_provider=StorageProvider( prefix=f"test-assets-{uuid.uuid1().hex}", provider="s3", aws_default_region="us-east-1", bucket="test-assets", aws_access_key_id="minioadmin", aws_secret_access_key="minioadmin", aws_session_token=None, s3_endpoint="http://127.0.0.1:9000", ), ) mng.storage_provider.driver.client.create_bucket(Bucket="test-assets") return mng
def test_local_manager_invalid_configuration(working_dir): modelkit_storage_bucket = working_dir modelkit_storage_prefix = "assets-prefix" modelkit_assets_dir = os.path.join(modelkit_storage_bucket, modelkit_storage_prefix) os.makedirs(modelkit_assets_dir) with pytest.raises(errors.StorageDriverError): AssetsManager( assets_dir=modelkit_assets_dir, storage_provider=StorageProvider( provider="local", prefix=modelkit_storage_prefix, bucket=modelkit_storage_bucket, ), )
def _start_az_manager(working_dir): mng = AssetsManager( assets_dir=working_dir, storage_provider=StorageProvider( prefix=f"test-assets-{uuid.uuid1().hex}", provider="az", bucket="test-assets", connection_string=( "DefaultEndpointsProtocol=http;" "AccountName=devstoreaccount1;" "AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSR" "Z6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" "BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" "QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;" "TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;"), ), ) mng.storage_provider.driver.client.create_container("test-assets") return mng
def test_assetsmanager_retry_on_fail(base_dir, working_dir): # Setup a bucket bucket_path = os.path.join(base_dir, "local_driver", "bucket") os.makedirs(bucket_path) mng = AssetsManager( assets_dir=working_dir, storage_provider=StorageProvider( provider="local", bucket=bucket_path, ), ) # Try with a file asset data_path = os.path.join(test_path, "testdata", "some_data.json") mng.storage_provider.push(data_path, "category-test/some-data.ext", "1.0") asset_info = mng.fetch_asset("category-test/some-data.ext:1.0", return_info=True) assert not asset_info["from_cache"] assert os.path.exists(_success_file_path(asset_info["path"])) os.unlink(_success_file_path(asset_info["path"])) asset_info = mng.fetch_asset("category-test/some-data.ext:1.0", return_info=True) assert not asset_info["from_cache"] # Try with a directory asset data_path = os.path.join(test_path, "testdata") mng.storage_provider.push(data_path, "category-test/some-data-dir", "1.0") asset_info = mng.fetch_asset("category-test/some-data-dir:1.0", return_info=True) assert not asset_info["from_cache"] assert os.path.exists(_success_file_path(asset_info["path"])) os.unlink(_success_file_path(asset_info["path"])) asset_info = mng.fetch_asset("category-test/some-data-dir:1.0", return_info=True) assert not asset_info["from_cache"]
def assets_manager(self): if self._assets_manager is None: logger.info("Instantiating AssetsManager", lazy_loading=self._lazy_loading) self._assets_manager = AssetsManager(**self.assetsmanager_settings) return self._assets_manager
def test_assetsmanager_default_assets_dir(): manager = AssetsManager() assert manager.assets_dir == os.getcwd() assert manager.storage_provider is None
def test_local_manager_no_versions(working_dir): # This test makes sure that the AssetsManager is able to retrieve files # refered to by their paths relative to the working_dir os.makedirs(os.path.join(working_dir, "something", "else")) with open(os.path.join(working_dir, "something", "else", "deep.txt"), "w") as f: f.write("OK") # valid relative path to assets dir manager = AssetsManager(assets_dir=working_dir) res = manager.fetch_asset("something/else/deep.txt", return_info=True) assert res["path"] == os.path.join(working_dir, "something", "else", "deep.txt") # valid relative path to CWD manager = AssetsManager() res = manager.fetch_asset("README.md", return_info=True) assert res["path"] == os.path.join(os.getcwd(), "README.md") # valid relative path to CWD with assets dir manager = AssetsManager(assets_dir=working_dir) res = manager.fetch_asset("README.md", return_info=True) assert res["path"] == os.path.join(os.getcwd(), "README.md") # valid absolute path manager = AssetsManager(assets_dir=working_dir) res = manager.fetch_asset(os.path.join(os.getcwd(), "README.md"), return_info=True) assert res["path"] == os.path.join(os.getcwd(), "README.md") # valid relative path dir manager = AssetsManager(assets_dir=working_dir) res = manager.fetch_asset("something", return_info=True) assert res["path"] == os.path.join(working_dir, "something") with open(os.path.join(working_dir, "something.txt"), "w") as f: f.write("OK") res = manager.fetch_asset("something.txt", return_info=True) assert res["path"] == os.path.join(working_dir, "something.txt") with pytest.raises(errors.LocalAssetDoesNotExistError): res = manager.fetch_asset("something.txt:0.1", return_info=True) with pytest.raises(errors.LocalAssetDoesNotExistError): res = manager.fetch_asset("something.txt:0", return_info=True) with pytest.raises(errors.AssetDoesNotExistError): res = manager.fetch_asset("doesnotexist.txt", return_info=True)
def test_local_manager_with_versions(v00, v01, v11, v10, versioning, working_dir, monkeypatch): if versioning: monkeypatch.setenv("MODELKIT_ASSETS_VERSIONING_SYSTEM", versioning) os.makedirs(os.path.join(working_dir, "something", v00)) open(os.path.join(working_dir, "something", v00, ".SUCCESS"), "w").close() os.makedirs(os.path.join(working_dir, "something", v01)) open(os.path.join(working_dir, "something", v01, ".SUCCESS"), "w").close() os.makedirs(os.path.join(working_dir, "something", v11, "subpart")) with open( os.path.join(working_dir, "something", v11, "subpart", "deep.txt"), "w") as f: f.write("OK") open(os.path.join(working_dir, "something", v11, ".SUCCESS"), "w").close() manager = AssetsManager(assets_dir=working_dir) res = manager.fetch_asset(f"something:{v11}[subpart/deep.txt]", return_info=True) assert res["path"] == os.path.join(working_dir, "something", v11, "subpart", "deep.txt") manager = AssetsManager(assets_dir=working_dir) res = manager.fetch_asset(f"something/{v11}/subpart/deep.txt", return_info=True) assert res["path"] == os.path.join(working_dir, "something", v11, "subpart", "deep.txt") manager = AssetsManager(assets_dir=working_dir) res = manager.fetch_asset(f"something:{v00}", return_info=True) assert res["path"] == os.path.join(working_dir, "something", v00) manager = AssetsManager(assets_dir=working_dir) res = manager.fetch_asset("something", return_info=True) assert res["path"] == os.path.join(working_dir, "something", v11) if versioning in (None, "major_minor"): manager = AssetsManager(assets_dir=working_dir) res = manager.fetch_asset("something:0", return_info=True) assert res["path"] == os.path.join(working_dir, "something", v01) try: manager = AssetsManager() local_dir = os.path.join("tmp-local-asset", v10, "subpart") os.makedirs(local_dir) open(os.path.join("tmp-local-asset", v10, ".SUCCESS"), "w").close() shutil.copy("README.md", local_dir) res = manager.fetch_asset(f"tmp-local-asset:{v10}[subpart/README.md]", return_info=True) assert res["path"] == os.path.abspath( os.path.join(local_dir, "README.md")) res = manager.fetch_asset("tmp-local-asset", return_info=True) assert res["path"] == os.path.abspath(os.path.join(local_dir, "..")) abs_path_to_readme = os.path.join(os.path.abspath(local_dir), "README.md") res = manager.fetch_asset(abs_path_to_readme, return_info=True) assert res["path"] == abs_path_to_readme finally: shutil.rmtree("tmp-local-asset")
def test_fetch_asset_version_with_storage_provider(version_asset_name, version, versioning, working_dir): manager = AssetsManager( assets_dir=working_dir, storage_provider=StorageProvider( provider="local", bucket=os.path.join(TEST_DIR, "testdata", "test-bucket"), prefix="assets-prefix", ), ) asset_name = os.path.join("category", version_asset_name) spec = AssetSpec(name=asset_name, version=version, versioning=versioning) # no _has_succeeded cache => fetch asset_dict = manager._fetch_asset_version( spec=spec, _force_download=False, ) del asset_dict["meta"] # fetch meta data assert asset_dict == { "from_cache": False, "version": version, "path": os.path.join(working_dir, asset_name, version), } # cache asset_dict = manager._fetch_asset_version( spec=spec, _force_download=False, ) assert asset_dict == { "from_cache": True, "version": version, "path": os.path.join(working_dir, asset_name, version), } # cache but force download asset_dict = manager._fetch_asset_version( spec=spec, _force_download=True, ) del asset_dict["meta"] # fetch meta data assert asset_dict == { "from_cache": False, "version": version, "path": os.path.join(working_dir, asset_name, version), } # Re-Download asset when missing version os.remove(os.path.join(working_dir, asset_name, version)) asset_dict = manager._fetch_asset_version( spec=spec, _force_download=False, ) del asset_dict["meta"] # fetch meta data assert asset_dict == { "from_cache": False, "version": version, "path": os.path.join(working_dir, asset_name, version), }