def test_get_cloud_tree_validate(tmp_dir, dvc): tmp_dir.add_remote(name="base", url="ssh://example.com/path", default=False) tmp_dir.add_remote( name="first", config={ "url": "remote://base/first", "type": "symlink" }, default=False, ) tmp_dir.add_remote( name="second", config={ "url": "remote://first/second", "oss_key_id": "mykey" }, default=False, ) assert get_cloud_tree(dvc, name="base").config == { "url": "ssh://example.com/path" } assert get_cloud_tree(dvc, name="first").config == { "url": "ssh://example.com/path/first", "type": ["symlink"], } with pytest.raises(ConfigError): get_cloud_tree(dvc, name="second")
def test_get_cloud_tree(tmp_dir, dvc): tmp_dir.add_remote(name="base", url="s3://bucket/path", default=False) tmp_dir.add_remote(name="first", url="remote://base/first", default=False) tmp_dir.add_remote(name="second", url="remote://first/second", default=False) base = CloudURLInfo("s3://bucket/path") first = base / "first" second = first / "second" assert get_cloud_tree(dvc, name="base").path_info == base assert get_cloud_tree(dvc, name="first").path_info == first assert get_cloud_tree(dvc, name="second").path_info == second
def test_tree_getsize(dvc, cloud): cloud.gen({"data": {"foo": "foo"}, "baz": "baz baz"}) tree = get_cloud_tree(dvc, **cloud.config) path_info = tree.path_info assert tree.getsize(path_info / "baz") == 7 assert tree.getsize(path_info / "data" / "foo") == 3
def test_tree_ls(dvc, cloud): cloud.gen({"data": {"foo": "foo", "bar": {"baz": "baz"}, "quux": "quux"}}) tree = get_cloud_tree(dvc, **cloud.config) path_info = tree.path_info assert { os.path.basename(file_key) for file_key in tree.ls(path_info / "data", recursive=True) } == {"foo", "baz", "quux"}
def _process_stages(repo, sub_targets, stages, no_commit, pbar, to_remote, to_cache, **kwargs): link_failures = [] from dvc.dvcfile import Dvcfile from ..output.base import OutputDoesNotExistError if to_remote or to_cache: # Already verified in the add() (stage, ) = stages (target, ) = sub_targets (out, ) = stage.outs if to_remote: out.hash_info = repo.cloud.transfer( target, jobs=kwargs.get("jobs"), remote=kwargs.get("remote"), command="add", ) else: from dvc.tree import get_cloud_tree from_tree = get_cloud_tree(repo, url=target) out.hash_info = out.cache.transfer( from_tree, from_tree.path_info, jobs=kwargs.get("jobs"), ) out.checkout() Dvcfile(repo, stage.path).dump(stage) return link_failures with Tqdm( total=len(stages), desc="Processing", unit="file", disable=len(stages) == 1, ) as pbar_stages: for stage in stages: try: stage.save() except OutputDoesNotExistError: pbar.n -= 1 raise try: if not no_commit: stage.commit() except CacheLinkError: link_failures.append(stage) Dvcfile(repo, stage.path).dump(stage) pbar_stages.update() return link_failures
def test_remote_with_hash_jobs(dvc): dvc.config["remote"]["with_hash_jobs"] = { "url": "s3://bucket/name", "checksum_jobs": 100, } dvc.config["core"]["checksum_jobs"] = 200 tree = get_cloud_tree(dvc, name="with_hash_jobs") assert tree.hash_jobs == 100
def test_tree_ls_with_etag(dvc, cloud): cloud.gen({"data": {"foo": "foo", "bar": {"baz": "baz"}, "quux": "quux"}}) tree = get_cloud_tree(dvc, **cloud.config) path_info = tree.path_info for details in tree.ls(path_info / "data", recursive=True, detail=True): assert ( tree.get_file_hash(path_info.replace(path=details["name"])).value == details[tree.PARAM_CHECKSUM] )
def test_tree_upload_fobj(dvc, tmp_dir, cloud): tmp_dir.gen("foo", "foo") tree = get_cloud_tree(dvc, **cloud.config) from_info = tmp_dir / "foo" to_info = tree.path_info / "foo" with open(from_info, "rb") as stream: tree.upload_fobj(stream, to_info) assert tree.exists(to_info) with tree.open(to_info, "rb") as stream: assert stream.read() == b"foo"
def transfer(self, source, jobs=None, remote=None, command=None): """Transfer data items in a cloud-agnostic way. Args: source (str): url for the source location. jobs (int): number of jobs that can be running simultaneously. remote (dvc.remote.base.BaseRemote): optional remote to compare cache to. By default remote from core.remote config option is used. command (str): the command which is benefitting from this function (to be used for reporting better error messages). """ from dvc.tree import get_cloud_tree from_tree = get_cloud_tree(self.repo, url=source) remote = self.get_remote(remote, command) return remote.transfer(from_tree, from_tree.path_info, jobs=jobs)
def test_remote_without_hash_jobs_default(dvc): dvc.config["remote"]["without_hash_jobs"] = {"url": "s3://bucket/name"} tree = get_cloud_tree(dvc, name="without_hash_jobs") assert tree.hash_jobs == tree.HASH_JOBS
def test_remote_without_hash_jobs(dvc): dvc.config["remote"]["without_hash_jobs"] = {"url": "s3://bucket/name"} dvc.config["core"]["hash_jobs"] = 200 tree = get_cloud_tree(dvc, name="without_hash_jobs") assert tree.hash_jobs == 200