def get_remote(repo, **kwargs): tree = get_cloud_tree(repo, **kwargs) if tree.scheme == "local": return LocalRemote(tree) if tree.scheme == "ssh": return SSHRemote(tree) return Remote(tree)
def test_checksums_exist(object_exists, traverse, dvc): remote = Remote(BaseRemoteTree(dvc, {})) # remote does not support traverse remote.tree.CAN_TRAVERSE = False with mock.patch.object( remote.tree, "list_checksums", return_value=list(range(256)) ): checksums = set(range(1000)) remote.checksums_exist(checksums) object_exists.assert_called_with(checksums, None, None) traverse.assert_not_called() remote.tree.CAN_TRAVERSE = True # large remote, small local object_exists.reset_mock() traverse.reset_mock() with mock.patch.object( remote.tree, "list_checksums", return_value=list(range(256)) ): checksums = list(range(1000)) remote.checksums_exist(checksums) # verify that _cache_paths_with_max() short circuits # before returning all 256 remote checksums max_checksums = math.ceil( remote.tree._max_estimation_size(checksums) / pow(16, remote.tree.TRAVERSE_PREFIX_LEN) ) assert max_checksums < 256 object_exists.assert_called_with( frozenset(range(max_checksums, 1000)), None, None ) traverse.assert_not_called() # large remote, large local object_exists.reset_mock() traverse.reset_mock() remote.tree.JOBS = 16 with mock.patch.object( remote.tree, "list_checksums", return_value=list(range(256)) ): checksums = list(range(1000000)) remote.checksums_exist(checksums) object_exists.assert_not_called() traverse.assert_called_with( 256 * pow(16, remote.tree.TRAVERSE_PREFIX_LEN), set(range(256)), None, None, )
def fetch_from_odb(repo, odb, objs, **kwargs): from dvc.remote.base import Remote downloaded = 0 failed = 0 remote = Remote.from_odb(odb) try: downloaded += remote.pull( repo.odb.local, objs, **kwargs, ) except DownloadError as exc: failed += exc.amount return downloaded, failed
def test_checkout_for_external_outputs(tmp_dir, dvc): dvc.cache.s3 = CloudCache(S3RemoteTree(dvc, {"url": S3.get_url()})) remote = Remote(S3RemoteTree(dvc, {"url": S3.get_url()})) file_path = remote.path_info / "foo" remote.tree.s3.put_object( Bucket=remote.path_info.bucket, Key=file_path.path, Body="foo" ) dvc.add(str(remote.path_info / "foo"), external=True) remote.tree.remove(file_path) stats = dvc.checkout(force=True) assert stats == {**empty_checkout, "added": [str(file_path)]} assert remote.tree.exists(file_path) remote.tree.s3.put_object( Bucket=remote.path_info.bucket, Key=file_path.path, Body="foo\nfoo" ) stats = dvc.checkout(force=True) assert stats == {**empty_checkout, "modified": [str(file_path)]}
def hashes_exist(self, *args, **kwargs): return Remote.hashes_exist(self, *args, **kwargs)
def checksums_exist(self, *args, **kwargs): return Remote.checksums_exist(self, *args, **kwargs)
def remote(cls, repo): yield Remote(GSRemoteTree(repo, {"url": cls.get_url()}))
def _do_gc(typ, remote, clist, jobs=None): from dvc.remote.base import Remote removed = Remote.gc(clist, remote, jobs=jobs) if not removed: logger.info(f"No unused '{typ}' cache to remove.")
def remote(cls, repo): with mock_s3(): yield Remote(S3RemoteTree(repo, {"url": cls.get_url()}))