def test_list_hashes(dvc): cache = CloudCache(BaseFileSystem(dvc, {})) cache.fs.path_info = PathInfo("foo") with mock.patch.object(cache, "_list_paths", return_value=["12/3456", "bar"]): hashes = list(cache.list_hashes()) assert hashes == ["123456"]
def test_link_created_on_non_nested_path(base_info, tmp_dir, dvc, scm): tree = S3Tree(dvc, {"url": str(base_info.parent)}) cache = CloudCache(tree) s3 = cache.tree.s3.meta.client s3.create_bucket(Bucket=base_info.bucket) s3.put_object( Bucket=base_info.bucket, Key=(base_info / "from").path, Body="data" ) cache.link(base_info / "from", base_info / "to") assert cache.tree.exists(base_info / "from") assert cache.tree.exists(base_info / "to")
def test_list_paths(dvc): cache = CloudCache(BaseFileSystem(dvc, {})) cache.fs.path_info = PathInfo("foo") with mock.patch.object(cache.fs, "walk_files", return_value=[]) as walk_mock: for _ in cache._list_paths(): pass walk_mock.assert_called_with(cache.fs.path_info, prefix=False) for _ in cache._list_paths(prefix="000"): pass walk_mock.assert_called_with(cache.fs.path_info / "00" / "0", prefix=True)
def test_list_hashes_traverse(_path_to_hash, list_hashes, dvc): cache = CloudCache(BaseFileSystem(dvc, {})) cache.fs.path_info = PathInfo("foo") # parallel traverse size = 256 / cache.fs.JOBS * cache.fs.LIST_OBJECT_PAGE_SIZE list(cache.list_hashes_traverse(size, {0})) for i in range(1, 16): list_hashes.assert_any_call(prefix=f"{i:03x}", progress_callback=CallableOrNone) for i in range(1, 256): list_hashes.assert_any_call(prefix=f"{i:02x}", progress_callback=CallableOrNone) # default traverse (small remote) size -= 1 list_hashes.reset_mock() list(cache.list_hashes_traverse(size - 1, {0})) list_hashes.assert_called_with(prefix=None, progress_callback=CallableOrNone)
def test_hashes_exist(object_exists, traverse, dvc): cache = CloudCache(BaseFileSystem(dvc, {})) # remote does not support traverse cache.fs.CAN_TRAVERSE = False with mock.patch.object(cache, "list_hashes", return_value=list(range(256))): hashes = set(range(1000)) cache.hashes_exist(hashes) object_exists.assert_called_with(hashes, None, None) traverse.assert_not_called() cache.fs.CAN_TRAVERSE = True # large remote, small local object_exists.reset_mock() traverse.reset_mock() with mock.patch.object(cache, "list_hashes", return_value=list(range(256))): hashes = list(range(1000)) cache.hashes_exist(hashes) # verify that _cache_paths_with_max() short circuits # before returning all 256 remote hashes max_hashes = math.ceil( cache._max_estimation_size(hashes) / pow(16, cache.fs.TRAVERSE_PREFIX_LEN)) assert max_hashes < 256 object_exists.assert_called_with(frozenset(range(max_hashes, 1000)), None, None) traverse.assert_not_called() # large remote, large local object_exists.reset_mock() traverse.reset_mock() cache.fs.JOBS = 16 with mock.patch.object(cache, "list_hashes", return_value=list(range(256))): hashes = list(range(1000000)) cache.hashes_exist(hashes) object_exists.assert_not_called() traverse.assert_called_with( 256 * pow(16, cache.fs.TRAVERSE_PREFIX_LEN), set(range(256)), None, None, )
def test_link_created_on_non_nested_path(base_info, tmp_dir, dvc, scm): from dvc.checkout import _link fs = S3FileSystem(dvc, {"url": str(base_info.parent)}) cache = CloudCache(fs) s3 = cache.fs.s3.meta.client s3.create_bucket(Bucket=base_info.bucket) s3.put_object(Bucket=base_info.bucket, Key=(base_info / "from").path, Body="data") _link(cache, base_info / "from", base_info / "to") assert cache.fs.exists(base_info / "from") assert cache.fs.exists(base_info / "to")
def test_checkout_for_external_outputs(tmp_dir, dvc): dvc.cache.s3 = CloudCache(S3Tree(dvc, {"url": S3.get_url()})) remote = Remote(S3Tree(dvc, {"url": S3.get_url()})) file_path = remote.tree.path_info / "foo" remote.tree.s3.put_object( Bucket=remote.tree.path_info.bucket, Key=file_path.path, Body="foo" ) dvc.add(str(remote.tree.path_info / "foo"), external=True) remote.tree.remove(file_path) stats = dvc.checkout(force=True) assert stats == {**empty_checkout, "added": [str(file_path)]} assert remote.tree.exists(file_path) remote.tree.s3.put_object( Bucket=remote.tree.path_info.bucket, Key=file_path.path, Body="foo\nfoo", ) stats = dvc.checkout(force=True) assert stats == {**empty_checkout, "modified": [str(file_path)]}