def test_status_download_optimization(mocker, dvc): """When comparing the status to pull a remote cache, And the desired files to fetch are already on the local cache, Don't check the existence of the desired files on the remote cache """ odb = LocalObjectDB(LocalFileSystem(), PathInfo(".")) objs = { HashFile(None, odb.fs, HashInfo("md5", "acbd18db4cc2f85cedef654fccc4a4d8")), HashFile(None, odb.fs, HashInfo("md5", "37b51d194a7513e45b56f6524f2d51f2")), } local_exists = [obj.hash_info.value for obj in objs] mocker.patch.object(odb, "hashes_exist", return_value=local_exists) other_remote = mocker.Mock() other_remote.url = "other_remote" other_remote.hashes_exist.return_value = [] other_remote.index = RemoteIndexNoop() other_remote.status(odb, objs, download=True) assert other_remote.hashes_exist.call_count == 0
def get(self, hash_info: "HashInfo"): """get raw object""" return HashFile( self.hash_to_path(hash_info.value), self.fs, hash_info, )
def _get_file_obj(path_info, fs, name, odb=None, state=None, upload=False): if upload: assert odb and name == "md5" return _upload_file(path_info, fs, odb) obj = HashFile(path_info, fs, get_file_hash(path_info, fs, name, state=state)) return path_info, obj
def get(self, hash_info: "HashInfo"): """get raw object""" return HashFile( # Prefer string path over PathInfo when possible due to performance self.hash_to_path(hash_info.value), self.fs, hash_info, )
def _iter_objects(path_info, fs, name, odb, state, upload, **kwargs): if not upload and name in fs.DETAIL_FIELDS: for details in fs.find(path_info, detail=True): file_info = path_info.replace(path=details["name"]) hash_info = HashInfo(name, details[name], size=details.get("size")) yield file_info, HashFile(file_info, fs, hash_info) return None yield from _build_objects(path_info, fs, name, odb, state, upload, **kwargs)
def _upload_file(path_info, fs, odb): from dvc.utils import tmp_fname from dvc.utils.stream import HashedStreamReader tmp_info = odb.fs.path_info / tmp_fname() with fs.open(path_info, mode="rb", chunk_size=fs.CHUNK_SIZE) as stream: stream = HashedStreamReader(stream) odb.fs.upload_fobj( stream, tmp_info, desc=path_info.name, total=fs.getsize(path_info) ) obj = HashFile(tmp_info, odb.fs, stream.hash_info) return path_info, obj
def _stage_file(fs_path, fs, name, odb=None, upload_odb=None, dry_run=False): state = odb.state if odb else None meta, hash_info = get_file_hash(fs_path, fs, name, state=state) if upload_odb and not dry_run: assert odb and name == "md5" return _upload_file(fs_path, fs, odb, upload_odb) if dry_run: obj = HashFile(fs_path, fs, hash_info) else: odb.add(fs_path, fs, hash_info, hardlink=False) obj = odb.get(hash_info) return fs_path, meta, obj
from dvc.objects.tree import Tree, _merge @pytest.mark.parametrize( "lst, trie_dict", [ ([], {}), ( [ {"md5": "def", "relpath": "zzz"}, {"md5": "123", "relpath": "foo"}, {"md5": "abc", "relpath": "aaa"}, {"md5": "456", "relpath": "bar"}, ], { ("zzz",): HashFile(None, None, HashInfo("md5", "def")), ("foo",): HashFile(None, None, HashInfo("md5", "123")), ("bar",): HashFile(None, None, HashInfo("md5", "456")), ("aaa",): HashFile(None, None, HashInfo("md5", "abc")), }, ), ( [ {"md5": "123", "relpath": "dir/b"}, {"md5": "456", "relpath": "dir/z"}, {"md5": "789", "relpath": "dir/a"}, {"md5": "abc", "relpath": "b"}, {"md5": "def", "relpath": "a"}, {"md5": "ghi", "relpath": "z"}, {"md5": "jkl", "relpath": "dir/subdir/b"}, {"md5": "mno", "relpath": "dir/subdir/z"},