def walk(self, fs: FileSystem, path: AnyFSPath, **kwargs): ignore_subrepos = kwargs.pop("ignore_subrepos", True) if fs.scheme == Schemes.LOCAL: for root, dirs, files in fs.walk(path, **kwargs): dirs[:], files[:] = self(root, dirs, files, ignore_subrepos=ignore_subrepos) yield root, dirs, files else: yield from fs.walk(path, **kwargs)
def is_ignored(self, fs: FileSystem, path: str, ignore_subrepos: bool = True) -> bool: # NOTE: can't use self.check_ignore(path).match for now, see # https://github.com/iterative/dvc/issues/4555 if fs.scheme != Schemes.LOCAL: return False if fs.isfile(path): return self.is_ignored_file(path) if fs.isdir(path): return self.is_ignored_dir(path, ignore_subrepos) return self.is_ignored_file(path) or self.is_ignored_dir( path, ignore_subrepos)
def test_list_hashes(dvc): odb = ObjectDB(FileSystem(), None) odb.fs_path = "foo" with mock.patch.object(odb, "_list_paths", return_value=["12/3456", "bar"]): hashes = list(odb._list_hashes()) assert hashes == ["123456"]
def find(self, fs: FileSystem, path: AnyFSPath, **kwargs): if fs.scheme == Schemes.LOCAL: for root, _, files in self.walk(fs, path, **kwargs): for file in files: # NOTE: os.path.join is ~5.5 times slower yield f"{root}{fs.sep}{file}" else: yield from fs.find(path)
def test_list_paths(dvc): path = "foo" odb = ObjectDB(FileSystem(), path) with mock.patch.object(odb.fs, "find", return_value=[]) as walk_mock: for _ in odb._list_paths(): pass walk_mock.assert_called_with(path, prefix=False) for _ in odb._list_paths(prefix="000"): pass walk_mock.assert_called_with(posixpath.join(path, "00", "0"), prefix=True)
def test_cmd_error(dvc): config = {} cmd = "sed 'hello'" ret = "1" err = "sed: expression #1, char 2: extra characters after command" with mock.patch.object( FileSystem, "remove", side_effect=RemoteCmdError("base", cmd, ret, err), ): with pytest.raises(RemoteCmdError): FileSystem(**config).remove("file")
def test_hashes_exist(object_exists, traverse, dvc): odb = ObjectDB(FileSystem(), None) # remote does not support traverse odb.fs.CAN_TRAVERSE = False with mock.patch.object(odb, "_list_hashes", return_value=list(range(256))): hashes = set(range(1000)) odb.hashes_exist(hashes) object_exists.assert_called_with(hashes, None, None) traverse.assert_not_called() odb.fs.CAN_TRAVERSE = True # large remote, small local object_exists.reset_mock() traverse.reset_mock() with mock.patch.object( odb, "_list_hashes", return_value=list(range(2048)) ): hashes = list(range(1000)) odb.hashes_exist(hashes) # verify that _odb_paths_with_max() short circuits # before returning all 2048 remote hashes max_hashes = math.ceil( odb._max_estimation_size(hashes) / pow(16, odb.fs.TRAVERSE_PREFIX_LEN) ) assert max_hashes < 2048 object_exists.assert_called_with( frozenset(range(max_hashes, 1000)), None, None ) traverse.assert_not_called() # large remote, large local object_exists.reset_mock() traverse.reset_mock() odb.fs._JOBS = 16 with mock.patch.object(odb, "_list_hashes", return_value=list(range(256))): hashes = list(range(1000000)) odb.hashes_exist(hashes) object_exists.assert_not_called() traverse.assert_called_with( 256 * pow(16, odb.fs.TRAVERSE_PREFIX_LEN), set(range(256)), jobs=None, name=None, )
def test_list_hashes_traverse(_path_to_hash, list_hashes, dvc): odb = ObjectDB(FileSystem(), None) odb.fs_path = "foo" # parallel traverse size = 256 / odb.fs._JOBS * odb.fs.LIST_OBJECT_PAGE_SIZE list(odb._list_hashes_traverse(size, {0})) for i in range(1, 16): list_hashes.assert_any_call(f"{i:0{odb.fs.TRAVERSE_PREFIX_LEN}x}") for i in range(1, 256): list_hashes.assert_any_call(f"{i:02x}") # default traverse (small remote) size -= 1 list_hashes.reset_mock() list(odb._list_hashes_traverse(size - 1, {0})) list_hashes.assert_called_with(None)
def test_list_hashes_traverse(_path_to_hash, list_hashes, dvc): odb = ObjectDB(FileSystem(), None) odb.fs_path = "foo" # parallel traverse size = 256 / odb.fs._JOBS * odb.fs.LIST_OBJECT_PAGE_SIZE list(odb._list_hashes_traverse(size, {0})) for i in range(1, 16): list_hashes.assert_any_call(prefix=f"{i:03x}", progress_callback=CallableOrNone) for i in range(1, 256): list_hashes.assert_any_call(prefix=f"{i:02x}", progress_callback=CallableOrNone) # default traverse (small remote) size -= 1 list_hashes.reset_mock() list(odb._list_hashes_traverse(size - 1, {0})) list_hashes.assert_called_with(prefix=None, progress_callback=CallableOrNone)
def test_missing_deps(pkg, msg, mocker): requires = {"missing": "missing"} mocker.patch.object(FileSystem, "REQUIRES", requires) mocker.patch("dvc.utils.pkg.PKG", pkg) with pytest.raises(RemoteMissingDepsError, match=msg): FileSystem()
def test_is_dir_hash(hash_, result): assert FileSystem.is_dir_hash(hash_) == result