Example #1
0
File: ignore.py Project: jhhuh/dvc
 def walk(self, fs: FileSystem, path: AnyFSPath, **kwargs):
     ignore_subrepos = kwargs.pop("ignore_subrepos", True)
     if fs.scheme == Schemes.LOCAL:
         for root, dirs, files in fs.walk(path, **kwargs):
             dirs[:], files[:] = self(root,
                                      dirs,
                                      files,
                                      ignore_subrepos=ignore_subrepos)
             yield root, dirs, files
     else:
         yield from fs.walk(path, **kwargs)
Example #2
0
 def is_ignored(self,
                fs: FileSystem,
                path: str,
                ignore_subrepos: bool = True) -> bool:
     # NOTE: can't use self.check_ignore(path).match for now, see
     # https://github.com/iterative/dvc/issues/4555
     if fs.scheme != Schemes.LOCAL:
         return False
     if fs.isfile(path):
         return self.is_ignored_file(path)
     if fs.isdir(path):
         return self.is_ignored_dir(path, ignore_subrepos)
     return self.is_ignored_file(path) or self.is_ignored_dir(
         path, ignore_subrepos)
Example #3
0
def test_list_hashes(dvc):
    odb = ObjectDB(FileSystem(), None)
    odb.fs_path = "foo"

    with mock.patch.object(odb, "_list_paths", return_value=["12/3456",
                                                             "bar"]):
        hashes = list(odb._list_hashes())
        assert hashes == ["123456"]
Example #4
0
File: ignore.py Project: jhhuh/dvc
 def find(self, fs: FileSystem, path: AnyFSPath, **kwargs):
     if fs.scheme == Schemes.LOCAL:
         for root, _, files in self.walk(fs, path, **kwargs):
             for file in files:
                 # NOTE: os.path.join is ~5.5 times slower
                 yield f"{root}{fs.sep}{file}"
     else:
         yield from fs.find(path)
Example #5
0
def test_list_paths(dvc):
    path = "foo"
    odb = ObjectDB(FileSystem(), path)

    with mock.patch.object(odb.fs, "find", return_value=[]) as walk_mock:
        for _ in odb._list_paths():
            pass
        walk_mock.assert_called_with(path, prefix=False)

        for _ in odb._list_paths(prefix="000"):
            pass
        walk_mock.assert_called_with(posixpath.join(path, "00", "0"),
                                     prefix=True)
Example #6
0
def test_cmd_error(dvc):
    config = {}

    cmd = "sed 'hello'"
    ret = "1"
    err = "sed: expression #1, char 2: extra characters after command"

    with mock.patch.object(
            FileSystem,
            "remove",
            side_effect=RemoteCmdError("base", cmd, ret, err),
    ):
        with pytest.raises(RemoteCmdError):
            FileSystem(**config).remove("file")
Example #7
0
def test_hashes_exist(object_exists, traverse, dvc):
    odb = ObjectDB(FileSystem(), None)

    # remote does not support traverse
    odb.fs.CAN_TRAVERSE = False
    with mock.patch.object(odb, "_list_hashes", return_value=list(range(256))):
        hashes = set(range(1000))
        odb.hashes_exist(hashes)
        object_exists.assert_called_with(hashes, None, None)
        traverse.assert_not_called()

    odb.fs.CAN_TRAVERSE = True

    # large remote, small local
    object_exists.reset_mock()
    traverse.reset_mock()
    with mock.patch.object(
        odb, "_list_hashes", return_value=list(range(2048))
    ):
        hashes = list(range(1000))
        odb.hashes_exist(hashes)
        # verify that _odb_paths_with_max() short circuits
        # before returning all 2048 remote hashes
        max_hashes = math.ceil(
            odb._max_estimation_size(hashes)
            / pow(16, odb.fs.TRAVERSE_PREFIX_LEN)
        )
        assert max_hashes < 2048
        object_exists.assert_called_with(
            frozenset(range(max_hashes, 1000)), None, None
        )
        traverse.assert_not_called()

    # large remote, large local
    object_exists.reset_mock()
    traverse.reset_mock()
    odb.fs._JOBS = 16
    with mock.patch.object(odb, "_list_hashes", return_value=list(range(256))):
        hashes = list(range(1000000))
        odb.hashes_exist(hashes)
        object_exists.assert_not_called()
        traverse.assert_called_with(
            256 * pow(16, odb.fs.TRAVERSE_PREFIX_LEN),
            set(range(256)),
            jobs=None,
            name=None,
        )
Example #8
0
def test_list_hashes_traverse(_path_to_hash, list_hashes, dvc):
    odb = ObjectDB(FileSystem(), None)
    odb.fs_path = "foo"

    # parallel traverse
    size = 256 / odb.fs._JOBS * odb.fs.LIST_OBJECT_PAGE_SIZE
    list(odb._list_hashes_traverse(size, {0}))
    for i in range(1, 16):
        list_hashes.assert_any_call(f"{i:0{odb.fs.TRAVERSE_PREFIX_LEN}x}")
    for i in range(1, 256):
        list_hashes.assert_any_call(f"{i:02x}")

    # default traverse (small remote)
    size -= 1
    list_hashes.reset_mock()
    list(odb._list_hashes_traverse(size - 1, {0}))
    list_hashes.assert_called_with(None)
Example #9
0
def test_list_hashes_traverse(_path_to_hash, list_hashes, dvc):
    odb = ObjectDB(FileSystem(), None)
    odb.fs_path = "foo"

    # parallel traverse
    size = 256 / odb.fs._JOBS * odb.fs.LIST_OBJECT_PAGE_SIZE
    list(odb._list_hashes_traverse(size, {0}))
    for i in range(1, 16):
        list_hashes.assert_any_call(prefix=f"{i:03x}",
                                    progress_callback=CallableOrNone)
    for i in range(1, 256):
        list_hashes.assert_any_call(prefix=f"{i:02x}",
                                    progress_callback=CallableOrNone)

    # default traverse (small remote)
    size -= 1
    list_hashes.reset_mock()
    list(odb._list_hashes_traverse(size - 1, {0}))
    list_hashes.assert_called_with(prefix=None,
                                   progress_callback=CallableOrNone)
Example #10
0
def test_missing_deps(pkg, msg, mocker):
    requires = {"missing": "missing"}
    mocker.patch.object(FileSystem, "REQUIRES", requires)
    mocker.patch("dvc.utils.pkg.PKG", pkg)
    with pytest.raises(RemoteMissingDepsError, match=msg):
        FileSystem()
Example #11
0
def test_is_dir_hash(hash_, result):
    assert FileSystem.is_dir_hash(hash_) == result