Ejemplo n.º 1
0
def test_list_hashes(dvc):
    cache = CloudCache(BaseFileSystem(dvc, {}))
    cache.fs.path_info = PathInfo("foo")

    with mock.patch.object(cache,
                           "_list_paths",
                           return_value=["12/3456", "bar"]):
        hashes = list(cache.list_hashes())
        assert hashes == ["123456"]
Ejemplo n.º 2
0
def test_link_created_on_non_nested_path(base_info, tmp_dir, dvc, scm):
    tree = S3Tree(dvc, {"url": str(base_info.parent)})
    cache = CloudCache(tree)
    s3 = cache.tree.s3.meta.client
    s3.create_bucket(Bucket=base_info.bucket)
    s3.put_object(
        Bucket=base_info.bucket, Key=(base_info / "from").path, Body="data"
    )
    cache.link(base_info / "from", base_info / "to")

    assert cache.tree.exists(base_info / "from")
    assert cache.tree.exists(base_info / "to")
Ejemplo n.º 3
0
def test_list_paths(dvc):
    cache = CloudCache(BaseFileSystem(dvc, {}))
    cache.fs.path_info = PathInfo("foo")

    with mock.patch.object(cache.fs, "walk_files",
                           return_value=[]) as walk_mock:
        for _ in cache._list_paths():
            pass
        walk_mock.assert_called_with(cache.fs.path_info, prefix=False)

        for _ in cache._list_paths(prefix="000"):
            pass
        walk_mock.assert_called_with(cache.fs.path_info / "00" / "0",
                                     prefix=True)
Ejemplo n.º 4
0
def test_list_hashes_traverse(_path_to_hash, list_hashes, dvc):
    cache = CloudCache(BaseFileSystem(dvc, {}))
    cache.fs.path_info = PathInfo("foo")

    # parallel traverse
    size = 256 / cache.fs.JOBS * cache.fs.LIST_OBJECT_PAGE_SIZE
    list(cache.list_hashes_traverse(size, {0}))
    for i in range(1, 16):
        list_hashes.assert_any_call(prefix=f"{i:03x}",
                                    progress_callback=CallableOrNone)
    for i in range(1, 256):
        list_hashes.assert_any_call(prefix=f"{i:02x}",
                                    progress_callback=CallableOrNone)

    # default traverse (small remote)
    size -= 1
    list_hashes.reset_mock()
    list(cache.list_hashes_traverse(size - 1, {0}))
    list_hashes.assert_called_with(prefix=None,
                                   progress_callback=CallableOrNone)
Ejemplo n.º 5
0
def test_hashes_exist(object_exists, traverse, dvc):
    cache = CloudCache(BaseFileSystem(dvc, {}))

    # remote does not support traverse
    cache.fs.CAN_TRAVERSE = False
    with mock.patch.object(cache, "list_hashes",
                           return_value=list(range(256))):
        hashes = set(range(1000))
        cache.hashes_exist(hashes)
        object_exists.assert_called_with(hashes, None, None)
        traverse.assert_not_called()

    cache.fs.CAN_TRAVERSE = True

    # large remote, small local
    object_exists.reset_mock()
    traverse.reset_mock()
    with mock.patch.object(cache, "list_hashes",
                           return_value=list(range(256))):
        hashes = list(range(1000))
        cache.hashes_exist(hashes)
        # verify that _cache_paths_with_max() short circuits
        # before returning all 256 remote hashes
        max_hashes = math.ceil(
            cache._max_estimation_size(hashes) /
            pow(16, cache.fs.TRAVERSE_PREFIX_LEN))
        assert max_hashes < 256
        object_exists.assert_called_with(frozenset(range(max_hashes, 1000)),
                                         None, None)
        traverse.assert_not_called()

    # large remote, large local
    object_exists.reset_mock()
    traverse.reset_mock()
    cache.fs.JOBS = 16
    with mock.patch.object(cache, "list_hashes",
                           return_value=list(range(256))):
        hashes = list(range(1000000))
        cache.hashes_exist(hashes)
        object_exists.assert_not_called()
        traverse.assert_called_with(
            256 * pow(16, cache.fs.TRAVERSE_PREFIX_LEN),
            set(range(256)),
            None,
            None,
        )
Ejemplo n.º 6
0
def test_link_created_on_non_nested_path(base_info, tmp_dir, dvc, scm):
    from dvc.checkout import _link

    fs = S3FileSystem(dvc, {"url": str(base_info.parent)})
    cache = CloudCache(fs)
    s3 = cache.fs.s3.meta.client
    s3.create_bucket(Bucket=base_info.bucket)
    s3.put_object(Bucket=base_info.bucket,
                  Key=(base_info / "from").path,
                  Body="data")
    _link(cache, base_info / "from", base_info / "to")

    assert cache.fs.exists(base_info / "from")
    assert cache.fs.exists(base_info / "to")
Ejemplo n.º 7
0
def test_checkout_for_external_outputs(tmp_dir, dvc):
    dvc.cache.s3 = CloudCache(S3Tree(dvc, {"url": S3.get_url()}))

    remote = Remote(S3Tree(dvc, {"url": S3.get_url()}))
    file_path = remote.tree.path_info / "foo"
    remote.tree.s3.put_object(
        Bucket=remote.tree.path_info.bucket, Key=file_path.path, Body="foo"
    )

    dvc.add(str(remote.tree.path_info / "foo"), external=True)

    remote.tree.remove(file_path)
    stats = dvc.checkout(force=True)
    assert stats == {**empty_checkout, "added": [str(file_path)]}
    assert remote.tree.exists(file_path)

    remote.tree.s3.put_object(
        Bucket=remote.tree.path_info.bucket,
        Key=file_path.path,
        Body="foo\nfoo",
    )
    stats = dvc.checkout(force=True)
    assert stats == {**empty_checkout, "modified": [str(file_path)]}