Esempio n. 1
0
def test_remove(ctx):
    contents = b"meow!"
    with ctx() as path:
        _write_contents(path, contents)
        assert bf.exists(path)
        bf.remove(path)
        assert not bf.exists(path)
Esempio n. 2
0
def test_azure_maybe_update_md5(ctx):
    contents = b"meow!"
    meow_hash = hashlib.md5(contents).hexdigest()
    alternative_contents = b"purr"
    purr_hash = hashlib.md5(alternative_contents).hexdigest()

    with ctx() as path:
        _write_contents(path, contents)
        _isfile, metadata = ops._azure_isfile(path)
        assert ops._azure_maybe_update_md5(path, metadata["ETag"], meow_hash)
        _write_contents(path, alternative_contents)
        assert not ops._azure_maybe_update_md5(path, metadata["ETag"], meow_hash)
        _isfile, metadata = ops._azure_isfile(path)
        assert base64.b64decode(metadata["Content-MD5"]).hex() == purr_hash
        bf.remove(path)
        assert not ops._azure_maybe_update_md5(path, metadata["ETag"], meow_hash)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--remote-dir", required=True)
    parser.add_argument("--local-dir", required=True)
    parser.add_argument("--size", default=100_000_000, type=int)
    parser.add_argument("--loops", default=10, type=int)
    parser.add_argument("--verify", action="store_true")
    args = parser.parse_args()

    tests = [
        (
            "local_to_remote",
            bf.join(args.local_dir, f"file-{args.size}.bin"),
            bf.join(args.remote_dir, "file.bin"),
        ),
        (
            "remote_to_local",
            bf.join(args.remote_dir, f"file-{args.size}.bin"),
            bf.join(args.local_dir, "file.bin"),
        ),
    ]

    for name, src, dst in tests:
        data = (b"meow" * 249 + b"mew\n") * (args.size // 1000)
        assert len(data) == args.size
        if not bf.exists(src) or bf.stat(src).size != args.size:
            with bf.BlobFile(src, "wb") as f:
                f.write(data)
        m = hashlib.md5()
        m.update(data)
        data_hash = m.hexdigest()

        with timer(f"{name}_serial", args.size * args.loops):
            for i in range(args.loops):
                dst_path = dst + str(i)
                bf.copy(src, dst_path)
                if args.verify:
                    verify_hash(data_hash, dst_path)
                bf.remove(dst_path)

        with timer(f"{name}_parallel", args.size * args.loops):
            for i in range(args.loops):
                dst_path = dst + str(i)
                bf.copy(src, dst_path, parallel=True)
                if args.verify:
                    verify_hash(data_hash, dst_path)
                bf.remove(dst_path)
Esempio n. 4
0
def test_composite_objects():
    with _get_temp_gcs_path() as remote_path:
        with _get_temp_local_path() as local_path:
            contents = b"0" * 2 * 2**20
            with open(local_path, "wb") as f:
                f.write(contents)

            def create_composite_file():
                sp.run(
                    [
                        "gsutil",
                        "-o",
                        "GSUtil:parallel_composite_upload_threshold=1M",
                        "cp",
                        local_path,
                        remote_path,
                    ],
                    check=True,
                )

            local_md5 = hashlib.md5(contents).hexdigest()
            create_composite_file()
            assert bf.stat(remote_path).md5 is None
            assert local_md5 == bf.md5(remote_path)
            assert bf.stat(remote_path).md5 == local_md5
            assert local_md5 == bf.md5(remote_path)

            bf.remove(remote_path)
            create_composite_file()
            assert bf.stat(remote_path).md5 is None

            with tempfile.TemporaryDirectory() as tmpdir:
                with bf.BlobFile(remote_path,
                                 "rb",
                                 cache_dir=tmpdir,
                                 streaming=False) as f:
                    assert f.read() == contents
            assert bf.stat(remote_path).md5 == local_md5
Esempio n. 5
0
def test_rmdir(ctx):
    contents = b"meow!"
    with ctx() as path:
        dirpath = bf.dirname(path)
        # this is an error for a local path but not for a blob path
        bf.rmdir(bf.join(dirpath, "fakedirname"))
        new_dirpath = bf.join(dirpath, "dirname")
        bf.makedirs(new_dirpath)
        assert bf.exists(new_dirpath)
        bf.rmdir(new_dirpath)
        assert not bf.exists(new_dirpath)

        # double delete is fine
        bf.rmdir(new_dirpath)

        # implicit dir
        new_filepath = bf.join(dirpath, "dirname", "name")
        _write_contents(new_filepath, contents)
        with pytest.raises(OSError):
            # not empty dir
            bf.rmdir(new_dirpath)
        bf.remove(new_filepath)
        bf.rmdir(new_dirpath)
Esempio n. 6
0
def test_invalid_paths(base_path):
    for suffix in ["", "/", "//", "/invalid.file", "/invalid/dir/"]:
        path = base_path + suffix
        print(path)
        if path.endswith("/"):
            expected_error = IsADirectoryError
        else:
            expected_error = FileNotFoundError
        list(bf.glob(path))
        if suffix == "":
            for pattern in ["*", "**"]:
                try:
                    list(bf.glob(path + pattern))
                except bf.Error as e:
                    assert "Wildcards cannot be used" in e.message
        else:
            for pattern in ["*", "**"]:
                list(bf.glob(path + pattern))
        with pytest.raises(FileNotFoundError):
            list(bf.listdir(path))
        assert not bf.exists(path)
        assert not bf.isdir(path)
        with pytest.raises(expected_error):
            bf.remove(path)
        if suffix in ("", "/"):
            try:
                bf.rmdir(path)
            except bf.Error as e:
                assert "Cannot delete bucket" in e.message
        else:
            bf.rmdir(path)
        with pytest.raises(NotADirectoryError):
            bf.rmtree(path)
        with pytest.raises(FileNotFoundError):
            bf.stat(path)

        if base_path == AZURE_INVALID_CONTAINER_NO_ACCOUNT:
            with pytest.raises(bf.Error):
                bf.get_url(path)
        else:
            bf.get_url(path)

        with pytest.raises(FileNotFoundError):
            bf.md5(path)
        with pytest.raises(bf.Error):
            bf.makedirs(path)
        list(bf.walk(path))
        with tempfile.TemporaryDirectory() as tmpdir:
            local_path = os.path.join(tmpdir, "test.txt")
            with pytest.raises(expected_error):
                bf.copy(path, local_path)
            with open(local_path, "w") as f:
                f.write("meow")
            with pytest.raises(expected_error):
                bf.copy(local_path, path)
        for streaming in [False, True]:
            with pytest.raises(expected_error):
                with bf.BlobFile(path, "rb", streaming=streaming) as f:
                    f.read()
            with pytest.raises(expected_error):
                with bf.BlobFile(path, "wb", streaming=streaming) as f:
                    f.write(b"meow")