Exemple #1
0
def test_remove(ctx):
    contents = b"meow!"
    with ctx() as path:
        _write_contents(path, contents)
        assert bf.exists(path)
        bf.remove(path)
        assert not bf.exists(path)
Exemple #2
0
def cache_folder(name, dirpath, options, build_fn):
    if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
        # we don't have any credentials to do the caching, always build in this case
        print(f"building without cache for {name}")
        start = time.time()
        build_fn()
        print(f"build elapsed {time.time() - start}")
        return

    options_hash = hashlib.md5("|".join(options).encode("utf8")).hexdigest()
    cache_path = bf.join(f"gs://{GCS_BUCKET}", "cache",
                         f"{name}-{options_hash}.tar")
    if os.path.exists(dirpath):
        print(f"cache for {name} found locally")
    elif bf.exists(cache_path):
        print(f"downloading cache for {name}: {cache_path}")
        start = time.time()
        with bf.BlobFile(cache_path, "rb") as f:
            with tarfile.open(fileobj=f, mode="r") as tf:
                tf.extractall()
        print(f"download elapsed {time.time() - start}")
    else:
        print(f"building cache for {name}")
        start = time.time()
        build_fn()
        print(f"cache build elapsed {time.time() - start}")
        print(f"uploading cache for {name}")
        start = time.time()
        if not bf.exists(cache_path):
            with bf.BlobFile(cache_path, "wb") as f:
                with tarfile.open(fileobj=f, mode="w") as tf:
                    tf.add(dirpath)
        print(f"upload elapsed {time.time() - start}")
Exemple #3
0
def test_more_exists():
    testcases = [
        (AZURE_INVALID_CONTAINER, False),
        (AZURE_INVALID_CONTAINER + "/", False),
        (AZURE_INVALID_CONTAINER + "//", False),
        (AZURE_INVALID_CONTAINER + "/invalid.file", False),
        (GCS_INVALID_BUCKET, False),
        (GCS_INVALID_BUCKET + "/", False),
        (GCS_INVALID_BUCKET + "//", False),
        (GCS_INVALID_BUCKET + "/invalid.file", False),
        (AZURE_INVALID_CONTAINER_NO_ACCOUNT, False),
        (AZURE_INVALID_CONTAINER_NO_ACCOUNT + "/", False),
        (AZURE_INVALID_CONTAINER_NO_ACCOUNT + "//", False),
        (AZURE_INVALID_CONTAINER_NO_ACCOUNT + "/invalid.file", False),
        (AZURE_VALID_CONTAINER, True),
        (AZURE_VALID_CONTAINER + "/", True),
        (AZURE_VALID_CONTAINER + "//", False),
        (AZURE_VALID_CONTAINER + "/invalid.file", False),
        (GCS_VALID_BUCKET, True),
        (GCS_VALID_BUCKET + "/", True),
        (GCS_VALID_BUCKET + "//", False),
        (GCS_VALID_BUCKET + "/invalid.file", False),
        (f"/does-not-exist", False),
        (f"/", True),
    ]
    for path, should_exist in testcases:
        assert bf.exists(path) == should_exist
Exemple #4
0
def test_makedirs(ctx):
    contents = b"meow!"
    with ctx() as path:
        dirpath = bf.join(path, "x", "x", "x")
        bf.makedirs(dirpath)
        assert bf.exists(dirpath)
        _write_contents(bf.join(dirpath, "testfile"), contents)
def find_ema_checkpoint(main_checkpoint, step, rate):
    if main_checkpoint is None:
        return None
    filename = f"ema_{rate}_{(step):06d}.pt"
    path = bf.join(bf.dirname(main_checkpoint), filename)
    if bf.exists(path):
        return path
    return None
Exemple #6
0
 def _load_optimizer_state(self):
     main_checkpoint = find_resume_checkpoint() or self.resume_checkpoint
     opt_checkpoint = bf.join(bf.dirname(main_checkpoint),
                              f"opt{self.resume_step:06}.pt")
     if bf.exists(opt_checkpoint):
         logger.log(
             f"loading optimizer state from checkpoint: {opt_checkpoint}")
         state_dict = dist_util.load_state_dict(
             opt_checkpoint, map_location=dist_util.dev())
         self.opt.load_state_dict(state_dict)
Exemple #7
0
def test_rmdir(ctx):
    contents = b"meow!"
    with ctx() as path:
        dirpath = bf.dirname(path)
        # this is an error for a local path but not for a blob path
        bf.rmdir(bf.join(dirpath, "fakedirname"))
        new_dirpath = bf.join(dirpath, "dirname")
        bf.makedirs(new_dirpath)
        assert bf.exists(new_dirpath)
        bf.rmdir(new_dirpath)
        assert not bf.exists(new_dirpath)

        # double delete is fine
        bf.rmdir(new_dirpath)

        # implicit dir
        new_filepath = bf.join(dirpath, "dirname", "name")
        _write_contents(new_filepath, contents)
        with pytest.raises(OSError):
            # not empty dir
            bf.rmdir(new_dirpath)
        bf.remove(new_filepath)
        bf.rmdir(new_dirpath)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--remote-dir", required=True)
    parser.add_argument("--local-dir", required=True)
    parser.add_argument("--size", default=100_000_000, type=int)
    parser.add_argument("--loops", default=10, type=int)
    parser.add_argument("--verify", action="store_true")
    args = parser.parse_args()

    tests = [
        (
            "local_to_remote",
            bf.join(args.local_dir, f"file-{args.size}.bin"),
            bf.join(args.remote_dir, "file.bin"),
        ),
        (
            "remote_to_local",
            bf.join(args.remote_dir, f"file-{args.size}.bin"),
            bf.join(args.local_dir, "file.bin"),
        ),
    ]

    for name, src, dst in tests:
        data = (b"meow" * 249 + b"mew\n") * (args.size // 1000)
        assert len(data) == args.size
        if not bf.exists(src) or bf.stat(src).size != args.size:
            with bf.BlobFile(src, "wb") as f:
                f.write(data)
        m = hashlib.md5()
        m.update(data)
        data_hash = m.hexdigest()

        with timer(f"{name}_serial", args.size * args.loops):
            for i in range(args.loops):
                dst_path = dst + str(i)
                bf.copy(src, dst_path)
                if args.verify:
                    verify_hash(data_hash, dst_path)
                bf.remove(dst_path)

        with timer(f"{name}_parallel", args.size * args.loops):
            for i in range(args.loops):
                dst_path = dst + str(i)
                bf.copy(src, dst_path, parallel=True)
                if args.verify:
                    verify_hash(data_hash, dst_path)
                bf.remove(dst_path)
Exemple #9
0
def test_invalid_paths(base_path):
    for suffix in ["", "/", "//", "/invalid.file", "/invalid/dir/"]:
        path = base_path + suffix
        print(path)
        if path.endswith("/"):
            expected_error = IsADirectoryError
        else:
            expected_error = FileNotFoundError
        list(bf.glob(path))
        if suffix == "":
            for pattern in ["*", "**"]:
                try:
                    list(bf.glob(path + pattern))
                except bf.Error as e:
                    assert "Wildcards cannot be used" in e.message
        else:
            for pattern in ["*", "**"]:
                list(bf.glob(path + pattern))
        with pytest.raises(FileNotFoundError):
            list(bf.listdir(path))
        assert not bf.exists(path)
        assert not bf.isdir(path)
        with pytest.raises(expected_error):
            bf.remove(path)
        if suffix in ("", "/"):
            try:
                bf.rmdir(path)
            except bf.Error as e:
                assert "Cannot delete bucket" in e.message
        else:
            bf.rmdir(path)
        with pytest.raises(NotADirectoryError):
            bf.rmtree(path)
        with pytest.raises(FileNotFoundError):
            bf.stat(path)

        if base_path == AZURE_INVALID_CONTAINER_NO_ACCOUNT:
            with pytest.raises(bf.Error):
                bf.get_url(path)
        else:
            bf.get_url(path)

        with pytest.raises(FileNotFoundError):
            bf.md5(path)
        with pytest.raises(bf.Error):
            bf.makedirs(path)
        list(bf.walk(path))
        with tempfile.TemporaryDirectory() as tmpdir:
            local_path = os.path.join(tmpdir, "test.txt")
            with pytest.raises(expected_error):
                bf.copy(path, local_path)
            with open(local_path, "w") as f:
                f.write("meow")
            with pytest.raises(expected_error):
                bf.copy(local_path, path)
        for streaming in [False, True]:
            with pytest.raises(expected_error):
                with bf.BlobFile(path, "rb", streaming=streaming) as f:
                    f.read()
            with pytest.raises(expected_error):
                with bf.BlobFile(path, "wb", streaming=streaming) as f:
                    f.write(b"meow")
Exemple #10
0
def test_exists(ctx):
    contents = b"meow!"
    with ctx() as path:
        assert not bf.exists(path)
        _write_contents(path, contents)
        assert bf.exists(path)