def test_remove(ctx): contents = b"meow!" with ctx() as path: _write_contents(path, contents) assert bf.exists(path) bf.remove(path) assert not bf.exists(path)
def cache_folder(name, dirpath, options, build_fn): if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ: # we don't have any credentials to do the caching, always build in this case print(f"building without cache for {name}") start = time.time() build_fn() print(f"build elapsed {time.time() - start}") return options_hash = hashlib.md5("|".join(options).encode("utf8")).hexdigest() cache_path = bf.join(f"gs://{GCS_BUCKET}", "cache", f"{name}-{options_hash}.tar") if os.path.exists(dirpath): print(f"cache for {name} found locally") elif bf.exists(cache_path): print(f"downloading cache for {name}: {cache_path}") start = time.time() with bf.BlobFile(cache_path, "rb") as f: with tarfile.open(fileobj=f, mode="r") as tf: tf.extractall() print(f"download elapsed {time.time() - start}") else: print(f"building cache for {name}") start = time.time() build_fn() print(f"cache build elapsed {time.time() - start}") print(f"uploading cache for {name}") start = time.time() if not bf.exists(cache_path): with bf.BlobFile(cache_path, "wb") as f: with tarfile.open(fileobj=f, mode="w") as tf: tf.add(dirpath) print(f"upload elapsed {time.time() - start}")
def test_more_exists(): testcases = [ (AZURE_INVALID_CONTAINER, False), (AZURE_INVALID_CONTAINER + "/", False), (AZURE_INVALID_CONTAINER + "//", False), (AZURE_INVALID_CONTAINER + "/invalid.file", False), (GCS_INVALID_BUCKET, False), (GCS_INVALID_BUCKET + "/", False), (GCS_INVALID_BUCKET + "//", False), (GCS_INVALID_BUCKET + "/invalid.file", False), (AZURE_INVALID_CONTAINER_NO_ACCOUNT, False), (AZURE_INVALID_CONTAINER_NO_ACCOUNT + "/", False), (AZURE_INVALID_CONTAINER_NO_ACCOUNT + "//", False), (AZURE_INVALID_CONTAINER_NO_ACCOUNT + "/invalid.file", False), (AZURE_VALID_CONTAINER, True), (AZURE_VALID_CONTAINER + "/", True), (AZURE_VALID_CONTAINER + "//", False), (AZURE_VALID_CONTAINER + "/invalid.file", False), (GCS_VALID_BUCKET, True), (GCS_VALID_BUCKET + "/", True), (GCS_VALID_BUCKET + "//", False), (GCS_VALID_BUCKET + "/invalid.file", False), (f"/does-not-exist", False), (f"/", True), ] for path, should_exist in testcases: assert bf.exists(path) == should_exist
def test_makedirs(ctx): contents = b"meow!" with ctx() as path: dirpath = bf.join(path, "x", "x", "x") bf.makedirs(dirpath) assert bf.exists(dirpath) _write_contents(bf.join(dirpath, "testfile"), contents)
def find_ema_checkpoint(main_checkpoint, step, rate): if main_checkpoint is None: return None filename = f"ema_{rate}_{(step):06d}.pt" path = bf.join(bf.dirname(main_checkpoint), filename) if bf.exists(path): return path return None
def _load_optimizer_state(self): main_checkpoint = find_resume_checkpoint() or self.resume_checkpoint opt_checkpoint = bf.join(bf.dirname(main_checkpoint), f"opt{self.resume_step:06}.pt") if bf.exists(opt_checkpoint): logger.log( f"loading optimizer state from checkpoint: {opt_checkpoint}") state_dict = dist_util.load_state_dict( opt_checkpoint, map_location=dist_util.dev()) self.opt.load_state_dict(state_dict)
def test_rmdir(ctx): contents = b"meow!" with ctx() as path: dirpath = bf.dirname(path) # this is an error for a local path but not for a blob path bf.rmdir(bf.join(dirpath, "fakedirname")) new_dirpath = bf.join(dirpath, "dirname") bf.makedirs(new_dirpath) assert bf.exists(new_dirpath) bf.rmdir(new_dirpath) assert not bf.exists(new_dirpath) # double delete is fine bf.rmdir(new_dirpath) # implicit dir new_filepath = bf.join(dirpath, "dirname", "name") _write_contents(new_filepath, contents) with pytest.raises(OSError): # not empty dir bf.rmdir(new_dirpath) bf.remove(new_filepath) bf.rmdir(new_dirpath)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--remote-dir", required=True) parser.add_argument("--local-dir", required=True) parser.add_argument("--size", default=100_000_000, type=int) parser.add_argument("--loops", default=10, type=int) parser.add_argument("--verify", action="store_true") args = parser.parse_args() tests = [ ( "local_to_remote", bf.join(args.local_dir, f"file-{args.size}.bin"), bf.join(args.remote_dir, "file.bin"), ), ( "remote_to_local", bf.join(args.remote_dir, f"file-{args.size}.bin"), bf.join(args.local_dir, "file.bin"), ), ] for name, src, dst in tests: data = (b"meow" * 249 + b"mew\n") * (args.size // 1000) assert len(data) == args.size if not bf.exists(src) or bf.stat(src).size != args.size: with bf.BlobFile(src, "wb") as f: f.write(data) m = hashlib.md5() m.update(data) data_hash = m.hexdigest() with timer(f"{name}_serial", args.size * args.loops): for i in range(args.loops): dst_path = dst + str(i) bf.copy(src, dst_path) if args.verify: verify_hash(data_hash, dst_path) bf.remove(dst_path) with timer(f"{name}_parallel", args.size * args.loops): for i in range(args.loops): dst_path = dst + str(i) bf.copy(src, dst_path, parallel=True) if args.verify: verify_hash(data_hash, dst_path) bf.remove(dst_path)
def test_invalid_paths(base_path): for suffix in ["", "/", "//", "/invalid.file", "/invalid/dir/"]: path = base_path + suffix print(path) if path.endswith("/"): expected_error = IsADirectoryError else: expected_error = FileNotFoundError list(bf.glob(path)) if suffix == "": for pattern in ["*", "**"]: try: list(bf.glob(path + pattern)) except bf.Error as e: assert "Wildcards cannot be used" in e.message else: for pattern in ["*", "**"]: list(bf.glob(path + pattern)) with pytest.raises(FileNotFoundError): list(bf.listdir(path)) assert not bf.exists(path) assert not bf.isdir(path) with pytest.raises(expected_error): bf.remove(path) if suffix in ("", "/"): try: bf.rmdir(path) except bf.Error as e: assert "Cannot delete bucket" in e.message else: bf.rmdir(path) with pytest.raises(NotADirectoryError): bf.rmtree(path) with pytest.raises(FileNotFoundError): bf.stat(path) if base_path == AZURE_INVALID_CONTAINER_NO_ACCOUNT: with pytest.raises(bf.Error): bf.get_url(path) else: bf.get_url(path) with pytest.raises(FileNotFoundError): bf.md5(path) with pytest.raises(bf.Error): bf.makedirs(path) list(bf.walk(path)) with tempfile.TemporaryDirectory() as tmpdir: local_path = os.path.join(tmpdir, "test.txt") with pytest.raises(expected_error): bf.copy(path, local_path) with open(local_path, "w") as f: f.write("meow") with pytest.raises(expected_error): bf.copy(local_path, path) for streaming in [False, True]: with pytest.raises(expected_error): with bf.BlobFile(path, "rb", streaming=streaming) as f: f.read() with pytest.raises(expected_error): with bf.BlobFile(path, "wb", streaming=streaming) as f: f.write(b"meow")
def test_exists(ctx): contents = b"meow!" with ctx() as path: assert not bf.exists(path) _write_contents(path, contents) assert bf.exists(path)