def get_matches_for_patterns(patterns): filenames = [] # print('patterns', patterns) for pattern in patterns: filenames.extend(bf.glob(pattern)) # print('filenames', filenames) return filenames
def main(): parser = argparse.ArgumentParser() parser.add_argument("--version", required=True) parser.add_argument("--for-real", action="store_true") args = parser.parse_args() with tempfile.TemporaryDirectory() as tmpdir: for filepath in bf.glob( f"gs://openai-procgen/builds/procgen-{args.version}-*.whl"): print(filepath) bf.copy(filepath, bf.join(tmpdir, bf.basename(filepath))) if args.for_real: options = [] else: options = ["--repository-url", "https://test.pypi.org/legacy/"] subprocess.run([ "python", "-m", "twine", "upload", *options, *bf.glob(bf.join(tmpdir, "*.whl")) ], check=True)
with timer("multi_read"): procs = [] for i in range(count): p = mp.Process(target=read_worker, args=(path, )) procs.append(p) for p in procs: p.start() for p in procs: p.join() end = time.time() print(f"MB/s {count * len(data) /1e6/(end - start)}") filepaths = list( bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/003/**/*.TIF")) with timer("read_small_files"): for fp in filepaths[:100]: with bf.BlobFile(fp, "rb", buffer_size=args.buffer_size) as f: f.read(1) with timer("glob"): first_file_list = list( bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/**")) with timer("parallel_glob"): second_file_list = list( bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/**", parallel=True)) assert set(first_file_list) == set(second_file_list)
def test_invalid_paths(base_path): for suffix in ["", "/", "//", "/invalid.file", "/invalid/dir/"]: path = base_path + suffix print(path) if path.endswith("/"): expected_error = IsADirectoryError else: expected_error = FileNotFoundError list(bf.glob(path)) if suffix == "": for pattern in ["*", "**"]: try: list(bf.glob(path + pattern)) except bf.Error as e: assert "Wildcards cannot be used" in e.message else: for pattern in ["*", "**"]: list(bf.glob(path + pattern)) with pytest.raises(FileNotFoundError): list(bf.listdir(path)) assert not bf.exists(path) assert not bf.isdir(path) with pytest.raises(expected_error): bf.remove(path) if suffix in ("", "/"): try: bf.rmdir(path) except bf.Error as e: assert "Cannot delete bucket" in e.message else: bf.rmdir(path) with pytest.raises(NotADirectoryError): bf.rmtree(path) with pytest.raises(FileNotFoundError): bf.stat(path) if base_path == AZURE_INVALID_CONTAINER_NO_ACCOUNT: with pytest.raises(bf.Error): bf.get_url(path) else: bf.get_url(path) with pytest.raises(FileNotFoundError): bf.md5(path) with pytest.raises(bf.Error): bf.makedirs(path) list(bf.walk(path)) with tempfile.TemporaryDirectory() as tmpdir: local_path = os.path.join(tmpdir, "test.txt") with pytest.raises(expected_error): bf.copy(path, local_path) with open(local_path, "w") as f: f.write("meow") with pytest.raises(expected_error): bf.copy(local_path, path) for streaming in [False, True]: with pytest.raises(expected_error): with bf.BlobFile(path, "rb", streaming=streaming) as f: f.read() with pytest.raises(expected_error): with bf.BlobFile(path, "wb", streaming=streaming) as f: f.write(b"meow")
def assert_listing_equal(path, desired): desired = sorted([bf.join(dirpath, p) for p in desired]) actual = sorted(list(bf.glob(path, parallel=parallel))) assert actual == desired, f"{actual} != {desired}"
start = time.time() with timer("multi_read"): procs = [] for i in range(count): p = mp.Process(target=read_worker, args=(path,)) procs.append(p) for p in procs: p.start() for p in procs: p.join() end = time.time() print(f"MB/s {count * len(data) /1e6/(end - start)}") filepaths = list(bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/003/**/*.TIF")) with timer("read_small_files"): for fp in filepaths[:100]: with bf.BlobFile(fp, "rb", buffer_size=args.buffer_size) as f: f.read(1) with timer("glob"): first_file_list = list(bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/**")) with timer("parallel_glob"): second_file_list = list( bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/**", parallel=True) ) assert set(first_file_list) == set(second_file_list)