Beispiel #1
0
def get_matches_for_patterns(patterns):
    filenames = []
    # print('patterns', patterns)
    for pattern in patterns:
        filenames.extend(bf.glob(pattern))
    # print('filenames', filenames)
    return filenames
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--version", required=True)
    parser.add_argument("--for-real", action="store_true")
    args = parser.parse_args()

    with tempfile.TemporaryDirectory() as tmpdir:
        for filepath in bf.glob(
                f"gs://openai-procgen/builds/procgen-{args.version}-*.whl"):
            print(filepath)
            bf.copy(filepath, bf.join(tmpdir, bf.basename(filepath)))
        if args.for_real:
            options = []
        else:
            options = ["--repository-url", "https://test.pypi.org/legacy/"]
        subprocess.run([
            "python", "-m", "twine", "upload", *options,
            *bf.glob(bf.join(tmpdir, "*.whl"))
        ],
                       check=True)
    with timer("multi_read"):
        procs = []
        for i in range(count):
            p = mp.Process(target=read_worker, args=(path, ))
            procs.append(p)

        for p in procs:
            p.start()

        for p in procs:
            p.join()
    end = time.time()
    print(f"MB/s {count * len(data) /1e6/(end - start)}")

    filepaths = list(
        bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/003/**/*.TIF"))
    with timer("read_small_files"):
        for fp in filepaths[:100]:
            with bf.BlobFile(fp, "rb", buffer_size=args.buffer_size) as f:
                f.read(1)

    with timer("glob"):
        first_file_list = list(
            bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/**"))

    with timer("parallel_glob"):
        second_file_list = list(
            bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/**",
                    parallel=True))

    assert set(first_file_list) == set(second_file_list)
Beispiel #4
0
def test_invalid_paths(base_path):
    for suffix in ["", "/", "//", "/invalid.file", "/invalid/dir/"]:
        path = base_path + suffix
        print(path)
        if path.endswith("/"):
            expected_error = IsADirectoryError
        else:
            expected_error = FileNotFoundError
        list(bf.glob(path))
        if suffix == "":
            for pattern in ["*", "**"]:
                try:
                    list(bf.glob(path + pattern))
                except bf.Error as e:
                    assert "Wildcards cannot be used" in e.message
        else:
            for pattern in ["*", "**"]:
                list(bf.glob(path + pattern))
        with pytest.raises(FileNotFoundError):
            list(bf.listdir(path))
        assert not bf.exists(path)
        assert not bf.isdir(path)
        with pytest.raises(expected_error):
            bf.remove(path)
        if suffix in ("", "/"):
            try:
                bf.rmdir(path)
            except bf.Error as e:
                assert "Cannot delete bucket" in e.message
        else:
            bf.rmdir(path)
        with pytest.raises(NotADirectoryError):
            bf.rmtree(path)
        with pytest.raises(FileNotFoundError):
            bf.stat(path)

        if base_path == AZURE_INVALID_CONTAINER_NO_ACCOUNT:
            with pytest.raises(bf.Error):
                bf.get_url(path)
        else:
            bf.get_url(path)

        with pytest.raises(FileNotFoundError):
            bf.md5(path)
        with pytest.raises(bf.Error):
            bf.makedirs(path)
        list(bf.walk(path))
        with tempfile.TemporaryDirectory() as tmpdir:
            local_path = os.path.join(tmpdir, "test.txt")
            with pytest.raises(expected_error):
                bf.copy(path, local_path)
            with open(local_path, "w") as f:
                f.write("meow")
            with pytest.raises(expected_error):
                bf.copy(local_path, path)
        for streaming in [False, True]:
            with pytest.raises(expected_error):
                with bf.BlobFile(path, "rb", streaming=streaming) as f:
                    f.read()
            with pytest.raises(expected_error):
                with bf.BlobFile(path, "wb", streaming=streaming) as f:
                    f.write(b"meow")
Beispiel #5
0
 def assert_listing_equal(path, desired):
     desired = sorted([bf.join(dirpath, p) for p in desired])
     actual = sorted(list(bf.glob(path, parallel=parallel)))
     assert actual == desired, f"{actual} != {desired}"
Beispiel #6
0
    start = time.time()
    with timer("multi_read"):
        procs = []
        for i in range(count):
            p = mp.Process(target=read_worker, args=(path,))
            procs.append(p)

        for p in procs:
            p.start()

        for p in procs:
            p.join()
    end = time.time()
    print(f"MB/s {count * len(data) /1e6/(end - start)}")

    filepaths = list(bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/003/**/*.TIF"))
    with timer("read_small_files"):
        for fp in filepaths[:100]:
            with bf.BlobFile(fp, "rb", buffer_size=args.buffer_size) as f:
                f.read(1)

    with timer("glob"):
        first_file_list = list(bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/**"))

    with timer("parallel_glob"):
        second_file_list = list(
            bf.glob(f"gs://gcp-public-data-landsat/LC08/01/001/**", parallel=True)
        )

    assert set(first_file_list) == set(second_file_list)