async def test_rmtree(any_dir): assert [p async for p in bbb.listdir(any_dir)] == [] N = 5 await asyncio.wait([ helpers.unsafe_create_file(any_dir / "alpha" / str(i)) for i in range(N) ] + [ helpers.unsafe_create_file(any_dir / "alpha" / "beta" / str(i)) for i in range(N) ]) with pytest.raises(NotADirectoryError): async with bbb.BoostExecutor(N) as e: await bbb.rmtree(any_dir / "alpha" / "0", e) async with bbb.BoostExecutor(N) as e: assert len([p async for p in bbb.listdir(any_dir / "alpha")]) == N + 1 assert len([p async for p in bbb.listtree(any_dir / "alpha")]) == 2 * N await bbb.rmtree(any_dir / "alpha", e) with pytest.raises(FileNotFoundError): async with bbb.BoostExecutor(N) as e: await bbb.rmtree(any_dir / "alpha", e) with pytest.raises(FileNotFoundError): [p async for p in bbb.listdir(any_dir / "alpha")] assert [p async for p in bbb.listdir(any_dir)] == [] assert [p async for p in bbb.listtree(any_dir)] == []
async def test_map_ordered_identity(): N = 20 results = [] async with bbb.BoostExecutor(N // 2) as e: it = e.map_ordered(identity, iter(range(N))) await collect(it, results) assert results == list(range(N)) results = [] async with bbb.BoostExecutor(N // 2) as e: it = e.map_ordered(identity, iter(range(N))) asyncio.create_task(collect(it, results)) assert results == list(range(N))
async def test_google_chunking(): with helpers.tmp_google_dir() as google_dir: async with bbb.BoostExecutor(10) as e: contents = [b"abc", b"def", b"ghi"] with pytest.raises(ValueError, match="chunked incorrectly"): await bbb.write.write_stream(google_dir / "alpha", iter(contents), e)
async def test_map_eager_async_iterator(): N = 30 async def iterator() -> AsyncIterator[int]: for i in range(N): yield i loop = asyncio.get_event_loop() future = loop.create_future() started = [] async def identity_wait(x: int) -> int: started.append(x) if not future.done(): await future return x results = [] async with bbb.BoostExecutor(N // 3) as e: it = e.map_ordered(identity_wait, bbb.boost.EagerAsyncIterator(iterator())) asyncio.create_task(collect(it, results)) assert started == [] await pause() assert started == [0] # BoostExecutor currently sleeps for a minimum of 0.01 seconds if the underlying async # iterator is not ready await asyncio.sleep(0.02) assert started == list(range(N // 3)) future.set_result(None) await asyncio.sleep(0.02) assert started == list(range(N)) assert results == list(range(N))
async def test_map_ordered_single(): futures = {} async with bbb.BoostExecutor(1) as e: assert e.semaphore._value == 0 # type: ignore it = e.map_ordered(get_futures_fn(futures), iter([0, 1])).__aiter__() assert not futures await pause() assert e.semaphore._value == 0 # type: ignore assert set(futures) == set() next_task = asyncio.create_task(it.__anext__()) await pause() assert set(futures) == {0} assert not next_task.done() futures[0].set_result(None) assert not next_task.done() await pause() # one might expect a task to be scheduled here, since we have one unused concurrency and # boostedblob is generally eager. however, in the single concurrency case, the executor # doesn't run to avoid deadlock. TODO(shantanu): consider changing this assert set(futures) == set() assert next_task.done() assert (await next_task) == 0 next_task = asyncio.create_task(it.__anext__()) await pause() assert not next_task.done() futures[1].set_result(None) assert (await next_task) == 1 assert not futures
async def cp( srcs: List[str], dst: str, quiet: bool = False, concurrency: int = DEFAULT_CONCURRENCY ) -> None: dst_obj = bbb.BasePath.from_str(dst) dst_is_dirlike = dst_obj.is_directory_like() or await bbb.isdir(dst_obj) async with bbb.BoostExecutor(concurrency) as executor: if len(srcs) > 1 and not dst_is_dirlike: raise NotADirectoryError(dst_obj) async def copy_wrapper(src: str) -> None: src_obj = bbb.BasePath.from_str(src) if is_glob(src): if not dst_is_dirlike: raise NotADirectoryError(dst_obj) async for path in bbb.copying.copyglob_iterator(src_obj, dst_obj, executor): if not quiet: print(path) return dst_file_obj = dst_obj / src_obj.name if dst_is_dirlike else dst_obj await bbb.copyfile(src_obj, dst_file_obj, executor, overwrite=True) if not quiet: print(src_obj) await bbb.boost.consume(executor.map_unordered(copy_wrapper, iter(srcs)))
async def test_map_ordered(): futures = {} results = [] async with bbb.BoostExecutor(2) as e: assert e.semaphore._value == 1 # type: ignore it = e.map_ordered(get_futures_fn(futures), iter(range(4))) asyncio.create_task(collect(it, results)) await pause() assert e.semaphore._value == 0 # type: ignore assert set(futures) == {0, 1} futures[1].set_result(None) await pause() assert results == [] futures[0].set_result(None) await pause() assert results == [0, 1] assert set(futures) == {2, 3} futures[2].set_result(None) await pause() assert results == [0, 1, 2] futures[3].set_result(None) await pause() assert results == [0, 1, 2, 3]
async def test_copytree(any_dir, other_any_dir): await asyncio.wait([ helpers.unsafe_create_file(any_dir / "f1"), helpers.unsafe_create_file(any_dir / "f2"), helpers.unsafe_create_file(any_dir / "f3"), helpers.unsafe_create_file(any_dir / "alpha" / "f4"), helpers.unsafe_create_file(any_dir / "alpha" / "f5"), helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "f6"), helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "f7"), helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "gamma" / "f8"), helpers.unsafe_create_file(any_dir / "delta" / "f9"), helpers.unsafe_create_file(any_dir / "delta" / "epsilon" / "f10"), ]) async with bbb.BoostExecutor(100) as e: if sys.version_info < (3, 8) and isinstance(other_any_dir, LocalPath): os.rmdir(other_any_dir) await bbb.copytree(any_dir, other_any_dir, e) async def _listtree(d, base): return sorted([p.relative_to(base) async for p in bbb.listtree(d)]) assert await _listtree(any_dir, any_dir) == await _listtree(other_any_dir, other_any_dir)
async def test_copy(any_dir, other_any_dir): MIN_CHUNK_SIZE = 256 * 1024 with open("/dev/random", "rb") as f: contents_medium = f.read(16 * MIN_CHUNK_SIZE) helpers.create_file(any_dir / "original_medium", contents_medium) contents_known_small = b"abcdefgh" helpers.create_file(any_dir / "original_small", contents_known_small) async with bbb.BoostExecutor(100) as e: with bbb.globals.configure(chunk_size=MIN_CHUNK_SIZE): await bbb.copyfile(any_dir / "original_medium", other_any_dir / "copied_medium", e) with blobfile.BlobFile(str(other_any_dir / "copied_medium"), "rb") as f: assert f.read() == contents_medium await bbb.copyfile( any_dir / "original_small", other_any_dir / "copied_small", e, size=len(contents_known_small), ) with blobfile.BlobFile(str(other_any_dir / "copied_small"), "rb") as f: assert f.read() == contents_known_small
async def test_read_write(any_dir): async with bbb.BoostExecutor(10) as e: # test reading and writing an empty stream await bbb.write.write_stream(any_dir / "empty", iter([]), e) stream = await bbb.read.read_stream(any_dir / "empty", e) async for _ in bbb.boost.iter_underlying(stream): raise AssertionError
async def cptree( src: str, dst: str, quiet: bool = False, concurrency: int = DEFAULT_CONCURRENCY ) -> None: src_obj = bbb.BasePath.from_str(src) async with bbb.BoostExecutor(concurrency) as executor: async for p in bbb.copying.copytree_iterator(src_obj, dst, executor): if not quiet: print(p)
async def test_boost_executor_shutdown(): async with bbb.BoostExecutor(1) as e: e.map_ordered(asyncio.sleep, iter([0])) async with bbb.BoostExecutor(4) as e: e.map_ordered(asyncio.sleep, (random.random() * 0.1 for _ in range(10))) assert set(get_coro(t).__name__ for t in asyncio.all_tasks()) == { "test_boost_executor_shutdown" } async with bbb.BoostExecutor(4) as e: e.map_unordered(asyncio.sleep, (random.random() * 0.1 for _ in range(10))) assert set(get_coro(t).__name__ for t in asyncio.all_tasks()) == { "test_boost_executor_shutdown" }
async def test_map_unordered_random_sleep(): async def random_sleep(i): await asyncio.sleep(random.random() * 0.3) return i N = 20 results = [] async with bbb.BoostExecutor(N // 2) as e: it = e.map_unordered(random_sleep, iter(range(N))) await collect(it, results) results.sort() assert results == list(range(N))
async def test_composition_nested_unordered(): N = 10 results = [] async with bbb.BoostExecutor(3) as e: async def work_spawner(n): await pause() return [x async for x in e.map_unordered(identity, iter(range(n)))] it = e.map_unordered(work_spawner, iter(range(N))) asyncio.create_task(collect(it, results)) assert sorted(map(len, results)) == list(range(10))
async def test_boost_executor_exception(): with pytest.raises(ValueError): async with bbb.BoostExecutor(10): assert set(get_coro(t).__name__ for t in asyncio.all_tasks()) == { "test_boost_executor_exception", "run", } assert len(asyncio.all_tasks()) > 1 raise ValueError await pause() assert set(get_coro(t).__name__ for t in asyncio.all_tasks()) == { "test_boost_executor_exception" }
async def test_map_unordered_many_reversed(): N = 500 futures = {} results = [] async with bbb.BoostExecutor(N * 2) as e: it = e.map_unordered(get_futures_fn(futures), iter(range(N))) asyncio.create_task(collect(it, results)) while not N - 1 in futures: await pause( ) # take a couple dozen pauses to get everything scheduled for i in reversed(range(N)): futures[i].set_result(None) await pause() assert results == list(reversed(range(N)))
async def edit(path: str) -> None: with tempfile.TemporaryDirectory() as tmpdir: path_obj = bbb.BasePath.from_str(path) local = bbb.LocalPath(tmpdir) / path_obj.name async with bbb.BoostExecutor(DEFAULT_CONCURRENCY) as executor: await bbb.copyfile(path_obj, local, executor) pre_stat = await bbb.stat(local) subprocess.check_call([os.environ.get("EDITOR", "vi"), local]) post_stat = await bbb.stat(local) if pre_stat != post_stat: await bbb.copyfile(local, path_obj, executor, overwrite=True) print(f"Updated {path_obj}") else: print("File unmodified, skipping reupload...")
async def test_map_unordered_many_low_concurrency(): N = 500 futures = {} results = [] async with bbb.BoostExecutor(10) as e: it = e.map_unordered(get_futures_fn(futures), iter(range(N))) asyncio.create_task(collect(it, results)) await pause() for i in range(1, N): futures[i].set_result(None) await pause() assert len(results) == i futures[0].set_result(None) await pause() assert results == list(range(1, N)) + [0]
async def rm(paths: List[str], quiet: bool = False, concurrency: int = DEFAULT_CONCURRENCY) -> None: async with bbb.BoostExecutor(concurrency) as executor: async def remove_wrapper(path: str) -> None: path_obj = bbb.BasePath.from_str(path) if is_glob(path): async for p in bbb.delete.glob_remove(path_obj, executor): if not quiet: print(p) return await bbb.remove(path_obj) if not quiet: print(path_obj) await bbb.boost.consume(executor.map_unordered(remove_wrapper, iter(paths)))
async def rmtree(path: str, quiet: bool = False, concurrency: int = DEFAULT_CONCURRENCY) -> None: path_obj = bbb.BasePath.from_str(path) async with bbb.BoostExecutor(concurrency) as executor: if is_glob(path): # this will fail if the glob matches a directory, which is a little contra the spirit of # rmtree. but maybe the best way to do that (and least likely to result in accidents) is # through recursive wildcards async for p in bbb.delete.glob_remove(path_obj, executor): if not quiet: print(p) elif isinstance(path_obj, bbb.CloudPath): async for p in bbb.delete.rmtree_iterator(path_obj, executor): if not quiet: print(p) else: await bbb.rmtree(path_obj, executor)
async def sync( src: str, dst: str, delete: bool = False, quiet: bool = False, concurrency: int = DEFAULT_CONCURRENCY, ) -> None: src_obj = bbb.BasePath.from_str(src) dst_obj = bbb.BasePath.from_str(dst) src_is_dirlike = src_obj.is_directory_like() or await bbb.isdir(src_obj) if not src_is_dirlike: raise ValueError(f"{src_obj} is not a directory") async with bbb.BoostExecutor(concurrency) as executor: async for p in bbb.sync(src_obj, dst_obj, executor, delete=delete): if not quiet: print(p)
async def test_composition_ordered_ordered(): N = 500 inner_futures = {} outer_futures = {} results = [] async with bbb.BoostExecutor(N // 5) as e: inner_it = e.map_ordered(get_futures_fn(inner_futures), iter(range(N))) outer_it = e.map_ordered(get_futures_fn(outer_futures), inner_it) asyncio.create_task(collect(outer_it, results)) await pause() while outer_futures or inner_futures: futures = random.choice( [fs for fs in (outer_futures, inner_futures) if fs]) futures[next(iter(futures))].set_result(None) await pause() assert results == list(range(N))
async def test_sync(any_dir, other_any_dir): await asyncio.wait([ helpers.unsafe_create_file(any_dir / "f1", b"samesize"), helpers.unsafe_create_file(any_dir / "f2"), helpers.unsafe_create_file(any_dir / "f3"), helpers.unsafe_create_file(any_dir / "alpha" / "f4"), helpers.unsafe_create_file(any_dir / "alpha" / "f5"), helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "f6"), helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "f7"), helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "gamma" / "f8"), helpers.unsafe_create_file(any_dir / "delta" / "f9", b"samesize"), helpers.unsafe_create_file(any_dir / "delta" / "epsilon" / "f10"), ]) async def _listtree(d, base): return sorted([p.relative_to(base) async for p in bbb.listtree(d)]) async with bbb.BoostExecutor(100) as e: # sleep since if we run sync too soon, we run into limits of mtime accuracy and end up # syncing more than what we need... await asyncio.sleep(1) await bbb.boost.consume(bbb.sync(any_dir, other_any_dir, e)) assert await _listtree(any_dir, any_dir) == await _listtree( other_any_dir, other_any_dir) await asyncio.wait([ cast(Any, bbb.remove(any_dir / "f2")), helpers.unsafe_create_file(any_dir / "f1", b"sizesame"), helpers.unsafe_create_file(any_dir / "delta" / "f9", b"differentsize"), ]) actions = sorted(await bbb.syncing.sync_action_iterator( any_dir, other_any_dir), key=lambda x: x.relpath) assert actions == [ bbb.syncing.CopyAction("delta/f9", 13), bbb.syncing.CopyAction("f1", 8), bbb.syncing.DeleteAction("f2"), ] await bbb.boost.consume( bbb.sync(any_dir, other_any_dir, e, delete=True)) assert await _listtree(any_dir, any_dir) == await _listtree( other_any_dir, other_any_dir)
async def test_map_multiple(): N = 20 r1 = [] r2 = [] r3 = [] async with bbb.BoostExecutor(N // 2) as e: it1 = e.map_unordered(identity, iter(range(N))) t1 = asyncio.create_task(collect(it1, r1)) it2 = e.map_ordered(identity, iter(range(N))) t2 = asyncio.create_task(collect(it2, r2)) it3 = e.map_ordered(identity, iter(range(N))) t3 = asyncio.create_task(collect(it3, r3)) await asyncio.gather(t1, t2, t3) r1.sort() assert r1 == r2 == r3
async def test_map_unordered_many_random(): N = 500 futures = {} results = [] async with bbb.BoostExecutor(N * 2) as e: it = e.map_unordered(get_futures_fn(futures), iter(range(N))) task = asyncio.create_task(collect(it, results)) while not N - 1 in futures: await pause( ) # take a couple dozen pauses to get everything scheduled shuffled = list(reversed(range(N))) random.shuffle(shuffled) for i in shuffled: futures[i].set_result(None) if random.random() < 0.3: await pause() await task assert sorted(results) == list(range(N))
async def test_copyglob(): with helpers.tmp_azure_dir() as dir1: with helpers.tmp_azure_dir() as dir2: await asyncio.wait([ helpers.unsafe_create_file(dir1 / "f1"), helpers.unsafe_create_file(dir1 / "f2"), helpers.unsafe_create_file(dir1 / "g3"), ]) async with bbb.BoostExecutor(100) as e: copied = [ p async for p in bbb.copying.copyglob_iterator( dir1 / "f*", dir2, e) ] assert sorted([p.relative_to(dir1) for p in copied]) == ["f1", "f2"] contents = sorted( [p.relative_to(dir2) async for p in bbb.listtree(dir2)]) assert contents == ["f1", "f2"]
async def test_map_ordered_many_low_concurrency(): N = 500 futures = {} results = [] loop = asyncio.get_event_loop() async with bbb.BoostExecutor(N // 50) as e: it = e.map_ordered(get_futures_fn(futures), iter(range(N))) asyncio.create_task(collect(it, results)) await pause() for i in range(1, N): # create the future if it doesn't exist, due to backpressure if i not in futures: assert i > N // 25 futures[i] = loop.create_future() futures[i].set_result(None) await pause() assert results == [] futures[0].set_result(None) await asyncio.sleep(0.1) # wait for backpressure to subside assert results == list(range(N))
async def test_map_eager_async_iterator_slow(): N = 30 loop = asyncio.get_event_loop() futures = [loop.create_future() for _ in range(N)] async def iterator() -> AsyncIterator[int]: for i in range(N): await futures[i] yield i results = [] async with bbb.BoostExecutor(N) as e: it = e.map_ordered(identity, bbb.boost.EagerAsyncIterator(iterator())) asyncio.create_task(collect(it, results)) await pause() for i in range(N): futures[i].set_result(None) await pause() assert results == list(range(N))
async def test_composition_ordered_unordered(): N = 500 inner_futures = {} outer_futures = {} results = [] async with bbb.BoostExecutor(N * 2) as e: inner_it = e.map_unordered(get_futures_fn(inner_futures), iter(range(N))) outer_it = e.map_ordered(get_futures_fn(outer_futures), inner_it) asyncio.create_task(collect(outer_it, results)) while not N - 1 in inner_futures: await pause( ) # take a couple dozen pauses to get everything scheduled for i in reversed(range(N)): if outer_futures: assert set(outer_futures) == {i + 1} inner_futures[i].set_result(None) await pause() assert set(outer_futures) == {i} outer_futures[i].set_result(None) await pause() assert results == list(reversed(range(N)))
async def cat(path: str, concurrency: int = DEFAULT_CONCURRENCY) -> None: loop = asyncio.get_event_loop() async with bbb.BoostExecutor(concurrency) as executor: stream = await bbb.read.read_stream(path, executor) async for data in bbb.boost.iter_underlying(stream): await loop.run_in_executor(None, sys.stdout.buffer.write, data)