async def test_outer_join(): todo, to_release = range(1, 15), range(10) done, released = [], [] async def inner(n): nonlocal done await aio.sleep(1 / n) done.append(n) async def outer(n, pool): nonlocal released await pool.join() released.append(n) loop = aio.get_event_loop() pool = AioPool(size=100) pool.map_n(inner, todo) joined = [loop.create_task(outer(j, pool)) for j in to_release] await pool.join() assert len(released) <= len(to_release) await aio.wait(joined) assert len(todo) == len(done) and len(released) == len(to_release)
async def details(todo=range(1, 11)): pool = AioPool(size=5) # This code: f1 = [] for i in todo: f1.append(pool.spawn_n(worker(i))) # is equivalent to one call of `map_n`: f2 = pool.map_n(worker, todo) # Afterwards you can await for any given future: try: assert 3 == await f1[2] # result of spawn_n(worker(3)) except BaseException: # exception happened in worker (including CancelledError) will be re-raised pass # Or use `asyncio.wait` to handle results in batches (see `iterwait` also): important_res = 0 more_important = [f1[1], f2[1], f2[2]] while more_important: done, more_important = await aio.wait(more_important, timeout=0.5) # handle result, note it will re-raise exceptions important_res += sum(f.result() for f in done) assert important_res == 2 + 2 + 3 # But you need to join, to allow all spawned workers to finish # (of course you can `asyncio.wait` all of the futures if you want to) await pool.join() assert all(f.done() for f in itertools.chain(f1, f2)) # this is guaranteed assert 2 * sum(todo) == sum(f.result() for f in itertools.chain(f1, f2))
async def work(self, size): self._pool = AioPool(size=size) agents = [await self.gen_agent() for _ in range(size)] self.grab_queue = asyncio.Queue() for agent in agents: item = GrabAgent(agent) await self.grab_queue.put(item) self.grab_agents[agent.msgid] = item while True: await self.next_grab() await asyncio.sleep(1)
async def test_cancel(): async def wrk(*arg, **kw): await aio.sleep(0.5) return 1 pool = AioPool(size=2) f_quick = await pool.spawn_n(aio.sleep(0.15)) f12 = await pool.spawn(wrk()), await pool.spawn_n(wrk()) f35 = await pool.map_n(wrk, range(3)) # cancel some await aio.sleep(0.1) cancelled, results = await pool.cancel(f12[0], f35[2]) # running and waiting assert 2 == cancelled # none of them had time to finish assert 2 == len(results) and \ all(isinstance(res, aio.CancelledError) for res in results) # cancel all others await aio.sleep(0.1) # not interrupted and finished successfully assert f_quick.done() and f_quick.result() is None cancelled, results = await pool.cancel() # all assert 3 == cancelled assert len(results) == 3 and \ all(isinstance(res, aio.CancelledError) for res in results) assert await pool.join() # joins successfully
async def _asyncio_run(fn, arg_list=None, kwarg_list=None, pool_size=10, callback=None, return_result=True): arg_count = len(arg_list) if arg_list else 0 kwarg_count = len(kwarg_list) if kwarg_list else 0 params_count = max(arg_count, kwarg_count) pool_size = min(params_count, pool_size) if not arg_list: arg_list = [[] for i in range(params_count)] if not kwarg_list: kwarg_list = [{} for i in range(params_count)] if len(kwarg_list) != len(arg_list): raise Exception('len(arg):%s != len(kwarg):%s' % (len(arg_list), len(kwarg_list))) futures = [] async with AioPool(size=pool_size) as pool: for arg, kwarg in zip(arg_list, kwarg_list): task = fn(*arg, **kwarg) if callback: fut = await pool.spawn(task, cb=_callback, ctx=callback) else: fut = await pool.spawn(task) futures.append(fut) return [i.result() for i in futures] if return_result else futures
async def test_map_n(): todo = range(2, 11) async with AioPool(size=3) as pool: futures = pool.map_n(wrk, todo, cb) results = [getres.flat(f) for f in futures] assert 2 * sum(todo) == sum(results)
async def pull(self, pool_size: int = 3) -> None: if self.exists(): return self._start_pull() pair_list = list([(filename, str(url)) async for filename, url in self]) if not pair_list: self._no_content() return pool = AioPool(pool_size) cbz = InMemoryZipFile(self.cbz_path) async with self._parent.get_client() as client: async def download_coroutine(pair: Tuple[str, str]): filename, url = pair # Download the page response = await client.get(url) response.raise_for_status() # Save the page content to the cbz file page_content: bytes = await response.read() cbz.write(filename, page_content) self._progress() result = await pool.map(download_coroutine, pair_list) raise_on_any_error_from_pool(result) cbz.save() self.log('\n', end='')
async def test_spawn_and_exec(): order = [] marker = 9999 async def wrk(n): nonlocal order order.append(n) if n == marker: await aio.sleep(0.5) else: await aio.sleep(1 / n) order.append(n) return n task = range(1, 11) futures = [] async with AioPool(size=7) as pool: for i in task: futures.append(await pool.spawn(wrk(i))) assert marker == await pool.exec(wrk(marker)) assert [f.result() for f in futures] == list(task) assert pool._executed == len(task) + 1 assert order != list(sorted(order)) ix = order.index(marker) iy = order.index(marker, ix + 1) assert iy - ix > 1
async def concurrent_scrape(self, scrape_configs:List[ScrapeConfig], concurrency:Optional[int]=None) -> List[ScrapeApiResponse]: try: from asyncio_pool import AioPool except ImportError: print('You must run pip install scrapfly-sdk[concurrency]') raise if concurrency is None: concurrency = self.max_concurrency futures = [] async def call(scrape_config:ScrapeConfig) -> ScrapeApiResponse: return await self.async_scrape(scrape_config=scrape_config) async with AioPool(size=concurrency) as pool: for index, scrape_config in enumerate(scrape_configs): # handle concurrent session access correctly to prevent 429 session concurrent access if (scrape_config.session is not None or scrape_config.asp is True) and not scrape_config.correlation_id: scrape_config.correlation_id = 'concurrent_slot_' + str(index) futures.append(await pool.spawn(call(scrape_config))) return [future.result() for future in futures]
async def loadtest_spawn_n(tasks, pool_size, duration): futures = [] async with AioPool(size=pool_size) as pool: for i in range(tasks): fut = await pool.spawn_n(aio.sleep(duration)) futures.append(fut) return [getres.flat(f) for f in futures]
async def test_itermap(timeout): todo = range(2, 11) res = 0 async with AioPool(size=3) as pool: async for i in pool.itermap(wrk, todo, cb, timeout=timeout): res += i assert 2 * sum(todo) == res
async def download_train_data(self, training_data): tasks = [] created_label_dirs = [] model_sha1 = training_data['model']['sha1'] train_dir = os.path.join(self.TRAIN_DIR, model_sha1) validate_dir = os.path.join(self.VALIDATE_DIR, model_sha1) logging.debug( "model_sha1={m}, train_dir={t}, validate_dir={v}".format(m=model_sha1, t=train_dir, v=validate_dir)) self.cleanup([train_dir, validate_dir]) self.makedirs([train_dir, validate_dir]) try: logging.info("get links for train") links, train_size, validate_size = await self.get_links_for_train(training_data) except Exception as exc: error = "Error extract data from csv-file" logging.error('Cant train model by data from csv {csv_url}, random_seed {r}: {error}'.format( csv_url=training_data['csv']['url'], r=training_data['metadata']['random_seed'], error=error)) logging.error(exc) return None, None, None, None, error # Custom validation (https://github.com/OlafenwaMoses/ImageAI/issues/294) if train_size < self.MIN_TRAIN_SIZE or validate_size < self.MIN_VALIDATE_SIZE: error = "You should have at least 300 for train and 100 for test per label." logging.error( 'Cant train model by data from csv {csv_url}: {error}'.format(csv_url=training_data['csv']['url'], error=error)) return None, None, None, None, error for link in links: dir_path = os.path.join(self.DATA_DIR, link['i_type'], model_sha1, link['label']) file_path = os.path.join(dir_path, link['file_name']) # logging.info("processing link url={u}, sha1={s}, path={p}".format(u=link['url'], s=link['sha1'], p=file_path)) if dir_path not in created_label_dirs: os.makedirs(dir_path, exist_ok=True) created_label_dirs.append(dir_path) if not os.path.isfile(file_path): tasks.append({ "url": link['url'], "sha1": link['sha1'], "file_path": file_path }) if tasks: pool = AioPool(size=settings.DOWNLOAD_POOL_SIZE) await pool.map(self.download_file, tasks) logging.info('Data downloaded ({count} files)'.format(count=len(tasks))) return train_dir, validate_dir, train_size, validate_size, None
async def loadtest_itermap(tasks, pool_size, duration): async def wrk(i): await aio.sleep(duration) results = [] async with AioPool(size=pool_size) as pool: async for res in pool.itermap(wrk, range(tasks)): results.append(res) return results
async def spawn(work_num, size): """ @param: work_num 总任务数量 @param: size 每秒执行任务数 """ futures = [] async with AioPool(size=size) as pool: for i in range(work_num): f = await pool.spawn(udp_worker()) futures.append(f)
async def test_spawns_behaviour(): started = [] async def wrk(n): nonlocal started started.append(n) await aio.sleep(0.1) async with AioPool(size=2) as pool: for i in range(1, 6): await pool.spawn(wrk(i)) # waits for pool to be available assert len(started) != 0 # so atm some of workers should start started.clear() async with AioPool(size=2) as pool: for i in range(1, 6): pool.spawn_n( wrk(i)) # does not wait for pool, just spawns waiting coros assert len(started) == 0 # so atm no worker should be able to start
async def itermap_usage(todo=range(1, 11)): result = 0 async with AioPool(size=10) as pool: # Combines spawn_n and iterwait, which is a wrapper for asyncio.wait, # which yields results of finished workers according to `timeout` and # `yield_when` params passed to asyncio.wait (see it's docs for details) async for res in pool.itermap(worker, todo, timeout=0.5): result += res # technically, you can skip join call assert result == sum(todo)
async def test_spawn_crash(): async def wrk(n): return 1 / n futures = [] async with AioPool(size=1) as pool: for i in (2, 1, 0): futures.append(await pool.spawn(wrk(i))) with pytest.raises(ZeroDivisionError): futures[-1].result()
async def test_spawn_n(): todo = range(5) futures = [] async with AioPool(size=2) as pool: for i in todo: ctx = (pool, i) fut = pool.spawn_n(wrk(i), cb, ctx) futures.append(fut) results = [getres.flat(f) for f in futures] assert all(isinstance(e, ZeroDivisionError) for e in results[:2]) assert sum(results[2:]) == 2 * (sum(todo) - 0 - 1)
async def test_internal_join(): async def wrk(pool): return await pool.join() # deadlock pool = AioPool(size=10) fut = await pool.spawn(wrk(pool)) await aio.sleep(0.5) assert not fut.done() # dealocked, will never return await pool.cancel(fut) await pool.join()
async def map_usage(todo=range(100)): pool = AioPool(size=10) # Waits and collects results from all spawned workers, # returns them in same order as `todo`, if worker crashes or cancelled: # returns exception object as a result. # Basically, it wraps `spawn_usage` code into one call. results = await pool.map(worker, todo) # await pool.join() # is not needed here, bcs no other tasks were spawned assert isinstance(results[0], ZeroDivisionError) \ and sum(results[1:]) == sum(todo)
async def brute(target_factory, combo_factory=None, username_factory=None, password_factory=None, pool_size: int = 100, timeout: float = 5.0, output: str = None, verbose: bool = False): """ :param target_factory: function that returns iterable of targets :param combo_factory: function that returns iterable of username/password pairs :param username_factory: function that returns iterable of usernames :param password_factory: function that returns iterable of passwords :param pool_size: number of concurrent connections to run :param output: output filename :param verbose: print failed messages :return: """ tasks = target_tasks(target_factory, combo_factory, username_factory, password_factory) async with AioPool(pool_size) as pool: suc_files = [sys.stdout] err_files = [sys.stderr] if output: f = open(output, "w") suc_files.append(f) async for (val, err) in pool.itermap(worker, tasks, timeout=timeout, get_result=getres.pair): if err: print("ERROR") print(err) continue result, (host, port, username, password) = val if result is True: print(C.GRN, file=sys.stdout, end="") for file in suc_files: print("{}@{}:{} {}".format(username, host, port, password), file=file, flush=True) print(C.RST, file=sys.stdout, end="") elif verbose: print(C.RED, file=sys.stderr, end="") for file in err_files: print("Failed {}@{}:{} {}".format(username, host, port, password) + C.RST, file=file) print(C.RST, file=sys.stderr, end="")
async def _get_all_them_lyrics(self, artist_id, job_id): songs = await self.spotify.get_songs_by_artist(artist_id) await self.sqlite.edit_lyrics(job_id=job_id, step=0, _all=len(songs)) futures = [] async with AioPool(size=10) as pool: for song in songs: fut = await pool.spawn( self._all_lyrics_helper(song, job_id)) futures.append(fut) results = [f.result() for f in futures] await self.sqlite.edit_lyrics(job_id, step=len(results), done=True) return results, len(songs)
async def spawn_usage(todo=range(1, 4)): futures = [] async with AioPool(size=2) as pool: for i in todo: # 1, 2, 3 # Returns quickly for 1 and 2, then waits for empty space for 3, # spawns 3 and returns. Can save some resources I guess. fut = await pool.spawn(worker(i)) futures.append(fut) # At this point some of the workers already started. # Context manager calls `join` at exit, so this will finish when all # workers return, crash or cancelled. assert sum(todo) == sum(fut.result() for fut in futures) # all done
async def exec_usage(todo=range(1, 11)): async with AioPool(size=4) as pool: futures = await pool.map_n(worker, todo) # While other workers are waiting or active, you can "synchronously" # execute one task. It does not interrupt others, just waits for pool # space, then waits for task to finish and then returns it's result. important_res = await pool.exec(worker(2)) assert 2 == important_res # You can continue working as usual: moar = await pool.spawn(worker(10)) assert sum(todo) == sum(f.result() for f in futures)
async def test_map_crash(): task = range(5) pool = AioPool(size=10) # exc as result res = await pool.map(wrk, task, get_result=getres.flat) assert isinstance(res[0], Exception) assert res[1:] == [i*10 for i in task[1:]] # tuple as result res = await pool.map(wrk, task, get_result=getres.pair) assert res[0][0] is None and isinstance(res[0][1], ZeroDivisionError) assert [r[0] for r in res[1:]] == [i*10 for i in task[1:]] and \ not any(r[1] for r in res[1:])
async def cancel_usage(): async def wrk(*arg, **kw): await aio.sleep(0.5) return 1 pool = AioPool(size=2) f_quick = pool.spawn_n(aio.sleep(0.1)) f12 = await pool.spawn(wrk()), pool.spawn_n(wrk()) f35 = pool.map_n(wrk, range(3)) # At this point, if you cancel futures, returned by pool methods, # you just won't be able to retrieve spawned task results, task # themselves will continue working. Don't do this: # f_quick.cancel() # use `pool.cancel` instead: # cancel some await aio.sleep(0.1) cancelled, results = await pool.cancel(f12[0], f35[2]) # running and waiting assert 2 == cancelled # none of them had time to finish assert 2 == len(results) and \ all(isinstance(res, aio.CancelledError) for res in results) # cancel all others await aio.sleep(0.1) # not interrupted and finished successfully assert f_quick.done() and f_quick.result() is None cancelled, results = await pool.cancel() # all assert 3 == cancelled assert len(results) == 3 and \ all(isinstance(res, aio.CancelledError) for res in results) assert await pool.join() # joins successfully
async def test_itermap(): async def wrk(n): await aio.sleep(n) return n async with AioPool(size=3) as pool: i = 0 async for res in pool.itermap(wrk, [0.5] * 4, flat=False, timeout=0.6): if i == 0: assert 15 == int(sum(res) * 10) elif i == 1: assert 5 == int(sum(res) * 10) else: assert False # should not get here i += 1 # does not support enumerate btw (
async def main(): async with await get_session() as session: preparation_steps = PrepareHits(session) await preparation_steps.register() await preparation_steps.login() futures = [] async with AioPool(size=DefaultValues.POOL_SIZE) as pool: try: for _ in range(DefaultValues.REQUESTS_QUANTITY): future = await pool.spawn(preparation_steps.make_hit()) futures.append(future) for fut in futures: print(*fut.result()) except asyncio.InvalidStateError: return
async def test_itermap_cancel(): async def wrk(n): await aio.sleep(n / 100) return n todo = range(1, 101) async with AioPool(5) as pool: async for res in pool.itermap(wrk, todo, yield_when=aio.FIRST_COMPLETED): if res == 13: cancelled, _ = await pool.cancel() break assert cancelled == 100 - 13
async def spawn_n_usage(todo=[range(1, 51), range(51, 101), range(101, 200)]): futures = [] async with AioPool(size=20) as pool: for tasks in todo: for i in tasks: # too many tasks # Returns quickly for all tasks, does not wait for pool space. # Workers are not spawned, they wait for pool space in their # own background tasks. fut = await pool.spawn_n(worker(i)) futures.append(fut) # At this point not a single worker should start. # Context manager calls `join` at exit, so this will finish when all # workers return, crash or cancelled. assert sum(itertools.chain.from_iterable(todo)) == \ sum(f.result() for f in futures)