Example #1
0
async def test_outer_join():

    todo, to_release = range(1, 15), range(10)
    done, released = [], []

    async def inner(n):
        nonlocal done
        await aio.sleep(1 / n)
        done.append(n)

    async def outer(n, pool):
        nonlocal released
        await pool.join()
        released.append(n)

    loop = aio.get_event_loop()
    pool = AioPool(size=100)

    pool.map_n(inner, todo)
    joined = [loop.create_task(outer(j, pool)) for j in to_release]
    await pool.join()

    assert len(released) <= len(to_release)
    await aio.wait(joined)
    assert len(todo) == len(done) and len(released) == len(to_release)
Example #2
0
async def details(todo=range(1, 11)):
    pool = AioPool(size=5)

    # This code:
    f1 = []
    for i in todo:
        f1.append(pool.spawn_n(worker(i)))
    # is equivalent to one call of `map_n`:
    f2 = pool.map_n(worker, todo)

    # Afterwards you can await for any given future:
    try:
        assert 3 == await f1[2]  # result of spawn_n(worker(3))
    except BaseException:
        # exception happened in worker (including CancelledError) will be re-raised
        pass

    # Or use `asyncio.wait` to handle results in batches (see `iterwait` also):
    important_res = 0
    more_important = [f1[1], f2[1], f2[2]]
    while more_important:
        done, more_important = await aio.wait(more_important, timeout=0.5)
        # handle result, note it will re-raise exceptions
        important_res += sum(f.result() for f in done)

    assert important_res == 2 + 2 + 3

    # But you need to join, to allow all spawned workers to finish
    # (of course you can `asyncio.wait` all of the futures if you want to)
    await pool.join()

    assert all(f.done() for f in itertools.chain(f1, f2))  # this is guaranteed
    assert 2 * sum(todo) == sum(f.result() for f in itertools.chain(f1, f2))
Example #3
0
    async def work(self, size):
        self._pool = AioPool(size=size)
        agents = [await self.gen_agent() for _ in range(size)]
        self.grab_queue = asyncio.Queue()

        for agent in agents:
            item = GrabAgent(agent)
            await self.grab_queue.put(item)
            self.grab_agents[agent.msgid] = item

        while True:
            await self.next_grab()
            await asyncio.sleep(1)
Example #4
0
async def test_cancel():
    async def wrk(*arg, **kw):
        await aio.sleep(0.5)
        return 1

    pool = AioPool(size=2)

    f_quick = await pool.spawn_n(aio.sleep(0.15))
    f12 = await pool.spawn(wrk()), await pool.spawn_n(wrk())
    f35 = await pool.map_n(wrk, range(3))

    # cancel some
    await aio.sleep(0.1)
    cancelled, results = await pool.cancel(f12[0],
                                           f35[2])  # running and waiting
    assert 2 == cancelled  # none of them had time to finish
    assert 2 == len(results) and \
        all(isinstance(res, aio.CancelledError) for res in results)

    # cancel all others
    await aio.sleep(0.1)

    # not interrupted and finished successfully
    assert f_quick.done() and f_quick.result() is None

    cancelled, results = await pool.cancel()  # all
    assert 3 == cancelled
    assert len(results) == 3 and \
        all(isinstance(res, aio.CancelledError) for res in results)

    assert await pool.join()  # joins successfully
Example #5
0
async def _asyncio_run(fn,
                       arg_list=None,
                       kwarg_list=None,
                       pool_size=10,
                       callback=None,
                       return_result=True):
    arg_count = len(arg_list) if arg_list else 0
    kwarg_count = len(kwarg_list) if kwarg_list else 0
    params_count = max(arg_count, kwarg_count)
    pool_size = min(params_count, pool_size)
    if not arg_list:
        arg_list = [[] for i in range(params_count)]
    if not kwarg_list:
        kwarg_list = [{} for i in range(params_count)]

    if len(kwarg_list) != len(arg_list):
        raise Exception('len(arg):%s != len(kwarg):%s' %
                        (len(arg_list), len(kwarg_list)))

    futures = []
    async with AioPool(size=pool_size) as pool:
        for arg, kwarg in zip(arg_list, kwarg_list):
            task = fn(*arg, **kwarg)
            if callback:
                fut = await pool.spawn(task, cb=_callback, ctx=callback)
            else:
                fut = await pool.spawn(task)
            futures.append(fut)

    return [i.result() for i in futures] if return_result else futures
Example #6
0
async def test_map_n():
    todo = range(2, 11)
    async with AioPool(size=3) as pool:
        futures = pool.map_n(wrk, todo, cb)

    results = [getres.flat(f) for f in futures]
    assert 2 * sum(todo) == sum(results)
Example #7
0
    async def pull(self, pool_size: int = 3) -> None:
        if self.exists():
            return
        self._start_pull()
        pair_list = list([(filename, str(url)) async for filename, url in self])
        if not pair_list:
            self._no_content()
            return

        pool = AioPool(pool_size)
        cbz = InMemoryZipFile(self.cbz_path)

        async with self._parent.get_client() as client:
            async def download_coroutine(pair: Tuple[str, str]):
                filename, url = pair
                # Download the page
                response = await client.get(url)
                response.raise_for_status()

                # Save the page content to the cbz file
                page_content: bytes = await response.read()
                cbz.write(filename, page_content)
                self._progress()

            result = await pool.map(download_coroutine, pair_list)
            raise_on_any_error_from_pool(result)
        cbz.save()
        self.log('\n', end='')
Example #8
0
async def test_spawn_and_exec():

    order = []
    marker = 9999

    async def wrk(n):
        nonlocal order
        order.append(n)
        if n == marker:
            await aio.sleep(0.5)
        else:
            await aio.sleep(1 / n)
        order.append(n)
        return n

    task = range(1, 11)
    futures = []
    async with AioPool(size=7) as pool:
        for i in task:
            futures.append(await pool.spawn(wrk(i)))
        assert marker == await pool.exec(wrk(marker))

    assert [f.result() for f in futures] == list(task)
    assert pool._executed == len(task) + 1
    assert order != list(sorted(order))

    ix = order.index(marker)
    iy = order.index(marker, ix + 1)
    assert iy - ix > 1
Example #9
0
    async def concurrent_scrape(self, scrape_configs:List[ScrapeConfig], concurrency:Optional[int]=None) -> List[ScrapeApiResponse]:

        try:
            from asyncio_pool import AioPool
        except ImportError:
            print('You must run pip install scrapfly-sdk[concurrency]')
            raise

        if concurrency is None:
            concurrency = self.max_concurrency

        futures = []

        async def call(scrape_config:ScrapeConfig) -> ScrapeApiResponse:
            return await self.async_scrape(scrape_config=scrape_config)

        async with AioPool(size=concurrency) as pool:
            for index, scrape_config in enumerate(scrape_configs):
                # handle concurrent session access correctly to prevent 429 session concurrent access
                if (scrape_config.session is not None or scrape_config.asp is True) and not scrape_config.correlation_id:
                    scrape_config.correlation_id = 'concurrent_slot_' + str(index)

                futures.append(await pool.spawn(call(scrape_config)))

        return [future.result() for future in futures]
Example #10
0
async def loadtest_spawn_n(tasks, pool_size, duration):
    futures = []
    async with AioPool(size=pool_size) as pool:
        for i in range(tasks):
            fut = await pool.spawn_n(aio.sleep(duration))
            futures.append(fut)

    return [getres.flat(f) for f in futures]
Example #11
0
async def test_itermap(timeout):
    todo = range(2, 11)

    res = 0
    async with AioPool(size=3) as pool:
        async for i in pool.itermap(wrk, todo, cb, timeout=timeout):
            res += i

    assert 2 * sum(todo) == res
Example #12
0
    async def download_train_data(self, training_data):
        tasks = []
        created_label_dirs = []

        model_sha1 = training_data['model']['sha1']

        train_dir = os.path.join(self.TRAIN_DIR, model_sha1)
        validate_dir = os.path.join(self.VALIDATE_DIR, model_sha1)

        logging.debug(
            "model_sha1={m}, train_dir={t}, validate_dir={v}".format(m=model_sha1, t=train_dir, v=validate_dir))

        self.cleanup([train_dir, validate_dir])
        self.makedirs([train_dir, validate_dir])

        try:
            logging.info("get links for train")
            links, train_size, validate_size = await self.get_links_for_train(training_data)
        except Exception as exc:
            error = "Error extract data from csv-file"
            logging.error('Cant train model by data from csv {csv_url}, random_seed {r}: {error}'.format(
                csv_url=training_data['csv']['url'], r=training_data['metadata']['random_seed'], error=error))
            logging.error(exc)
            return None, None, None, None, error

        # Custom validation (https://github.com/OlafenwaMoses/ImageAI/issues/294)
        if train_size < self.MIN_TRAIN_SIZE or validate_size < self.MIN_VALIDATE_SIZE:
            error = "You should have at least 300 for train and 100 for test per label."
            logging.error(
                'Cant train model by data from csv {csv_url}: {error}'.format(csv_url=training_data['csv']['url'],
                                                                              error=error))
            return None, None, None, None, error

        for link in links:
            dir_path = os.path.join(self.DATA_DIR, link['i_type'], model_sha1, link['label'])
            file_path = os.path.join(dir_path, link['file_name'])

            # logging.info("processing link url={u}, sha1={s}, path={p}".format(u=link['url'], s=link['sha1'], p=file_path))

            if dir_path not in created_label_dirs:
                os.makedirs(dir_path, exist_ok=True)
                created_label_dirs.append(dir_path)

            if not os.path.isfile(file_path):
                tasks.append({
                    "url": link['url'],
                    "sha1": link['sha1'],
                    "file_path": file_path
                })

        if tasks:
            pool = AioPool(size=settings.DOWNLOAD_POOL_SIZE)
            await pool.map(self.download_file, tasks)

        logging.info('Data downloaded ({count} files)'.format(count=len(tasks)))

        return train_dir, validate_dir, train_size, validate_size, None
Example #13
0
async def loadtest_itermap(tasks, pool_size, duration):
    async def wrk(i):
        await aio.sleep(duration)

    results = []
    async with AioPool(size=pool_size) as pool:
        async for res in pool.itermap(wrk, range(tasks)):
            results.append(res)

    return results
Example #14
0
async def spawn(work_num, size):
    """
    @param: work_num 总任务数量
    @param: size 每秒执行任务数
    """
    futures = []
    async with AioPool(size=size) as pool:
        for i in range(work_num):
            f = await pool.spawn(udp_worker())
            futures.append(f)
Example #15
0
async def test_spawns_behaviour():
    started = []

    async def wrk(n):
        nonlocal started
        started.append(n)
        await aio.sleep(0.1)

    async with AioPool(size=2) as pool:
        for i in range(1, 6):
            await pool.spawn(wrk(i))  # waits for pool to be available
        assert len(started) != 0  # so atm some of workers should start

    started.clear()

    async with AioPool(size=2) as pool:
        for i in range(1, 6):
            pool.spawn_n(
                wrk(i))  # does not wait for pool, just spawns waiting coros
        assert len(started) == 0  # so atm no worker should be able to start
Example #16
0
async def itermap_usage(todo=range(1, 11)):
    result = 0
    async with AioPool(size=10) as pool:
        # Combines spawn_n and iterwait, which is a wrapper for asyncio.wait,
        # which yields results of finished workers according to `timeout` and
        # `yield_when` params passed to asyncio.wait (see it's docs for details)
        async for res in pool.itermap(worker, todo, timeout=0.5):
            result += res
        # technically, you can skip join call

    assert result == sum(todo)
Example #17
0
async def test_spawn_crash():
    async def wrk(n):
        return 1 / n

    futures = []
    async with AioPool(size=1) as pool:
        for i in (2, 1, 0):
            futures.append(await pool.spawn(wrk(i)))

    with pytest.raises(ZeroDivisionError):
        futures[-1].result()
Example #18
0
async def test_spawn_n():
    todo = range(5)
    futures = []
    async with AioPool(size=2) as pool:
        for i in todo:
            ctx = (pool, i)
            fut = pool.spawn_n(wrk(i), cb, ctx)
            futures.append(fut)

    results = [getres.flat(f) for f in futures]
    assert all(isinstance(e, ZeroDivisionError) for e in results[:2])
    assert sum(results[2:]) == 2 * (sum(todo) - 0 - 1)
Example #19
0
async def test_internal_join():
    async def wrk(pool):
        return await pool.join()  # deadlock

    pool = AioPool(size=10)
    fut = await pool.spawn(wrk(pool))

    await aio.sleep(0.5)
    assert not fut.done()  # dealocked, will never return

    await pool.cancel(fut)
    await pool.join()
Example #20
0
async def map_usage(todo=range(100)):
    pool = AioPool(size=10)
    # Waits and collects results from all spawned workers,
    # returns them in same order as `todo`, if worker crashes or cancelled:
    # returns exception object as a result.
    # Basically, it wraps `spawn_usage` code into one call.
    results = await pool.map(worker, todo)

    # await pool.join()  # is not needed here, bcs no other tasks were spawned

    assert isinstance(results[0], ZeroDivisionError) \
        and sum(results[1:]) == sum(todo)
Example #21
0
async def brute(target_factory,
                combo_factory=None,
                username_factory=None,
                password_factory=None,
                pool_size: int = 100,
                timeout: float = 5.0,
                output: str = None,
                verbose: bool = False):
    """
    :param target_factory: function that returns iterable of targets
    :param combo_factory: function that returns iterable of username/password pairs
    :param username_factory: function that returns iterable of usernames
    :param password_factory: function that returns iterable of passwords
    :param pool_size: number of concurrent connections to run
    :param output: output filename
    :param verbose: print failed messages
    :return:
    """
    tasks = target_tasks(target_factory, combo_factory, username_factory,
                         password_factory)

    async with AioPool(pool_size) as pool:
        suc_files = [sys.stdout]
        err_files = [sys.stderr]

        if output:
            f = open(output, "w")
            suc_files.append(f)

        async for (val, err) in pool.itermap(worker,
                                             tasks,
                                             timeout=timeout,
                                             get_result=getres.pair):
            if err:
                print("ERROR")
                print(err)
                continue
            result, (host, port, username, password) = val
            if result is True:
                print(C.GRN, file=sys.stdout, end="")
                for file in suc_files:
                    print("{}@{}:{} {}".format(username, host, port, password),
                          file=file,
                          flush=True)
                print(C.RST, file=sys.stdout, end="")
            elif verbose:
                print(C.RED, file=sys.stderr, end="")
                for file in err_files:
                    print("Failed {}@{}:{} {}".format(username, host, port,
                                                      password) + C.RST,
                          file=file)
                print(C.RST, file=sys.stderr, end="")
Example #22
0
    async def _get_all_them_lyrics(self, artist_id, job_id):
        songs = await self.spotify.get_songs_by_artist(artist_id)
        await self.sqlite.edit_lyrics(job_id=job_id, step=0, _all=len(songs))
        futures = []
        async with AioPool(size=10) as pool:
            for song in songs:
                fut = await pool.spawn(
                    self._all_lyrics_helper(song, job_id))
                futures.append(fut)

        results = [f.result() for f in futures]

        await self.sqlite.edit_lyrics(job_id, step=len(results), done=True)
        return results, len(songs)
Example #23
0
async def spawn_usage(todo=range(1, 4)):
    futures = []
    async with AioPool(size=2) as pool:
        for i in todo:  # 1, 2, 3
            # Returns quickly for 1 and 2, then waits for empty space for 3,
            # spawns 3 and returns. Can save some resources I guess.
            fut = await pool.spawn(worker(i))
            futures.append(fut)
        # At this point some of the workers already started.

        # Context manager calls `join` at exit, so this will finish when all
        # workers return, crash or cancelled.

    assert sum(todo) == sum(fut.result() for fut in futures)  # all done
Example #24
0
async def exec_usage(todo=range(1, 11)):
    async with AioPool(size=4) as pool:
        futures = await pool.map_n(worker, todo)

        # While other workers are waiting or active, you can "synchronously"
        # execute one task. It does not interrupt  others, just waits for pool
        # space, then waits for task to finish and then returns it's result.
        important_res = await pool.exec(worker(2))
        assert 2 == important_res

        # You can continue working as usual:
        moar = await pool.spawn(worker(10))

    assert sum(todo) == sum(f.result() for f in futures)
Example #25
0
async def test_map_crash():
    task = range(5)
    pool = AioPool(size=10)

    # exc as result
    res = await pool.map(wrk, task, get_result=getres.flat)
    assert isinstance(res[0], Exception)
    assert res[1:] == [i*10 for i in task[1:]]

    # tuple as result
    res = await pool.map(wrk, task, get_result=getres.pair)
    assert res[0][0] is None and isinstance(res[0][1], ZeroDivisionError)
    assert [r[0] for r in res[1:]] == [i*10 for i in task[1:]] and \
        not any(r[1] for r in res[1:])
Example #26
0
async def cancel_usage():
    async def wrk(*arg, **kw):
        await aio.sleep(0.5)
        return 1

    pool = AioPool(size=2)

    f_quick = pool.spawn_n(aio.sleep(0.1))
    f12 = await pool.spawn(wrk()), pool.spawn_n(wrk())
    f35 = pool.map_n(wrk, range(3))

    # At this point, if you cancel futures, returned by pool methods,
    # you just won't be able to retrieve spawned task results, task
    # themselves will continue working. Don't do this:
    #   f_quick.cancel()
    # use `pool.cancel` instead:

    # cancel some
    await aio.sleep(0.1)
    cancelled, results = await pool.cancel(f12[0],
                                           f35[2])  # running and waiting
    assert 2 == cancelled  # none of them had time to finish
    assert 2 == len(results) and \
        all(isinstance(res, aio.CancelledError) for res in results)

    # cancel all others
    await aio.sleep(0.1)

    # not interrupted and finished successfully
    assert f_quick.done() and f_quick.result() is None

    cancelled, results = await pool.cancel()  # all
    assert 3 == cancelled
    assert len(results) == 3 and \
        all(isinstance(res, aio.CancelledError) for res in results)

    assert await pool.join()  # joins successfully
Example #27
0
async def test_itermap():
    async def wrk(n):
        await aio.sleep(n)
        return n

    async with AioPool(size=3) as pool:
        i = 0
        async for res in pool.itermap(wrk, [0.5] * 4, flat=False, timeout=0.6):
            if i == 0:
                assert 15 == int(sum(res) * 10)
            elif i == 1:
                assert 5 == int(sum(res) * 10)
            else:
                assert False  # should not get here
            i += 1  # does not support enumerate btw (
Example #28
0
async def main():
    async with await get_session() as session:
        preparation_steps = PrepareHits(session)
        await preparation_steps.register()
        await preparation_steps.login()
        futures = []
        async with AioPool(size=DefaultValues.POOL_SIZE) as pool:
            try:
                for _ in range(DefaultValues.REQUESTS_QUANTITY):
                    future = await pool.spawn(preparation_steps.make_hit())
                    futures.append(future)
                for fut in futures:
                    print(*fut.result())
            except asyncio.InvalidStateError:
                return
Example #29
0
async def test_itermap_cancel():
    async def wrk(n):
        await aio.sleep(n / 100)
        return n

    todo = range(1, 101)

    async with AioPool(5) as pool:
        async for res in pool.itermap(wrk,
                                      todo,
                                      yield_when=aio.FIRST_COMPLETED):
            if res == 13:
                cancelled, _ = await pool.cancel()
                break
    assert cancelled == 100 - 13
Example #30
0
async def spawn_n_usage(todo=[range(1, 51), range(51, 101), range(101, 200)]):
    futures = []
    async with AioPool(size=20) as pool:
        for tasks in todo:
            for i in tasks:  # too many tasks
                # Returns quickly for all tasks, does not wait for pool space.
                # Workers are not spawned, they wait for pool space in their
                # own background tasks.
                fut = await pool.spawn_n(worker(i))
                futures.append(fut)
        # At this point not a single worker should start.

        # Context manager calls `join` at exit, so this will finish when all
        # workers return, crash or cancelled.

    assert sum(itertools.chain.from_iterable(todo)) == \
        sum(f.result() for f in futures)