Ejemplo n.º 1
0
    async def bulk_index(self, docs, namespace, params=None, chunk_size=None, doc_process=None, ):
        """
        Insert multiple documents into Elasticsearch directly.
        :return:
        """
        if not docs:
            return None

        if doc_process:
            docs = stream.map(docs, doc_process)

        docs = stream.map(docs, self._formatter.format_document)

        async def bulk(docs):
            succeed_total, failed_total = 0, 0
            async for (succeed, failed) in self._chunk(actions=docs, chunk_size=self.chunk_size, params=params):
                succeed_total += len(succeed)
                failed_total += len(failed)
                self.monitor.increase_succeed(len(succeed))
                self.monitor.increase_failed(len(failed))
                logger.info('[Direct bulk] ns:%s succeed:%d' % (namespace, len(succeed)))
                if failed:
                    logger.warning('[Direct bulk] ns:%s failed:%d' % (namespace, len(failed)))
                    _, failed = await asyncio.ensure_future(self._failed_actions_commit(failed))
                    if not failed:
                        logger.debug('Failed actions commit success')
                    else:
                        logger.warning('Failed actions commit failed')
            return succeed_total, failed_total

        return await bulk(stream.map(docs,
                                     lambda doc:
                                     self._gen_action(ElasticOperate.index, namespace, util.utc_now(), doc, False)[0]))
Ejemplo n.º 2
0
async def multi_resolver(collection: AsyncIOMotorCollection,
                         watcher,
                         pipeline=[]):
    documents: list = await find(
        collection,
        match=get_where(pipeline),
    )
    documents = documents[:3]
    documents: dict = {
        doc['_id']: aggregate([doc], pipeline)[0]
        for doc in documents
    }
    yield list(documents.values())

    def process(change):
        document = change['fullDocument']
        _id = document['_id']
        if change['operationType'] == 'insert':
            documents.update({_id: aggregate([document], pipeline)[0]})
            return list(documents.values())
        elif change['operationType'] == 'update':
            if _id in documents.keys():
                documents[_id] = aggregate([document], pipeline)[0]
                return list(documents.values())

    xs = stream.map(watcher, process, task_limit=1)
    xs = stream.filter(xs, bool)
    xs = window(xs, BATCH_INTERVAL)
    xs = last_per_window(xs, )
    async for x in xs:
        yield list(x)
Ejemplo n.º 3
0
async def main():
    db = AsyncIOMotorClient().db

    async def persist(updates, ):
        updates and await db[AGGREGATED_COLLECTION].bulk_write(updates)
        print('simulating long sleep')
        await asyncio.sleep(2)
        return 'done'

    batcher = Batcher(persist, interval=PERSIST_INTERVAL)

    def key(doc):
        return doc[ID_KEY]

    async def function(acc, document):
        return acc + 1

    async def initializer(doc: dict):
        value = await db[AGGREGATED_COLLECTION].find_one(
            {AGGREGATED_ID_KEY: key(doc)})
        value = value and value.get(AGGREGATED_KEY)
        return value or 0

    # initializer = 0  # TODO rm

    xs = events(collection=db[EVENTS_COLLECTION])
    xs = accumulate_by_key(xs, function, key=key, initializer=initializer)
    xs = stream.starmap(xs, make_db_operation)
    xs = stream.map(
        xs,
        batcher.push,
    )  # task_limit=1)
    # xs = window(xs, PERSIST_INTERVAL)
    # xs = stream.map(xs, take_last)
    # xs = stream.map(xs, list)
    # xs = stream.map(xs, lambda x: [z[1] for z in x])
    # xs = stream.map(xs, persist, task_limit=1)
    xs = stream.map(
        xs,
        pretty,
    )
    await asyncio.gather(
        store_some(db),
        xs,
    )
Ejemplo n.º 4
0
async def async_fetch_urlset(urls, download_dir, pbar=None, verbose=False):
    async with httpx.AsyncClient(http2=True) as session:
        ws = stream.repeat(session)
        xs = stream.zip(ws, stream.iterate(urls))
        ys = stream.starmap(xs, fetch, ordered=False, task_limit=10)
        process_download = partial(process,
                                   download_dir=download_dir,
                                   pbar=pbar,
                                   verbose=verbose)
        zs = stream.map(ys, process_download)
        return await zs
Ejemplo n.º 5
0
async def async_fetch_urlset(urls,
                             schedules,
                             pbar=None,
                             verbose=False,
                             use_http2=True):
    async with httpx.AsyncClient(http2=use_http2) as session:
        ws = stream.repeat(session)
        xs = stream.zip(ws, stream.iterate(urls))
        ys = stream.starmap(xs, fetch, ordered=False,
                            task_limit=20)  # 30 is similar IDK
        process = partial(process_soup,
                          schedules=schedules,
                          pbar=pbar,
                          verbose=verbose)
        zs = stream.map(ys, process)
        return await zs
Ejemplo n.º 6
0
async def async_fetch_episodes(listings,
                               pbar=None,
                               verbose=False,
                               use_http2=False):
    jsons = dict(zip(listings.broadcasts_urlset, listings.all_broadcasts))
    limits = httpx.Limits(max_keepalive_connections=20)
    async with httpx.AsyncClient(http2=use_http2, limits=limits) as session:
        ws = stream.repeat(session)
        xs = stream.zip(ws, stream.iterate(listings.broadcasts_urlset))
        ys = stream.starmap(xs, fetch, ordered=False,
                            task_limit=20)  # 20 is optimal
        process = partial(process_json,
                          jsons=jsons,
                          pbar=pbar,
                          verbose=verbose)
        zs = stream.map(ys, process)
        return await zs
Ejemplo n.º 7
0
async def single_resolver(collection, watcher, pipeline=[]):
    initializer = await find_one(collection, get_where(pipeline) or {})
    yield initializer
    xs = stream.filter(
        watcher,
        lambda change: change['operationType'] == 'update',
    )
    xs = stream.map(xs, lambda change: change['fullDocument'], task_limit=1)
    xs = stream.filter(xs, lambda doc: doc['_id'] == initializer['_id'])
    xs = stream.concatmap(xs,
                          lambda w: stream.iterate(aggregate([w], pipeline)),
                          task_limit=1)
    xs = window(xs, BATCH_INTERVAL)
    xs = last_per_window(xs, )
    # xs = last_per_window(xs, )
    # xs = stream.map(xs, list)
    # xs = stream.concat(xs, )
    async for x in xs:
        print(f'serving {prettify(x)}')
        yield x
Ejemplo n.º 8
0
    async def async_get(self, urls):
        result = []

        #TODO: Error checking
        async with aiohttp.ClientSession() as session:

            async def fetch(url):
                if self.logger: self.logger.log(f"Grabbing {url}")
                for _ in range(self.retry + 1):
                    try:
                        async with session.get(url) as resp:
                            if resp.status == 200:
                                return await resp.text()
                            else:
                                logging.error(
                                    f"Server returned error status {resp.status} on {url}"
                                )
                                if self.logger:
                                    self.logger.log(f"Error on {url}")
                                return ""
                    except aiohttp.InvalidURL:
                        logger.error(f"Invalid URL: {url} ")
                    except aiohttp.ClientPayloadError:
                        logging.error(f"Invalid payload")
                    except Exception as e:
                        logging.error(f"Unexpected error: {e}")
                return ""

            url_stream = stream.iterate(urls)
            html_stream = stream.map(url_stream,
                                     fetch,
                                     ordered=True,
                                     task_limit=10)

            async with html_stream.stream() as streamer:
                async for item in streamer:
                    result.append(item)
            return result
Ejemplo n.º 9
0
def last_per_window(xs):
    xs = stream.map(xs, take_last)
    xs = stream.filter(xs, bool)
    return xs
Ejemplo n.º 10
0
async def execute(seed_prs: List[int],
                  dry: bool = False,
                  database_url: str = None) -> None:
    session = aiohttp.ClientSession()
    pr_stream = stream.map(
        aiter_opened_prs(seed_prs, session=session),
        partial(get_ofborg_eval, session=session),
        ordered=False,
    )

    if dry:
        sqs_queues = None
        autoscaling = None
    else:
        sqs_queues = get_sqs()
        autoscaling = get_autoscaling()
        assert database_url is not None

    if database_url is not None:
        conn = await asyncpg.connect(database_url)
        await conn.execute(create_nixpkgs_review_dispatched_table_sql())
    else:
        conn = None

    log.info("Setup", sqs=sqs_queues, autoscaling=autoscaling, conn=conn)
    async with pr_stream.stream() as streamer:
        async for event, ofborg_eval in streamer:
            pr = event["payload"]["number"]
            log.info("Main loop", pr=pr)

            if ofborg_eval is None:
                log.info(
                    "Ofborg failed or no packages",
                    pr=pr,
                    ofborg_eval=ofborg_eval,
                    failed=True,
                )
                # Ofborg failed
                continue

            log.info("New buildable PR", pr=pr, ofborg_eval=ofborg_eval)
            await log_buildable_pr(conn, pr=pr, ofborg_eval=ofborg_eval)
            if sqs_queues is not None:
                for system in ALL_BUILD_SYSTEMS:
                    if len(ofborg_eval["packages_per_system"].get(
                            system, set())) == 0:
                        log.info("Empty pull request", pr=pr, system=system)
                        continue

                    sqs_response = sqs_queues[system].send_message(
                        # Message must be shorter than 2048 bytes, so don't pack
                        # too much stuff in here
                        MessageBody=json.dumps(
                            dict(
                                pr=pr,
                                ofborg_url=ofborg_eval["url"],
                            )))
                    if sqs_response["ResponseMetadata"][
                            "HTTPStatusCode"] != 200:
                        log.error("SQS Response", response=sqs_response, pr=pr)

            else:
                log.info(
                    "Skipping SQS submission",
                    pr=pr,
                    sqs_queues=sqs_queues,
                )
Ejemplo n.º 11
0
async def async_map(func, items):
    if isinstance(items, Stream):
        return stream.map(items, func)
    return stream.map(stream.iterate(items), func)