async def get_data_ongoing(*args, historical=False, **kwargs):
    last_id = 0
    if not historical:
        kwargs[
            "inserted_after_record_id"] = last_id = await get_newest_record_id(
            )
    db_session = db_session_var.get()
    refeed_queue = refeed_queue_var.get()

    while True:
        # Run a timer for 300 seconds concurrently with our work
        minimum_loop_timer = asyncio.create_task(asyncio.sleep(300))
        import datetime
        async for datapoint in get_data(*args,
                                        inserted_after_record_id=last_id,
                                        order=False,
                                        **kwargs):
            if datapoint.id > last_id:
                # This is the newest datapoint we have handled so far
                last_id = datapoint.id
            yield datapoint

        while not refeed_queue.empty():
            # Process any datapoints gathered through the refeed queue
            async for datapoint in queue_as_iterator(refeed_queue):
                yield datapoint

        # Commit the DB to store any work that was done in this loop and
        # ensure that any isolation level issues do not prevent loading more
        # data
        db_session.commit()
        # Wait for that timer to complete. If our loop took over 5 minutes
        # this will complete immediately, otherwise it will block
        await minimum_loop_timer
コード例 #2
0
async def get_data_ongoing_psql_pubsub(*args, historical=False, **kwargs):
    last_id = 0
    if not historical:
        kwargs[
            "inserted_after_record_id"] = last_id = await get_newest_record_id(
            )
    db_session = db_session_var.get()
    db_session.execute("LISTEN apd_aggregation;")
    loop = asyncio.get_running_loop()
    while True:
        async for datapoint in get_data(*args, order=False, **kwargs):
            if datapoint.id > last_id:
                # This is the newest datapoint we have handled so far
                last_id = datapoint.id
            yield datapoint
            # Next time, find only data points later than the latest we've seen
            kwargs["inserted_after_record_id"] = last_id
        # Commit the DB to store any work that was done in this loop and
        # ensure that any isolation level issues do not prevent loading more
        # data
        db_session.commit()

        connection = db_session.connection()
        raw_connection = connection.connection
        await wait_for_notify(loop, raw_connection)
コード例 #3
0
async def get_newest_record_id():
    from apd.aggregation.database import datapoint_table
    from sqlalchemy import func

    loop = asyncio.get_running_loop()
    db_session = db_session_var.get()
    max_id_query = db_session.query(func.max(datapoint_table.c.id))
    return await loop.run_in_executor(None, max_id_query.scalar)
async def get_data_ongoing(*args, **kwargs):
    last_id = 0
    db_session = db_session_var.get()
    while True:
        # Run a timer for 300 seconds concurrently with our work
        minimum_loop_timer = asyncio.create_task(asyncio.sleep(300))
        async for datapoint in get_data(*args, **kwargs):
            if datapoint.id > last_id:
                # This is the newest datapoint we have handled so far
                last_id = datapoint.id
            yield datapoint
            # Next time, find only data points later than the latest we've seen
            kwargs["inserted_after_record_id"] = last_id
        # Commit the DB to store any work that was done in this loop and
        # ensure that any isolation level issues do not prevent loading more
        # data
        db_session.commit()
        # Wait for that timer to complete. If our loop took over 5 minutes
        # this will complete immediately, otherwise it will block
        await minimum_loop_timer