예제 #1
0
파일: __init__.py 프로젝트: rasibley/kcidb
def count_main():
    """Execute the kcidb-count command-line tool"""
    sys.excepthook = misc.log_and_print_excepthook
    description = 'kcidb-count - Count number of objects in I/O JSON data'
    parser = misc.ArgumentParser(description=description)
    parser.parse_args()

    for data in misc.json_load_stream_fd(sys.stdin.fileno()):
        print(io.get_obj_num(io.schema.validate(data)), file=sys.stdout)
        sys.stdout.flush()
예제 #2
0
def kcidb_load_queue(event, context):
    """
    Load multiple KCIDB data messages from the LOAD_QUEUE_SUBSCRIBER queue
    into the database, if it stayed unmodified for at least
    DATASET_LOAD_PERIOD.
    """
    # Do nothing, if updated recently
    now = datetime.datetime.now(datetime.timezone.utc)
    last_modified = DB_CLIENT.get_last_modified()
    LOGGER.debug("Now: %s, Last modified: %s", now, last_modified)
    if last_modified and now - last_modified < DATASET_LOAD_PERIOD:
        LOGGER.info("Database too fresh, exiting")
        return

    # Pull messages
    msgs = kcidb_load_queue_msgs(LOAD_QUEUE_SUBSCRIBER, LOAD_QUEUE_MSG_MAX,
                                 LOAD_QUEUE_OBJ_MAX, LOAD_QUEUE_TIMEOUT_SEC)
    if msgs:
        LOGGER.info("Pulled %u messages", len(msgs))
    else:
        LOGGER.info("Pulled nothing, exiting")
        return

    # Create merged data referencing the pulled pieces
    LOGGER.debug("Merging %u messages...", len(msgs))
    data = kcidb_io.merge(kcidb_io.new(), (msg[1] for msg in msgs),
                          copy_target=False,
                          copy_sources=False)
    LOGGER.info("Merged %u messages", len(msgs))
    # Load the merged data into the database
    obj_num = kcidb_io.get_obj_num(data)
    LOGGER.debug("Loading %u objects...", obj_num)
    DB_CLIENT.load(data)
    LOGGER.info("Loaded %u objects", obj_num)

    # Acknowledge all the loaded data
    for msg in msgs:
        LOAD_QUEUE_SUBSCRIBER.ack(msg[0])
    LOGGER.debug("ACK'ed %u messages", len(msgs))

    # Forward the loaded data to the "loaded" topic
    for msg in msgs:
        LOADED_QUEUE_PUBLISHER.publish(msg[1])
    LOGGER.debug("Forwarded %u messages", len(msgs))
예제 #3
0
def kcidb_load_queue_msgs(subscriber, msg_max, obj_max, timeout_sec):
    """
    Pull I/O data messages from a subscriber with a limit on message number,
    total object number and time spent.

    Args:
        subscriber:     The subscriber (kcidb.mq.Subscriber) to pull from.
        msg_max:        Maximum number of messages to pull.
        obj_max:        Maximum number of objects to pull.
        timeout_sec:    Maximum number of seconds to spend.

    Returns:
        The list of pulled messages.
    """
    # Yeah it's crowded, but bear with us, pylint: disable=too-many-locals
    # Pull data from queue until we get enough, or time runs out
    start = datetime.datetime.now(datetime.timezone.utc)
    obj_num = 0
    pulls = 0
    msgs = []
    while True:
        # Calculate remaining messages
        pull_msg_max = msg_max - len(msgs)
        if pull_msg_max <= 0:
            LOGGER.debug("Received enough messages")
            break

        # Calculate remaining time
        pull_timeout_sec = \
            timeout_sec - \
            (datetime.datetime.now(datetime.timezone.utc) - start). \
            total_seconds()
        if pull_timeout_sec <= 0:
            LOGGER.debug("Ran out of time")
            break

        # Pull
        LOGGER.debug(
            "Pulling <= %u messages from the queue, "
            "with timeout %us...", pull_msg_max, pull_timeout_sec)
        pull_msgs = subscriber.pull(pull_msg_max, timeout=pull_timeout_sec)
        pulls += 1
        LOGGER.debug("Pulled %u messages", len(pull_msgs))

        # Add messages up to obj_max, except the first one
        for index, msg in enumerate(pull_msgs):
            msg_obj_num = kcidb_io.get_obj_num(msg[1])
            obj_num += msg_obj_num
            if msgs and obj_num > obj_max:
                LOGGER.debug(
                    "Message #%u crossed %u-object boundary "
                    "at %u total objects",
                    len(msgs) + 1, obj_max, obj_num)
                obj_num -= msg_obj_num
                for nack_msg in pull_msgs[index:]:
                    subscriber.nack(nack_msg[0])
                LOGGER.debug("NACK'ed %s messages", len(pull_msgs) - index)
                break
            msgs.append(msg)
        else:
            continue
        break

    duration_seconds = \
        (datetime.datetime.now(datetime.timezone.utc) - start).total_seconds()
    LOGGER.debug(
        "Pulled %u messages, %u objects total "
        "in %u pulls and %u seconds", len(msgs), obj_num, pulls,
        duration_seconds)
    return msgs