def count_main(): """Execute the kcidb-count command-line tool""" sys.excepthook = misc.log_and_print_excepthook description = 'kcidb-count - Count number of objects in I/O JSON data' parser = misc.ArgumentParser(description=description) parser.parse_args() for data in misc.json_load_stream_fd(sys.stdin.fileno()): print(io.get_obj_num(io.schema.validate(data)), file=sys.stdout) sys.stdout.flush()
def kcidb_load_queue(event, context): """ Load multiple KCIDB data messages from the LOAD_QUEUE_SUBSCRIBER queue into the database, if it stayed unmodified for at least DATASET_LOAD_PERIOD. """ # Do nothing, if updated recently now = datetime.datetime.now(datetime.timezone.utc) last_modified = DB_CLIENT.get_last_modified() LOGGER.debug("Now: %s, Last modified: %s", now, last_modified) if last_modified and now - last_modified < DATASET_LOAD_PERIOD: LOGGER.info("Database too fresh, exiting") return # Pull messages msgs = kcidb_load_queue_msgs(LOAD_QUEUE_SUBSCRIBER, LOAD_QUEUE_MSG_MAX, LOAD_QUEUE_OBJ_MAX, LOAD_QUEUE_TIMEOUT_SEC) if msgs: LOGGER.info("Pulled %u messages", len(msgs)) else: LOGGER.info("Pulled nothing, exiting") return # Create merged data referencing the pulled pieces LOGGER.debug("Merging %u messages...", len(msgs)) data = kcidb_io.merge(kcidb_io.new(), (msg[1] for msg in msgs), copy_target=False, copy_sources=False) LOGGER.info("Merged %u messages", len(msgs)) # Load the merged data into the database obj_num = kcidb_io.get_obj_num(data) LOGGER.debug("Loading %u objects...", obj_num) DB_CLIENT.load(data) LOGGER.info("Loaded %u objects", obj_num) # Acknowledge all the loaded data for msg in msgs: LOAD_QUEUE_SUBSCRIBER.ack(msg[0]) LOGGER.debug("ACK'ed %u messages", len(msgs)) # Forward the loaded data to the "loaded" topic for msg in msgs: LOADED_QUEUE_PUBLISHER.publish(msg[1]) LOGGER.debug("Forwarded %u messages", len(msgs))
def kcidb_load_queue_msgs(subscriber, msg_max, obj_max, timeout_sec): """ Pull I/O data messages from a subscriber with a limit on message number, total object number and time spent. Args: subscriber: The subscriber (kcidb.mq.Subscriber) to pull from. msg_max: Maximum number of messages to pull. obj_max: Maximum number of objects to pull. timeout_sec: Maximum number of seconds to spend. Returns: The list of pulled messages. """ # Yeah it's crowded, but bear with us, pylint: disable=too-many-locals # Pull data from queue until we get enough, or time runs out start = datetime.datetime.now(datetime.timezone.utc) obj_num = 0 pulls = 0 msgs = [] while True: # Calculate remaining messages pull_msg_max = msg_max - len(msgs) if pull_msg_max <= 0: LOGGER.debug("Received enough messages") break # Calculate remaining time pull_timeout_sec = \ timeout_sec - \ (datetime.datetime.now(datetime.timezone.utc) - start). \ total_seconds() if pull_timeout_sec <= 0: LOGGER.debug("Ran out of time") break # Pull LOGGER.debug( "Pulling <= %u messages from the queue, " "with timeout %us...", pull_msg_max, pull_timeout_sec) pull_msgs = subscriber.pull(pull_msg_max, timeout=pull_timeout_sec) pulls += 1 LOGGER.debug("Pulled %u messages", len(pull_msgs)) # Add messages up to obj_max, except the first one for index, msg in enumerate(pull_msgs): msg_obj_num = kcidb_io.get_obj_num(msg[1]) obj_num += msg_obj_num if msgs and obj_num > obj_max: LOGGER.debug( "Message #%u crossed %u-object boundary " "at %u total objects", len(msgs) + 1, obj_max, obj_num) obj_num -= msg_obj_num for nack_msg in pull_msgs[index:]: subscriber.nack(nack_msg[0]) LOGGER.debug("NACK'ed %s messages", len(pull_msgs) - index) break msgs.append(msg) else: continue break duration_seconds = \ (datetime.datetime.now(datetime.timezone.utc) - start).total_seconds() LOGGER.debug( "Pulled %u messages, %u objects total " "in %u pulls and %u seconds", len(msgs), obj_num, pulls, duration_seconds) return msgs