def run_consumer(): logger = logging.getLogger('consumer') logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() handler.setFormatter( logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s')) logger.addHandler(handler) consumer = Consumer(conf) consumer.subscribe(topics=config.resolve_config("CONSUMER_TOPICS")) try: while True: msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: # Proper message # sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' % # (msg.topic(), msg.partition(), msg.offset(), # str(msg.key()))) print(msg.value()) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') finally: # Close down consumer to commit final offsets. consumer.close()
class KafmanConsumer(metaclass=Singleton): """TODO""" def __init__(self): super().__init__() self.topic = None self.consumer = None self.started = False self.bus = EventBus.get(CONSUMER_BUS) def start(self, settings: dict) -> None: """TODO""" if self.consumer is None: self.consumer = Consumer(settings) self.started = True def stop(self) -> None: """TODO""" if self.consumer is not None: del self.consumer self.consumer = None self.started = False def consume(self, topics: List[str]) -> None: """TODO""" if self.started: tr = threading.Thread(target=self._consume, args=(topics, )) tr.setDaemon(True) tr.start() def _consume(self, topics: List[str]) -> None: """TODO""" self.consumer.subscribe(topics) try: while self.started: message = self.consumer.poll(POLLING_INTERVAL) if message is None: continue elif not message.error(): msg = message.value().decode(Charset.UTF_8.value) self.bus.emit(MSG_CONS_EVT, message=msg, topic=message.topic()) elif message.error().code() == PARTITION_EOF: print( f"End of partition reached {message.topic()}/{message.partition()}" ) else: print(f"Error occurred: {message.error().str()}") except KeyboardInterrupt: print("Keyboard interrupted") finally: if self.consumer: self.consumer.close()
def poll(self, timeout=None): """ overridden method :param timeout: :return: """ msg = Consumer.poll(self, timeout) if msg is not None: self.build_and_finish_child_span(msg) return msg
def create_consumers(args, num_partitions, partition_table): consumers = [] transactional = args["transactional"] for i in range(num_partitions): partition_table[i] = [] oc = Consumer({ 'bootstrap.servers': args["kafka"], 'group.id': str(uuid.uuid4()), 'auto.offset.reset': 'latest', 'api.version.request': True, 'isolation.level': ('read_committed' if transactional else 'read_uncommitted'), 'max.poll.interval.ms': 86400000 }) oc.assign([TopicPartition(args["output_topic"], i)]) oc.poll(0.5) consumers.append(oc) return consumers
class KafkaConsumer(BaseKafkaConsumer): def __init__(self, config, logger): self._config = config["consumer"] conf = self._config["conf"] conf.setdefault("group.id", str(uuid.uuid1())) self.autocommit_enabled = conf.get("enable.auto.commit", True) self._logger = logger internal_log_path = self._config.get("internal_log_path") if internal_log_path: debug_logger = logging.getLogger("debug_consumer") timestamp = time.strftime("_%d%m%Y_") debug_logger.addHandler( logging.FileHandler("{}/kafka_consumer_debug{}{}.log".format( internal_log_path, timestamp, os.getpid()))) conf["logger"] = debug_logger self._consumer = Consumer(**conf) def subscribe(self, topics=None): topics = topics or list(self._config["topics"].values()) self._consumer.subscribe(topics) def poll(self): msg = self._consumer.poll(self._config["poll_timeout"]) if msg is not None: err = msg.error() if err: if err.code() == KafkaError._PARTITION_EOF: return None else: self._logger.info( "KafkaConsumer Error {} at pid {}: topic={} partition=[{}] reached end at offset {}\n" .format(err.code(), os.getpid(), msg.topic(), msg.partition(), msg.offset())) raise KafkaException(err) if msg.value(): return msg def commit_offset(self, msg): if msg is not None: if self.autocommit_enabled: self._consumer.store_offsets(msg) else: self._consumer.commit(msg, async=False) def close(self): self._consumer.close()
def kafka_consume_expected(topic, group='0', timeout=1.0, mfilter=lambda x: True, validator=lambda x: None, after_subscribe=lambda: None): consumer = Consumer({ 'bootstrap.servers': KAFK, 'group.id': group, 'auto.offset.reset': 'earliest' # earliest _committed_ offset }) msgs = [] topics = consumer.list_topics(topic) # promises to create topic logging.debug("Topic state: %s", topics.topics) if topics.topics[topic].error is not None: logging.warning("Error subscribing to topic: %s", topics.topics) return msgs consumer.subscribe([topic]) time.sleep(5) # for kafka to rebalance consumer groups after_subscribe() logging.debug("Waiting for messages...") while True: msg = consumer.poll(timeout) if msg is None: break logging.info("Seen message: %r %r", msg.key(), msg.value()) if msg.error(): logging.warning("Consumer error: {}".format(msg.error())) continue if mfilter(msg): validator(msg) msgs.append(msg) consumer.commit() consumer.close() return msgs
def pay_order(): consumer = Consumer({ 'bootstrap.servers': os.environ.get('BROKER'), 'group.id': 'consumer-pay-id', 'auto.offset.reset': 'earliest' }) consumer.subscribe(['pay_order']) while True: msg = consumer.poll(1.0) if msg is None: continue if msg.error(): logging.error("Consumer error: {}".format(msg.error())) continue data = json.loads(msg.value()) OrderPayStory().execute(data.get('order_id')) consumer.close()
def start(self): c = Consumer({ 'bootstrap.servers': KAFKA_BOOTSTRAP_SERVICE, 'group.id': KAFKA_GROUP_ID, 'auto.offset.reset': 'earliest' }) c.subscribe([BTC_BLOCK_TOPIC]) while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue data = json.loads(msg.value().decode('utf-8')) cache.set("latest_block", data, timeout=None)
def deamon(): """ Termite Client """ group = "" KAFKA_HOST = "" KAFKA_TOPIC="" print KAFKA_HOST c = Consumer({ "bootstrap.servers": KAFKA_HOST, 'group.id': group, }) c.subscribe([KAFKA_TOPIC]) running = True while running: msg = c.poll(1) if msg is None: continue if not msg.error(): data = json.loads(msg.value()) print("receive msg:", data) kwargs = { "work_id": data["Work_id"], "flow_id": data["Flow_id"], "cid": data["Cid"] } # è§†é¢‘ç¾Žå¥³æ ‡ç¾è¯†åˆ« if data.get("Work", "") == "video_tag_detect": t_video_tag_detect(**kwargs) else: pass else: if msg.error().code() == KafkaError._PARTITION_EOF: print "Skip-Error Message-Topic: {} Partition: {} Offset: {}Error: {}".format(msg.topic(), msg.partition(), msg.offset(), msg.error()) else: print "Error Message: {}".format(msg.error()) time.sleep(0.01) c.close()
def order_channel(): consumer = Consumer({ 'bootstrap.servers': os.environ.get('BROKER'), 'group.id': 'consumer-order-id', 'auto.offset.reset': 'earliest' }) consumer.subscribe([ 'order_reserved', 'order_paid', 'order_pay_failed', 'order_reserve_rejected' ]) while True: msg = consumer.poll(1.0) if msg is None: continue if msg.error(): logging.error("Consumer error: {}".format(msg.error())) continue msg.topic() data = json.loads(msg.value()) topic = msg.topic() # TODO For demo if topic == 'order_reserved': OrderSaga().pay(data.get('order_id')) continue if topic == 'order_paid': OrderSaga().approve(data.get('order_id')) continue if topic == 'order_pay_failed': OrderSaga().reject_reserve(data.get('order_id')) continue if topic == 'order_reserve_rejected': OrderSaga().cancel(data.get('order_id')) continue consumer.close()
class Kafka(object): def __init__(self, target_key) -> None: super().__init__() self.address = _address_for_key(target_key) kafka_config = { 'bootstrap.servers': self.address, 'group.id': "up9-test-group", 'enable.auto.commit': 'false' # important for passive observing } if "ssl://" in self.address.lower(): kafka_config['security.protocol'] = 'SSL' self.consumer = Consumer(kafka_config) self.producer = Producer(kafka_config) self.watching_topics = [] self.consumer.list_topics(timeout=5) # to check for connectivity def watch_topics(self, topics: list): def my_on_assign(consumer, partitions): logging.debug("On assign: %r", partitions) consumer.assign(partitions) for partition in partitions: low, high = consumer.get_watermark_offsets(partition) partition.offset = high logging.debug("Setting offset: %r", partition) consumer.seek(partition) self.watching_topics.extend(topics) self.consumer.subscribe(topics, on_assign=my_on_assign) self.consumer.poll(0.01) # to trigger partition assignments def get_watched_messages(self, interval=0.0, predicate=lambda x: True): logging.debug( "Checking messages that appeared on kafka topics: %r", self.watching_topics) res = [] start = time.time() while True: msg = self.consumer.poll(interval) if msg is None or time.time() - start > interval: break # done reading if msg.error(): raise KafkaException("kafka consumer error: {}".format( msg.error())) logging.debug( "Potential message: %r", (msg.partition(), msg.key(), msg.headers(), msg.value())) if predicate(msg): res.append(msg) # TODO: consumer.close() return res def assert_seen_message(self, resp, delay=0, predicate=lambda x: True): @recorder.assertion_decorator def assert_seen_kafka_message(resp, topics, delay): messages = self.get_watched_messages(delay, predicate) messages = [(m.topic(), m.key(), m.value(), m.headers()) for m in messages] if not messages: raise AssertionError("No messages on Kafka topic %r" % topics) else: logging.info("Validated the messages have appeared: %s", messages) return messages return assert_seen_kafka_message(resp, self.watching_topics, delay) def put(self, topic, data=None, json=None, headers=None): # TODO: parse key out of URL if topic.startswith('/'): topic = topic[1:] if data is None and json is not None: data = json_lib.dumps(json) with apiritif.transaction('kafka://[' + self.address + ']/' + topic): logging.info("Sending message to Kafka topic %r: %r", topic, data) self.producer.produce( topic, data, headers=[] if headers is None else headers) self.producer.poll(0) self.producer.flush() wrapped_req = self._make_request( 'PUT', 'kafka://' + self.address.split(',')[0] + '/' + topic, data) wrapped_response = self._make_response(wrapped_req) recorder.record_http_request('PUT', self.address, wrapped_req, wrapped_response, _context.session) return wrapped_response def _make_request(self, method, url, request): req = requests.Request(method, url=url, data=request) prepared = req.prepare() _context.grpc_mapping[id(request)] = prepared return prepared def _make_response(self, wrapped_req): resp = requests.Response() resp.status_code = 202 resp.request = wrapped_req resp._request = wrapped_req resp.msg = 'Accepted' resp.raw = io.BytesIO() return resp
def main(args): serial = args.serial num_messages = args.num_messages brokers = args.brokers group_id = args.group_id input_topic = args.input_topic input_partition = args.input_partition output_topic = args.output_topic if serial: print("Running in SERIAL mode") print( "The input producer will wait for the reply of the transactor before producing the next message." ) else: print("Running in PARALLEL mode") print( "The input producer will produce all messages in parallel (at once) after the first message." ) tr_args = [ sys.executable, os.path.join(HERE, "eos-transactions.py"), "-b", brokers, "-g", group_id + "-tr", "-t", input_topic, "-p", str(input_partition), "-o", output_topic, ] output_consumer = Consumer({ "bootstrap.servers": brokers, "group.id": group_id + "-pr", "auto.offset.reset": "earliest", "enable.auto.commit": True, "enable.partition.eof": False, }) output_consumer.subscribe([output_topic]) input_producer = Producer({ 'bootstrap.servers': brokers, }) try: with tempfile.NamedTemporaryFile(mode='w+') as f: tr_proc = subprocess.Popen(tr_args, stderr=subprocess.STDOUT, stdout=f, cwd=HERE, close_fds=True) try: time.sleep(1) assert tr_proc.poll() is None tx = 0 for i in range(num_messages): input_producer.produce(input_topic, key=b"xy", value=str(tx).encode("ascii")) tx += 1 assert input_producer.flush(10) == 0 while serial or tx <= 1: msg = output_consumer.poll(1.0) if msg is None: continue assert msg.error() is None if tx == 1: t_start = time.time() break if not serial: for _ in range(num_messages - 1): msg = output_consumer.poll(1.0) if msg is None: continue assert msg.error() is None print("Processing took {}".format(time.time() - t_start)) finally: if tr_proc.poll() is None: tr_proc.terminate() tr_proc.wait() f.seek(0) eos_out = f.read() finally: output_consumer.close() # commit offsets i = 0 c = False send_offset_logs = defaultdict(list) send_offset_times = [] for line in eos_out.split("\n"): if line.startswith(":DEMO:START "): c = True if c: send_offset_logs[i].append(line) if line.startswith(":DEMO:END "): send_offset_times.append(float(line.rpartition(" ")[-1])) c = False i += 1 print("\nSend offset times:", send_offset_times) print("Send offset times average:", sum(send_offset_times) / len(send_offset_times)) print("\nRelevant log snippet from the middle:") print("\n".join(send_offset_logs[int(i / 2)])) print("\nFull output of the transactor:") print(eos_out)
class BreadCrumbDataConsumer: _logger = logging.getLogger('BreadCrumbDataConsumer') def __init__(self): kafka_configs = KafkaHelper.get_kafka_configs() kafka_configs['group.id'] = 'python_breadcrumb_data_consumer' kafka_configs['auto.offset.reset'] = 'earliest' self._consumer = Consumer(kafka_configs) self._bread_crumb_repo = BreadCrumbRepository() self._trips_stop_data = dict() def consume_breadcrumb_records(self): self._logger.info("Starting breadcrumb data consumer ...") self._consumer.subscribe([STOP_EVENT_TOPIC, BREADCRUMB_DATA_TOPIC]) stop_events_records_count = 0 consumed_breadcrumb_records_count = 0 bread_crumb_records_saved_to_db_count = 0 breadcrumbs = list() last_saved_to_db = datetime.now() try: while True: duration_from_last_saved_to_db = datetime.now( ) - last_saved_to_db if len(breadcrumbs) >= 50_000 or ( len(breadcrumbs) > 0 and duration_from_last_saved_to_db.total_seconds() > (60 * 2)): self._bread_crumb_repo.bulk_save_breadcrumbs( breadcrumbs, self._trips_stop_data) bread_crumb_records_saved_to_db_count += len(breadcrumbs) breadcrumbs.clear() last_saved_to_db = datetime.now() self._logger.info( 'Number of breadcrumb records consumed = {}, stop event records consumed = {}, records saved to db = {}' .format(consumed_breadcrumb_records_count, stop_events_records_count, bread_crumb_records_saved_to_db_count)) msg = self._consumer.poll(1.0) if msg is None: continue elif msg.error(): self._logger.error('error: {}'.format(msg.error())) else: msg_topic = msg.topic() message_data = msg.value().decode("utf-8") if msg_topic == BREADCRUMB_DATA_TOPIC: consumed_breadcrumb_records_count += 1 self.process_bread_crumb_record( breadcrumbs, message_data) elif msg_topic == STOP_EVENT_TOPIC: stop_events_records_count += 1 self.process_stop_event_records(message_data) self._logger.debug( 'Number of breadcrumb records consumed = {}, stop event records consumed = {}' .format(consumed_breadcrumb_records_count, stop_events_records_count)) finally: self._consumer.close() self._bread_crumb_repo.bulk_save_breadcrumbs( breadcrumbs, self._trips_stop_data) def process_bread_crumb_record(self, breadcrumbs, message_data): try: breadcrumb = BreadCrumb.parse_raw(message_data) breadcrumbs.append(breadcrumb) except Exception as ex: self._logger.debug('Encountered an error parsing a bread crumb.', ex) def process_stop_event_records(self, message_data): try: trip_stop_dict = json.loads(message_data) trip_id = list(trip_stop_dict.keys())[0] if trip_id not in self._trips_stop_data.keys(): trip_stop_events_df = pd.read_json( list(trip_stop_dict.values())[0]) first_row = trip_stop_events_df.iloc[0] self._trips_stop_data[trip_id] = { 'route_id': first_row['route_number'], 'service_key': first_row['service_key'], 'direction': first_row['direction'] } except Exception as ex: self._logger.debug( 'Encountered an error parsing a stop events record.', ex)
def __poll_msg(self, timeout): return Consumer.poll(self, timeout)
def main(): parser = argparse.ArgumentParser( epilog="""Description: Reidentification demo using any number of cameras: Either camera can be used for registration or reidentification only, or for both. Plays a video from a jpeg topic, visualizes head detection with a gray bounding box around a head. When a detection is identified, changes the bounding box color to orange and writes the dwell time, age and ID (derived from the reid MS ID) above the heads. Displays ('-d') or stores ('-o') the result of this demo in kafka topics. Required topics (example): - <prefix>.cam.0.original.Image.jpg - <prefix>.cam.0.dets.ObjectDetectionRecord.json - <prefix>.cam.0.frameinfo.FrameInfoRecord.json - <prefix>.cam.0.ages.AgeRecord.json - <prefix>.cam.1.original.Image.jpg - <prefix>.cam.1.dets.ObjectDetectionRecord.json - <prefix>.cam.1.frameinfo.FrameInfoRecord.json - <prefix>.cam.1.ages.AgeRecord.json ... - <prefix>.cam.1.reids.ReidRecord.json """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("broker", help="The name of the kafka broker.", type=str) parser.add_argument("prefix", help="Prefix of topics (base|skeleton).", type=str) parser.add_argument('-d', "--display", action='store_true') parser.add_argument('-o', '--output', help='write output image into kafka topic', action='store_true') parser.add_argument('text', help='Text to display (age|dwell_time|both).', type=str) args = parser.parse_args() if not args.display and not args.output: parser.error( "Missing argument: -d (display output) or -o (write output to kafka) is needed" ) if args.output: producer = Producer({'bootstrap.servers': args.broker}) overlay = cv2.imread('resources/powered_by_white.png', cv2.IMREAD_UNCHANGED) # Prepare the topics to read input_topics = [ f"{args.prefix}.cam.{id}.{topic_postfix}" for id in CAMERA_TOPIC_IDS for topic_postfix in TOPIC_POSTFIXES ] reid_topics = [ f"{args.prefix}.cam.{id}.{topic_postfix}" for id in REID_TOPIC_IDS for topic_postfix in REID_TOPIC_POSTFIXES ] consumable_topics = list(map(TopicInfo, input_topics)) \ + (list(map(lambda t: TopicInfo(t, drop=False), reid_topics))) # TODO (when names via person stream): Remove this consumer reg_consumer = Consumer({ 'bootstrap.servers': args.broker, 'group.id': 'multicamreid_reg', 'auto.offset.reset': 'earliest' }) reg_consumer.assign( [TopicPartition(topic="named.records.json", partition=0, offset=0)]) output_topics = dict((id, f"{args.prefix}.cam.{id}.{OUTPUT_TOPIC_POSTFIX}") for id in CAMERA_TOPIC_IDS) # read message, draw and display them consumer = TimeOrderedGeneratorWithTimeout(broker=args.broker, groupid="detection", topics_infos=consumable_topics, latency_ms=200, commit_interval_sec=None, group_by_time=True) registrations: Dict[str, Registration] = {} i = 0 inner_id = 0 scaling = 1.0 for msgs in consumer.getMessages(): k = -1 for time, v in message_list_to_frame_structure(msgs).items(): message = v.get(args.prefix, {}) # Collect Reid records reid_records = {} for reid_id in REID_TOPIC_IDS: reid_message = message.get(reid_id, {}) reid_records.update(reid_message.get("reid", {})) # Process the image for topic_key, topic_message in filter( lambda t: t[0] not in REID_TOPIC_IDS, message.items()): img = topic_message.get("image", {}) if not isinstance(img, np.ndarray): continue head_detections = topic_message.get("head_detection", {}) # Set the image scale shape_orig = head_detections.pop("image", {}) if shape_orig: scaling = img.shape[1] / shape_orig["frame_info"]["columns"] # Processing the detections of the image for detection_key, detection_record in head_detections.items(): object_detection_record = detection_record.get( "bounding_box", {}) if not object_detection_record: continue key_to_display = "" color = COLOR_DARK_GREY face_detection = detection_record.get("unknown", {}) if face_detection: color = COLOR_LIGHT_GREY age = None age_detection_record = detection_record.get("age", {}) if age_detection_record: age = age_detection_record["age"] if args.text == "age" or args.text == "both": key_to_display = f"Age: {age}" if age else "" # Reidentification received for the detection reid_records_for_det = reid_records.get(detection_key, {}) if reid_records_for_det: for reid_record in filter(lambda r: "reid_event" in r, reid_records_for_det): # We only use the first [0] identified face now reid_key = reid_record["reid_event"]["match_list"][ 0]["id"]["first_detection_key"] registered = registrations.get(reid_key, None) if registered: age_to_display = "" if age: registered.addAge(age) if args.text == "age" or args.text == "both": age_to_display = f"; Age: {registered.age:d}" if age else "" # Calculate the dwell time if required dwell_time_display = "" if args.text == "dwell_time" or args.text == "both": detection_time = reid_record["reid_event"][ "match_list"][0]["id"][ "first_detection_time"] dwell_time = time - int(detection_time) dwell_time_display = f"; Dwell time: {dwell_time}ms" color = COLOR_ORANGE name_to_display = registered.name if registered.name else f"ID: {registered.id}" key_to_display = f"{name_to_display}{age_to_display}{dwell_time_display}" else: inner_id += 1 registrations[reid_key] = Registration( id=inner_id) if age: registrations[reid_key].addAge(age) # Update the technical naming topic # TODO (when names via person stream): remove producer.produce( "detected.records.json", key=str(reid_key).encode("utf-8"), value=(str(inner_id) + ";").encode("utf-8"), timestamp=time) # Read the technical naming topic # TODO (when names via person stream): remove reg_msg = reg_consumer.poll(0.01) if reg_msg is not None: try: key = reg_msg.key().decode("utf-8") name = reg_msg.value().decode("utf-8") # Update the person name reg_to_update = registrations.get(key) if reg_to_update: reg_to_update.addName(name) else: registrations[key] = Registration(name=name) except: print( "Decoding entry of the named.records topic failed.", flush=True) # draw text above bounding box img = draw_nice_text( canvas=img, text=key_to_display, bounding_box=object_detection_record["bounding_box"], color=color, scale=scaling) # draw bounding_box img = draw_nice_bounding_box( canvas=img, bounding_box=object_detection_record["bounding_box"], color=color, scaling=scaling) # draw ultinous logo img = draw_overlay(canvas=img, overlay=overlay, position=Position.BOTTOM_RIGHT, scale=scaling) # produce output topic if args.output: out_topic = output_topics.get(topic_key) producer.produce(out_topic, value=encode_image_to_message(img), timestamp=time) producer.poll(0) if i % 1000 == 0: producer.flush() i += 1 # display # if args.display: cv2.imshow(f"DEMO Camera {topic_key}", img) k = cv2.waitKey(33) if k == 113: # The 'q' key to stop break elif k == -1: # normally -1 returned,so don't print it continue else: print(f"Press 'q' key for EXIT!")
class TimeOrderedGeneratorWithTimeout(GeneratorInterface): """ A general generator which can read multiple topics and merge their messages in time order. A message must be emitted at (arrival_system_time + latency_ms). In batch mode (until reaching the first EOP on each stream) the generator will not discard any messages. """ def __init__(self, broker, groupid, topics_infos: List[TopicInfo], latency_ms, commit_interval_sec=None, group_by_time=False, begin_timestamp=None, begin_flag=None, end_timestamp=None, end_flag=None, heartbeat_interval_ms=-1): """ :param broker: Broker to connect to. :param groupid: Group id of the consumer. :param topics_infos: [TopicInfo()] - list of TopicInfo objects. :param latency_ms: (integer >=0) Latency to wait before serving a message. After this messages with lower or equal timestamps will be discarded. :param commit_interval_sec: How many seconds to wait between commits.-1 does not commit with the given group id. :param group_by_time: Group messages with the same timestamp. This will yield a list of messages. :param begin_timestamp: Timestamp of the kafka messages where the generator will start. :param begin_flag: BEGINNING, CONTINUE, LIVE - CONTINUE will continue from the last committed offset. If there was no committed offset will start from the end of the stream. :param end_timestamp: Timestamp where to end the reading. :param end_flag: NEVER, END_OF_PARTITION :param heartbeat_interval_ms: -1 does not produce heartbeat. After every interval will produce a HeartBeat typed message with the timestamp. """ if begin_timestamp is not None and begin_flag is not None: raise Exception( 'You can not set the begin timestamp and a flag in the same time.' ) if end_timestamp is not None and end_flag is not None: raise Exception( 'You can not set the end timestamp and a flag in the same time.' ) if begin_timestamp is not None and end_timestamp is not None and begin_timestamp >= end_timestamp: raise Exception( 'The begin timestamp is larger then the end timestamp.') if begin_flag is not None and end_flag is not None and \ begin_flag == BeginFlag.LIVE and end_flag == EndFlag.END_OF_PARTITION: raise Exception( 'You can not start in live and process until the end of the streams.' ) if end_flag is not None and not (end_flag == EndFlag.END_OF_PARTITION or end_flag == EndFlag.NEVER): raise Exception( 'Unknow end flag: {} . Please use the given enum to use proper end flag.' .format(end_flag)) self.end_ts = end_timestamp self.end_flag = end_flag self.commit_interval_sec = commit_interval_sec self.latency_ms = latency_ms self.group_by_time = group_by_time self.max_poll_interval_ms = 5 * 60 * 1000 self.consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': groupid, 'enable.auto.commit': False, 'auto.offset.reset': 'earliest' if begin_flag == BeginFlag.CONTINUE_OR_BEGINNING else 'latest', 'fetch.wait.max.ms': 20, 'max.poll.interval.ms': self.max_poll_interval_ms, 'enable.partition.eof': True }) self.last_poll = None self.tps = [] self.queues = {} self.messages_to_be_committed = {} self.begin_timestamp = begin_timestamp for ti in topics_infos: topic_name = ti.topic self.messages_to_be_committed[topic_name] = { 'last_msg': None, 'committed': True } if begin_timestamp is not None: self.tps.extend( self.consumer.offsets_for_times([ TopicPartition(topic_name, partition=ti.partition, offset=begin_timestamp) ])) elif begin_flag is not None: if begin_flag == BeginFlag.BEGINNING: self.tps.append( TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_BEGINNING)) elif begin_flag in (BeginFlag.CONTINUE, BeginFlag.CONTINUE_OR_BEGINNING): self.tps.append( TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_STORED)) elif begin_flag == BeginFlag.LIVE: self.tps.append( TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_END)) else: raise Exception( 'Unknown begin flag. Please use the enum to provide proper begin flag.' ) else: self.tps.append( TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_END)) end_offset = None if end_flag is not None and end_flag == EndFlag.END_OF_PARTITION: end_offset = self.consumer.get_watermark_offsets( TopicPartition(topic_name, 0))[1] - 1 if end_offset is None or end_offset >= 0: self.queues[topic_name] = Topic(topic_name, self.consumer, end_offset=end_offset, partition=ti.partition, drop=ti.drop) self.consumer.assign(self.tps) self.last_commit = time.time() self.running = True self.heartbeat_interval_ms = heartbeat_interval_ms self.next_hb = None def stopGenerator(self): self.running = False def _serve_messages(self, message_to_serve): if self.commit_interval_sec is not None and self.group_by_time: for msg in message_to_serve: self.messages_to_be_committed[msg.topic()]['last_msg'] = msg self.messages_to_be_committed[msg.topic()]['committed'] = False # serve messages if self.group_by_time: yield message_to_serve else: for msg in message_to_serve: self.messages_to_be_committed[msg.topic()]['last_msg'] = msg self.messages_to_be_committed[msg.topic()]['committed'] = False yield msg if not self.running: break # commit messages when they were delivered current_time = time.time() if self.commit_interval_sec is not None and ( current_time - self.last_commit) > self.commit_interval_sec: for k in self.messages_to_be_committed.keys(): if not self.messages_to_be_committed[k]['committed']: self.consumer.commit( self.messages_to_be_committed[k]['last_msg']) self.messages_to_be_committed[k]['committed'] = True self.last_commit = current_time def _serve_heartbeat(self, current_timestamp_ms): if self.next_hb is None: if self.begin_timestamp is not None: self.next_hb = self.begin_timestamp else: self.next_hb = current_timestamp_ms while self.next_hb <= current_timestamp_ms: yield HeartBeat(self.next_hb) self.next_hb += self.heartbeat_interval_ms def _can_serve(self): min_ets = min([ q.queue[0].message.timestamp()[1] for q in self.queues.values() if len(q.queue) > 0 ], default=-1) if min_ets == -1: return None deadline = getSystemTimestamp() - self.latency_ms if all([q.can_be_emitted(min_ets) for q in self.queues.values()]) and \ any([q.queue[0].ts < deadline for q in self.queues.values() if len(q.queue) > 0 and q.queue[0].message.timestamp()[1] == min_ets]): return min_ets else: return None def getMessages(self): while self.running: if all([v.stopped for v in self.queues.values()]): message_to_serve = [] for q in self.queues.values(): message_to_serve.extend(q.queue) message_to_serve = [m.message for m in message_to_serve] message_to_serve.sort(key=lambda x: x.timestamp()[1]) while len(message_to_serve) > 0: ts = message_to_serve[0].timestamp()[1] serve_it = [] while len(message_to_serve) > 0 and message_to_serve[ 0].timestamp()[1] == ts: serve_it.append(message_to_serve.pop(0)) if not self.heartbeat_interval_ms == -1: yield from self._serve_heartbeat(ts) yield from self._serve_messages(serve_it) logging.info('Exiting from generator.') break self.last_poll = getSystemTimestamp() msg = self.consumer.poll(0.001) if msg is not None: if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: if msg.topic() in self.queues: self.queues[msg.topic()].first_eop_reached = True self.queues[msg.topic()].end_of_partition = True else: logging.error('Unhandle error: {}'.format(msg.error())) break else: self.queues[msg.topic()].end_of_partition = False if self.end_ts is not None and msg.timestamp( )[1] > self.end_ts: self.queues[msg.topic()].stop_topic() else: self.queues[msg.topic()].add_message(msg) while self.running: event_ts_to_serve = self._can_serve() if event_ts_to_serve is None or \ self.max_poll_interval_ms - (getSystemTimestamp() - self.last_poll) < 30000: if self.end_flag == EndFlag.NEVER and self.heartbeat_interval_ms != -1 \ and any([q.end_of_partition for q in self.queues.values()]): if self.next_hb is None: self.next_hb = min( getSystemTimestamp() - self.latency_ms, min([ q.queue[0].message.timestamp()[1] for q in self.queues.values() if len(q.queue) > 0 ], default=sys.maxsize)) if self.next_hb < min( getSystemTimestamp() - self.latency_ms, min([ q.queue[0].message.timestamp()[1] for q in self.queues.values() if len(q.queue) > 0 ], default=sys.maxsize)): yield from self._serve_heartbeat(self.next_hb) break if self.heartbeat_interval_ms != -1: yield from self._serve_heartbeat(event_ts_to_serve) message_to_serve = [] for q in self.queues.values(): message_to_serve.extend(q.get_messages(event_ts_to_serve)) yield from self._serve_messages(message_to_serve) if self.end_ts is not None and self.end_ts <= event_ts_to_serve: self.running = False self.consumer.close()
# https://docs.confluent.io/platform/current/clients/consumer.html "auto.offset.reset": "earliest", "enable.auto.commit": "true", }) @atexit.register def finisher(): # close consumer on exit c.close() # can subscribe to multiple topics c.subscribe(TOPICS) while True: # poll for new message at most 1 second msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue topic = msg.topic() key = msg.key().decode() value = msg.value().decode() print("Received message: TOPIC: {} | KEY: {} | VALUE: {}".format( topic, key, value)) data = Measurement(**json.loads(value))
parser = argparse.ArgumentParser( description="Amend data to runinfo messages") parser.add_argument("-b", "--broker") args = parser.parse_args() broker = args.broker conf = {"bootstrap.servers": broker, "group.id": str(uuid.uuid4())} admin_client = AdminClient(conf) cons = Consumer(conf) prod = Producer(conf) topics = [topic + "_runInfo" for topic in INST_NAMES] print(f"subscribing to {topics}") cons.subscribe(topics=topics) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = cons.poll(1.0) if msg is None: continue message_topic = msg.topic() instrument_name = message_topic.split("_runInfo")[0] des = deserialise_pl72(msg.value()) structure = des.nexus_structure entry = _create_group("raw_data_1", "NXentry") detector_1 = _create_group("detector_1", "NXdetector") detector_1[CHILDREN].append(structure["entry"]["events"]) instrument = _create_group("instrument", "NXinstrument") __add_source_info(instrument) entry[CHILDREN].append(detector_1)