def run_consumer(): logger = logging.getLogger('consumer') logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() handler.setFormatter( logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s')) logger.addHandler(handler) consumer = Consumer(conf) consumer.subscribe(topics=config.resolve_config("CONSUMER_TOPICS")) try: while True: msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: # Proper message # sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' % # (msg.topic(), msg.partition(), msg.offset(), # str(msg.key()))) print(msg.value()) except KeyboardInterrupt: sys.stderr.write('%% Aborted by user\n') finally: # Close down consumer to commit final offsets. consumer.close()
def _init_consumer(topics: List[str], config: Dict) -> Consumer: """config must contain: `bootstrap.servers` 'group.id' but may contain every other kafka setting as well """ assert "bootstrap.servers" in config.keys() assert "group.id" in config.keys() consumer = Consumer(config) consumer.subscribe(topics) return consumer
def create_consumer(kafka_broker, topic): ic = Consumer({ 'bootstrap.servers': kafka_broker, 'group.id': str(uuid.uuid4()), 'auto.offset.reset': 'earliest', 'api.version.request': True, 'max.poll.interval.ms': 60000 }) ic.subscribe([topic]) return ic
class KafmanConsumer(metaclass=Singleton): """TODO""" def __init__(self): super().__init__() self.topic = None self.consumer = None self.started = False self.bus = EventBus.get(CONSUMER_BUS) def start(self, settings: dict) -> None: """TODO""" if self.consumer is None: self.consumer = Consumer(settings) self.started = True def stop(self) -> None: """TODO""" if self.consumer is not None: del self.consumer self.consumer = None self.started = False def consume(self, topics: List[str]) -> None: """TODO""" if self.started: tr = threading.Thread(target=self._consume, args=(topics, )) tr.setDaemon(True) tr.start() def _consume(self, topics: List[str]) -> None: """TODO""" self.consumer.subscribe(topics) try: while self.started: message = self.consumer.poll(POLLING_INTERVAL) if message is None: continue elif not message.error(): msg = message.value().decode(Charset.UTF_8.value) self.bus.emit(MSG_CONS_EVT, message=msg, topic=message.topic()) elif message.error().code() == PARTITION_EOF: print( f"End of partition reached {message.topic()}/{message.partition()}" ) else: print(f"Error occurred: {message.error().str()}") except KeyboardInterrupt: print("Keyboard interrupted") finally: if self.consumer: self.consumer.close()
class KafkaConsumer(BaseKafkaConsumer): def __init__(self, config, logger): self._config = config["consumer"] conf = self._config["conf"] conf.setdefault("group.id", str(uuid.uuid1())) self.autocommit_enabled = conf.get("enable.auto.commit", True) self._logger = logger internal_log_path = self._config.get("internal_log_path") if internal_log_path: debug_logger = logging.getLogger("debug_consumer") timestamp = time.strftime("_%d%m%Y_") debug_logger.addHandler( logging.FileHandler("{}/kafka_consumer_debug{}{}.log".format( internal_log_path, timestamp, os.getpid()))) conf["logger"] = debug_logger self._consumer = Consumer(**conf) def subscribe(self, topics=None): topics = topics or list(self._config["topics"].values()) self._consumer.subscribe(topics) def poll(self): msg = self._consumer.poll(self._config["poll_timeout"]) if msg is not None: err = msg.error() if err: if err.code() == KafkaError._PARTITION_EOF: return None else: self._logger.info( "KafkaConsumer Error {} at pid {}: topic={} partition=[{}] reached end at offset {}\n" .format(err.code(), os.getpid(), msg.topic(), msg.partition(), msg.offset())) raise KafkaException(err) if msg.value(): return msg def commit_offset(self, msg): if msg is not None: if self.autocommit_enabled: self._consumer.store_offsets(msg) else: self._consumer.commit(msg, async=False) def close(self): self._consumer.close()
def get_partitions_with_offsets(broker): input_consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': str(uuid.uuid4()), 'auto.offset.reset': 'earliest', 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'api.version.request': True, 'max.poll.interval.ms': 60000 }) output_consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': str(uuid.uuid4()), 'auto.offset.reset': 'earliest', 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'api.version.request': True, 'max.poll.interval.ms': 60000 }) input_consumer.subscribe(['read', 'update', 'transfer']) output_consumer.subscribe(['responses']) msgs = input_consumer.consume(timeout=5, num_messages=100) if len(msgs) == 0: print("returned empty") return {} partitions_with_offsets = {'input': [], 'output': []} input_partitions = input_consumer.assignment() for p in input_partitions: _, h = input_consumer.get_watermark_offsets(p) p.offset = h partitions_with_offsets['input'].append(p) output_consumer.consume(timeout=5, num_messages=100) output_partitions = output_consumer.assignment() for p in output_partitions: _, h = output_consumer.get_watermark_offsets(p) p.offset = h partitions_with_offsets['output'].append(p) return partitions_with_offsets
def kafka_consume_expected(topic, group='0', timeout=1.0, mfilter=lambda x: True, validator=lambda x: None, after_subscribe=lambda: None): consumer = Consumer({ 'bootstrap.servers': KAFK, 'group.id': group, 'auto.offset.reset': 'earliest' # earliest _committed_ offset }) msgs = [] topics = consumer.list_topics(topic) # promises to create topic logging.debug("Topic state: %s", topics.topics) if topics.topics[topic].error is not None: logging.warning("Error subscribing to topic: %s", topics.topics) return msgs consumer.subscribe([topic]) time.sleep(5) # for kafka to rebalance consumer groups after_subscribe() logging.debug("Waiting for messages...") while True: msg = consumer.poll(timeout) if msg is None: break logging.info("Seen message: %r %r", msg.key(), msg.value()) if msg.error(): logging.warning("Consumer error: {}".format(msg.error())) continue if mfilter(msg): validator(msg) msgs.append(msg) consumer.commit() consumer.close() return msgs
def pay_order(): consumer = Consumer({ 'bootstrap.servers': os.environ.get('BROKER'), 'group.id': 'consumer-pay-id', 'auto.offset.reset': 'earliest' }) consumer.subscribe(['pay_order']) while True: msg = consumer.poll(1.0) if msg is None: continue if msg.error(): logging.error("Consumer error: {}".format(msg.error())) continue data = json.loads(msg.value()) OrderPayStory().execute(data.get('order_id')) consumer.close()
def start(self): c = Consumer({ 'bootstrap.servers': KAFKA_BOOTSTRAP_SERVICE, 'group.id': KAFKA_GROUP_ID, 'auto.offset.reset': 'earliest' }) c.subscribe([BTC_BLOCK_TOPIC]) while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue data = json.loads(msg.value().decode('utf-8')) cache.set("latest_block", data, timeout=None)
def deamon(): """ Termite Client """ group = "" KAFKA_HOST = "" KAFKA_TOPIC="" print KAFKA_HOST c = Consumer({ "bootstrap.servers": KAFKA_HOST, 'group.id': group, }) c.subscribe([KAFKA_TOPIC]) running = True while running: msg = c.poll(1) if msg is None: continue if not msg.error(): data = json.loads(msg.value()) print("receive msg:", data) kwargs = { "work_id": data["Work_id"], "flow_id": data["Flow_id"], "cid": data["Cid"] } # 视频美女标签识别 if data.get("Work", "") == "video_tag_detect": t_video_tag_detect(**kwargs) else: pass else: if msg.error().code() == KafkaError._PARTITION_EOF: print "Skip-Error Message-Topic: {} Partition: {} Offset: {}Error: {}".format(msg.topic(), msg.partition(), msg.offset(), msg.error()) else: print "Error Message: {}".format(msg.error()) time.sleep(0.01) c.close()
def order_channel(): consumer = Consumer({ 'bootstrap.servers': os.environ.get('BROKER'), 'group.id': 'consumer-order-id', 'auto.offset.reset': 'earliest' }) consumer.subscribe([ 'order_reserved', 'order_paid', 'order_pay_failed', 'order_reserve_rejected' ]) while True: msg = consumer.poll(1.0) if msg is None: continue if msg.error(): logging.error("Consumer error: {}".format(msg.error())) continue msg.topic() data = json.loads(msg.value()) topic = msg.topic() # TODO For demo if topic == 'order_reserved': OrderSaga().pay(data.get('order_id')) continue if topic == 'order_paid': OrderSaga().approve(data.get('order_id')) continue if topic == 'order_pay_failed': OrderSaga().reject_reserve(data.get('order_id')) continue if topic == 'order_reserve_rejected': OrderSaga().cancel(data.get('order_id')) continue consumer.close()
def main(args): serial = args.serial num_messages = args.num_messages brokers = args.brokers group_id = args.group_id input_topic = args.input_topic input_partition = args.input_partition output_topic = args.output_topic if serial: print("Running in SERIAL mode") print( "The input producer will wait for the reply of the transactor before producing the next message." ) else: print("Running in PARALLEL mode") print( "The input producer will produce all messages in parallel (at once) after the first message." ) tr_args = [ sys.executable, os.path.join(HERE, "eos-transactions.py"), "-b", brokers, "-g", group_id + "-tr", "-t", input_topic, "-p", str(input_partition), "-o", output_topic, ] output_consumer = Consumer({ "bootstrap.servers": brokers, "group.id": group_id + "-pr", "auto.offset.reset": "earliest", "enable.auto.commit": True, "enable.partition.eof": False, }) output_consumer.subscribe([output_topic]) input_producer = Producer({ 'bootstrap.servers': brokers, }) try: with tempfile.NamedTemporaryFile(mode='w+') as f: tr_proc = subprocess.Popen(tr_args, stderr=subprocess.STDOUT, stdout=f, cwd=HERE, close_fds=True) try: time.sleep(1) assert tr_proc.poll() is None tx = 0 for i in range(num_messages): input_producer.produce(input_topic, key=b"xy", value=str(tx).encode("ascii")) tx += 1 assert input_producer.flush(10) == 0 while serial or tx <= 1: msg = output_consumer.poll(1.0) if msg is None: continue assert msg.error() is None if tx == 1: t_start = time.time() break if not serial: for _ in range(num_messages - 1): msg = output_consumer.poll(1.0) if msg is None: continue assert msg.error() is None print("Processing took {}".format(time.time() - t_start)) finally: if tr_proc.poll() is None: tr_proc.terminate() tr_proc.wait() f.seek(0) eos_out = f.read() finally: output_consumer.close() # commit offsets i = 0 c = False send_offset_logs = defaultdict(list) send_offset_times = [] for line in eos_out.split("\n"): if line.startswith(":DEMO:START "): c = True if c: send_offset_logs[i].append(line) if line.startswith(":DEMO:END "): send_offset_times.append(float(line.rpartition(" ")[-1])) c = False i += 1 print("\nSend offset times:", send_offset_times) print("Send offset times average:", sum(send_offset_times) / len(send_offset_times)) print("\nRelevant log snippet from the middle:") print("\n".join(send_offset_logs[int(i / 2)])) print("\nFull output of the transactor:") print(eos_out)
""" Module comment """ LOGGER = logging.getLogger(__name__) if __name__ == '__main__': c = Consumer({ 'bootstrap.servers': 'qg-cdh-server-04.vcom.local:9092,qg-cdh-server-05.vcom.local:9092,qg-cdh-server-06.vcom.local:9092', 'group.id': 'ddc_test_group', 'auto.offset.reset': 'earliest' }) c.subscribe(['ddc_test_topic1']) print('consumer start') count = 0 while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue count += 1 print('Received message: {}'.format(count)) # print('Received message: {}'.format(msg.value().decode('utf-8'))) # c.close()
class BreadCrumbDataConsumer: _logger = logging.getLogger('BreadCrumbDataConsumer') def __init__(self): kafka_configs = KafkaHelper.get_kafka_configs() kafka_configs['group.id'] = 'python_breadcrumb_data_consumer' kafka_configs['auto.offset.reset'] = 'earliest' self._consumer = Consumer(kafka_configs) self._bread_crumb_repo = BreadCrumbRepository() self._trips_stop_data = dict() def consume_breadcrumb_records(self): self._logger.info("Starting breadcrumb data consumer ...") self._consumer.subscribe([STOP_EVENT_TOPIC, BREADCRUMB_DATA_TOPIC]) stop_events_records_count = 0 consumed_breadcrumb_records_count = 0 bread_crumb_records_saved_to_db_count = 0 breadcrumbs = list() last_saved_to_db = datetime.now() try: while True: duration_from_last_saved_to_db = datetime.now( ) - last_saved_to_db if len(breadcrumbs) >= 50_000 or ( len(breadcrumbs) > 0 and duration_from_last_saved_to_db.total_seconds() > (60 * 2)): self._bread_crumb_repo.bulk_save_breadcrumbs( breadcrumbs, self._trips_stop_data) bread_crumb_records_saved_to_db_count += len(breadcrumbs) breadcrumbs.clear() last_saved_to_db = datetime.now() self._logger.info( 'Number of breadcrumb records consumed = {}, stop event records consumed = {}, records saved to db = {}' .format(consumed_breadcrumb_records_count, stop_events_records_count, bread_crumb_records_saved_to_db_count)) msg = self._consumer.poll(1.0) if msg is None: continue elif msg.error(): self._logger.error('error: {}'.format(msg.error())) else: msg_topic = msg.topic() message_data = msg.value().decode("utf-8") if msg_topic == BREADCRUMB_DATA_TOPIC: consumed_breadcrumb_records_count += 1 self.process_bread_crumb_record( breadcrumbs, message_data) elif msg_topic == STOP_EVENT_TOPIC: stop_events_records_count += 1 self.process_stop_event_records(message_data) self._logger.debug( 'Number of breadcrumb records consumed = {}, stop event records consumed = {}' .format(consumed_breadcrumb_records_count, stop_events_records_count)) finally: self._consumer.close() self._bread_crumb_repo.bulk_save_breadcrumbs( breadcrumbs, self._trips_stop_data) def process_bread_crumb_record(self, breadcrumbs, message_data): try: breadcrumb = BreadCrumb.parse_raw(message_data) breadcrumbs.append(breadcrumb) except Exception as ex: self._logger.debug('Encountered an error parsing a bread crumb.', ex) def process_stop_event_records(self, message_data): try: trip_stop_dict = json.loads(message_data) trip_id = list(trip_stop_dict.keys())[0] if trip_id not in self._trips_stop_data.keys(): trip_stop_events_df = pd.read_json( list(trip_stop_dict.values())[0]) first_row = trip_stop_events_df.iloc[0] self._trips_stop_data[trip_id] = { 'route_id': first_row['route_number'], 'service_key': first_row['service_key'], 'direction': first_row['direction'] } except Exception as ex: self._logger.debug( 'Encountered an error parsing a stop events record.', ex)
def exec_benchmark(duration_s, fps, kafka_loc, output_topic, silent): """Measures throughput at the output Kafka topic, by checking the growth in all partitions""" c = Consumer({ 'bootstrap.servers': kafka_loc, 'group.id': 'benchmark-' + str(uuid.uuid4()), 'auto.offset.reset': 'latest', 'max.poll.interval.ms': 86400000, 'isolation.level': 'read_committed' }) # === Get topic partitions topic_partitions = None def store_topic_partition(consumer, partitions): nonlocal topic_partitions topic_partitions = partitions c.subscribe([output_topic], on_assign=store_topic_partition) while topic_partitions is None: c.consume(timeout=0.5) #Loop read partitions throughput_measured = [] throughput_measured_per_partition = {} last_values = {} for p in topic_partitions: low, high = c.get_watermark_offsets(p) throughput_measured_per_partition[p.partition] = [] last_values[p.partition] = high #if silent != "silent": # print("Starting value for partition {}: {}".format(p.partition, high)) MS_PER_UPDATE = 1000 / fps start_time = current_milli_time() last_time = start_time current_time = start_time last_write_time = current_time lag = 0.0 while current_time < start_time + duration_s * 1000: current_time = current_milli_time() elapsed = current_time - last_time last_time = current_time lag += elapsed while lag >= MS_PER_UPDATE: #calc new val total_new = 0 curr_time_for_print = current_milli_time() time_delta = ((curr_time_for_print - last_write_time) / 1000) if time_delta > 0: for p in topic_partitions: low, high = c.get_watermark_offsets(p) delta = high - last_values[p.partition] total_new += delta throughput_measured_per_partition[p.partition].append( (delta / time_delta, curr_time_for_print)) last_values[p.partition] = high throughput_measured.append( (total_new / time_delta, curr_time_for_print)) last_write_time = curr_time_for_print lag -= MS_PER_UPDATE if silent != "silent": #Print column names #TIME THROUGHPUT PART-0 ... PART-N columns = "TIME\tTHROUGHPUT" for i in range(len(topic_partitions)): columns += "\tPART-{}".format(str(i)) print(columns) for row in range(len(throughput_measured)): row_data = "{}\t{}".format(throughput_measured[row][1], int(throughput_measured[row][0])) for i in range(len(topic_partitions)): row_data += "\t{}".format( int(throughput_measured_per_partition[i][row][0])) print(row_data) else: print( int( statistics.mean( [x[0] for x in throughput_measured if x[0] > 0.0])))
class AioConsumer: def __init__(self, config, topics: list, group_id: str, handler, max_retry=-1, consumer_no=0, timeout=1, loop=None, exe=None): """ consumer = new AioConsumer(...) :param config: kafka consumer config :param topics: :param group_id: :param handler: :param max_retry: 消费失败重试次数。-1:不重试 :param consumer_no: 消费者编号 :param timeout: poll超时时间 :param loop: :param exe: """ self.loop = loop or asyncio.get_event_loop() assert config is not None, 'init kafka consumer error, config is None' _config = copy.deepcopy(config) _config['group.id'] = group_id _config['on_commit'] = self.commit_completed self.handler = handler self.consumer = Consumer(_config) self.consumer.subscribe(topics) self.redis_retry_key = f'{"_".join(topics)}_{self.handler.__name__}' self.name = f'{self.redis_retry_key}_{consumer_no}' self.max_retry = max_retry self.exe = exe self.timeout = timeout # 'INIT' -> 'RUNNING' -> 'STOP' self.status = 'INIT' @staticmethod def commit_completed(err, partitions): if err: logger.info(str(err)) else: logger.info("Committed partition offsets: " + str(partitions)) async def poll(self): return await self.loop.run_in_executor(self.exe, self.consumer.poll, self.timeout) async def _get_message_from_kafka(self): poll_message = await self.poll() if not poll_message: return None elif poll_message.error(): raise KafkaException(poll_message.error()) else: return poll_message.value() async def run(self): while self.status == 'RUNNING': str_message = await self._get_message_from_kafka() message = json.loads(str_message or '{}') if not message: await asyncio.sleep(1) continue try: if asyncio.iscoroutinefunction(self.handler): await self.handler(message) else: self.handler(message) await self.commit() except Exception as e: logger.warning(f'{str(self)} handler error: {e.args}. msg: {str_message}') await self.close() async def commit(self): def _commit(): self.consumer.commit(asynchronous=False) await self.loop.run_in_executor(self.exe, _commit) async def close(self): await self.commit() await self.loop.run_in_executor(self.exe, self.consumer.close) logger.info(f'{self.name} closed') def stop(self): self.status = 'STOP'
instrument[CHILDREN].append(source) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Amend data to runinfo messages") parser.add_argument("-b", "--broker") args = parser.parse_args() broker = args.broker conf = {"bootstrap.servers": broker, "group.id": str(uuid.uuid4())} admin_client = AdminClient(conf) cons = Consumer(conf) prod = Producer(conf) topics = [topic + "_runInfo" for topic in INST_NAMES] print(f"subscribing to {topics}") cons.subscribe(topics=topics) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = cons.poll(1.0) if msg is None: continue message_topic = msg.topic() instrument_name = message_topic.split("_runInfo")[0] des = deserialise_pl72(msg.value()) structure = des.nexus_structure entry = _create_group("raw_data_1", "NXentry") detector_1 = _create_group("detector_1", "NXdetector") detector_1[CHILDREN].append(structure["entry"]["events"]) instrument = _create_group("instrument", "NXinstrument")
class MsgConsumer: def __init__(self, topic, broker_address, group_id='group', client_id='client', auto_offset_reset='earliest', num_messages=1, verbose=False): """Consumer for handling EEG Streamer messages. Args: topic: Topic to subscribe to broker_address: Broker address group_id: group ID client_id: client ID auto_offset_reset: (default: 'earliest') num_messages: Maximum number of messages to consume each time (default: 1) verbose: verbose mode. (default: False) """ self.data = deque() self.timestamps = deque() self.__num_msgs = num_messages """Maximum number of messages to consume each time (default: 1)""" self.__verbose = verbose self.__streamqueue = deque() self.__consumer = Consumer({ 'bootstrap.servers': broker_address, 'auto.offset.reset': auto_offset_reset, 'group.id': group_id, 'client.id': client_id, 'enable.auto.commit': True, 'session.timeout.ms': 6000, 'max.poll.interval.ms': 10000 }) """consumer that reads stream of EEG signal""" self.__consumer.subscribe([topic]) def listen(self): """read stream from Kafka and append to streamqueue Returns: list of list: dataset (nchannel x nsample) or None """ # If chunk size is too large, consume it multiple epochs chunk_size = self.__num_msgs msgs = [] while chunk_size > 100: msgs.extend(self.__consumer.consume(num_messages=100, timeout=1)) chunk_size -= 100 msgs.extend(self.__consumer.consume(num_messages=chunk_size, timeout=1)) print(f"INFO: Received {str(len(msgs))} messages" ) if self.__verbose else None if msgs is None or len(msgs) <= 0: return None self.__streamqueue.extendleft(msgs) # Enqueue if len(self.__streamqueue) < self.__num_msgs: return None # Dequeue msgs__ = [self.__streamqueue.pop() for i in range(0, self.__num_msgs)] timestamps, data = [], [] for msg in msgs__: time, values = msg_decode(msg.value()) timestamps.append(time) if time is not None else None data.append(values) if time is not None else None #TODO:// assert there is not big time gap in the data if len(data) < self.__num_msgs: return None print(timestamps[0], data[0]) if self.__verbose else None data = tuple(zip(*data)) self.data.append(data) self.timestamps.append(timestamps[0]) print(f"INFO: Sucessfully Read a chunk") if self.__verbose else None def stop(self): self.__consumer.close() pass def drain(self): self.__num_msgs = 100000 for i in range(0, 10): self.listen()
"bootstrap.servers": BOOTSTRAP_SERVERS, "group.id": GROUP, # by using earliest offset reset and autocommit we have "at-least-once" semantic (default) # "at-most-once" and "exactly-once" are also available # https://docs.confluent.io/platform/current/clients/consumer.html "auto.offset.reset": "earliest", "enable.auto.commit": "true", }) @atexit.register def finisher(): # close consumer on exit c.close() # can subscribe to multiple topics c.subscribe(TOPICS) while True: # poll for new message at most 1 second msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue topic = msg.topic() key = msg.key().decode() value = msg.value().decode()
class AsyncWorker(object): """ Fetches from Kafka topics and processes them. :param consumer_topic: Name of the Kafka topic for consume. :type consumer_topic: str :param service: Service function which is executed every time when job is processed. Service must get as argument str or dict type object. :type service: callable :param consumer_conf: config for Kafka consumer. :type consumer_conf: dict :param failed_topic: Kafka topic for produce unprocessed messages from consumer_topic. :type failed_topic: str :param producer_conf: config for Kafka producer for producing unprocessed messages. :type producer_conf: dict """ def __init__(self, consumer_topic: str, service: Callable, consumer_conf: dict, failed_topic: str, producer_conf: dict): self._consumer_topic = consumer_topic self._consumer = Consumer(consumer_conf) self._service = service self._failed_topic = failed_topic # use naming like <project name>_<version>_<consumer_topic><retry/failed> self._producer = AsyncProducer(producer_conf) def __repr__(self): """Return the string representation of the worker. :return: String representation of the worker. :rtype: str """ return 'Worker(Consumer={}, consume_topic={})'.format( self._consumer, self._consumer_topic) def __del__(self): # pragma: no cover # noinspection PyBroadException try: self._consumer.close() except Exception: pass async def _exec_service(self, message_value): if iscoroutinefunction(self._service): res = await self._service(message_value) else: res = self._service(message_value) return res async def _process_message(self, msg: Message): """ De-serialize message and execute service. :param msg: Kafka message. :type msg: confluent_kafka.Message` """ LOGGER.info( 'Processing Message(topic={}, partition={}, offset={}) ...'.format( msg.topic, msg.partition, msg.offset)) service_repr = get_call_repr(self._service) LOGGER.info('Executing job {}'.format(service_repr)) try: message_value = _decode_msg_value(msg.value()) res = await self._exec_service(message_value) except KeyboardInterrupt: LOGGER.error('Job was interrupted: {}'.format(msg.offset())) except Exception as err: LOGGER.exception('Job {} raised an exception: {}'.format( msg.offset(), err)) await self._producer.produce(topic=self._failed_topic, value=msg.value(), error=str(err)) else: LOGGER.info('Job {} returned: {}'.format(msg.offset(), res)) @property def consumer_topic(self): """Return the name of the Kafka topic. :return: Name of the Kafka topic. :rtype: str """ return self._consumer_topic @property def consumer(self): """Return the Kafka consumer instance. :return: Kafka consumer instance. :rtype: kafka.KafkaConsumer """ return self._consumer @property def service(self): """Return the service function. :return: Callback function, or None if not set. :rtype: callable | None """ return self._service async def start(self, max_messages: int = math.inf, commit_offsets: bool = True) -> int: """Start processing Kafka messages and executing jobs. :param max_messages: Maximum number of Kafka messages to process before stopping. If not set, worker runs until interrupted. :type max_messages: int :param commit_offsets: If set to True, consumer offsets are committed every time a message is processed (default: True). :type commit_offsets: bool :return: Total number of messages processed. :rtype: int """ LOGGER.info('Starting {} ...'.format(self)) self._consumer.unsubscribe() self._consumer.subscribe([self.consumer_topic]) LOGGER.info(" Try get messages from position: {}".format( self._consumer.position(self._consumer.assignment()))) messages_processed = 0 while messages_processed < max_messages: loop = asyncio.get_event_loop() # awaiting place for processing messages in other coroutines messages = await loop.run_in_executor( None, partial(self._consumer.consume, 10, 2.0)) LOGGER.debug(" Try get messages from position: {}".format( self._consumer.position(self._consumer.assignment()))) if not messages: LOGGER.debug("Messages not found") continue for msg in messages: if msg.error(): LOGGER.error("Consumer error: {}".format(msg.error())) LOGGER.info("Get message with offset {}".format(msg.offset())) asyncio.create_task(self._process_message(msg)) if commit_offsets: self._consumer.commit() messages_processed += 1 self._consumer.close() return messages_processed
class Kafka(object): def __init__(self, target_key) -> None: super().__init__() self.address = _address_for_key(target_key) kafka_config = { 'bootstrap.servers': self.address, 'group.id': "up9-test-group", 'enable.auto.commit': 'false' # important for passive observing } if "ssl://" in self.address.lower(): kafka_config['security.protocol'] = 'SSL' self.consumer = Consumer(kafka_config) self.producer = Producer(kafka_config) self.watching_topics = [] self.consumer.list_topics(timeout=5) # to check for connectivity def watch_topics(self, topics: list): def my_on_assign(consumer, partitions): logging.debug("On assign: %r", partitions) consumer.assign(partitions) for partition in partitions: low, high = consumer.get_watermark_offsets(partition) partition.offset = high logging.debug("Setting offset: %r", partition) consumer.seek(partition) self.watching_topics.extend(topics) self.consumer.subscribe(topics, on_assign=my_on_assign) self.consumer.poll(0.01) # to trigger partition assignments def get_watched_messages(self, interval=0.0, predicate=lambda x: True): logging.debug( "Checking messages that appeared on kafka topics: %r", self.watching_topics) res = [] start = time.time() while True: msg = self.consumer.poll(interval) if msg is None or time.time() - start > interval: break # done reading if msg.error(): raise KafkaException("kafka consumer error: {}".format( msg.error())) logging.debug( "Potential message: %r", (msg.partition(), msg.key(), msg.headers(), msg.value())) if predicate(msg): res.append(msg) # TODO: consumer.close() return res def assert_seen_message(self, resp, delay=0, predicate=lambda x: True): @recorder.assertion_decorator def assert_seen_kafka_message(resp, topics, delay): messages = self.get_watched_messages(delay, predicate) messages = [(m.topic(), m.key(), m.value(), m.headers()) for m in messages] if not messages: raise AssertionError("No messages on Kafka topic %r" % topics) else: logging.info("Validated the messages have appeared: %s", messages) return messages return assert_seen_kafka_message(resp, self.watching_topics, delay) def put(self, topic, data=None, json=None, headers=None): # TODO: parse key out of URL if topic.startswith('/'): topic = topic[1:] if data is None and json is not None: data = json_lib.dumps(json) with apiritif.transaction('kafka://[' + self.address + ']/' + topic): logging.info("Sending message to Kafka topic %r: %r", topic, data) self.producer.produce( topic, data, headers=[] if headers is None else headers) self.producer.poll(0) self.producer.flush() wrapped_req = self._make_request( 'PUT', 'kafka://' + self.address.split(',')[0] + '/' + topic, data) wrapped_response = self._make_response(wrapped_req) recorder.record_http_request('PUT', self.address, wrapped_req, wrapped_response, _context.session) return wrapped_response def _make_request(self, method, url, request): req = requests.Request(method, url=url, data=request) prepared = req.prepare() _context.grpc_mapping[id(request)] = prepared return prepared def _make_response(self, wrapped_req): resp = requests.Response() resp.status_code = 202 resp.request = wrapped_req resp._request = wrapped_req resp.msg = 'Accepted' resp.raw = io.BytesIO() return resp
def consume(self, topic, topic_timeout): kafka_config_consumer = ConfigFactory(kafka_client="consumer") config = kafka_config_consumer.config log.info("kafka config for consume %s", config) consumer = Consumer(config) events = [] start_time = time.monotonic() timeout_start_time = start_time timeout_consumer = 10.0 # actual consumer starts now # subscribe to 1 or more topics and define the callback function # callback is only received after consumer.consume() is called! consumer.subscribe([topic], on_assign=self.callback_on_assignment) log.info( f"Waiting for partition assignment ... (timeout at {timeout_consumer} seconds" ) try: while (time.monotonic() - timeout_start_time) < timeout_consumer: # start consumption messages = consumer.consume(timeout=0.1) # check for partition assignment if self.consume_lock == ConsumerState.PARTITIONS_UNASSIGNED: # this should not happen but we are not 100% sure if messages: log.error("messages consumed but lock is unopened") break continue # after partition assignment set the timeout again # and reset the start time from which to determine timeout # violation elif self.consume_lock == ConsumerState.PARTITIONS_ASSIGNED: timeout_start_time = time.monotonic() timeout_consumer = topic_timeout self.consume_lock = ConsumerState.TIMEOUT_SET log.info("Lock has been opened, consuming ...") # appened messages to the events list to be returned if messages: for msg in messages: log.info(f"message at offset: {msg.offset()}, \ partition: {msg.partition()}, \ topic: {msg.topic()}") # TODO: allow assertions to be on message headers etc. # events.append({ # "key": msg.key, # "headers": msg.headers, # "value": msg.value() # }) events.append(msg.value()) # only executed when while condition becomes false else: # at the end check if the partition assignment was achieved if self.consume_lock != ConsumerState.TIMEOUT_SET: log.error("No partition assignments received in time") except KafkaException as e: log.error(f"Kafka error: {e}") pass finally: consumer.close() end_time = time.monotonic() log.debug(f"this cycle took: {(end_time - start_time)} seconds") return events
def compute_achieved_throughput(broker, partitions_with_offsets, result_dict): partitions_with_offsets = {} input_consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': str(uuid.uuid4()), # 'group.id': 'achieved_throughput_measurer', 'auto.offset.reset': 'earliest', 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'api.version.request': True, 'max.poll.interval.ms': 60000 }) output_consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': str(uuid.uuid4()), # 'group.id': 'achieved_throughput_measurer', 'auto.offset.reset': 'earliest', 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'api.version.request': True, 'max.poll.interval.ms': 60000 }) if 'input' in partitions_with_offsets and len( partitions_with_offsets['input']) > 0: input_consumer.assign(partitions_with_offsets['input']) else: input_consumer.subscribe(['read', 'update', 'transfer']) if 'output' in partitions_with_offsets and len( partitions_with_offsets['output']) > 0: output_consumer.assign(partitions_with_offsets['output']) else: output_consumer.subscribe(['responses']) while True: msgs = input_consumer.consume(timeout=5, num_messages=500) if len(msgs) == 0: break for msg in msgs: try: wrapped = Wrapper() wrapped.ParseFromString(msg.value()) result = {} result['operation'] = msg.topic() result['input_time'] = msg.timestamp()[1] result_dict[wrapped.request_id] = result except DecodeError as e: print("Could not decode?") pass partitions_with_offsets['input'] = input_consumer.position( input_consumer.assignment()) input_consumer.close() total_messages = 0 start_time = 0 end_time = 0 first = True while True: msgs = output_consumer.consume(timeout=5, num_messages=500) if len(msgs) == 0: break for msg in msgs: response = Response() response.ParseFromString(msg.value()) key = response.request_id status_code = response.status_code if key in result_dict: if first: start_time = msg.timestamp()[1] / 1000 first = False total_messages += 1 end_time = msg.timestamp()[1] / 1000 result_dict[key]['output_time'] = msg.timestamp()[1] result_dict[key]['status_code'] = status_code partitions_with_offsets['output'] = output_consumer.position( output_consumer.assignment()) output_consumer.close() print("Total messages considered: " + str(total_messages)) if total_messages == 0 or end_time - start_time == 0: return 0 return total_messages / (end_time - start_time)