class ConsumerTask(object): def __init__(self, conf, topic_name): self.consumer = Consumer(conf) self.topic_name = topic_name self.running = True def stop(self): self.running = False def print_assignment(self, consumer, partition): print('Assignment: ', partition) def run(self): self.consumer.subscribe([self.topic_name], on_assign=self.print_assignment) try: while True: msg = self.consumer.poll(10) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: print('%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) print(msg.value()) except KeyboardInterrupt: sys.stderr.write("%% Aborted by user\n") finally: self.consumer.unsubscribe() self.consumer.close()
class PubSubConsumerKafka(PubSubConsumer): def __init__(self, conf: dict, topic_names: List[str], client_name: str, group_name: str, logger: logging.Logger = None): self.client_name = client_name self.__consumer_settings = { 'bootstrap.servers': conf['bootstrap.servers'], 'group.id': group_name, 'client.id': client_name, 'enable.auto.commit': True, 'session.timeout.ms': 6000, 'default.topic.config': { 'auto.offset.reset': 'smallest' } } self.__consumer = None self.__set_consumer(topic_names) self.__log = logger def __set_consumer(self, topic_names: List[str]): if (len(topic_names) > 0): if (self.__consumer is None): self.__consumer = Consumer(self.__consumer_settings) #print(f"DEBUG: __set_consumer: {topic_names}") self.__consumer.subscribe(topic_names) else: if (self.__consumer is not None): self.__consumer.unsubscribe() def __log(self, msg: str): if (self.__logger is not None): self.__logger.debug(msg) def reset_topics(self, topic_names: List[str]): self.__set_consumer(topic_names) # returns a pair (topic, msg) def poll(self, timeout=None) -> (str, object): if (timeout is None): timeout = 1 if (self.__consumer is None): return (None, None) msg = self.__consumer.poll(timeout) if (msg is None): return (None, None) elif not msg.error(): #print(f"DEBUG: PubSubConsumerKafka: Message Received: {msg.value()[0:256]} ...") return (msg.topic(), jsonpickle.decode(msg.value())) elif msg.error().code() == KafkaError._PARTITION_EOF: self.__log( f"End of partition reached {msg.topic()}/{msg.partition()}") return (None, None) else: self.__log(f"Error occured: {msg.error().str()}") return (None, None)
def test_any_method_after_close_throws_exception(): """ Calling any consumer method after close should thorw a RuntimeError """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.subscribe(['test']) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.unsubscribe() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.poll() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.consume() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.assign([TopicPartition('test', 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.unassign() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.assignment() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.commit() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.committed([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.position([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.seek([TopicPartition("test", 0, 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: lo, hi = c.get_watermark_offsets(TopicPartition("test", 0)) assert 'Consumer closed' == str(ex.value)
class ConsumerTask(object): def __init__(self, is_bluemix, conf, topic_name): try: from confluent_kafka import Consumer except: from confluent_kafka_prebuilt import Consumer self.consumer = Consumer(conf) self.topic_name = topic_name self.running = True def stop(self): self.running = False @asyncio.coroutine def run(self): print('The consumer has started') self.consumer.subscribe([self.topic_name]) while self.running: msg = self.consumer.poll(1) if msg is not None and msg.error() is None: print('Message consumed: topic={0}, partition={1}, offset={2}, key={3}, value={4}'.format( msg.topic(), msg.partition(), msg.offset(), msg.key().decode('utf-8'), msg.value().decode('utf-8'))) else: print('No messages consumed') yield from asyncio.sleep(2) self.consumer.unsubscribe() self.consumer.close()
def test_any_method_after_close_throws_exception(): """ Calling any consumer method after close should thorw a RuntimeError """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.subscribe(['test']) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.unsubscribe() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.poll() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.consume() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.assign([TopicPartition('test', 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.unassign() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.assignment() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.commit() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.committed([TopicPartition("test", 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.position([TopicPartition("test", 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.seek([TopicPartition("test", 0, 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: lo, hi = c.get_watermark_offsets(TopicPartition("test", 0)) assert ex.match('Consumer closed')
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb (err, partitions): pass kc = Consumer({'group.id':'test', 'socket.timeout.ms':'100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb}) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke (consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') partitions = list(map(lambda p: TopicPartition("test", p), range(0,100,3))) kc.assign(partitions) kc.unassign() kc.commit(async=True) try: kc.commit(async=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == -1001]) == len(partitions) try: offsets = kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT kc.close()
class MessagehubStreamingAdapterConfluent(StreamingDataAdapter): def __init__(self, topic, username, password, prod=True): caLocation = '/etc/ssl/cert.pem' if not os.path.exists(caLocation): caLocation = '/etc/pki/tls/cert.pem' conf = { 'client.id': 'pixieapp.client.id', 'group.id': 'pixieapp.group', 'security.protocol': 'SASL_SSL', 'sasl.mechanisms': 'PLAIN', 'ssl.ca.location': caLocation, "bootstrap.servers": ','.join(["kafka0{}-{}.messagehub.services.us-south.bluemix.net:9093".format(i, "prod01" if prod else "stage1") for i in range(1,6)]), "sasl.username": username, "sasl.password": password, 'api.version.request': True } self.consumer = Consumer(conf) self.consumer.subscribe([topic]) self.schema = {} self.sampleDocCount = 0 def close(self): self.consumer.unsubscribe() self.consumer.close() def tryCast(self, value, t): try: return t(value) except: return None def inferType(self, value): if isinstance(value, string_types): value = self.tryCast(value, int) or self.tryCast(value, long) or self.tryCast(value, float) or value return "integer" if value.__class__==int else "float" if value.__class__ == float else "string" def inferSchema(self, eventJSON): if self.sampleDocCount > 20: return for key,value in iteritems(eventJSON): if not key in self.schema: self.schema[key] = self.inferType(value) self.sampleDocCount = self.sampleDocCount + 1 def doGetNextData(self): msgs = [] msg = self.consumer.poll(1) if msg is not None and msg.error() is None: jsonValue = json.loads(msg.value()) self.inferSchema(json.loads(msg.value())) msgs.append(jsonValue) return msgs def close(self): self.consumer.close()
def repl(): c = Consumer(settings) c.subscribe(topics) try: while True: if not red.ping(): time.sleep(1) continue msg = c.poll(0.1) # No message present if msg is None: continue # Found message elif not msg.error(): # Try to handle if msg.topic() == u'add_build': result = add_build(msg.value()) elif msg.topic() == u'delete_build': result = delete_build(msg.value()) elif msg.topic() == u'add_user': result = add_user(msg.value()) elif msg.topic() == u'delete_user': result = delete_user(msg.value()) elif msg.topic() == u'add_build_component': result = add_build_component(msg.value()) elif msg.topic() == u'remove_build_component': result = remove_build_component(msg.value()) elif msg.topic() == u'add_decoration': result = add_decoration(msg.value()) elif msg.topic() == u'remove_decoration': result = remove_decoration(msg.value()) elif msg.topic() == u'remove_all_decorations': result = remove_all_decorations(msg.value()) if result: pprint('Success ' + msg.value()) c.commit() else: c.unsubscribe() c.subscribe(topics) print('Error Occurred Adding to Redis') elif msg.error().code() == KafkaError._PARTITION_EOF: print('End of partition reached {0}/{1}'.format( msg.topic(), msg.partition())) else: print('Error occurred: {0}'.format(msg.error().str())) time.sleep(1) except KeyboardInterrupt: pass finally: c.close()
def test_multiple_close_does_not_throw_exception(): """ Calling Consumer.close() multiple times should not throw Runtime Exception """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() c.close()
class Consumer: def __init__(self, bootstrap_servers: str, topic: str, group_id: str) -> None: config = { 'bootstrap.servers': bootstrap_servers, # Where to consume from after a reset # "latest" is the end of the topic, "earliest" is the beginning 'default.topic.config': { 'auto.offset.reset': 'latest' }, 'metadata.request.timeout.ms': 20000, 'enable.auto.commit': False, 'group.id': group_id, 'api.version.request': True, 'fetch.wait.max.ms': 100, 'log.connection.close': False, # This logger will log messages originating from non-Python code 'logger': get_logger('librdkafka'), # Max number of bytes per partition returned by the server 'max.partition.fetch.bytes': MEBIBYTE * 5, 'statistics.interval.ms': 15000, 'queued.max.messages.kbytes': 1024 * 64, } self._consumer = ConfluentConsumer(config) self._consumer.subscribe([topic]) def consume(self) -> str: while True: msg = self._consumer.poll(1.0) if msg is None: continue if msg.error() is None: log.debug(f'Received message: {msg.value().decode("utf-8")}') return msg.value().decode('utf-8') elif msg.error().code() != KafkaError._PARTITION_EOF: log.error( f'Failed to consume from topic, continuing... ' f'Reason: {KafkaException(msg.error())}', ) else: log.debug('Reached end of topic, waiting for new messages...') def commit(self) -> None: self._consumer.commmit() def close(self) -> None: self._consumer.unsubscribe() self._consumer.close()
def test_multiple_close_throw_exception(): """ Calling Consumer.close() multiple times should throw Runtime Exception """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.close() assert 'Consumer already closed' == str(ex.value)
def test_store_offsets(): """ Basic store_offsets() tests """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) try: c.store_offsets(offsets=[TopicPartition("test", 0, 42)]) except KafkaException as e: assert e.args[0].code() == KafkaError._UNKNOWN_PARTITION c.unsubscribe() c.close()
def test_calling_store_offsets_after_close_throws_erro(): """ calling store_offset after close should throw RuntimeError """ c = Consumer({ 'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100 }) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.store_offsets(offsets=[TopicPartition("test", 0, 42)]) assert 'Consumer closed' == str(ex.value)
class ConsumerTask(object): def __init__(self, conf, topic_name): self.consumer = Consumer(conf) self.topic_name = topic_name self.running = True self._observers = [] def stop(self): self.running = False def print_assignment(self, consumer, partition): print('Assignment - subscribing to topic: ', partition) def register_observer(self, observer): self._observers.append(observer) def notify_observers(self, *args, **kwargs): for observer in self._observers: observer.notify(self, *args, **kwargs) def run(self): self.consumer.subscribe([self.topic_name], on_assign=self.print_assignment) try: while True: msg = self.consumer.poll(1) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) #print(msg.value()) self.notify_observers(msg.topic()) #could add something here that will tell the widget / UI to go to Object Storage except KeyboardInterrupt: sys.stderr.write("%% Aborted by user\n") finally: self.consumer.unsubscribe() self.consumer.close()
def test_calling_store_offsets_after_close_throws_erro(): """ calling store_offset after close should throw RuntimeError """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.store_offsets(offsets=[TopicPartition("test", 0, 42)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.offsets_for_times([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value)
class ConsumerTask(object): def __init__(self, conf, topic_name): self.consumer = Consumer(conf) self.topic_name = topic_name self.running = True def stop(self): self.running = False @asyncio.coroutine def run(self): print('The consumer has started') self.consumer.subscribe([self.topic_name]) while self.running: msg = self.consumer.poll(1) if msg is not None and msg.error() is None: pprint("Message consumed: offset={0}, value={1}".format( msg.offset(), msg.value())) else: print('No messages consumed') yield from asyncio.sleep(2) self.consumer.unsubscribe() self.consumer.close()
def repl(): c = Consumer(settings) c.subscribe([topic]) db.connect() try: while True: if not db.ping(): db.connect() continue msg = c.poll(0.1) # No message present if msg is None: continue # Found a message elif not msg.error(): # Try to insert result = insertArmor(msg.value()) if result: pprint('Added Successfully ' + msg.value()) c.commit() else: c.unsubscribe() c.subscribe([topic]) print('Error Occurred Adding to Cassandra') elif msg.error().code() == KafkaError._PARTITION_EOF: print('End of partition reached {0}/{1}'.format( msg.topic(), msg.partition())) else: print('Error occurred: {0}'.format(msg.error().str())) time.sleep(1) except KeyboardInterrupt: pass finally: c.close()
class KafkaConsumer: """Defines the base kafka consumer class""" def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest self.broker_properties = { "bootstrap.servers": "PLAINTEXT://localhost:9092,PLAINTEXT://localhost:9093,PLAINTEXT://localhost:9094", "group.id": "00", } if is_avro: self.broker_properties[ "schema.registry.url"] = "http://localhost:8081" self.consumer = AvroConsumer(self.broker_properties) else: self.consumer = Consumer(self.broker_properties) self.consumer.subscribe([self.topic_name_pattern], on_assign=self.on_assign) logger.info(f"{self.topic_name_pattern} subscribed!") def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" if self.offset_earliest: for partition in partitions: partition.offset = 0 consumer.assign(partitions, ) logger.info("partitions assigned for %s", self.topic_name_pattern) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" message = self.consumer.poll(1.0) if message is None: logger.debug("no message received by consumer %s", self.topic_name_pattern) return 0 elif message.error() is not None: logger.info(f"error from consumer {message.error()}") else: self.message_handler(message) return 1 def close(self): """Cleans up any open kafka consumers""" self.consumer.unsubscribe() logger.info(f"unsubscribed from {self.topic_name_pattern}") self.consumer.close()
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({ 'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb }) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) msglist = kc.consume(num_messages=10, timeout=0.001) assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist) with pytest.raises(ValueError) as ex: kc.consume(-100) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) with pytest.raises(ValueError) as ex: kc.consume(1000001) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) partitions = list( map(lambda part: TopicPartition("test", part), range(0, 100, 3))) kc.assign(partitions) with pytest.raises(KafkaException) as ex: kc.seek(TopicPartition("test", 0, 123)) assert 'Erroneous state' in str(ex.value) # Verify assignment assignment = kc.assignment() assert partitions == assignment # Pause partitions kc.pause(partitions) # Resume partitions kc.resume(partitions) # Get cached watermarks, should all be invalid. lo, hi = kc.get_watermark_offsets(partitions[0], cached=True) assert lo == -1001 and hi == -1001 assert lo == OFFSET_INVALID and hi == OFFSET_INVALID # Query broker for watermarks, should raise an exception. try: lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\ str(e.args([0])) kc.unassign() kc.commit(asynchronous=True) try: kc.commit(asynchronous=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions) try: kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT try: kc.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) try: kc.list_topics(topic="hi", timeout=0.1) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) kc.close()
stop_consumer.subscribe(['kill_all_consumers']) producer_conf = {'bootstrap.servers': args.broker} producer = Producer(**conf) while True: consumer.subscribe([args.topic]) stop_msg = stop_consumer.poll(timeout=1.0) if stop_msg is not None: if stop_msg.value() == b"STOP": print("Received STOP message") break msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: # Proper message sys.stderr.write( '%% %s [%d] at offset %d with key %s:\n' % (msg.topic(), msg.partition(), msg.offset(), str(msg.key()))) consumer.unsubscribe() msg_data = json.loads(msg.value()) print(f'Doing job {msg_data["id"]}') sleep(int(msg_data['job_length'])) report_job_done(producer, msg_data['id']) consumer.close()
class BusConsumer: def __init__(self, groupid=None): # Pre-shared credentials # self.credentials = json.load(open('bus_credentials.json')) self.credentials = load_credentials.LoadCredentials.load_bus_credentials( ) # Construct required configuration self.configuration = { 'client.id': 'VAL_consumer', 'group.id': 'VAL_consumer_group', 'bootstrap.servers': ','.join(self.credentials['kafka_brokers_sasl']), 'security.protocol': 'SASL_SSL', 'ssl.ca.location': '/etc/ssl/certs', 'sasl.mechanisms': 'PLAIN', 'sasl.username': self.credentials['api_key'][0:16], 'sasl.password': self.credentials['api_key'][16:48], 'api.version.request': True } if groupid is not None: self.configuration["group.id"] = groupid self.consumer = Consumer(self.configuration) self.listening = True self.database = 'messages.sqlite' self.default_topics = [TOP803, TOP030] def listen(self, performed_action, topics=None): # Topics should be a list of topic names e.g. ['topic1', 'topic2'] if topics is None: topics = self.default_topics self.listening = True # Subscribe to topics try: self.consumer.subscribe(topics) except Exception as e: logger.error("Error @ BusConsumer.listen()") logger.debug(str(type(e)) + str(e)) return False logger.info("listener subscribed successfully to topics:" + str(topics)) # Initiate a loop for continuous listening while self.listening: msg = self.consumer.poll(0) # If a message is received and it is not an error message if msg is not None and msg.error() is None: # Add incoming message to requests database try: message_text = msg.value().decode('utf-8') except: message_text = msg.value() performed_action(message_text) # TODO: check if it works ok with the sleep .5 time.sleep(0.5) # Unsubscribe and close consumer self.consumer.unsubscribe() self.consumer.close() def stop(self): self.listening = False @staticmethod def __load_dummy_messages(): """ Load Vicenza messages and add them to the message queue with a small delay between each insertion """ import random import filter_messages max_delay = 0.01 # delay in the range [0, max_delay] from uniform distribution vic_messages = filter_messages.simulateData() for m in vic_messages: logger.debug("writing TOP101 message to queue") message_queue.MessageQueue.put_message( m) # Note: pass it by value, not reference! ValidatorThreadHandler.init_validator() time.sleep(random.random() * max_delay) ValidatorThreadHandler.join_validation_thread() @staticmethod def __continuously_add_fake_TOP101(): import random fake_msg = dict() fake_msg['body'] = dict() fake_msg['body']['spam'] = False fake_msg['body']['incidentID'] = random.randint(0, 1000000) p = 1 while True: if random.random() > 1: p += 1 fake_msg['body'][ 'incidentID'] = p # random.randint(0, 1000000) print("message in queue. ID: ", p) message_queue.MessageQueue.put_message( {'body': { 'spam': False, 'incidentID': p }}) ValidatorThreadHandler.init_validator() @staticmethod def __load_TOP030(): """ load TOP030 messages from local file and put them in the message queue """ import random import filter_messages max_delay = .1 # delay in the range [0, max_delay] from uniform distribution vic_messages = filter_messages.get030() for m in vic_messages: logger.debug("writing TOP030 message to queue") message_queue.MessageQueue.put_message( m) # Note: pass it by value, not reference! ValidatorThreadHandler.init_validator() time.sleep(random.random() * max_delay) ValidatorThreadHandler.join_validation_thread()
class KafkaReader(MQReader): @staticmethod def decode(args): k, v = args return b2s(k), int(v) @staticmethod def get_value(msg): return msg.value() @staticmethod def get_token(msg): return {f'{msg.topic()}.{msg.partition()}': msg.offset() + 1} def __init__(self, topics, group_id='group-1', client_id='default', bootstrap_servers=BOOTSTRAP_SERVERS, is_bootstrap=True, is_resume=True): """从 Kafka 读取数据 :param topics: list, kafka topics :param group_id: str, kafka topics group_id :param client_id: str, kafka topics client_id :param bootstrap_servers: kafka host :param is_bootstrap: 是否全量读取 :param is_resume: 是否断点续传 """ if not isinstance(topics, (list, tuple)): topics = [topics] self.topics = topics self.group_id = group_id self.client_id = client_id super().__init__(f'kafka:{self.client_id}:{self.group_id}', is_bootstrap=is_bootstrap, is_resume=is_resume) self.config = { 'client.id': self.client_id, 'group.id': self.group_id, 'bootstrap.servers': bootstrap_servers, 'broker.version.fallback': BROKER_VERSION, 'compression.type': COMPRESSION_TYPE, 'enable.auto.commit': True, 'auto.offset.reset': 'earliest' if is_bootstrap else 'latest', 'on_commit': self.on_commit, } self.consumer = Consumer(self.config) def on_commit(self, err, partitions): for part in partitions: if err is not None: logger.error(f'Message delivery failed: {err}') logger.error( f'topic={part.topic} partition={part.partition} ' f'offset={part.offset}' ) else: # logger.info( # f'topic={part.topic} partition={part.partition} ' # f'offset={part.offset}' # ) key = f'{part.topic}.{part.partition}' if part.offset != OFFSET_INVALID: self.resume_token[key] = part.offset def read(self): """从 kafka 读取数据 https://github.com/confluentinc/confluent-kafka-python/issues/201 :return: """ # 重置 offset def on_assign(consumer, partitions): consumer.assign(partitions) for part in partitions: key = f'{part.topic}.{part.partition}' part.offset = self.resume_token.get(key) or 0 consumer.commit(offsets=partitions, asynchronous=False) # 取消订阅 if self.is_resume: self.consumer.subscribe(self.topics, on_assign=on_assign) else: self.consumer.subscribe(self.topics) logger.info(f'Kafka consumer subscribe {self.topics}') while True: try: msg = self.consumer.poll(1) except RuntimeError as e: logger.error(f'RuntimeError:{e}') break except KeyboardInterrupt: logger.error('KeyboardInterrupt') break if msg is None: continue if msg.error(): logger.error(msg.error()) continue # logger.info(f'{msg.topic()} {msg.partition()} {msg.offset()}') yield msg # 取消订阅 try: self.consumer.unsubscribe() except RuntimeError: pass logger.info(f'Kafka reader {self.topics} unsubscribe') def disconnect(self): try: self.consumer.close() except RuntimeError: pass
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb}) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) msglist = kc.consume(num_messages=10, timeout=0.001) assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist) with pytest.raises(ValueError) as ex: kc.consume(-100) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) with pytest.raises(ValueError) as ex: kc.consume(1000001) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) partitions = list(map(lambda part: TopicPartition("test", part), range(0, 100, 3))) kc.assign(partitions) with pytest.raises(KafkaException) as ex: kc.seek(TopicPartition("test", 0, 123)) assert 'Erroneous state' in str(ex.value) # Verify assignment assignment = kc.assignment() assert partitions == assignment # Pause partitions kc.pause(partitions) # Resume partitions kc.resume(partitions) # Get cached watermarks, should all be invalid. lo, hi = kc.get_watermark_offsets(partitions[0], cached=True) assert lo == -1001 and hi == -1001 assert lo == OFFSET_INVALID and hi == OFFSET_INVALID # Query broker for watermarks, should raise an exception. try: lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\ str(e.args([0])) kc.unassign() kc.commit(asynchronous=True) try: kc.commit(asynchronous=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions) try: kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT try: kc.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) try: kc.list_topics(topic="hi", timeout=0.1) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) kc.close()
class KafkaConsumer(Consumer[TopicPartition, int, bytes]): """ The behavior of this consumer differs slightly from the Confluent consumer during rebalancing operations. Whenever a partition is assigned to this consumer, offsets are *always* automatically reset to the committed offset for that partition (or if no offsets have been committed for that partition, the offset is reset in accordance with the ``auto.offset.reset`` configuration value.) This causes partitions that are maintained across a rebalance to have the same offset management behavior as a partition that is moved from one consumer to another. To prevent uncommitted messages from being consumed multiple times, ``commit`` should be called in the partition revocation callback. The behavior of ``auto.offset.reset`` also differs slightly from the Confluent consumer as well: offsets are only reset during initial assignment or subsequent rebalancing operations. Any other circumstances that would otherwise lead to preemptive offset reset (e.g. the consumer tries to read a message that is before the earliest offset, or the consumer attempts to read a message that is after the latest offset) will cause an exception to be thrown, rather than resetting the offset, as this could lead to chunks messages being replayed or skipped, depending on the circumstances. This also means that if the committed offset is no longer available (such as when reading older messages from the log and those messages expire, or reading newer messages from the log and the leader crashes and partition ownership fails over to an out-of-date replica), the consumer will fail-stop rather than reset to the value of ``auto.offset.reset``. """ # Set of logical offsets that do not correspond to actual log positions. # These offsets should be considered an implementation detail of the Kafka # consumer and not used publically. # https://github.com/confluentinc/confluent-kafka-python/blob/443177e1c83d9b66ce30f5eb8775e062453a738b/tests/test_enums.py#L22-L25 LOGICAL_OFFSETS = frozenset( [OFFSET_BEGINNING, OFFSET_END, OFFSET_STORED, OFFSET_INVALID]) def __init__(self, configuration: Mapping[str, Any]) -> None: auto_offset_reset = configuration.get("auto.offset.reset", "largest") if auto_offset_reset in {"smallest", "earliest", "beginning"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_earliest) elif auto_offset_reset in {"largest", "latest", "end"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_latest) elif auto_offset_reset == "error": self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_error) else: raise ValueError( "invalid value for 'auto.offset.reset' configuration") # NOTE: Offsets are explicitly managed as part of the assignment # callback, so preemptively resetting offsets is not enabled. self.__consumer = ConfluentConsumer({ **configuration, "auto.offset.reset": "error" }) self.__offsets: MutableMapping[TopicPartition, int] = {} self.__state = KafkaConsumerState.CONSUMING def __resolve_partition_offset_earliest( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: low, high = self.__consumer.get_watermark_offsets(partition) return ConfluentTopicPartition(partition.topic, partition.partition, low) def __resolve_partition_offset_latest( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: low, high = self.__consumer.get_watermark_offsets(partition) return ConfluentTopicPartition(partition.topic, partition.partition, high) def __resolve_partition_offset_error( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: raise ConsumerError("unable to resolve partition offsets") def subscribe( self, topics: Sequence[str], on_assign: Optional[Callable[[Sequence[TopicPartition]], None]] = None, on_revoke: Optional[Callable[[Sequence[TopicPartition]], None]] = None, ) -> None: if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) def assignment_callback( consumer: ConfluentConsumer, partitions: Sequence[ConfluentTopicPartition]) -> None: self.__state = KafkaConsumerState.ASSIGNING try: assignment: MutableSequence[ConfluentTopicPartition] = [] for partition in self.__consumer.committed(partitions): if partition.offset >= 0: assignment.append(partition) elif partition.offset == OFFSET_INVALID: assignment.append( self.__resolve_partition_starting_offset( partition)) else: raise ValueError("received unexpected offset") offsets: MutableMapping[TopicPartition, int] = { TopicPartition(i.topic, i.partition): i.offset for i in assignment } self.__seek(offsets) except Exception: self.__state = KafkaConsumerState.ERROR raise try: if on_assign is not None: on_assign(list(offsets.keys())) finally: self.__state = KafkaConsumerState.CONSUMING def revocation_callback( consumer: ConfluentConsumer, partitions: Sequence[ConfluentTopicPartition]) -> None: self.__state = KafkaConsumerState.REVOKING streams = [ TopicPartition(i.topic, i.partition) for i in partitions ] try: if on_revoke is not None: on_revoke(streams) finally: for stream in streams: try: self.__offsets.pop(stream) except KeyError: # If there was an error during assignment, this stream # may have never been added to the offsets mapping. logger.warning( "failed to delete offset for unknown stream: %r", stream) self.__state = KafkaConsumerState.CONSUMING self.__consumer.subscribe(topics, on_assign=assignment_callback, on_revoke=revocation_callback) def unsubscribe(self) -> None: if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) self.__consumer.unsubscribe() def poll(self, timeout: Optional[float] = None) -> Optional[KafkaMessage]: if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) message: Optional[ConfluentMessage] = self.__consumer.poll( *[timeout] if timeout is not None else []) if message is None: return None error: Optional[KafkaError] = message.error() if error is not None: code = error.code() if code == KafkaError._PARTITION_EOF: raise EndOfStream( TopicPartition(message.topic(), message.partition()), message.offset(), ) elif code == KafkaError._TRANSPORT: raise TransportError(str(error)) else: raise ConsumerError(str(error)) result = KafkaMessage( TopicPartition(message.topic(), message.partition()), message.offset(), message.value(), ) self.__offsets[result.stream] = result.get_next_offset() return result def tell(self) -> Mapping[TopicPartition, int]: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) return self.__offsets def __seek(self, offsets: Mapping[TopicPartition, int]) -> None: if self.__state is KafkaConsumerState.ASSIGNING: # Calling ``seek`` on the Confluent consumer from an assignment # callback will throw an "Erroneous state" error. Instead, # partition offsets have to be initialized by calling ``assign``. self.__consumer.assign([ ConfluentTopicPartition(stream.topic, stream.partition, offset) for stream, offset in offsets.items() ]) else: for stream, offset in offsets.items(): self.__consumer.seek( ConfluentTopicPartition(stream.topic, stream.partition, offset)) self.__offsets.update(offsets) def seek(self, offsets: Mapping[TopicPartition, int]) -> None: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) if offsets.keys() - self.__offsets.keys(): raise ConsumerError("cannot seek on unassigned streams") self.__seek(offsets) def commit(self) -> Mapping[TopicPartition, int]: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) result: Optional[Sequence[ConfluentTopicPartition]] = None retries_remaining = 3 while result is None: try: result = self.__consumer.commit(asynchronous=False) assert result is not None except KafkaException as e: if not e.args[0].code() in ( KafkaError.REQUEST_TIMED_OUT, KafkaError.NOT_COORDINATOR_FOR_GROUP, KafkaError._WAIT_COORD, ): raise if not retries_remaining: raise logger.warning( "Commit failed: %s (%d retries remaining)", str(e), retries_remaining, ) retries_remaining -= 1 time.sleep(1) offsets: MutableMapping[TopicPartition, int] = {} for value in result: # The Confluent Kafka Consumer will include logical offsets in the # sequence of ``TopicPartition`` objects returned by ``commit``. # These are an implementation detail of the Kafka Consumer, so we # don't expose them here. # NOTE: These should no longer be seen now that we are forcing # offsets to be set as part of the assignment callback. if value.offset in self.LOGICAL_OFFSETS: continue assert value.offset >= 0, "expected non-negative offset" offsets[TopicPartition(value.topic, value.partition)] = value.offset return offsets def close(self, timeout: Optional[float] = None) -> None: try: self.__consumer.close() except RuntimeError: pass self.__state = KafkaConsumerState.CLOSED
def consumer_seek_to_end_of_topic(consumer: Consumer, data_topic: str): consumer.unsubscribe() sleep(1) # Resubscribe at end of topic consumer.subscribe([data_topic])
class BusConsumer: def __init__(self): # Pre-shared credentials self.credentials = json.load(open('bus_credentials.json')) # Construct required configuration self.configuration = { 'client.id': 'CRCL_consumer', 'group.id': 'CRCL_consumer_group', 'bootstrap.servers': ','.join(self.credentials['kafka_brokers_sasl']), 'security.protocol': 'SASL_SSL', 'ssl.ca.location': '/etc/ssl/certs', 'sasl.mechanisms': 'PLAIN', 'sasl.username': self.credentials['api_key'][0:16], 'sasl.password': self.credentials['api_key'][16:48], 'api.version.request': True } self.consumer = Consumer(self.configuration) self.listening = True self.database = 'messages.sqlite' def listen(self, topics): # Topics should be a list of topic names e.g. ['topic1', 'topic2'] self.listening = True # Subscribe to topics try: self.consumer.subscribe(topics) except Exception as e: print(e) return False # Initiate a loop for continuous listening while self.listening: msg = self.consumer.poll(0) # If a message is received and it is not an error message if msg is not None and msg.error() is None: # print('Message consumed: topic={0}, partition={1}, offset={2}, key={3}, value={4}'.format( # msg.topic(), # msg.partition(), # msg.offset(), # msg.key().decode('utf-8'), # msg.value().decode('utf-8'))) # print('Message consumed: topic={0}, partition={1}, offset={2}, key={3}'.format( # msg.topic(), # msg.partition(), # msg.offset(), # msg.key().decode('utf-8'))) # print("RECEIVED: " + msg.topic()) # Add incoming message to requests database try: message_text = msg.value().decode('utf-8') except: message_text = msg.value() self.submit_message_to_database(message_text) # Sleep for a while # asyncio.sleep(0.43) # Unsubscribe and close consumer self.consumer.unsubscribe() self.consumer.close() def stop(self): self.listening = False def submit_message_to_database(self, message): try: con = sqlite3.connect(self.database) with con: cur = con.cursor() cur.execute('INSERT INTO requests (message) VALUES (?)', (message,)) cur.close() except sqlite3.Error as e: print("Error %s:" % e.args[0]) return False
class KafkaConsumer: """Defines the base kafka consumer class""" def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest # self.broker_properties = { "group.id": "group1", "bootstrap.servers": common.BROKER_URL, "linger.ms": 1000, "acks": 1, "retries": 3, "message.max.bytes": 4 * 4096, "batch.num.messages": 10 } # TODO: Create the Consumer, using the appropriate type. if is_avro is True: self.broker_properties[ "schema.registry.url"] = common.SCHEMA_REGISTRY_URL self.consumer = AvroConsumer(self.broker_properties) else: self.consumer = Consumer(self.broker_properties) self.consumer.subscribe([topic_name_pattern], on_assign=self.on_assign) def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" # TODO: If the topic is configured to use `offset_earliest` set the partition offset to # the beginning or earliest logger.info("on_assign is incomplete - skipping") if self.offset_earliest: for partition in partitions: partition.offset = confluent_kafka.OFFSET_BEGINNING logger.info("partitions assigned for %s", self.topic_name_pattern) consumer.assign(partitions) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" # # # TODO: Poll Kafka for messages. Make sure to handle any errors or exceptions. # Additionally, make sure you return 1 when a message is processed, and 0 when no message # is retrieved. # # message = self.consumer.poll(1.0) if message is None: logger.warn("_consume empty message") return 0 elif message.error() is not None: logger.error(f"_consume met error {message.error()}") return 0 else: try: logger.info(message.value()) except KeyError as e: logger.error(f"_consumer compact message failed: {e}") return 1 def close(self): """Cleans up any open kafka consumers""" self.consumer.unsubscribe() self.consumer.close()
class BusConsumer: def __init__(self): # Pre-shared credentials self.credentials = json.load(open('bus_credentials.json')) # Construct required configuration self.configuration = { 'client.id': 'bus_logger', 'group.id': 'bus_logger_group', 'bootstrap.servers': ','.join(self.credentials['kafka_brokers_sasl']), 'security.protocol': 'SASL_SSL', 'ssl.ca.location': '/etc/ssl/certs', 'sasl.mechanisms': 'PLAIN', 'sasl.username': self.credentials['api_key'][0:16], 'sasl.password': self.credentials['api_key'][16:48], 'api.version.request': True } self.consumer = Consumer(self.configuration) self.listening = True self.database = 'log.sqlite' def listen(self, topics): # Topics should be a list of topic names e.g. ['topic1', 'topic2'] self.listening = True # Subscribe to topics try: self.consumer.subscribe(topics) except Exception as e: print(e) return False # Initiate a loop for continuous listening while self.listening: msg = self.consumer.poll(1) # If a message is received and it is not an error message if msg is not None and msg.error() is None: # Add incoming message to requests database try: topic = msg.topic() except: topic = "Undefined" try: offset = str(msg.offset()) except: offset = "Undefined" try: message_text = msg.value().decode('utf-8') except: message_text = msg.value() # self.submit_message_to_sqlite_database(topic, message_text, offset) self.submit_message_to_mysql_database(topic, message_text, offset) # Sleep for a while asyncio.sleep(0.43) # Unsubscribe and close consumer self.consumer.unsubscribe() self.consumer.close() def stop(self): self.listening = False def submit_message_to_sqlite_database(self, topic, message, offset): # Get UTC time as string timestamp = datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S.%f") try: con = sqlite3.connect(self.database) with con: cur = con.cursor() cur.execute('INSERT INTO requests (topic, message, timestamp, offset) VALUES (?, ?, ?, ?)', (topic, message, timestamp, offset)) cur.close() # print('# Message logged:' + timestamp + " - Topic: " + topic) except sqlite3.Error as e: print("Error %s:" % e.args[0]) return False # con = sqlite3.connect(self.database) # # with con: # cur = con.cursor() # cur.execute('INSERT INTO requests (message) VALUES (?)', (message,)) # # cur.close() def submit_message_to_mysql_database(self, topic, message, offset): # Get UTC time as string timestamp = datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S:%f") # Connect to the database connection = pymysql.connect(host='localhost', user='******', password='******', db='bus_log', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) try: with connection.cursor() as cur: cur.execute('INSERT INTO messages (topic, message, timestamp, offset) VALUES (%s, %s, %s, %s)', (topic, message, timestamp, offset)) # connection is not autocommit by default. So you must commit to save # your changes. connection.commit() print('# Message logged:' + timestamp + " - Topic: " + topic) finally: connection.close() def empty_sqlite_database(self): # Connect to the database connection = pymysql.connect(host='localhost', user='******', password='******', db='bus_log', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) try: with connection.cursor() as cur: cur.execute('DELETE FROM messages') # connection is not autocommit by default. So you must commit to save # your changes. connection.commit() finally: connection.close() def empty_mysql_database(self): try: con = sqlite3.connect(self.database) with con: cur = con.cursor() cur.execute('DELETE FROM messages') cur.close() print("Database was cleared") except sqlite3.Error as e: print("Error %s:" % e.args[0]) return False
class KafkaConsumer(Consumer[TPayload]): """ The behavior of this consumer differs slightly from the Confluent consumer during rebalancing operations. Whenever a partition is assigned to this consumer, offsets are *always* automatically reset to the committed offset for that partition (or if no offsets have been committed for that partition, the offset is reset in accordance with the ``auto.offset.reset`` configuration value.) This causes partitions that are maintained across a rebalance to have the same offset management behavior as a partition that is moved from one consumer to another. To prevent uncommitted messages from being consumed multiple times, ``commit`` should be called in the partition revocation callback. The behavior of ``auto.offset.reset`` also differs slightly from the Confluent consumer as well: offsets are only reset during initial assignment or subsequent rebalancing operations. Any other circumstances that would otherwise lead to preemptive offset reset (e.g. the consumer tries to read a message that is before the earliest offset, or the consumer attempts to read a message that is after the latest offset) will cause an exception to be thrown, rather than resetting the offset, as this could lead to chunks messages being replayed or skipped, depending on the circumstances. This also means that if the committed offset is no longer available (such as when reading older messages from the log and those messages expire, or reading newer messages from the log and the leader crashes and partition ownership fails over to an out-of-date replica), the consumer will fail-stop rather than reset to the value of ``auto.offset.reset``. """ # Set of logical offsets that do not correspond to actual log positions. # These offsets should be considered an implementation detail of the Kafka # consumer and not used publically. # https://github.com/confluentinc/confluent-kafka-python/blob/443177e1c83d9b66ce30f5eb8775e062453a738b/tests/test_enums.py#L22-L25 LOGICAL_OFFSETS = frozenset( [OFFSET_BEGINNING, OFFSET_END, OFFSET_STORED, OFFSET_INVALID]) def __init__( self, configuration: Mapping[str, Any], codec: Codec[KafkaPayload, TPayload], *, commit_retry_policy: Optional[RetryPolicy] = None, ) -> None: if commit_retry_policy is None: commit_retry_policy = NoRetryPolicy() auto_offset_reset = configuration.get("auto.offset.reset", "largest") if auto_offset_reset in {"smallest", "earliest", "beginning"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_earliest) elif auto_offset_reset in {"largest", "latest", "end"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_latest) elif auto_offset_reset == "error": self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_error) else: raise ValueError( "invalid value for 'auto.offset.reset' configuration") if (as_kafka_configuration_bool( configuration.get("enable.auto.commit", "true")) is not False): raise ValueError( "invalid value for 'enable.auto.commit' configuration") if (as_kafka_configuration_bool( configuration.get("enable.auto.offset.store", "true")) is not False): raise ValueError( "invalid value for 'enable.auto.offset.store' configuration") # NOTE: Offsets are explicitly managed as part of the assignment # callback, so preemptively resetting offsets is not enabled. self.__consumer = ConfluentConsumer({ **configuration, "auto.offset.reset": "error" }) self.__codec = codec self.__offsets: MutableMapping[Partition, int] = {} self.__staged_offsets: MutableMapping[Partition, int] = {} self.__paused: Set[Partition] = set() self.__commit_retry_policy = commit_retry_policy self.__state = KafkaConsumerState.CONSUMING def __resolve_partition_offset_earliest( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: low, high = self.__consumer.get_watermark_offsets(partition) return ConfluentTopicPartition(partition.topic, partition.partition, low) def __resolve_partition_offset_latest( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: low, high = self.__consumer.get_watermark_offsets(partition) return ConfluentTopicPartition(partition.topic, partition.partition, high) def __resolve_partition_offset_error( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: raise ConsumerError("unable to resolve partition offsets") def subscribe( self, topics: Sequence[Topic], on_assign: Optional[Callable[[Mapping[Partition, int]], None]] = None, on_revoke: Optional[Callable[[Sequence[Partition]], None]] = None, ) -> None: """ Subscribe to topics. This replaces a previous subscription. This method does not block. The subscription may not be fulfilled immediately: instead, the ``on_assign`` and ``on_revoke`` callbacks are called when the subscription state changes with the updated assignment for this consumer. If provided, the ``on_assign`` callback is called with a mapping of partitions to their offsets (at this point, the working offset and the committed offset are the same for each partition) on each subscription change. Similarly, the ``on_revoke`` callback (if provided) is called with a sequence of partitions that are being removed from this consumer's assignment. (This callback does not include the offsets, as the working offset and committed offset may differ, in some cases by substantial margin.) Raises an ``InvalidState`` exception if called on a closed consumer. """ if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) def assignment_callback( consumer: ConfluentConsumer, partitions: Sequence[ConfluentTopicPartition]) -> None: self.__state = KafkaConsumerState.ASSIGNING try: assignment: MutableSequence[ConfluentTopicPartition] = [] for partition in self.__consumer.committed(partitions): if partition.offset >= 0: assignment.append(partition) elif partition.offset == OFFSET_INVALID: assignment.append( self.__resolve_partition_starting_offset( partition)) else: raise ValueError("received unexpected offset") offsets: MutableMapping[Partition, int] = { Partition(Topic(i.topic), i.partition): i.offset for i in assignment } self.__seek(offsets) # Ensure that all partitions are resumed on assignment to avoid # carrying over state from a previous assignment. self.__consumer.resume([ ConfluentTopicPartition(partition.topic.name, partition.index, offset) for partition, offset in offsets.items() ]) for partition in offsets: self.__paused.discard(partition) except Exception: self.__state = KafkaConsumerState.ERROR raise try: if on_assign is not None: on_assign(offsets) finally: self.__state = KafkaConsumerState.CONSUMING def revocation_callback( consumer: ConfluentConsumer, partitions: Sequence[ConfluentTopicPartition]) -> None: self.__state = KafkaConsumerState.REVOKING partitions = [ Partition(Topic(i.topic), i.partition) for i in partitions ] try: if on_revoke is not None: on_revoke(partitions) finally: for partition in partitions: # Staged offsets are deleted during partition revocation to # prevent later committing offsets for partitions that are # no longer owned by this consumer. if partition in self.__staged_offsets: logger.warning( "Dropping staged offset for revoked partition (%r)!", partition, ) del self.__staged_offsets[partition] try: self.__offsets.pop(partition) except KeyError: # If there was an error during assignment, this # partition may have never been added to the offsets # mapping. logger.warning( "failed to delete offset for unknown partition: %r", partition, ) self.__paused.discard(partition) self.__state = KafkaConsumerState.CONSUMING self.__consumer.subscribe( [topic.name for topic in topics], on_assign=assignment_callback, on_revoke=revocation_callback, ) def unsubscribe(self) -> None: """ Unsubscribe from topics. Raises an ``InvalidState`` exception if called on a closed consumer. """ if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) self.__consumer.unsubscribe() def poll(self, timeout: Optional[float] = None) -> Optional[Message[TPayload]]: """ Return the next message available to be consumed, if one is available. If no message is available, this method will block up to the ``timeout`` value before returning ``None``. A timeout of ``0.0`` represents "do not block", while a timeout of ``None`` represents "block until a message is available (or forever)". Calling this method may also invoke subscription state change callbacks. This method may also raise an ``EndOfPartition`` error (a subtype of ``ConsumerError``) when the consumer has reached the end of a partition that it is subscribed to and no additional messages are available. The ``partition`` attribute of the raised exception specifies the end which partition has been reached. (Since this consumer is multiplexing a set of partitions, this exception does not mean that *all* of the partitions that the consumer is subscribed to do not have any messages, just that it has reached the end of one of them. This also does not mean that additional messages won't be available in future poll calls.) Not every backend implementation supports this feature or is configured to raise in this scenario. Raises an ``InvalidState`` exception if called on a closed consumer. Raises a ``TransportError`` for various other consumption-related errors. """ if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) message: Optional[ConfluentMessage] = self.__consumer.poll( *[timeout] if timeout is not None else []) if message is None: return None error: Optional[KafkaError] = message.error() if error is not None: code = error.code() if code == KafkaError._PARTITION_EOF: raise EndOfPartition( Partition(Topic(message.topic()), message.partition()), message.offset(), ) elif code == KafkaError._TRANSPORT: raise TransportError(str(error)) else: raise ConsumerError(str(error)) headers: Optional[Headers] = message.headers() result = Message( Partition(Topic(message.topic()), message.partition()), message.offset(), self.__codec.decode( KafkaPayload( message.key(), message.value(), headers if headers is not None else [], )), datetime.utcfromtimestamp(message.timestamp()[1] / 1000.0), ) self.__offsets[result.partition] = result.get_next_offset() return result def tell(self) -> Mapping[Partition, int]: """ Return the read offsets for all assigned partitions. Raises an ``InvalidState`` if called on a closed consumer. """ if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) return self.__offsets def __validate_offsets(self, offsets: Mapping[Partition, int]) -> None: invalid_offsets: Mapping[Partition, int] = { partition: offset for partition, offset in offsets.items() if offset < 0 } if invalid_offsets: raise ConsumerError(f"invalid offsets: {invalid_offsets!r}") def __seek(self, offsets: Mapping[Partition, int]) -> None: self.__validate_offsets(offsets) if self.__state is KafkaConsumerState.ASSIGNING: # Calling ``seek`` on the Confluent consumer from an assignment # callback will throw an "Erroneous state" error. Instead, # partition offsets have to be initialized by calling ``assign``. self.__consumer.assign([ ConfluentTopicPartition(partition.topic.name, partition.index, offset) for partition, offset in offsets.items() ]) else: for partition, offset in offsets.items(): self.__consumer.seek( ConfluentTopicPartition(partition.topic.name, partition.index, offset)) self.__offsets.update(offsets) def seek(self, offsets: Mapping[Partition, int]) -> None: """ Change the read offsets for the provided partitions. Raises an ``InvalidState`` if called on a closed consumer. """ if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) if offsets.keys() - self.__offsets.keys(): raise ConsumerError("cannot seek on unassigned partitions") self.__seek(offsets) def pause(self, partitions: Sequence[Partition]) -> None: """ Pause the consumption of messages for the provided partitions. Raises an ``InvalidState`` if called on a closed consumer. """ if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) if set(partitions) - self.__offsets.keys(): raise ConsumerError("cannot pause unassigned partitions") self.__consumer.pause([ ConfluentTopicPartition(partition.topic.name, partition.index) for partition in partitions ]) self.__paused.update(partitions) # XXX: Seeking to a specific partition offset and immediately pausing # that partition causes the seek to be ignored for some reason. self.seek({ partition: offset for partition, offset in self.__offsets.items() if partition in partitions }) def resume(self, partitions: Sequence[Partition]) -> None: """ Resume the consumption of messages for the provided partitions. Raises an ``InvalidState`` if called on a closed consumer. """ if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) if set(partitions) - self.__offsets.keys(): raise ConsumerError("cannot resume unassigned partitions") self.__consumer.resume([ ConfluentTopicPartition(partition.topic.name, partition.index) for partition in partitions ]) for partition in partitions: self.__paused.discard(partition) def paused(self) -> Sequence[Partition]: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) return [*self.__paused] def stage_offsets(self, offsets: Mapping[Partition, int]) -> None: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) if offsets.keys() - self.__offsets.keys(): raise ConsumerError( "cannot stage offsets for unassigned partitions") self.__validate_offsets(offsets) # TODO: Maybe log a warning if these offsets exceed the current # offsets, since that's probably a side effect of an incorrect usage # pattern? self.__staged_offsets.update(offsets) def __commit(self) -> Mapping[Partition, int]: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) result: Optional[Sequence[ConfluentTopicPartition]] if self.__staged_offsets: result = self.__consumer.commit( offsets=[ ConfluentTopicPartition(partition.topic.name, partition.index, offset) for partition, offset in self.__staged_offsets.items() ], asynchronous=False, ) else: result = [] assert result is not None # synchronous commit should return result immediately self.__staged_offsets.clear() offsets: MutableMapping[Partition, int] = {} for value in result: # The Confluent Kafka Consumer will include logical offsets in the # sequence of ``Partition`` objects returned by ``commit``. These # are an implementation detail of the Kafka Consumer, so we don't # expose them here. # NOTE: These should no longer be seen now that we are forcing # offsets to be set as part of the assignment callback. if value.offset in self.LOGICAL_OFFSETS: continue assert value.offset >= 0, "expected non-negative offset" offsets[Partition(Topic(value.topic), value.partition)] = value.offset return offsets def commit_offsets(self) -> Mapping[Partition, int]: """ Commit staged offsets for all partitions that this consumer is assigned to. The return value of this method is a mapping of partitions with their committed offsets as values. Raises an ``InvalidState`` if called on a closed consumer. """ return self.__commit_retry_policy.call(self.__commit) def close(self, timeout: Optional[float] = None) -> None: """ Close the consumer. This stops consuming messages, *may* commit staged offsets (depending on the configuration), and ends its subscription. Raises a ``InvalidState`` if the consumer is unable to be closed before the timeout is reached. """ try: self.__consumer.close() except RuntimeError: pass self.__state = KafkaConsumerState.CLOSED @property def closed(self) -> bool: return self.__state is KafkaConsumerState.CLOSED
class BusConsumer: def __init__(self): # Pre-shared credentials # self.credentials = json.load(open('bus_credentials.json')) self.credentials = bus.load_credentials.LoadCredentials.load_bus_credentials( ) # Construct required configuration self.configuration = { 'client.id': 'KB_consumer', 'group.id': 'KB_consumer_group', 'bootstrap.servers': ','.join(self.credentials['kafka_brokers_sasl']), 'security.protocol': 'SASL_SSL', 'ssl.ca.location': '/etc/ssl/certs', 'sasl.mechanisms': 'PLAIN', 'sasl.username': self.credentials['api_key'][0:16], 'sasl.password': self.credentials['api_key'][16:48], 'api.version.request': True } self.consumer = Consumer(self.configuration) self.listening = True self.database = 'messages.sqlite' def listen(self, topics): # Topics should be a list of topic names e.g. ['topic1', 'topic2'] self.listening = True # Subscribe to topics try: self.consumer.subscribe(topics) except Exception as e: print("Error @ BusConsumer.listen()") print(e) return False # Initiate a loop for continuous listening while self.listening: msg = self.consumer.poll(0) # If a message is received and it is not an error message if msg is not None and msg.error() is None: # Add incoming message to requests database try: message_text = msg.value().decode('utf-8') except: message_text = msg.value() self.submit_message_process(message_text) # Unsubscribe and close consumer self.consumer.unsubscribe() self.consumer.close() def stop(self): self.listening = False def submit_message_process(self, message): znGen.generateTopic()
class KafkaConsumer: """Defines the base kafka consumer class""" consumer_group_counter = 0 def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest # # Configure the broker properties below. Make sure to reference the project README # and use the Host URL for Kafka and Schema Registry! # KafkaConsumer.consumer_group_counter = KafkaConsumer.consumer_group_counter + 1 if (self.offset_earliest): self.broker_properties = { CTAConstants.MAP_KEY_BOOTSTRAP_SERVERS:CTAConstants.BOOTSTRAP_SERVERS, "group.id":f"{CTAConstants.CONSUMER_GRP_ID_PRFX}-{KafkaConsumer.consumer_group_counter}", "default.topic.config": {"auto.offset.reset":"earliest"} } else: self.broker_properties = { CTAConstants.MAP_KEY_BOOTSTRAP_SERVERS:CTAConstants.BOOTSTRAP_SERVERS, "group.id":f"{CTAConstants.CONSUMER_GRP_ID_PRFX}-{KafkaConsumer.consumer_group_counter}", "default.topic.config": {"auto.offset.reset":"earliest"} } # Create the Consumer, using the appropriate type. if is_avro is True: self.broker_properties["schema.registry.url"] = CTAConstants.SCHEMA_REGISTRY_HOST self.consumer = AvroConsumer(self.broker_properties) else: self.consumer = Consumer(self.broker_properties) # # Configure the AvroConsumer and subscribe to the topics. Make sure to think about # how the `on_assign` callback should be invoked. # self.consumer.subscribe([topic_name_pattern], on_assign=self.on_assign) logger.info(f"Instantiated consumer and subscribed: ({self.topic_name_pattern})") # Called back on assign of partition(s) to this Consumer. def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" # If the topic is configured to use `offset_earliest` set the partition offset to # the beginning or earliest logger.info("on_assign") if (self.offset_earliest): for partition in partitions: #partition.offset(Offset.OFFSET_BEGINNING) partition.offset = confluent_kafka.OFFSET_BEGINNING logger.info("partitions assigned for %s", self.topic_name_pattern) consumer.assign(partitions) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" # Poll Kafka for messages. Make sure to handle any errors or exceptions. # Additionally, make sure you return 1 when a message is processed, and 0 when no message # is retrieved. logger.debug(f"In _consume({self.topic_name_pattern})") try: msg = self.consumer.poll(timeout=self.consume_timeout) if (msg is None): logger.debug("No msg in topic yet.") return 0; else: if (msg.error() is None): logger.debug("Got msg.") self.message_handler(msg) else: # handle error. error = msg.error() logger.error(f"Error in consumer:{self.topic_name_pattern} while consuming msgs. Err code: {error.code()}, error-name: {error.name()}, error.str:{error.str()}" ) return 1 except RuntimeError as re: logger.error(f"Runtime error in consumer:{self.topic_name_pattern}. Err msg: {re.message}" ) def close(self): """Cleans up any open kafka consumers""" # Cleanup the kafka consumer self.consumer.unassign() self.consumer.unsubscribe()
class KafkaConsumer(BaseKafkaConsumer): def __init__(self, config): self._config = config["consumer"] self.assign_offset_end = self._config.get("assign_offset_end", False) conf = self._config["conf"] conf.setdefault("group.id", str(uuid.uuid1())) self.autocommit_enabled = conf.get("enable.auto.commit", True) internal_log_path = self._config.get("internal_log_path") conf["error_cb"] = self._error_callback if internal_log_path: debug_logger = logging.getLogger("debug_consumer") timestamp = time.strftime("_%d%m%Y_") debug_logger.addHandler( logging.FileHandler("{}/kafka_consumer_debug{}{}.log".format( internal_log_path, timestamp, os.getpid()))) conf["logger"] = debug_logger self._consumer = Consumer(**conf) @staticmethod def on_assign_offset_end(consumer, partitions): for p in partitions: p.offset = OFFSET_END KafkaConsumer.on_assign_log(consumer, partitions) consumer.assign(partitions) @staticmethod def on_coop_assign_offset_end(consumer, partitions): for p in partitions: p.offset = OFFSET_END KafkaConsumer.on_assign_log(consumer, partitions) consumer.incremental_assign(partitions) @staticmethod def on_assign_log(consumer, partitions): log_level = "WARNING" for p in partitions: if p.error: log_level = "ERROR" params = { "partitions": str(list([str(partition) for partition in partitions or []])), log_const.KEY_NAME: log_const.KAFKA_ON_ASSIGN_VALUE, "log_level": log_level } log("KafkaConsumer.subscribe<on_assign>: assign %(partitions)s %(log_level)s", params=params, level=log_level) def subscribe(self, topics=None): topics = topics or list(self._config["topics"].values()) self._consumer.subscribe( topics, on_assign=self.get_on_assign_callback() if self.assign_offset_end else KafkaConsumer.on_assign_log) def get_on_assign_callback(self): if "cooperative" in self._config["conf"].get( "partition.assignment.strategy", ""): callback = KafkaConsumer.on_coop_assign_offset_end else: callback = KafkaConsumer.on_assign_offset_end return callback def unsubscribe(self): self._consumer.unsubscribe() def poll(self): msg = self._consumer.poll(self._config["poll_timeout"]) if msg is not None: return self._process_message(msg) def consume(self, num_messages: int = 1): messages = self._consumer.consume(num_messages=num_messages, timeout=self._config["poll_timeout"]) for msg in messages: yield self._process_message(msg) def commit_offset(self, msg): if msg is not None: if self.autocommit_enabled: self._consumer.store_offsets(msg) else: self._consumer.commit(msg, **{"async": True}) def get_msg_create_time(self, mq_message): timestamp_type, timestamp = mq_message.timestamp() return timestamp if timestamp_type is not TIMESTAMP_NOT_AVAILABLE else None def _error_callback(self, err): params = { "error": str(err), log_const.KEY_NAME: log_const.EXCEPTION_VALUE } log("KafkaConsumer: Error: %(error)s", params=params, level="WARNING") monitoring.got_counter("kafka_consumer_exception") # noinspection PyMethodMayBeStatic def _process_message(self, msg: KafkaMessage): err = msg.error() if err: if err.code() == KafkaError._PARTITION_EOF: return None else: monitoring.got_counter("kafka_consumer_exception") params = { "code": err.code(), "pid": os.getpid(), "topic": msg.topic(), "partition": msg.partition(), "offset": msg.offset(), log_const.KEY_NAME: log_const.EXCEPTION_VALUE } log( "KafkaConsumer Error %(code)s at pid %(pid)s: topic=%(topic)s partition=[%(partition)s] " "reached end at offset %(offset)s\n", params=params, level="WARNING") raise KafkaException(err) if msg.value(): if msg.headers() is None: msg.set_headers([]) return msg def close(self): self._consumer.close() log(f"consumer to topics {self._config['topics']} closed.")
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({ 'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb }) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) partitions = list( map(lambda p: TopicPartition("test", p), range(0, 100, 3))) kc.assign(partitions) kc.unassign() kc.commit(async=True) try: kc.commit(async=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == -1001]) == len(partitions) try: offsets = kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT kc.close()
class ConsumerServer: """Defines the base kafka consumer class""" def __init__( self, topic_name_pattern, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest self.broker_properties = { "bootstrap.servers": "PLAINTEXT://localhost:9093", "group.id": "GRP.0", "max.poll.interval.ms": 600000 } self.consumer = Consumer({ "bootstrap.servers": self.broker_properties.get("bootstrap.servers"), "group.id": self.broker_properties.get("group.id"), "max.poll.interval.ms": self.broker_properties.get("max.poll.interval.ms") }) self.consumer.subscribe([self.topic_name_pattern], on_assign=self.on_assign) def on_assign(self, consumer, partitions): """Callback for when topic assignment takes place""" if self.offset_earliest is True: for partition in partitions: partition.offset = confluent_kafka.OFFSET_BEGINNING logger.info("partitions assigned for %s", self.topic_name_pattern) self.consumer.assign(partitions) async def consume(self): """Asynchronously consumes data from kafka topic""" while True: num_results = 1 while num_results > 0: num_results = self._consume() await gen.sleep(self.sleep_secs) def _consume(self): """Polls for a message. Returns 1 if a message was received, 0 otherwise""" message = self.consumer.poll(self.consume_timeout) if message is None: logger.debug("No message received on topic %s", self.topic_name_pattern) return 0 elif message.error() is not None: logger.error( f"Error in receiving message from topic {self.topic_name_pattern}: {message.error()}" ) return 1 else: logger.debug( f"Received message from topic {self.topic_name_pattern}:\n {message.value()}" ) return 1 def close(self): """Cleans up any open kafka consumers""" self.consumer.unsubscribe()