def verify_stats_cb(): """ Verify stats_cb """ def stats_cb(stats_json_str): global good_stats_cb_result stats_json = json.loads(stats_json_str) if topic in stats_json['topics']: app_offset = stats_json['topics'][topic]['partitions']['0'][ 'app_offset'] if app_offset > 0: print("# app_offset stats for topic %s partition 0: %d" % (topic, app_offset)) good_stats_cb_result = True conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': uuid.uuid1(), 'session.timeout.ms': 6000, 'error_cb': error_cb, 'stats_cb': stats_cb, 'statistics.interval.ms': 200, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } c = confluent_kafka.Consumer(**conf) c.subscribe([topic]) max_msgcnt = 1000000 bytecnt = 0 msgcnt = 0 print('Will now consume %d messages' % max_msgcnt) if with_progress: bar = Bar('Consuming', max=max_msgcnt, suffix='%(index)d/%(max)d [%(eta_td)s]') else: bar = None while not good_stats_cb_result: # Consume until EOF or error msg = c.poll(timeout=20.0) if msg is None: raise Exception( 'Stalled at %d/%d message, no new messages for 20s' % (msgcnt, max_msgcnt)) if msg.error(): if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF: # Reached EOF for a partition, ignore. continue else: raise confluent_kafka.KafkaException(msg.error()) bytecnt += len(msg) msgcnt += 1 if bar is not None and (msgcnt % 10000) == 0: bar.next(n=10000) if msgcnt == 1: t_first_msg = time.time() if msgcnt >= max_msgcnt: break if bar is not None: bar.finish() if msgcnt > 0: t_spent = time.time() - t_first_msg print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' % (msgcnt, bytecnt / (1024 * 1024), t_spent, msgcnt / t_spent, (bytecnt / t_spent) / (1024 * 1024))) print('closing consumer') c.close()
def verify_batch_consumer(): """ Verify basic batch Consumer functionality """ # Consumer config conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } # Create consumer c = confluent_kafka.Consumer(**conf) # Subscribe to a list of topics c.subscribe([topic]) max_msgcnt = 1000 batch_cnt = 100 msgcnt = 0 while msgcnt < max_msgcnt: # Consume until we hit max_msgcnt # Consume messages (error()==0) or event (error()!=0) msglist = c.consume(batch_cnt, 10.0) assert len(msglist) == batch_cnt, 'expected %d messages, not %d' % ( batch_cnt, len(msglist)) for msg in msglist: if msg.error(): print('Consumer error: %s: ignoring' % msg.error()) continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) if (msg.offset() % 5) == 0: # Async commit c.commit(msg, asynchronous=True) elif (msg.offset() % 4) == 0: offsets = c.commit(msg, asynchronous=False) assert len( offsets) == 1, 'expected 1 offset, not %s' % (offsets) assert offsets[0].offset == msg.offset()+1, \ 'expected offset %d to be committed, not %s' % \ (msg.offset(), offsets) print('Sync committed offset: %s' % offsets) msgcnt += 1 print('max_msgcnt %d reached' % msgcnt) # Get current assignment assignment = c.assignment() # Get cached watermark offsets # Since we're not making use of statistics the low offset is not known so ignore it. lo, hi = c.get_watermark_offsets(assignment[0], cached=True) print('Cached offsets for %s: %d - %d' % (assignment[0], lo, hi)) # Query broker for offsets lo, hi = c.get_watermark_offsets(assignment[0], timeout=1.0) print('Queried offsets for %s: %d - %d' % (assignment[0], lo, hi)) # Close consumer c.close() # Start a new client and get the committed offsets c = confluent_kafka.Consumer(**conf) offsets = c.committed( list( map(lambda p: confluent_kafka.TopicPartition(topic, p), range(0, 3)))) for tp in offsets: print(tp) c.close()
def verify_batch_consumer_performance(): """ Verify batch Consumer performance """ conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': uuid.uuid1(), 'session.timeout.ms': 6000, 'error_cb': error_cb, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } c = confluent_kafka.Consumer(**conf) def my_on_assign(consumer, partitions): print('on_assign:', len(partitions), 'partitions:') for p in partitions: print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset)) consumer.assign(partitions) def my_on_revoke(consumer, partitions): print('on_revoke:', len(partitions), 'partitions:') for p in partitions: print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset)) consumer.unassign() c.subscribe([topic], on_assign=my_on_assign, on_revoke=my_on_revoke) max_msgcnt = 1000000 bytecnt = 0 msgcnt = 0 batch_size = 1000 print('Will now consume %d messages' % max_msgcnt) if with_progress: bar = Bar('Consuming', max=max_msgcnt, suffix='%(index)d/%(max)d [%(eta_td)s]') else: bar = None while msgcnt < max_msgcnt: # Consume until we hit max_msgcnt msglist = c.consume(num_messages=batch_size, timeout=20.0) for msg in msglist: if msg.error(): if msg.error().code( ) == confluent_kafka.KafkaError._PARTITION_EOF: # Reached EOF for a partition, ignore. continue else: raise confluent_kafka.KafkaException(msg.error()) bytecnt += len(msg) msgcnt += 1 if bar is not None and (msgcnt % 10000) == 0: bar.next(n=10000) if msgcnt == 1: t_first_msg = time.time() if bar is not None: bar.finish() if msgcnt > 0: t_spent = time.time() - t_first_msg print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' % (msgcnt, bytecnt / (1024 * 1024), t_spent, msgcnt / t_spent, (bytecnt / t_spent) / (1024 * 1024))) print('closing consumer') c.close()
def verify_consumer(): """ Verify basic Consumer functionality """ # Consumer config conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } # Create consumer c = confluent_kafka.Consumer(**conf) def print_wmark(consumer, parts): # Verify #294: get_watermark_offsets() should not fail on the first call # This is really a librdkafka issue. for p in parts: wmarks = consumer.get_watermark_offsets(parts[0]) print('Watermarks for %s: %s' % (p, wmarks)) # Subscribe to a list of topics c.subscribe([topic], on_assign=print_wmark) max_msgcnt = 100 msgcnt = 0 first_msg = None while True: # Consume until EOF or error # Consume message (error()==0) or event (error()!=0) msg = c.poll() if msg is None: raise Exception( 'Got timeout from poll() without a timeout set: %s' % msg) if msg.error(): if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF: print('Reached end of %s [%d] at offset %d' % (msg.topic(), msg.partition(), msg.offset())) break else: print('Consumer error: %s: ignoring' % msg.error()) break tstype, timestamp = msg.timestamp() headers = msg.headers() if headers: example_header = headers msg.set_headers([('foo', 'bar')]) assert msg.headers() == [('foo', 'bar')] print( '%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s headers=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp, headers)) if first_msg is None: first_msg = msg if (msgcnt == 11): parts = c.assignment() print('Pausing partitions briefly') c.pause(parts) exp_None = c.poll(timeout=2.0) assert exp_None is None, "expected no messages during pause, got %s" % exp_None print('Resuming partitions') c.resume(parts) if (msg.offset() % 5) == 0: # Async commit c.commit(msg, asynchronous=True) elif (msg.offset() % 4) == 0: offsets = c.commit(msg, asynchronous=False) assert len(offsets) == 1, 'expected 1 offset, not %s' % (offsets) assert offsets[0].offset == msg.offset()+1, \ 'expected offset %d to be committed, not %s' % \ (msg.offset(), offsets) print('Sync committed offset: %s' % offsets) msgcnt += 1 if msgcnt >= max_msgcnt: print('max_msgcnt %d reached' % msgcnt) break assert example_header, "We should have received at least one header" assert example_header == [(u'foo1', 'bar'), (u'foo1', 'bar2'), (u'foo2', '1')] # Get current assignment assignment = c.assignment() # Get cached watermark offsets # Since we're not making use of statistics the low offset is not known so ignore it. lo, hi = c.get_watermark_offsets(assignment[0], cached=True) print('Cached offsets for %s: %d - %d' % (assignment[0], lo, hi)) # Query broker for offsets lo, hi = c.get_watermark_offsets(assignment[0], timeout=1.0) print('Queried offsets for %s: %d - %d' % (assignment[0], lo, hi)) # Query offsets for timestamps by setting the topic partition offset to a timestamp. 123456789000 + 1 topic_partions_to_search = list( map(lambda p: confluent_kafka.TopicPartition(topic, p, 123456789001), range(0, 3))) print("Searching for offsets with %s" % topic_partions_to_search) offsets = c.offsets_for_times(topic_partions_to_search, timeout=1.0) print("offsets_for_times results: %s" % offsets) verify_consumer_seek(c, first_msg) # Close consumer c.close() # Start a new client and get the committed offsets c = confluent_kafka.Consumer(**conf) offsets = c.committed( list( map(lambda p: confluent_kafka.TopicPartition(topic, p), range(0, 3)))) for tp in offsets: print(tp) c.close()