def test_any_method_after_close_throws_exception(): """ Calling any consumer method after close should thorw a RuntimeError """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.subscribe(['test']) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.unsubscribe() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.poll() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.consume() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.assign([TopicPartition('test', 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.unassign() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.assignment() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.commit() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.committed([TopicPartition("test", 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.position([TopicPartition("test", 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.seek([TopicPartition("test", 0, 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: lo, hi = c.get_watermark_offsets(TopicPartition("test", 0)) assert ex.match('Consumer closed')
def test_any_method_after_close_throws_exception(): """ Calling any consumer method after close should thorw a RuntimeError """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.subscribe(['test']) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.unsubscribe() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.poll() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.consume() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.assign([TopicPartition('test', 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.unassign() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.assignment() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.commit() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.committed([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.position([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.seek([TopicPartition("test", 0, 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: lo, hi = c.get_watermark_offsets(TopicPartition("test", 0)) assert 'Consumer closed' == str(ex.value)
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb (err, partitions): pass kc = Consumer({'group.id':'test', 'socket.timeout.ms':'100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb}) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke (consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') partitions = list(map(lambda p: TopicPartition("test", p), range(0,100,3))) kc.assign(partitions) kc.unassign() kc.commit(async=True) try: kc.commit(async=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == -1001]) == len(partitions) try: offsets = kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT kc.close()
async def consume(topic_name): c = Consumer({ "bootstrap.servers": "PLAINTEXT://localhost:9092", "group.id": "0", # "auto.offset.reset": "beginning" }) topic_partition = TopicPartition(topic_name, 0, OFFSET_BEGINNING) # c.subscribe([topic_name]) # c.subscribe([topic_name], on_assign=on_assign) c.assign([topic_partition]) assignment = c.assignment() print(f"assignment: {assignment}") position = c.position([topic_partition]) print(f"position: {position}") while True: message = c.poll(1.0) if message is None: print("no message received by consumer") elif message.error() is not None: print(f"error from consumer {message.error()}") else: print(f"consumed message {message.key()}: {message.value()}") await asyncio.sleep(1)
def morning_notice(): # 每只股票都创建 1 个 topic,包含 5 个 partition,partition 0 存放 futu 获取的 snapshot,partition 1 存放 futu 的 实时报价,partition 2 存放 futu 的实时 K线,partition 3 存放 futu 的实时 分时, # partition 4 存放 futu 的实时 逐比,partition 5 存放 futu 的实时摆盘,partition 6 存放 futu 的实时经纪队列,partition 7-9 暂时空闲 consumer = Consumer({ 'bootstrap.servers': 'kafka01', 'group.id': 'test', 'enable.auto.commit': False, 'default.topic.config': { 'auto.offset.reset': 'largest' } }) (rise_ratio_list_smallest, rise_ratio_list_largest) = consumer.get_watermark_offsets( TopicPartition('test', 0)) (volume_list_smallest, volume_list_largest) = consumer.get_watermark_offsets( TopicPartition('test', 1)) try: consumer.assign( [TopicPartition('test', 0, rise_ratio_list_largest - 1)]) consumer.seek(TopicPartition('test', 0, rise_ratio_list_largest - 1)) print(consumer.position([TopicPartition('test', 0)])) print(consumer.position([TopicPartition('test', 1)])) latest_rise_ratio = consumer.poll(3.0) print(consumer.position([TopicPartition('test', 0)])) print(consumer.position([TopicPartition('test', 1)])) print(latest_rise_ratio) consumer.assign([TopicPartition('test', 1, volume_list_largest - 1)]) consumer.seek(TopicPartition('test', 1, volume_list_largest - 1)) print(consumer.position([TopicPartition('test', 0)])) print(consumer.position([TopicPartition('test', 1)])) latest_volume = consumer.poll(3.0).value() print(consumer.position([TopicPartition('test', 0)])) print(consumer.position([TopicPartition('test', 1)])) print(latest_volume) finally: consumer.close()
def test_send_offsets_committed_transaction(kafka_cluster): input_topic = kafka_cluster.create_topic("input_topic") output_topic = kafka_cluster.create_topic("output_topic") error_cb = prefixed_error_cb('test_send_offsets_committed_transaction') producer = kafka_cluster.producer({ 'client.id': 'producer1', 'transactional.id': 'example_transactional_id', 'error_cb': error_cb, }) consumer_conf = { 'group.id': str(uuid1()), 'auto.offset.reset': 'earliest', 'enable.auto.commit': False, 'enable.partition.eof': True, 'error_cb': error_cb } consumer_conf.update(kafka_cluster.client_conf()) consumer = Consumer(consumer_conf) kafka_cluster.seed_topic(input_topic) consumer.subscribe([input_topic]) read_all_msgs(consumer) producer.init_transactions() transactional_produce(producer, output_topic, 100) consumer_position = consumer.position(consumer.assignment()) group_metadata = consumer.consumer_group_metadata() print( "=== Sending offsets {} to transaction ===".format(consumer_position)) producer.send_offsets_to_transaction(consumer_position, group_metadata) producer.commit_transaction() producer2 = kafka_cluster.producer({ 'client.id': 'producer2', 'transactional.id': 'example_transactional_id', 'error_cb': error_cb }) # ensure offset commits are visible prior to sending FetchOffsets request producer2.init_transactions() committed_offsets = consumer.committed(consumer.assignment()) print("=== Committed offsets for {} ===".format(committed_offsets)) assert [tp.offset for tp in committed_offsets] == [100] consumer.close()
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({ 'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb }) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) msglist = kc.consume(num_messages=10, timeout=0.001) assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist) with pytest.raises(ValueError) as ex: kc.consume(-100) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) with pytest.raises(ValueError) as ex: kc.consume(1000001) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) partitions = list( map(lambda part: TopicPartition("test", part), range(0, 100, 3))) kc.assign(partitions) with pytest.raises(KafkaException) as ex: kc.seek(TopicPartition("test", 0, 123)) assert 'Erroneous state' in str(ex.value) # Verify assignment assignment = kc.assignment() assert partitions == assignment # Pause partitions kc.pause(partitions) # Resume partitions kc.resume(partitions) # Get cached watermarks, should all be invalid. lo, hi = kc.get_watermark_offsets(partitions[0], cached=True) assert lo == -1001 and hi == -1001 assert lo == OFFSET_INVALID and hi == OFFSET_INVALID # Query broker for watermarks, should raise an exception. try: lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\ str(e.args([0])) kc.unassign() kc.commit(asynchronous=True) try: kc.commit(asynchronous=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions) try: kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT try: kc.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) try: kc.list_topics(topic="hi", timeout=0.1) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) kc.close()
class KafkaClient(object): def __init__(self, kafka_bootstrap_servers, kafka_topic, guid=None, partition=None): self.kafka_bootstrap_servers = kafka_bootstrap_servers self.kafka_topic = kafka_topic if partition: raise NotImplementedError("multiple partitions not supported yet") self.guid = guid if not self.guid: self.guid = str(uuid4()) self.p = None self.c = None def produce(self, key, val): try: if not self.p: self.p = Producer({ 'bootstrap.servers': self.kafka_bootstrap_servers, 'api.version.request': True }) if not isinstance(key, bytes): raise TypeError( 'producing to kafka requires key to be raw bytes') if not isinstance(val, bytes) and val is not None: raise TypeError( 'producing to kafka requires val to be raw bytes or None') self.p.produce(topic=self.kafka_topic, value=val, key=key) except BufferError: self.p.flush() self.p.produce(topic=self.kafka_topic, value=val, key=key) def flush_producer(self): if self.p: self.p.flush() def consume(self): if not self.c: self.c = Consumer({ 'bootstrap.servers': self.kafka_bootstrap_servers, 'group.id': self.guid, 'api.version.request': True, 'log.connection.close': False, 'socket.keepalive.enable': True, 'session.timeout.ms': 6000, 'default.topic.config': { 'auto.offset.reset': 'smallest' } }) self.c.subscribe([self.kafka_topic]) # must perform an initial poll to get partition assignments first_message = True msg = self.c.poll(timeout=10.0) # grab watermarks from partition partitionobjs = self.c.assignment() partitions = {} for prt in partitionobjs: partition = prt.partition last_offset = self.c.get_watermark_offsets(prt)[1] - 1 if last_offset < 0: # if nothing in partition then this will be -1 continue position = max( self.c.position([prt])[0].offset - 1, -1 ) # if never read before then call returns -1001 for some reason if last_offset > position: partitions[partition] = last_offset # process partitions up to watermarks (but remember that we already consumed a message, so need to yield that) while first_message or len(partitions) > 0: if not first_message: msg = self.c.poll(timeout=10.0) else: first_message = False if msg is None or msg.error( ): # NOTE: "if not msg" checks if message len = 0, which is different from checking "if msg is None" continue # ignore errors partition = msg.partition() if partition in partitions and msg.offset() >= partitions[ partition]: # first check is because we might read past the watermark # for a partition that we're already done with... but that's ok del partitions[partition] yield msg.key(), msg.value(), msg.timestamp()[1] def __del__(self): self.flush_producer() if self.c: self.c.close()
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({ 'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb }) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) partitions = list( map(lambda p: TopicPartition("test", p), range(0, 100, 3))) kc.assign(partitions) kc.unassign() kc.commit(async=True) try: kc.commit(async=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == -1001]) == len(partitions) try: offsets = kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT kc.close()
def main(args): brokers = args.brokers group_id = args.group_id input_topic = args.input_topic input_partition = args.input_partition output_topic = args.output_topic consumer = Consumer({ 'bootstrap.servers': brokers, 'group.id': group_id, 'auto.offset.reset': 'earliest', # Do not advance committed offsets outside of the transaction. # Consumer offsets are committed along with the transaction # using the producer's send_offsets_to_transaction() API. 'enable.auto.commit': False, 'enable.partition.eof': True, }) # Prior to KIP-447 being supported each input partition requires # its own transactional producer, so in this example we use # assign() to a single partition rather than subscribe(). # A more complex alternative is to dynamically create a producer per # partition in subscribe's rebalance callback. consumer.assign([TopicPartition(input_topic, input_partition)]) producer = Producer({ 'bootstrap.servers': brokers, 'transactional.id': 'eos-transactions.py' }) # Initialize producer transaction. producer.init_transactions() # Start producer transaction. producer.begin_transaction() eof = {} msg_cnt = 0 print("=== Starting Consume-Transform-Process loop ===") while True: # serve delivery reports from previous produce()s producer.poll(0) # read message from input_topic msg = consumer.poll(timeout=1.0) if msg is None: continue topic, partition = msg.topic(), msg.partition() if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: eof[(topic, partition)] = True print("=== Reached the end of {} [{}] at {}====".format( topic, partition, msg.offset())) if len(eof) == len(consumer.assignment()): print("=== Reached end of input ===") break continue # clear EOF if a new message has been received eof.pop((topic, partition), None) msg_cnt += 1 # process message processed_key, processed_value = process_input(msg) # produce transformed message to output topic producer.produce(output_topic, processed_value, processed_key, on_delivery=delivery_report) if msg_cnt % 100 == 0: print( "=== Committing transaction with {} messages at input offset {} ===" .format(msg_cnt, msg.offset())) # Send the consumer's position to transaction to commit # them along with the transaction, committing both # input and outputs in the same transaction is what provides EOS. producer.send_offsets_to_transaction( consumer.position(consumer.assignment()), consumer.consumer_group_metadata()) # Commit the transaction producer.commit_transaction() # Begin new transaction producer.begin_transaction() msg_cnt = 0 print("=== Committing final transaction with {} messages ===".format( msg_cnt)) # commit processed message offsets to the transaction producer.send_offsets_to_transaction( consumer.position(consumer.assignment()), consumer.consumer_group_metadata()) # commit transaction producer.commit_transaction() consumer.close()
timestamp=record_json.get("phenomenonTime"), result=record_json.get("result"), topic=msg.topic(), partition=msg.partition(), offset=msg.offset(), **additional_attributes) # ingest the record into the StreamBuffer instance, instant emit if record.get("topic") == KAFKA_TOPIC_IN_1: # Car1 stream_buffer.ingest_left(record) # with instant emit elif record.get("topic") == KAFKA_TOPIC_IN_2: # Car2 stream_buffer.ingest_right(record) except KeyboardInterrupt: print("Gracefully stopping") finally: ts_stop = time.time() # commit processed message offsets to the transaction kafka_producer.send_offsets_to_transaction( kafka_consumer.position(kafka_consumer.assignment()), kafka_consumer.consumer_group_metadata()) # commit transaction kafka_producer.commit_transaction() # Leave group and commit offsets kafka_consumer.close() print(f"\nRecords in |{KAFKA_TOPIC_OUT}| = {stream_buffer.get_join_counter()}, " f"|{KAFKA_TOPIC_IN_1}| = {stream_buffer.get_left_counter()}, " f"|{KAFKA_TOPIC_IN_2}| = {stream_buffer.get_right_counter()}.") print(f"Joined time-series {ts_stop - st0:.5g} s long, " f"this are {stream_buffer.get_join_counter() / (ts_stop - st0):.6g} joins per second.")
# subscribe和assign是不能同时使用的。subscribe表示订阅topic,从kafka记录的offset开始消费。assign表示从指定的offset开始消费。 consumer.subscribe(['test']) # 消费 topic 里某一个或几个特定的 partition consumer.assign([TopicPartition('test', 4)]) # 重置 offset consumer.assign([TopicPartition('test', 4, 2)]) # 获取一个 partition 的最小、最大 offset consumer.get_watermark_offsets(TopicPartition('test', 4)) # (0, 19) # 如果是一个新的 group.id 必须先消费一条消息,这样后面的重置 offset 才有效, 如果不消费,重置 offset 前后获取到的 offset 值都是-1001 # 获取当前 offset 位置 consumer.position([TopicPartition('test', 3)]) # 重置 offset 到任意位置,committed 决定了下一次连接后的 offset 位置(以 group 为维度),本次连接无效。本次连接的 offset 位置由 position 决定。 # 重置 offset 后,要 close 重新连才有效。position 决定本次连接的 offset 位置,用 seek() 修改。 consumer.seek(TopicPartition('test', 3, 1)) consumer.commit(offsets=[TopicPartition('test', 3, 7)]) # 检查重置的位置 msg = consumer.committed([TopicPartition('test', 3)]) print(msg) # offset:Either an absolute offset (>=0) or a logical offset: OFFSET_BEGINNING, OFFSET_END, OFFSET_STORED, OFFSET_INVALID while True: msg = consumer.poll(3.0) if msg is None: continue
def replicate(topic, rerun, delete, source, src_groupid, target, trg_groupid, trg_partitions): global source_partitions # Connect to source kafka cluster src = Consumer({ 'bootstrap.servers': source, 'group.id': src_groupid, 'auto.offset.reset': 'smallest', 'enable.auto.commit': False }) # Connect to target kafka cluster trg = Consumer({ 'bootstrap.servers': target, 'group.id': trg_groupid, }) admin_client = KafkaAdminClient(bootstrap_servers=TRG_BOOTSTRAP_SERVERS, client_id=TRG_GROUP_ID) if delete: logger.warning( f"DELETING topic {topic} on {TRG_BOOTSTRAP_SERVERS} as requested") admin_client.delete_topics([topic]) logger.warning(f"DELETION of {topic} completed.") logger.info(f"source cluster: {source} source group_id: {src_groupid}") logger.info(f"target cluster: {target} target group_id: {trg_groupid}") # Determine if latest source topic is at least partially loaded to target trg_topics, the_topic, offset_sum_delta = determine_topic( topic, src, trg, rerun) src_cm = src.list_topics() # returns ClusterMetadata if the_topic not in src_cm.topics: logger.error( f"Current topics in {source} with group id {src_groupid} are:") logger.error(f"{src_cm.topics}") logger.error( f"Topic {topic} not in cluster {source} with group id {src_groupid}" ) sys.exit(1) src_partition_count = len(src_cm.topics[the_topic].partitions) logger.info( f"topic: {the_topic} has # of partitions: {src_partition_count}") # Calculate multiplier for demuxing # Example: # source = 4 target = 9 then multiplier is 9/4=2.25 # int(2.25) = 2 multiplier = int(trg_partitions / src_partition_count) trg_partition_count = src_partition_count * multiplier logger.info( f"multiplier={multiplier} target_partition_count={trg_partition_count}" ) # Add the new topic in target cluster if the_topic not in trg_topics: logger.info( f"replicate {the_topic} to {TRG_BOOTSTRAP_SERVERS} with source group id: {src_groupid}" ) topic_list = [ NewTopic(name=the_topic, num_partitions=trg_partition_count, replication_factor=1) ] try: logger.info( f"Creating topic {the_topic} with {trg_partition_count} partitions" ) admin_client.create_topics(new_topics=topic_list, validate_only=False) except kafka.errors.TopicAlreadyExistsError: logger.info(f"Topic already exists in {TRG_BOOTSTRAP_SERVERS} ") part_map = create_part_map(src_partition_count, multiplier) # Get offset status for each partition logger.info(f"Source broker partitions for topic {the_topic}") logger.info( "-------------------------------------------------------------------------" ) parts = {} total_committed = 0 total_offsets = 0 for part in src_cm.topics[the_topic].partitions: tp = TopicPartition(the_topic, part) tp.offset = confluent_kafka.OFFSET_BEGINNING src.assign([tp]) any_committed = src.committed([tp]) committed = any_committed[0].offset total_committed += committed end_offset = src.get_watermark_offsets(tp, cached=False)[1] position = src.position([tp])[0].offset if position == confluent_kafka.OFFSET_BEGINNING: position = 0 elif position == confluent_kafka.OFFSET_END: position = end_offset elif position == confluent_kafka.OFFSET_INVALID: position = 0 parts[str(part)] = end_offset total_offsets += end_offset logger.info( "Source topic: %s partition: %s end offset: %s committed: %s position: %s lag: %s" % (the_topic, part, end_offset, committed, position, (position - committed))) src.close() logger.info( f"Source: total_committed={total_committed} total_offsets={total_offsets}" ) logger.info( "=========================================================================" ) logger.info( f"Starting multi-process: the_topic={the_topic} rerun={rerun} src_partition_count={src_partition_count}" ) procs = [ mp.Process(target=proc_replicate, args=(the_topic, part, parts[str(part)], part_map, rerun)) for part in range(0, src_partition_count) ] for proc in procs: proc.start() for proc in procs: proc.join() logger.info(f"END")
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb}) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) msglist = kc.consume(num_messages=10, timeout=0.001) assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist) with pytest.raises(ValueError) as ex: kc.consume(-100) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) with pytest.raises(ValueError) as ex: kc.consume(1000001) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) partitions = list(map(lambda part: TopicPartition("test", part), range(0, 100, 3))) kc.assign(partitions) with pytest.raises(KafkaException) as ex: kc.seek(TopicPartition("test", 0, 123)) assert 'Erroneous state' in str(ex.value) # Verify assignment assignment = kc.assignment() assert partitions == assignment # Pause partitions kc.pause(partitions) # Resume partitions kc.resume(partitions) # Get cached watermarks, should all be invalid. lo, hi = kc.get_watermark_offsets(partitions[0], cached=True) assert lo == -1001 and hi == -1001 assert lo == OFFSET_INVALID and hi == OFFSET_INVALID # Query broker for watermarks, should raise an exception. try: lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\ str(e.args([0])) kc.unassign() kc.commit(asynchronous=True) try: kc.commit(asynchronous=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions) try: kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT try: kc.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) try: kc.list_topics(topic="hi", timeout=0.1) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) kc.close()