def test_any_method_after_close_throws_exception():
    """ Calling any consumer method after close should thorw a RuntimeError
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.subscribe(['test'])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.unsubscribe()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.poll()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.consume()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.assign([TopicPartition('test', 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.unassign()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.assignment()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.commit()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.committed([TopicPartition("test", 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.position([TopicPartition("test", 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.seek([TopicPartition("test", 0, 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        lo, hi = c.get_watermark_offsets(TopicPartition("test", 0))
    assert ex.match('Consumer closed')
def test_any_method_after_close_throws_exception():
    """ Calling any consumer method after close should thorw a RuntimeError
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.subscribe(['test'])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.unsubscribe()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.poll()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.consume()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.assign([TopicPartition('test', 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.unassign()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.assignment()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.commit()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.committed([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.position([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.seek([TopicPartition("test", 0, 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        lo, hi = c.get_watermark_offsets(TopicPartition("test", 0))
    assert 'Consumer closed' == str(ex.value)
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb (err, partitions):
        pass

    kc = Consumer({'group.id':'test', 'socket.timeout.ms':'100',
                   'session.timeout.ms': 1000, # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke (consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    partitions = list(map(lambda p: TopicPartition("test", p), range(0,100,3)))
    kc.assign(partitions)

    kc.unassign()

    kc.commit(async=True)

    try:
        kc.commit(async=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == -1001]) == len(partitions)

    try:
        offsets = kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT


    kc.close()
Exemple #4
0
async def consume(topic_name):
    c = Consumer({
        "bootstrap.servers": "PLAINTEXT://localhost:9092",
        "group.id": "0",
        # "auto.offset.reset": "beginning"
    })

    topic_partition = TopicPartition(topic_name, 0, OFFSET_BEGINNING)

    # c.subscribe([topic_name])
    # c.subscribe([topic_name], on_assign=on_assign)
    c.assign([topic_partition])

    assignment = c.assignment()
    print(f"assignment: {assignment}")

    position = c.position([topic_partition])
    print(f"position: {position}")

    while True:
        message = c.poll(1.0)
        if message is None:
            print("no message received by consumer")
        elif message.error() is not None:
            print(f"error from consumer {message.error()}")
        else:
            print(f"consumed message {message.key()}: {message.value()}")
        await asyncio.sleep(1)
def morning_notice():
    # 每只股票都创建 1 个 topic,包含 5 个 partition,partition 0 存放 futu 获取的 snapshot,partition 1 存放 futu 的 实时报价,partition 2 存放 futu 的实时 K线,partition 3 存放 futu 的实时 分时,
    # partition 4 存放 futu 的实时 逐比,partition 5 存放 futu 的实时摆盘,partition 6 存放 futu 的实时经纪队列,partition 7-9 暂时空闲
    consumer = Consumer({
        'bootstrap.servers': 'kafka01',
        'group.id': 'test',
        'enable.auto.commit': False,
        'default.topic.config': {
            'auto.offset.reset': 'largest'
        }
    })

    (rise_ratio_list_smallest,
     rise_ratio_list_largest) = consumer.get_watermark_offsets(
         TopicPartition('test', 0))
    (volume_list_smallest,
     volume_list_largest) = consumer.get_watermark_offsets(
         TopicPartition('test', 1))
    try:
        consumer.assign(
            [TopicPartition('test', 0, rise_ratio_list_largest - 1)])
        consumer.seek(TopicPartition('test', 0, rise_ratio_list_largest - 1))
        print(consumer.position([TopicPartition('test', 0)]))
        print(consumer.position([TopicPartition('test', 1)]))
        latest_rise_ratio = consumer.poll(3.0)
        print(consumer.position([TopicPartition('test', 0)]))
        print(consumer.position([TopicPartition('test', 1)]))

        print(latest_rise_ratio)
        consumer.assign([TopicPartition('test', 1, volume_list_largest - 1)])
        consumer.seek(TopicPartition('test', 1, volume_list_largest - 1))
        print(consumer.position([TopicPartition('test', 0)]))
        print(consumer.position([TopicPartition('test', 1)]))
        latest_volume = consumer.poll(3.0).value()
        print(consumer.position([TopicPartition('test', 0)]))
        print(consumer.position([TopicPartition('test', 1)]))
        print(latest_volume)
    finally:
        consumer.close()
Exemple #6
0
def test_send_offsets_committed_transaction(kafka_cluster):
    input_topic = kafka_cluster.create_topic("input_topic")
    output_topic = kafka_cluster.create_topic("output_topic")
    error_cb = prefixed_error_cb('test_send_offsets_committed_transaction')
    producer = kafka_cluster.producer({
        'client.id': 'producer1',
        'transactional.id': 'example_transactional_id',
        'error_cb': error_cb,
    })

    consumer_conf = {
        'group.id': str(uuid1()),
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': False,
        'enable.partition.eof': True,
        'error_cb': error_cb
    }
    consumer_conf.update(kafka_cluster.client_conf())
    consumer = Consumer(consumer_conf)

    kafka_cluster.seed_topic(input_topic)
    consumer.subscribe([input_topic])

    read_all_msgs(consumer)

    producer.init_transactions()
    transactional_produce(producer, output_topic, 100)

    consumer_position = consumer.position(consumer.assignment())
    group_metadata = consumer.consumer_group_metadata()
    print(
        "=== Sending offsets {} to transaction ===".format(consumer_position))
    producer.send_offsets_to_transaction(consumer_position, group_metadata)
    producer.commit_transaction()

    producer2 = kafka_cluster.producer({
        'client.id': 'producer2',
        'transactional.id': 'example_transactional_id',
        'error_cb': error_cb
    })

    # ensure offset commits are visible prior to sending FetchOffsets request
    producer2.init_transactions()

    committed_offsets = consumer.committed(consumer.assignment())
    print("=== Committed offsets for {} ===".format(committed_offsets))

    assert [tp.offset for tp in committed_offsets] == [100]

    consumer.close()
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({
        'group.id': 'test',
        'socket.timeout.ms': '100',
        'session.timeout.ms': 1000,  # Avoid close() blocking too long
        'on_commit': dummy_commit_cb
    })

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"],
                 on_assign=dummy_assign_revoke,
                 on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    msglist = kc.consume(num_messages=10, timeout=0.001)
    assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist)

    with pytest.raises(ValueError) as ex:
        kc.consume(-100)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    with pytest.raises(ValueError) as ex:
        kc.consume(1000001)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    partitions = list(
        map(lambda part: TopicPartition("test", part), range(0, 100, 3)))
    kc.assign(partitions)

    with pytest.raises(KafkaException) as ex:
        kc.seek(TopicPartition("test", 0, 123))
    assert 'Erroneous state' in str(ex.value)

    # Verify assignment
    assignment = kc.assignment()
    assert partitions == assignment

    # Pause partitions
    kc.pause(partitions)

    # Resume partitions
    kc.resume(partitions)

    # Get cached watermarks, should all be invalid.
    lo, hi = kc.get_watermark_offsets(partitions[0], cached=True)
    assert lo == -1001 and hi == -1001
    assert lo == OFFSET_INVALID and hi == OFFSET_INVALID

    # Query broker for watermarks, should raise an exception.
    try:
        lo, hi = kc.get_watermark_offsets(partitions[0],
                                          timeout=0.5,
                                          cached=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))

    kc.unassign()

    kc.commit(asynchronous=True)

    try:
        kc.commit(asynchronous=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions
                if p.offset == OFFSET_INVALID]) == len(partitions)

    try:
        kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    try:
        kc.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._TRANSPORT)

    try:
        kc.list_topics(topic="hi", timeout=0.1)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._TRANSPORT)

    kc.close()
Exemple #8
0
class KafkaClient(object):
    def __init__(self,
                 kafka_bootstrap_servers,
                 kafka_topic,
                 guid=None,
                 partition=None):
        self.kafka_bootstrap_servers = kafka_bootstrap_servers
        self.kafka_topic = kafka_topic
        if partition:
            raise NotImplementedError("multiple partitions not supported yet")
        self.guid = guid
        if not self.guid:
            self.guid = str(uuid4())

        self.p = None
        self.c = None

    def produce(self, key, val):
        try:
            if not self.p:
                self.p = Producer({
                    'bootstrap.servers': self.kafka_bootstrap_servers,
                    'api.version.request': True
                })
            if not isinstance(key, bytes):
                raise TypeError(
                    'producing to kafka requires key to be raw bytes')
            if not isinstance(val, bytes) and val is not None:
                raise TypeError(
                    'producing to kafka requires val to be raw bytes or None')
            self.p.produce(topic=self.kafka_topic, value=val, key=key)
        except BufferError:
            self.p.flush()
            self.p.produce(topic=self.kafka_topic, value=val, key=key)

    def flush_producer(self):
        if self.p:
            self.p.flush()

    def consume(self):
        if not self.c:
            self.c = Consumer({
                'bootstrap.servers': self.kafka_bootstrap_servers,
                'group.id': self.guid,
                'api.version.request': True,
                'log.connection.close': False,
                'socket.keepalive.enable': True,
                'session.timeout.ms': 6000,
                'default.topic.config': {
                    'auto.offset.reset': 'smallest'
                }
            })
            self.c.subscribe([self.kafka_topic])

        # must perform an initial poll to get partition assignments
        first_message = True
        msg = self.c.poll(timeout=10.0)

        # grab watermarks from partition
        partitionobjs = self.c.assignment()
        partitions = {}
        for prt in partitionobjs:
            partition = prt.partition
            last_offset = self.c.get_watermark_offsets(prt)[1] - 1
            if last_offset < 0:  # if nothing in partition then this will be -1
                continue
            position = max(
                self.c.position([prt])[0].offset - 1, -1
            )  # if never read before then call returns -1001 for some reason
            if last_offset > position:
                partitions[partition] = last_offset

        # process partitions up to watermarks (but remember that we already consumed a message, so need to yield that)
        while first_message or len(partitions) > 0:
            if not first_message:
                msg = self.c.poll(timeout=10.0)
            else:
                first_message = False
            if msg is None or msg.error(
            ):  # NOTE:  "if not msg" checks if message len = 0, which is different from checking "if msg is None"
                continue  # ignore errors
            partition = msg.partition()
            if partition in partitions and msg.offset() >= partitions[
                    partition]:  # first check is because we might read past the watermark
                # for a partition that we're already done with... but that's ok
                del partitions[partition]
            yield msg.key(), msg.value(), msg.timestamp()[1]

    def __del__(self):
        self.flush_producer()
        if self.c:
            self.c.close()
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({
        'group.id': 'test',
        'socket.timeout.ms': '100',
        'session.timeout.ms': 1000,  # Avoid close() blocking too long
        'on_commit': dummy_commit_cb
    })

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"],
                 on_assign=dummy_assign_revoke,
                 on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    partitions = list(
        map(lambda p: TopicPartition("test", p), range(0, 100, 3)))
    kc.assign(partitions)

    kc.unassign()

    kc.commit(async=True)

    try:
        kc.commit(async=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == -1001]) == len(partitions)

    try:
        offsets = kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    kc.close()
Exemple #10
0
def main(args):
    brokers = args.brokers
    group_id = args.group_id
    input_topic = args.input_topic
    input_partition = args.input_partition
    output_topic = args.output_topic

    consumer = Consumer({
        'bootstrap.servers': brokers,
        'group.id': group_id,
        'auto.offset.reset': 'earliest',
        # Do not advance committed offsets outside of the transaction.
        # Consumer offsets are committed along with the transaction
        # using the producer's send_offsets_to_transaction() API.
        'enable.auto.commit': False,
        'enable.partition.eof': True,
    })

    # Prior to KIP-447 being supported each input partition requires
    # its own transactional producer, so in this example we use
    # assign() to a single partition rather than subscribe().
    # A more complex alternative is to dynamically create a producer per
    # partition in subscribe's rebalance callback.
    consumer.assign([TopicPartition(input_topic, input_partition)])

    producer = Producer({
        'bootstrap.servers': brokers,
        'transactional.id': 'eos-transactions.py'
    })

    # Initialize producer transaction.
    producer.init_transactions()
    # Start producer transaction.
    producer.begin_transaction()

    eof = {}
    msg_cnt = 0
    print("=== Starting Consume-Transform-Process loop ===")
    while True:
        # serve delivery reports from previous produce()s
        producer.poll(0)

        # read message from input_topic
        msg = consumer.poll(timeout=1.0)
        if msg is None:
            continue

        topic, partition = msg.topic(), msg.partition()
        if msg.error():
            if msg.error().code() == KafkaError._PARTITION_EOF:
                eof[(topic, partition)] = True
                print("=== Reached the end of {} [{}] at {}====".format(
                    topic, partition, msg.offset()))

                if len(eof) == len(consumer.assignment()):
                    print("=== Reached end of input ===")
                    break
            continue
        # clear EOF if a new message has been received
        eof.pop((topic, partition), None)

        msg_cnt += 1

        # process message
        processed_key, processed_value = process_input(msg)

        # produce transformed message to output topic
        producer.produce(output_topic,
                         processed_value,
                         processed_key,
                         on_delivery=delivery_report)

        if msg_cnt % 100 == 0:
            print(
                "=== Committing transaction with {} messages at input offset {} ==="
                .format(msg_cnt, msg.offset()))
            # Send the consumer's position to transaction to commit
            # them along with the transaction, committing both
            # input and outputs in the same transaction is what provides EOS.
            producer.send_offsets_to_transaction(
                consumer.position(consumer.assignment()),
                consumer.consumer_group_metadata())

            # Commit the transaction
            producer.commit_transaction()

            # Begin new transaction
            producer.begin_transaction()
            msg_cnt = 0

    print("=== Committing final transaction with {} messages ===".format(
        msg_cnt))
    # commit processed message offsets to the transaction
    producer.send_offsets_to_transaction(
        consumer.position(consumer.assignment()),
        consumer.consumer_group_metadata())

    # commit transaction
    producer.commit_transaction()

    consumer.close()
                timestamp=record_json.get("phenomenonTime"),
                result=record_json.get("result"),
                topic=msg.topic(), partition=msg.partition(), offset=msg.offset(),
                **additional_attributes)

            # ingest the record into the StreamBuffer instance, instant emit
            if record.get("topic") == KAFKA_TOPIC_IN_1:  # Car1
                stream_buffer.ingest_left(record)  # with instant emit
            elif record.get("topic") == KAFKA_TOPIC_IN_2:  # Car2
                stream_buffer.ingest_right(record)

    except KeyboardInterrupt:
        print("Gracefully stopping")
    finally:
        ts_stop = time.time()

        # commit processed message offsets to the transaction
        kafka_producer.send_offsets_to_transaction(
            kafka_consumer.position(kafka_consumer.assignment()),
            kafka_consumer.consumer_group_metadata())
        # commit transaction
        kafka_producer.commit_transaction()
        # Leave group and commit offsets
        kafka_consumer.close()

        print(f"\nRecords in |{KAFKA_TOPIC_OUT}| = {stream_buffer.get_join_counter()}, "
              f"|{KAFKA_TOPIC_IN_1}| = {stream_buffer.get_left_counter()}, "
              f"|{KAFKA_TOPIC_IN_2}| = {stream_buffer.get_right_counter()}.")
        print(f"Joined time-series {ts_stop - st0:.5g} s long, "
              f"this are {stream_buffer.get_join_counter() / (ts_stop - st0):.6g} joins per second.")
Exemple #12
0
# subscribe和assign是不能同时使用的。subscribe表示订阅topic,从kafka记录的offset开始消费。assign表示从指定的offset开始消费。
consumer.subscribe(['test'])

# 消费 topic 里某一个或几个特定的 partition
consumer.assign([TopicPartition('test', 4)])

# 重置 offset
consumer.assign([TopicPartition('test', 4, 2)])

# 获取一个 partition 的最小、最大 offset
consumer.get_watermark_offsets(TopicPartition('test', 4))
# (0, 19)

# 如果是一个新的 group.id 必须先消费一条消息,这样后面的重置 offset 才有效, 如果不消费,重置 offset 前后获取到的 offset 值都是-1001
# 获取当前 offset 位置
consumer.position([TopicPartition('test', 3)])

# 重置 offset 到任意位置,committed 决定了下一次连接后的 offset 位置(以 group 为维度),本次连接无效。本次连接的 offset 位置由 position 决定。
# 重置 offset 后,要 close 重新连才有效。position 决定本次连接的 offset 位置,用 seek() 修改。
consumer.seek(TopicPartition('test', 3, 1))
consumer.commit(offsets=[TopicPartition('test', 3, 7)])

# 检查重置的位置
msg = consumer.committed([TopicPartition('test', 3)])
print(msg)

# offset:Either an absolute offset (>=0) or a logical offset: OFFSET_BEGINNING, OFFSET_END, OFFSET_STORED, OFFSET_INVALID
while True:
    msg = consumer.poll(3.0)
    if msg is None:
        continue
def replicate(topic, rerun, delete, source, src_groupid, target, trg_groupid,
              trg_partitions):
    global source_partitions

    # Connect to source kafka cluster
    src = Consumer({
        'bootstrap.servers': source,
        'group.id': src_groupid,
        'auto.offset.reset': 'smallest',
        'enable.auto.commit': False
    })

    # Connect to target kafka cluster
    trg = Consumer({
        'bootstrap.servers': target,
        'group.id': trg_groupid,
    })

    admin_client = KafkaAdminClient(bootstrap_servers=TRG_BOOTSTRAP_SERVERS,
                                    client_id=TRG_GROUP_ID)

    if delete:
        logger.warning(
            f"DELETING topic {topic} on {TRG_BOOTSTRAP_SERVERS} as requested")
        admin_client.delete_topics([topic])
        logger.warning(f"DELETION of {topic} completed.")

    logger.info(f"source cluster: {source}  source group_id: {src_groupid}")
    logger.info(f"target cluster: {target}  target group_id: {trg_groupid}")

    # Determine if latest source topic is at least partially loaded to target
    trg_topics, the_topic, offset_sum_delta = determine_topic(
        topic, src, trg, rerun)

    src_cm = src.list_topics()  # returns ClusterMetadata
    if the_topic not in src_cm.topics:
        logger.error(
            f"Current topics in {source} with group id {src_groupid} are:")
        logger.error(f"{src_cm.topics}")
        logger.error(
            f"Topic {topic} not in cluster {source} with group id {src_groupid}"
        )
        sys.exit(1)

    src_partition_count = len(src_cm.topics[the_topic].partitions)

    logger.info(
        f"topic: {the_topic} has # of partitions: {src_partition_count}")
    # Calculate multiplier for demuxing
    # Example:
    #    source = 4 target = 9 then multiplier is 9/4=2.25
    #    int(2.25) = 2
    multiplier = int(trg_partitions / src_partition_count)
    trg_partition_count = src_partition_count * multiplier
    logger.info(
        f"multiplier={multiplier} target_partition_count={trg_partition_count}"
    )

    # Add the new topic in target cluster
    if the_topic not in trg_topics:
        logger.info(
            f"replicate {the_topic} to {TRG_BOOTSTRAP_SERVERS} with source group id: {src_groupid}"
        )

        topic_list = [
            NewTopic(name=the_topic,
                     num_partitions=trg_partition_count,
                     replication_factor=1)
        ]
        try:
            logger.info(
                f"Creating topic {the_topic} with {trg_partition_count} partitions"
            )
            admin_client.create_topics(new_topics=topic_list,
                                       validate_only=False)
        except kafka.errors.TopicAlreadyExistsError:
            logger.info(f"Topic already exists in {TRG_BOOTSTRAP_SERVERS} ")
    part_map = create_part_map(src_partition_count, multiplier)

    # Get offset status for each partition
    logger.info(f"Source broker partitions for topic {the_topic}")
    logger.info(
        "-------------------------------------------------------------------------"
    )
    parts = {}
    total_committed = 0
    total_offsets = 0

    for part in src_cm.topics[the_topic].partitions:
        tp = TopicPartition(the_topic, part)
        tp.offset = confluent_kafka.OFFSET_BEGINNING
        src.assign([tp])
        any_committed = src.committed([tp])
        committed = any_committed[0].offset
        total_committed += committed
        end_offset = src.get_watermark_offsets(tp, cached=False)[1]
        position = src.position([tp])[0].offset
        if position == confluent_kafka.OFFSET_BEGINNING:
            position = 0
        elif position == confluent_kafka.OFFSET_END:
            position = end_offset
        elif position == confluent_kafka.OFFSET_INVALID:
            position = 0

        parts[str(part)] = end_offset
        total_offsets += end_offset
        logger.info(
            "Source topic: %s partition: %s end offset: %s committed: %s position: %s lag: %s"
            % (the_topic, part, end_offset, committed, position,
               (position - committed)))

    src.close()
    logger.info(
        f"Source: total_committed={total_committed} total_offsets={total_offsets}"
    )
    logger.info(
        "========================================================================="
    )

    logger.info(
        f"Starting multi-process: the_topic={the_topic} rerun={rerun} src_partition_count={src_partition_count}"
    )
    procs = [
        mp.Process(target=proc_replicate,
                   args=(the_topic, part, parts[str(part)], part_map, rerun))
        for part in range(0, src_partition_count)
    ]

    for proc in procs:
        proc.start()
    for proc in procs:
        proc.join()

    logger.info(f"END")
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({'group.id': 'test', 'socket.timeout.ms': '100',
                   'session.timeout.ms': 1000,  # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    msglist = kc.consume(num_messages=10, timeout=0.001)
    assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist)

    with pytest.raises(ValueError) as ex:
        kc.consume(-100)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    with pytest.raises(ValueError) as ex:
        kc.consume(1000001)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    partitions = list(map(lambda part: TopicPartition("test", part), range(0, 100, 3)))
    kc.assign(partitions)

    with pytest.raises(KafkaException) as ex:
        kc.seek(TopicPartition("test", 0, 123))
    assert 'Erroneous state' in str(ex.value)

    # Verify assignment
    assignment = kc.assignment()
    assert partitions == assignment

    # Pause partitions
    kc.pause(partitions)

    # Resume partitions
    kc.resume(partitions)

    # Get cached watermarks, should all be invalid.
    lo, hi = kc.get_watermark_offsets(partitions[0], cached=True)
    assert lo == -1001 and hi == -1001
    assert lo == OFFSET_INVALID and hi == OFFSET_INVALID

    # Query broker for watermarks, should raise an exception.
    try:
        lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))

    kc.unassign()

    kc.commit(asynchronous=True)

    try:
        kc.commit(asynchronous=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions)

    try:
        kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    try:
        kc.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)

    try:
        kc.list_topics(topic="hi", timeout=0.1)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)

    kc.close()