예제 #1
0
def verify_stats_cb():
    """ Verify stats_cb """
    def stats_cb(stats_json_str):
        global good_stats_cb_result
        stats_json = json.loads(stats_json_str)
        if topic in stats_json['topics']:
            app_offset = stats_json['topics'][topic]['partitions']['0'][
                'app_offset']
            if app_offset > 0:
                print("# app_offset stats for topic %s partition 0: %d" %
                      (topic, app_offset))
                good_stats_cb_result = True

    conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': uuid.uuid1(),
        'session.timeout.ms': 6000,
        'error_cb': error_cb,
        'stats_cb': stats_cb,
        'statistics.interval.ms': 200,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    }

    c = confluent_kafka.Consumer(**conf)
    c.subscribe([topic])

    max_msgcnt = 1000000
    bytecnt = 0
    msgcnt = 0

    print('Will now consume %d messages' % max_msgcnt)

    if with_progress:
        bar = Bar('Consuming',
                  max=max_msgcnt,
                  suffix='%(index)d/%(max)d [%(eta_td)s]')
    else:
        bar = None

    while not good_stats_cb_result:
        # Consume until EOF or error

        msg = c.poll(timeout=20.0)
        if msg is None:
            raise Exception(
                'Stalled at %d/%d message, no new messages for 20s' %
                (msgcnt, max_msgcnt))

        if msg.error():
            if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF:
                # Reached EOF for a partition, ignore.
                continue
            else:
                raise confluent_kafka.KafkaException(msg.error())

        bytecnt += len(msg)
        msgcnt += 1

        if bar is not None and (msgcnt % 10000) == 0:
            bar.next(n=10000)

        if msgcnt == 1:
            t_first_msg = time.time()
        if msgcnt >= max_msgcnt:
            break

    if bar is not None:
        bar.finish()

    if msgcnt > 0:
        t_spent = time.time() - t_first_msg
        print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' %
              (msgcnt, bytecnt / (1024 * 1024), t_spent, msgcnt / t_spent,
               (bytecnt / t_spent) / (1024 * 1024)))

    print('closing consumer')
    c.close()
예제 #2
0
def verify_batch_consumer():
    """ Verify basic batch Consumer functionality """

    # Consumer config
    conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': 'test.py',
        'session.timeout.ms': 6000,
        'enable.auto.commit': False,
        'api.version.request': api_version_request,
        'on_commit': print_commit_result,
        'error_cb': error_cb,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    }

    # Create consumer
    c = confluent_kafka.Consumer(**conf)

    # Subscribe to a list of topics
    c.subscribe([topic])

    max_msgcnt = 1000
    batch_cnt = 100
    msgcnt = 0

    while msgcnt < max_msgcnt:
        # Consume until we hit max_msgcnt

        # Consume messages (error()==0) or event (error()!=0)
        msglist = c.consume(batch_cnt, 10.0)
        assert len(msglist) == batch_cnt, 'expected %d messages, not %d' % (
            batch_cnt, len(msglist))

        for msg in msglist:
            if msg.error():
                print('Consumer error: %s: ignoring' % msg.error())
                continue

            tstype, timestamp = msg.timestamp()
            print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' %
                  (msg.topic(), msg.partition(), msg.offset(), msg.key(),
                   msg.value(), tstype, timestamp))

            if (msg.offset() % 5) == 0:
                # Async commit
                c.commit(msg, asynchronous=True)
            elif (msg.offset() % 4) == 0:
                offsets = c.commit(msg, asynchronous=False)
                assert len(
                    offsets) == 1, 'expected 1 offset, not %s' % (offsets)
                assert offsets[0].offset == msg.offset()+1, \
                    'expected offset %d to be committed, not %s' % \
                    (msg.offset(), offsets)
                print('Sync committed offset: %s' % offsets)

            msgcnt += 1

    print('max_msgcnt %d reached' % msgcnt)

    # Get current assignment
    assignment = c.assignment()

    # Get cached watermark offsets
    # Since we're not making use of statistics the low offset is not known so ignore it.
    lo, hi = c.get_watermark_offsets(assignment[0], cached=True)
    print('Cached offsets for %s: %d - %d' % (assignment[0], lo, hi))

    # Query broker for offsets
    lo, hi = c.get_watermark_offsets(assignment[0], timeout=1.0)
    print('Queried offsets for %s: %d - %d' % (assignment[0], lo, hi))

    # Close consumer
    c.close()

    # Start a new client and get the committed offsets
    c = confluent_kafka.Consumer(**conf)
    offsets = c.committed(
        list(
            map(lambda p: confluent_kafka.TopicPartition(topic, p),
                range(0, 3))))
    for tp in offsets:
        print(tp)

    c.close()
예제 #3
0
def verify_batch_consumer_performance():
    """ Verify batch Consumer performance """

    conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': uuid.uuid1(),
        'session.timeout.ms': 6000,
        'error_cb': error_cb,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    }

    c = confluent_kafka.Consumer(**conf)

    def my_on_assign(consumer, partitions):
        print('on_assign:', len(partitions), 'partitions:')
        for p in partitions:
            print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset))
        consumer.assign(partitions)

    def my_on_revoke(consumer, partitions):
        print('on_revoke:', len(partitions), 'partitions:')
        for p in partitions:
            print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset))
        consumer.unassign()

    c.subscribe([topic], on_assign=my_on_assign, on_revoke=my_on_revoke)

    max_msgcnt = 1000000
    bytecnt = 0
    msgcnt = 0
    batch_size = 1000

    print('Will now consume %d messages' % max_msgcnt)

    if with_progress:
        bar = Bar('Consuming',
                  max=max_msgcnt,
                  suffix='%(index)d/%(max)d [%(eta_td)s]')
    else:
        bar = None

    while msgcnt < max_msgcnt:
        # Consume until we hit max_msgcnt

        msglist = c.consume(num_messages=batch_size, timeout=20.0)

        for msg in msglist:
            if msg.error():
                if msg.error().code(
                ) == confluent_kafka.KafkaError._PARTITION_EOF:
                    # Reached EOF for a partition, ignore.
                    continue
                else:
                    raise confluent_kafka.KafkaException(msg.error())

            bytecnt += len(msg)
            msgcnt += 1

            if bar is not None and (msgcnt % 10000) == 0:
                bar.next(n=10000)

            if msgcnt == 1:
                t_first_msg = time.time()

    if bar is not None:
        bar.finish()

    if msgcnt > 0:
        t_spent = time.time() - t_first_msg
        print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' %
              (msgcnt, bytecnt / (1024 * 1024), t_spent, msgcnt / t_spent,
               (bytecnt / t_spent) / (1024 * 1024)))

    print('closing consumer')
    c.close()
예제 #4
0
def verify_consumer():
    """ Verify basic Consumer functionality """

    # Consumer config
    conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': 'test.py',
        'session.timeout.ms': 6000,
        'enable.auto.commit': False,
        'api.version.request': api_version_request,
        'on_commit': print_commit_result,
        'error_cb': error_cb,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    }

    # Create consumer
    c = confluent_kafka.Consumer(**conf)

    def print_wmark(consumer, parts):
        # Verify #294: get_watermark_offsets() should not fail on the first call
        #              This is really a librdkafka issue.
        for p in parts:
            wmarks = consumer.get_watermark_offsets(parts[0])
            print('Watermarks for %s: %s' % (p, wmarks))

    # Subscribe to a list of topics
    c.subscribe([topic], on_assign=print_wmark)

    max_msgcnt = 100
    msgcnt = 0

    first_msg = None

    while True:
        # Consume until EOF or error

        # Consume message (error()==0) or event (error()!=0)
        msg = c.poll()
        if msg is None:
            raise Exception(
                'Got timeout from poll() without a timeout set: %s' % msg)

        if msg.error():
            if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF:
                print('Reached end of %s [%d] at offset %d' %
                      (msg.topic(), msg.partition(), msg.offset()))
                break
            else:
                print('Consumer error: %s: ignoring' % msg.error())
                break

        tstype, timestamp = msg.timestamp()
        headers = msg.headers()
        if headers:
            example_header = headers

        msg.set_headers([('foo', 'bar')])
        assert msg.headers() == [('foo', 'bar')]

        print(
            '%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s headers=%s' %
            (msg.topic(), msg.partition(), msg.offset(), msg.key(),
             msg.value(), tstype, timestamp, headers))

        if first_msg is None:
            first_msg = msg

        if (msgcnt == 11):
            parts = c.assignment()
            print('Pausing partitions briefly')
            c.pause(parts)
            exp_None = c.poll(timeout=2.0)
            assert exp_None is None, "expected no messages during pause, got %s" % exp_None
            print('Resuming partitions')
            c.resume(parts)

        if (msg.offset() % 5) == 0:
            # Async commit
            c.commit(msg, asynchronous=True)
        elif (msg.offset() % 4) == 0:
            offsets = c.commit(msg, asynchronous=False)
            assert len(offsets) == 1, 'expected 1 offset, not %s' % (offsets)
            assert offsets[0].offset == msg.offset()+1, \
                'expected offset %d to be committed, not %s' % \
                (msg.offset(), offsets)
            print('Sync committed offset: %s' % offsets)

        msgcnt += 1
        if msgcnt >= max_msgcnt:
            print('max_msgcnt %d reached' % msgcnt)
            break

    assert example_header, "We should have received at least one header"
    assert example_header == [(u'foo1', 'bar'), (u'foo1', 'bar2'),
                              (u'foo2', '1')]

    # Get current assignment
    assignment = c.assignment()

    # Get cached watermark offsets
    # Since we're not making use of statistics the low offset is not known so ignore it.
    lo, hi = c.get_watermark_offsets(assignment[0], cached=True)
    print('Cached offsets for %s: %d - %d' % (assignment[0], lo, hi))

    # Query broker for offsets
    lo, hi = c.get_watermark_offsets(assignment[0], timeout=1.0)
    print('Queried offsets for %s: %d - %d' % (assignment[0], lo, hi))

    # Query offsets for timestamps by setting the topic partition offset to a timestamp. 123456789000 + 1
    topic_partions_to_search = list(
        map(lambda p: confluent_kafka.TopicPartition(topic, p, 123456789001),
            range(0, 3)))
    print("Searching for offsets with %s" % topic_partions_to_search)

    offsets = c.offsets_for_times(topic_partions_to_search, timeout=1.0)
    print("offsets_for_times results: %s" % offsets)

    verify_consumer_seek(c, first_msg)

    # Close consumer
    c.close()

    # Start a new client and get the committed offsets
    c = confluent_kafka.Consumer(**conf)
    offsets = c.committed(
        list(
            map(lambda p: confluent_kafka.TopicPartition(topic, p),
                range(0, 3))))
    for tp in offsets:
        print(tp)

    c.close()