Example #1
0
def verify_consumer_performance():
    """ Verify Consumer performance """

    conf = {'bootstrap.servers': bootstrap_servers,
            'group.id': uuid.uuid1(),
            'session.timeout.ms': 6000,
            'error_cb': error_cb,
            'auto.offset.reset': 'earliest'}

    c = confluent_kafka.Consumer(**conf)

    def my_on_assign(consumer, partitions):
        print('on_assign:', len(partitions), 'partitions:')
        for p in partitions:
            print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset))
        consumer.assign(partitions)

    def my_on_revoke(consumer, partitions):
        print('on_revoke:', len(partitions), 'partitions:')
        for p in partitions:
            print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset))
        consumer.unassign()

    c.subscribe([topic], on_assign=my_on_assign, on_revoke=my_on_revoke)

    max_msgcnt = 1000000
    bytecnt = 0
    msgcnt = 0

    print('Will now consume %d messages' % max_msgcnt)

    if with_progress:
        bar = Bar('Consuming', max=max_msgcnt,
                  suffix='%(index)d/%(max)d [%(eta_td)s]')
    else:
        bar = None

    while True:
        # Consume until EOF or error

        msg = c.poll(timeout=20.0)
        if msg is None:
            raise Exception('Stalled at %d/%d message, no new messages for 20s' %
                            (msgcnt, max_msgcnt))

        if msg.error():
            if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF:
                # Reached EOF for a partition, ignore.
                continue
            else:
                raise confluent_kafka.KafkaException(msg.error())

        bytecnt += len(msg)
        msgcnt += 1

        if bar is not None and (msgcnt % 10000) == 0:
            bar.next(n=10000)

        if msgcnt == 1:
            t_first_msg = time.time()
        if msgcnt >= max_msgcnt:
            break

    if bar is not None:
        bar.finish()

    if msgcnt > 0:
        t_spent = time.time() - t_first_msg
        print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' %
              (msgcnt, bytecnt / (1024*1024), t_spent, msgcnt / t_spent,
               (bytecnt / t_spent) / (1024*1024)))

    print('closing consumer')
    c.close()
def verify_batch_consumer_performance():
    """ Verify batch Consumer performance """

    conf = {'bootstrap.servers': bootstrap_servers,
            'group.id': uuid.uuid1(),
            'session.timeout.ms': 6000,
            'error_cb': error_cb,
            'auto.offset.reset': 'earliest'}

    c = confluent_kafka.Consumer(conf)

    def my_on_assign(consumer, partitions):
        print('on_assign:', len(partitions), 'partitions:')
        for p in partitions:
            print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset))
        consumer.assign(partitions)

    def my_on_revoke(consumer, partitions):
        print('on_revoke:', len(partitions), 'partitions:')
        for p in partitions:
            print(' %s [%d] @ %d' % (p.topic, p.partition, p.offset))
        consumer.unassign()

    c.subscribe([topic], on_assign=my_on_assign, on_revoke=my_on_revoke)

    max_msgcnt = 1000000
    bytecnt = 0
    msgcnt = 0
    batch_size = 1000

    print('Will now consume %d messages' % max_msgcnt)

    if with_progress:
        bar = Bar('Consuming', max=max_msgcnt,
                  suffix='%(index)d/%(max)d [%(eta_td)s]')
    else:
        bar = None

    while msgcnt < max_msgcnt:
        # Consume until we hit max_msgcnt

        msglist = c.consume(num_messages=batch_size, timeout=20.0)

        for msg in msglist:
            if msg.error():
                raise confluent_kafka.KafkaException(msg.error())

            bytecnt += len(msg)
            msgcnt += 1

            if bar is not None and (msgcnt % 10000) == 0:
                bar.next(n=10000)

            if msgcnt == 1:
                t_first_msg = time.time()

    if bar is not None:
        bar.finish()

    if msgcnt > 0:
        t_spent = time.time() - t_first_msg
        print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' %
              (msgcnt, bytecnt / (1024*1024), t_spent, msgcnt / t_spent,
               (bytecnt / t_spent) / (1024*1024)))

    print('closing consumer')
    c.close()
Example #3
0
def verify_stats_cb():
    """ Verify stats_cb """
    def stats_cb(stats_json_str):
        global good_stats_cb_result
        stats_json = json.loads(stats_json_str)
        if topic in stats_json['topics']:
            app_offset = stats_json['topics'][topic]['partitions']['0'][
                'app_offset']
            if app_offset > 0:
                print("# app_offset stats for topic %s partition 0: %d" %
                      (topic, app_offset))
                good_stats_cb_result = True

    conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': uuid.uuid1(),
        'session.timeout.ms': 6000,
        'error_cb': error_cb,
        'stats_cb': stats_cb,
        'statistics.interval.ms': 200,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    }

    c = confluent_kafka.Consumer(**conf)
    c.subscribe([topic])

    max_msgcnt = 1000000
    bytecnt = 0
    msgcnt = 0

    print('Will now consume %d messages' % max_msgcnt)

    if with_progress:
        bar = Bar('Consuming',
                  max=max_msgcnt,
                  suffix='%(index)d/%(max)d [%(eta_td)s]')
    else:
        bar = None

    while not good_stats_cb_result:
        # Consume until EOF or error

        msg = c.poll(timeout=20.0)
        if msg is None:
            raise Exception(
                'Stalled at %d/%d message, no new messages for 20s' %
                (msgcnt, max_msgcnt))

        if msg.error():
            if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF:
                # Reached EOF for a partition, ignore.
                continue
            else:
                raise confluent_kafka.KafkaException(msg.error())

        bytecnt += len(msg)
        msgcnt += 1

        if bar is not None and (msgcnt % 10000) == 0:
            bar.next(n=10000)

        if msgcnt == 1:
            t_first_msg = time.time()
        if msgcnt >= max_msgcnt:
            break

    if bar is not None:
        bar.finish()

    if msgcnt > 0:
        t_spent = time.time() - t_first_msg
        print('%d messages (%.2fMb) consumed in %.3fs: %d msgs/s, %.2f Mb/s' %
              (msgcnt, bytecnt / (1024 * 1024), t_spent, msgcnt / t_spent,
               (bytecnt / t_spent) / (1024 * 1024)))

    print('closing consumer')
    c.close()
Example #4
0
    def run(
        self,
        bootstrap_servers: str = None,
        group_id: str = None,
        topics: List[str] = None,
        request_timeout: float = 1.0,
        auto_offset_reset: str = "earliest",
        message_consume_limit: int = None,
        kafka_configs: dict = None,
        **kwargs,
    ) -> List[bytes]:
        """
        Run method for this Task. Invoked by calling this Task after initialization within a
        Flow context, or by using `Task.bind`.
        Args:
            - bootstrap_servers (str, required): comma separated host and port pairs that are the
                addresses of kafka brokers
            - group_id (str, required): name of the consumer group the consumer will belong to
            - topics (List[str], required): list of topic names to consume messages from
            - request_timeout (float, optional): Maximum time to block waiting for message, event
                or callback
            - auto_offset_reset (str, optional): configurable offset reset policy
            - message_consume_limit (int, optional): max number of messages to consume before
                closing the consumer
            - kafka_configs (dict, optional): a dict of kafka client configuration properties used
                to construct the consumer.
            - **kwargs (Any, optional): additional keyword arguments to pass to the standard Task
                init method
        Returns:
            - List of consumed messages
        """
        consumer = confluent_kafka.Consumer({
            "bootstrap.servers": bootstrap_servers,
            "group.id": group_id,
            "auto.offset.reset": auto_offset_reset,
            **kafka_configs,
        })
        consumer.subscribe(topics)

        messages = []
        message_consume_count = 0
        running = True

        try:
            while running:
                message = consumer.poll(timeout=request_timeout)

                if message is not None:
                    if message.error():
                        if (message.error().code() ==
                                confluent_kafka.KafkaError._PARTITION_EOF):
                            # End of partition event, exit consumer
                            self.logger.warn(
                                f"{message.topic()} [{message.partition()}] "
                                f"reached end at offset {message.offset()}")
                            running = False
                        elif message.error():
                            raise confluent_kafka.KafkaException(
                                message.error())
                    else:
                        messages.append(message.value())
                        message_consume_count += 1

                        if message_consume_limit:
                            if message_consume_count >= message_consume_limit:
                                break
                else:
                    self.logger.info(
                        f"No messages found for topic {topics}; closing consumer..."
                    )
                    break

        finally:
            consumer.close()

        return messages
Example #5
0
def main(bootstrap_servers, host, port, user, password):
    consumerConfiguration = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': "elasticsearch",
        'session.timeout.ms': 30000,
        'auto.offset.reset': 'earliest'
    }
    consumer = confluent_kafka.Consumer(consumerConfiguration)
    consumer.subscribe(["scan", "ct", "tags"])

    # Elasticsearch configuration
    es = Elasticsearch([{
        'host': host,
        'port': port
    }],
                       http_auth=(user, password),
                       timeout=60)

    actions = []
    try:
        while True:
            msg = consumer.poll(timeout=1.0)
            if msg is None:  # no message received yet
                continue
            if msg.error():
                raise confluent_kafka.KafkaException(msg.error())
            else:
                topic = msg.topic()
                if topic == "scan":
                    message = json.loads(msg.value())
                    data = message['data']
                    date = message['date']
                    sha1 = message['sha1']

                    raw = deep_get(
                        data,
                        'data.tls.result.handshake_log.server_certificates.certificate.raw',
                        "")

                    tls_version = deep_get(
                        data,
                        'data.tls.result.handshake_log.server_hello.version.value',
                        "")

                    tls_cipher_suite = deep_get(
                        data,
                        'data.tls.result.handshake_log.server_hello.cipher_suite.hex',
                        "")

                    actions.append({
                        "_index": "certificates",
                        "_id": sha1,
                        "date": date,
                        "sha1": sha1,
                        "raw": raw,
                        "scan": True,
                    })

                    ip = deep_get(data, 'ip', "")
                    md5 = deep_get(
                        data,
                        'data.tls.result.handshake_log.server_certificates.certificate.parsed.fingerprint_md5',
                        "")
                    sha256 = deep_get(
                        data,
                        'data.tls.result.handshake_log.server_certificates.certificate.parsed.fingerprint_sha256',
                        "")

                    actions.append({
                        "_index": "hosts_{date}".format(date=date),
                        "ip": ip,
                        "date": date,
                        "md5": md5,
                        "sha1": sha1,
                        "sha256": sha256,
                        "tls_version": tls_version,
                        "tls_cipher_suite": tls_cipher_suite,
                    })

                elif topic == "ct":
                    message = json.loads(msg.value())
                    data = message['data']
                    date = message['date']
                    sha1 = message['sha1']

                    try:
                        issuer_common_name = data["chain"][0]["subject"]["CN"]
                    except KeyError or IndexError:
                        issuer_common_name = ""

                    subject_common_name = deep_get(data,
                                                   'leaf_cert.subject.CN', "")
                    raw = deep_get(data, 'leaf_cert.as_der', "")

                    actions.append({
                        "_index": "certificates",
                        "_id": sha1,
                        "date": date,
                        "sha1": sha1,
                        "raw": raw,
                        "ct": True,
                    })

                elif msg.topic() == "tags":
                    message = json.loads(msg.value())
                    date = message['date']
                    sha1 = message['sha1']
                    tag = message['tag']
                    comment = message['comment']
                    actions.append({
                        "_index": "tags",
                        "date": date,
                        "sha1": sha1,
                        "tag": tag,
                        "comment": comment,
                    })
                if len(actions) > 1000:
                    bulk(es, iter(actions))
                    actions = []

    except KeyboardInterrupt:
        sys.stderr.write('Aborted by user\n')

    finally:
        consumer.close()
Example #6
0
# Create consumer.
# This consumer will not join the group, but the group.id is required by
# committed() to know which group to get offsets for.
consumer = confluent_kafka.Consumer({'bootstrap.servers': brokers,
                                     'group.id': group})


print("%-50s  %9s  %9s" % ("Topic [Partition]", "Committed", "Lag"))
print("=" * 72)

for topic in sys.argv[3:]:
    # Get the topic's partitions
    metadata = consumer.list_topics(topic, timeout=10)
    if metadata.topics[topic].error is not None:
        raise confluent_kafka.KafkaException(metadata.topics[topic].error)

    # Construct TopicPartition list of partitions to query
    partitions = [confluent_kafka.TopicPartition(topic, p) for p in metadata.topics[topic].partitions]

    # Query committed offsets for this group and the given partitions
    committed = consumer.committed(partitions, timeout=10)

    for partition in committed:
        # Get the partitions low and high watermark offsets.
        (lo, hi) = consumer.get_watermark_offsets(partition, timeout=10, cached=False)

        if partition.offset == confluent_kafka.OFFSET_INVALID:
            offset = "-"
        else:
            offset = "%d" % (partition.offset)
Example #7
0
 def ack(err, msg):
     if err:
         self._loop.call_soon_threadsafe(result.set_exception, confluent_kafka.KafkaException(err))
     else:
         self._loop.call_soon_threadsafe(result.set_result, msg)
Example #8
0
        'ssl.certificate.location': args.client_pem,
        'ssl.key.location': args.client_key,
    }

    c = confluent_kafka.Consumer(**conf)
    c.subscribe([topic])
    numOfRecords = 10

    try:
        while numOfRecords > 0:
            msg = c.poll(timeout=1.0)
            if msg is None:
                continue
            if msg.error():
                if msg.error().code(
                ) == confluent_kafka.KafkaError._PARTITION_EOF:
                    sys.stderr.write(
                        '%s [%d] reached end at offset %d\n' %
                        (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    raise confluent_kafka.KafkaException(msg.error())
            else:
                sys.stderr.write('parition: %d, offset: %d, message: %s\n' %
                                 (msg.partition(), msg.offset(), msg.value()))
            numOfRecords = numOfRecords - 1
    except KeyboardInterrupt:
        sys.stderr.write('Aborted by user\n')

    # Close down consumer to commit final offsets.
    c.close()