def consumer(kafka_server: str, test_config: Dict,
             kafka_consumer_group: str) -> Consumer:
    """Get a connected Kafka consumer."""
    consumer = Consumer({
        "bootstrap.servers": kafka_server,
        "auto.offset.reset": "earliest",
        "enable.auto.commit": True,
        "group.id": kafka_consumer_group,
    })
    prefix = test_config["prefix"]
    kafka_topics = [
        f"{prefix}.{object_type}"
        for object_type in test_config["object_types"]
    ] + [
        f"{prefix}_privileged.{object_type}"
        for object_type in test_config["privileged_object_types"]
    ]
    consumer.subscribe(kafka_topics)

    yield consumer

    # Explicitly perform the commit operation on the consumer before closing it
    # to avoid possible hang since confluent-kafka v1.6.0
    consumer.commit()
    consumer.close()
class Kafka(Consumer):
    def __init__(self,
                 publisher,
                 downloader,
                 engine,
                 incoming_topic,
                 group_id,
                 bootstrap_servers,
                 requeuer=None,
                 **kwargs):

        super().__init__(publisher, downloader, engine)
        config = kwargs.copy()
        config["group.id"] = group_id
        config["bootstrap.servers"] = ",".join(bootstrap_servers)
        config["group.instance.id"] = kwargs.get("group.instance.id",
                                                 os.environ.get("HOSTNAME"))

        self.auto_commit = kwargs.get("enable.auto.commit", True)
        self.consumer = ConfluentConsumer(config)

        self.consumer.subscribe([incoming_topic])
        log.info("subscribing to %s: %s", incoming_topic, self.consumer)
        self.requerer = requeuer

    def deserialize(self, bytes_):
        raise NotImplementedError()

    def handles(self, input_msg):
        return True

    def run(self):
        while True:
            msg = self.consumer.poll(1)
            if msg is None:
                continue

            err = msg.error()
            if err is not None:
                if not self.auto_commit:
                    self.consumer.commit(msg)
                log.exception(err)
                continue

            val = msg.value()
            if val is not None:
                try:
                    payload = self.deserialize(val)
                    if self.handles(payload):
                        self.process(payload)
                except Requeue as req:
                    if not self.requerer:
                        raise Exception(
                            "Requeue request with no requerer configured.")
                    self.requeuer.requeue(val, req)
                except Exception as ex:
                    log.exception(ex)
                finally:
                    if not self.auto_commit:
                        self.consumer.commit(msg)
Beispiel #3
0
class KSubscriber(Subscriber, threading.Thread):

    def __init__(self, arg_parser: ArgsParser, killer: Killer):
        threading.Thread.__init__(self, 
            name='kafka subscriber thread', daemon=True)
        self.killer = killer
        self.topic = arg_parser.topic
        
        config = {
            'bootstrap.servers': ','.join(arg_parser.brokers),
            'client.id': CLIENT_ID,
            'group.id': GROUP_ID,
            'auto.offset.reset': 'earliest'
        }

        if arg_parser.auth:
            config.update({
                'security.protocol': 'SASL_PLAINTEXT',
                'sasl.mechanism': 'PLAIN',
                'sasl.username': arg_parser.username,
                'sasl.password': arg_parser.password
            })

        self.kafka_subscriber = Consumer(config)
    
    def subscribe(self, topic):
        def on_assign(consumer, partitions):
            log.info('subscribed')

        try:
            self.kafka_subscriber.subscribe([topic], on_assign=on_assign)
            
            while True:
                message = self.kafka_subscriber.poll(timeout=1.0)
                if self.killer.killed:
                    break
                
                if message is None:
                    continue

                if message.error():
                    log.error('read message error')
                
                # commit message
                self.kafka_subscriber.commit(asynchronous=False)

                log.info('received message from topic {t}'.format(t=message.topic()))
                print(message.value().decode('utf-8'))
        except:
            log.error('error subscribing to brokers')
        
        self.close()

    def run(self):
        log.info('start kafka subscriber inside thread {tn}'.format(tn=self.name))
        self.subscribe(self.topic)
    
    def close(self):
        if self.kafka_subscriber is not None:
            self.kafka_subscriber.close()
def test_any_method_after_close_throws_exception():
    """ Calling any consumer method after close should thorw a RuntimeError
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.subscribe(['test'])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.unsubscribe()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.poll()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.consume()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.assign([TopicPartition('test', 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.unassign()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.assignment()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.commit()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.committed([TopicPartition("test", 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.position([TopicPartition("test", 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.seek([TopicPartition("test", 0, 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        lo, hi = c.get_watermark_offsets(TopicPartition("test", 0))
    assert ex.match('Consumer closed')
    def _consume(group_id, topic, n, max_messages):
        config = {
            "bootstrap.servers": "localhost:9094",
            "group.id": group_id,
            "auto.offset.reset": "beginning",
            "enable.partition.eof": "true",
            "enable.auto.commit": "false",
        }
        consumer = Consumer(config)
        consumer.subscribe(topics=[topic])
        messages = 0
        while True:
            if messages == max_messages:
                return
            msg = consumer.consume(num_messages=n, timeout=5)
            if len(msg) == 0:
                continue

            for m in msg:
                if m.error():
                    if m.error().code() == KafkaError._PARTITION_EOF:
                        return

                    elif m.error():
                        raise KafkaException(m.error())
                else:
                    messages += 1
                    if messages == max_messages:
                        break
            consumer.commit(asynchronous=False)
def test_any_method_after_close_throws_exception():
    """ Calling any consumer method after close should thorw a RuntimeError
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.subscribe(['test'])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.unsubscribe()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.poll()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.consume()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.assign([TopicPartition('test', 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.unassign()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.assignment()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.commit()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.committed([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.position([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.seek([TopicPartition("test", 0, 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        lo, hi = c.get_watermark_offsets(TopicPartition("test", 0))
    assert 'Consumer closed' == str(ex.value)
Beispiel #7
0
def run_consumer(queue, msg_handler):
    consumer = Consumer({
        'bootstrap.servers': os.environ.get("KAFKA", "localhost:9092"),
        'group.id': 'manager',
        'auto.offset.reset': 'earliest'  # earliest _committed_ offset
    })

    _wait_for_topic_to_exist(consumer, queue)

    logging.info("Subscribing to topic: %s", queue)
    consumer.subscribe([queue])

    while True:
        logging.debug("Waiting for messages in %r...", queue)
        msg = consumer.poll()

        if msg is None:
            logging.warning("Poll timed out")
            break

        logging.info("Consuming Kafka message: %r", msg.key())

        if msg.error():
            logging.warning("Consumer error: {}".format(msg.error()))
            continue

        msg_handler(msg)

        consumer.commit()
Beispiel #8
0
def run_communication_consumer(communication_handler):
    conf = {'bootstrap.servers': config['kafka']['servers'],
            'group.id': "communication",
            'auto.offset.reset': 'earliest',
            'enable.auto.commit': 'false'}
    consumer = Consumer(conf)
    print('[+] Listening for communication messages')

    try:
        consumer_topics = [config['kafka']['communication-topic']]
        consumer.subscribe(consumer_topics)

        while True:
            msg = consumer.poll(timeout=1.0)

            if msg is None:
                continue

            if msg.error():
                raise KafkaException(msg.error())
            else:
                print('[+] Communication message received')
                print(msg.value())
                consumer.commit(asynchronous=False)
                communication_handler.handle_run_communication(msg.value())
    finally:
        consumer.close()
class FinalCallReport:
    """
    If CRM system send two reports per call CallReport class will handle thw first one.
    FinalCallReport will handle the final report.
    """
    def __init__(self, group=None, consumer_topic=None, producer_topic=None):
        self.producer = Producer({
            "bootstrap.servers": "",
            "security.protocol": "SASL_SSL",
            "sasl.mechanisms": "PLAIN",
            "sasl.username": "",
            "sasl.password": ""
        })
        self._consumer = Consumer({
            "bootstrap.servers": "",
            "security.protocol": "SASL_SSL",
            "sasl.mechanisms": "PLAIN",
            "sasl.username": "",
            "sasl.password": "",
            'group.id': group,
            'enable.auto.commit': False,
            'auto.offset.reset': 'earliest'
        })
        self._consumer.subscribe([consumer_topic])
        self._producer_topic = producer_topic

    @abstractmethod
    def get_call_report(self):
        """
        Implementation for stream case
        """
        while True:
            msg = self._consumer.poll(0.1)

            if msg is None:
                continue

            elif not msg.error():  # Received message
                self._consumer.commit()
                self.send_call_report(msg.value())
            elif msg.error().code() == KafkaError._PARTITION_EOF:
                logging.info('End of partition reached {}/{}'.format(
                    msg.topic(), msg.partition()))
            else:
                logging.error('Error occurred: {}'.format(msg.error().str()))

    def send_call_report(self, report):
        """
        Depends on dialer API options
        :param report:
        """
        while True:
            self.producer.produce(self._producer_topic,
                                  report,
                                  callback=KafkaUtils.self.delivery_report)
            self.producer.poll(0)

    def terminate(self):
        self.producer.flush()
class PushRecommendations:
    def __init__(self, group, recommedations_topic):
        self._producer = Producer({
            "bootstrap.servers": "",
            "security.protocol": "SASL_SSL",
            "sasl.mechanisms": "PLAIN",
            "sasl.username": "",
            "sasl.password": ""
        })
        self._consumer = Consumer({
            "bootstrap.servers": "",
            "security.protocol": "SASL_SSL",
            "sasl.mechanisms": "PLAIN",
            "sasl.username": "",
            "sasl.password": "",
            'group.id': group,
            'enable.auto.commit': True,
            'auto.offset.reset': 'earliest'
        })

        self._consumer.subscribe([recommedations_topic])

    def start(self):
        """
        Get messages from push_recommendations_topic.
        If we get recommendations then call self.push_recommendations()
        :return:
        """
        while True:
            msg = self._consumer.poll(0.1)

            if msg is None:
                continue
            elif not msg.error():  # Received message
                recommendations = msg.value()
                recommendations = json.loads(recommendations)

                if len(recommendations['response']['leads']) == 0:
                    logging.error('Got 0 optimal leads from OptimalQ')
                    continue

                self.push_recommendations(recommendations)
                self._consumer.commit()
            elif msg.error().code() == KafkaError._PARTITION_EOF:
                logging.info('End of partition reached {}/{}'.format(
                    msg.topic(), msg.partition()))
            else:
                logging.error('Error occurred: {}'.format(msg.error().str()))

    @abstractmethod
    def push_recommendations(self, recommendations):
        """
        Get OptimalQ recommendations and push them to the dialer, depends on dialer api options.
        :param recommendations:
        """
        pass

    def terminate(self):
        self._producer.flush()
Beispiel #11
0
def worker():
    global consumers

    consumer = Consumer({'bootstrap.servers': bootstrap_servers, 'group.id': consumer_group, 'client.id': client_id,
                         'default.topic.config': {'auto.offset.reset': 'earliest'}, 'enable.auto.offset.store': False,
                         'session.timeout.ms': session_timeout_ms})
    consumers.append(consumer)

    consumer.subscribe([topic])

    while True:
        msg = consumer.poll(0)

        thread_name = threading.current_thread().name

        if msg == None or not msg:
            continue

        if not msg.error():
            msg_timestamp = datetime.fromtimestamp(msg.timestamp()[1] / 1000.0)

            keep_alive_counter = 0
            now = datetime.now()
            # loop/sleep to delay the message
            while now < msg_timestamp + delay_timedelta:
                keep_alive_counter = keep_alive_counter + 1

                msg_timestamp_with_delta = msg_timestamp + delay_timedelta
                diff1 = msg_timestamp_with_delta - now
                diff_seconds = diff1.total_seconds()

                if keep_alive_counter <= 1:
                    logging.info("[%s] %s | received message on partition=%d, delaying for %fs" % (
                    thread_name, now.isoformat(), msg.partition(), diff_seconds))

                # sleep for {min_sleep_seconds}s...{kafka_keep_alive_seconds}s
                sleep_seconds = min(kafka_keep_alive_seconds, max(min_sleep_seconds, diff_seconds))

                # use as 'keep alive' feedback for low (no) traffic periods... to avoid connections getting dropped by brokers - resulting in a group rebalance
                logging.debug(
                    "[%s] %s | kafka keep alive commit partition=%d" % (thread_name, now.isoformat(), msg.partition()))
                consumer.commit(
                    offsets=[TopicPartition(topic=msg.topic(), partition=msg.partition(), offset=OFFSET_STORED)])

                # go to sleep
                logging.debug("[%s] %s | going to sleep for %fs / lag: %fs" % (
                    thread_name, now.isoformat(), sleep_seconds, diff_seconds))
                sleep(sleep_seconds)
                now = datetime.now()

            process(thread_name, msg)
            consumer.store_offsets(msg)

        elif msg.error().code() == KafkaError._PARTITION_EOF:
            continue

        else:
            logging.error("kafka consumer error: %s" % msg.error())
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb (err, partitions):
        pass

    kc = Consumer({'group.id':'test', 'socket.timeout.ms':'100',
                   'session.timeout.ms': 1000, # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke (consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    partitions = list(map(lambda p: TopicPartition("test", p), range(0,100,3)))
    kc.assign(partitions)

    kc.unassign()

    kc.commit(async=True)

    try:
        kc.commit(async=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == -1001]) == len(partitions)

    try:
        offsets = kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT


    kc.close()
Beispiel #13
0
 def set_consumer_offsets(self, offsets: List[Offset]):
     grouped_offsets = ConfluentAdminClient.group_offsets_by_consumer_group(
         offsets)
     for consumer_group, _offsets in grouped_offsets.items():
         consumer = Consumer({**self.config, 'group.id': consumer_group})
         tps = [TopicPartition(o.topic, o.partition, o.value)
                for o in _offsets]
         logger.info(f'Set {len(tps)} offsets for consumer '
                     f'group: {consumer_group}')
         consumer.commit(offsets=tps, asynchronous=False)
Beispiel #14
0
class Kafka():
    def __init__(self, topic_name, group_id, auto_offset_reset):

        with open(config_file_path) as kafka_conf:
            self.conf = yaml.load(kafka_conf, Loader=yaml.FullLoader)

        self.group_id = group_id
        self.topic_name = topic_name
        self.auto_offset_reset = auto_offset_reset
        self.running_consumer = True

        self.c = Consumer({
            'bootstrap.servers': self.conf['bootstrap_servers'],
            'group.id': self.group_id,
            'auto.offset.reset': self.auto_offset_reset
        })
        self.c.subscribe([self.topic_name])
        print(self.c.list_topics())

    def consume(self):
        #        self.batch_size = batch_size
        while self.running_consumer:
            a = 0
            msg = self.c.poll(1.0)

            if msg is None:

                #        empty = Log("Empty")
                #        empty.write("Empty message!","kafka")
                print("empty message!")
                msg = "empty".encode('utf-8')
                #if a%10 == 0:
                #break

        #    if msg.error():
        #        err = Log("Error")
        #        err.write(msg.error(),"kafka")

        #    print(msg.value().decode('utf-8'))
            else:
                a += 1
                msg = msg.value().decode('utf-8')
            print("message is : {}".format(msg))  #.decode('utf-8')))
            self.c.commit()
            if a % 10 == 0:
                self.running_consumer = False
            #return msg
#        self.c.close()
        return msg
        self.consume()

    def stop_consume(self):
        self.running_consumer = False
        time.sleep(10)
        self.consume()
Beispiel #15
0
def consume_loop():
    # Consumer configuration
    # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
    conf = {
        'bootstrap.servers': KAFKA,
        'group.id': GROUP,
        'session.timeout.ms': SESSION_TIMEOUT,
        'auto.offset.reset': AUTO_OFFSET_RESET,
        'on_commit': commit_completed
    }

    # Create Consumer instance. Logs will be emitted when poll() is called
    c = Consumer(conf, logger=logger)

    def print_assignment(_, partitions):
        logger.info('Assignment: {}'.format(partitions))

    # Subscribe to topics
    c.subscribe(TOPICS, on_assign=print_assignment)

    # Read messages from Kafka, print to stdout
    try:
        msg_count = 0
        while True:
            msg = c.poll(timeout=TIMEOUT)

            if msg is None:
                continue
            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' %
                        (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    raise KafkaException(msg.error())
            else:
                # Proper message
                sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' %
                                 (msg.topic(), msg.partition(), msg.offset(),
                                  str(msg.key())))
                msg_process(msg)
                msg_count += 1
                # Manually commit every MIN_COMMIT_COUNT messages
                if msg_count % MIN_COMMIT_COUNT == 0:
                    c.commit()

    except KeyboardInterrupt:
        sys.stderr.write('%% Aborted by user\n')

    finally:
        # Close down consumer to commit final offsets.
        c.close()
Beispiel #16
0
def repl():
    c = Consumer(settings)
    c.subscribe(topics)
    try:
        while True:
            if not red.ping():
                time.sleep(1)
                continue
            msg = c.poll(0.1)
            # No message present
            if msg is None:
                continue
            # Found message
            elif not msg.error():
                # Try to handle
                if msg.topic() == u'add_build':
                    result = add_build(msg.value())
                elif msg.topic() == u'delete_build':
                    result = delete_build(msg.value())
                elif msg.topic() == u'add_user':
                    result = add_user(msg.value())
                elif msg.topic() == u'delete_user':
                    result = delete_user(msg.value())
                elif msg.topic() == u'add_build_component':
                    result = add_build_component(msg.value())
                elif msg.topic() == u'remove_build_component':
                    result = remove_build_component(msg.value())
                elif msg.topic() == u'add_decoration':
                    result = add_decoration(msg.value())
                elif msg.topic() == u'remove_decoration':
                    result = remove_decoration(msg.value())
                elif msg.topic() == u'remove_all_decorations':
                    result = remove_all_decorations(msg.value())
                if result:
                    pprint('Success ' + msg.value())
                    c.commit()
                else:
                    c.unsubscribe()
                    c.subscribe(topics)
                    print('Error Occurred Adding to Redis')
            elif msg.error().code() == KafkaError._PARTITION_EOF:
                print('End of partition reached {0}/{1}'.format(
                    msg.topic(), msg.partition()))
            else:
                print('Error occurred: {0}'.format(msg.error().str()))
            time.sleep(1)

    except KeyboardInterrupt:
        pass

    finally:
        c.close()
Beispiel #17
0
class Consume:
    def __init__(self, topics, min_commit_count, persist_object):
        conf = {
            'bootstrap.servers': "localhost:9092",
            'group.id': "bike_theft",
            'default.topic.config': {
                'auto.offset.reset': 'smallest'
            },
            'on_commit': self.commit_completed
        }
        self.consumer = Consumer(conf)

        self.topics = topics
        self.min_commit_count = min_commit_count
        self.db = persist_object

    @staticmethod
    def commit_completed(err, partitions):
        if err:
            print(str(err))
        else:
            print("Committed partition offsets: " + str(partitions))

    def db_persist(self, msg):
        return self.db.insert(msg)

    def consume_loop(self):
        try:
            self.consumer.subscribe(self.topics)
            msg_count = 0
            while True:
                msg = self.consumer.poll(timeout=1.0)
                print(msg)
                if msg is None:
                    continue

                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        sys.stderr.write(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        raise KafkaException(msg.error())
                else:
                    decoded_msg = msg.value().decode("utf-8")
                    inserted_id = self.db_persist(decoded_msg)
                    msg_count += 1
                    if msg_count % self.min_commit_count == 0:
                        self.consumer.commit(asynchronous=False)
                    yield decoded_msg
        finally:
            self.consumer.close()
def main(args):
    def _on_assign(consumer, partitions):
        """
        If force-beginning is True, force Kafka to read all stored messages
        :param consumer:
        :param partitions:
        :return:
        """
        print(partitions)
        if args.force_beginning:
            for p in partitions:
                p.offset = OFFSET_BEGINNING
        consumer.assign(partitions)

    conf = read_config()

    kafka_config = gen_kafka_config(conf)
    kafka_config['auto.offset.reset'] = args.offset
    kafka_config['group.id'] = args.consumer_id

    if args.debug:
        print("Kafka configuration:")
        print(json.dumps(kafka_config, indent=4))

    consumer = Consumer(kafka_config)
    consumer.subscribe([args.topic], on_assign=_on_assign)

    schema = None
    if args.schema in ['marshmallow', 'marshmallow-extended']:
        schema = UserSchema()

    while True:
        message = consumer.poll(1)
        if message is not None:
            print("Raw message: ", message.value().decode('UTF-8'))
            if args.schema == 'marshmallow':
                user = schema.loads(message.value().decode('UTF-8')).data
                print(user)
            elif args.schema == 'marshmallow-extended':
                buffer = json.loads(message.value().decode('UTF-8'))
                schema_name = buffer['schema']['name']
                schema_version = buffer['schema']['version']
                print("Schema name: ", schema_name, " version: ", schema_version)

                if schema_name == 'UserSchema':
                    user = schema.load(buffer['data']).data
                    print(user)

            consumer.commit()
Beispiel #19
0
def main():
    consumer = Consumer({
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'HuMan-1',
        'enable-autocommit': False,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    })

    consumer.subscribe(['test-topic'])

    while (True):
        message = consumer.poll(1.0)
        if message:
            print(f"Message {message.value()}")
            consumer.commit()
Beispiel #20
0
class Kafka(Consumer):
    def __init__(self, publisher, downloader, engine, incoming_topic, group_id,
                 bootstrap_servers, **kwargs):

        super().__init__(publisher, downloader, engine)
        config = kwargs.copy()
        config["group.id"] = group_id
        config["bootstrap.servers"] = ",".join(bootstrap_servers)
        log.info("config", extra={"config": config})

        self.auto_commit = kwargs.get("enable.auto.commit", True)
        self.consumer = ConfluentConsumer(config)

        self.consumer.subscribe([incoming_topic])
        log.info("subscribing to %s: %s", incoming_topic, self.consumer)

    def deserialize(self, bytes_):
        raise NotImplementedError()

    def handles(self, input_msg):
        return True

    def run(self):
        while True:
            msg = self.consumer.poll(1)
            if msg is None:
                continue

            err = msg.error()
            if err is not None:
                # TODO: Should msg be committed?
                log.exception(err)
                continue

            val = msg.value()
            if val is not None:
                try:
                    payload = self.deserialize(val)
                    if self.handles(payload):
                        self.process(payload)
                except Exception as ex:
                    log.exception(ex)
                finally:
                    if not self.auto_commit:
                        self.consumer.commit(msg)
Beispiel #21
0
def consume_record(lines: list):
    consumer = Consumer(consumer_config)
    consumer.subscribe([
        "observations.weather.multivariate",
        "observations.weather.municipality"
    ])

    while True:
        try:
            message = consumer.poll(1)
        except Exception as e:
            print(f"Exception while trying to poll messages - {e}")
            exit(-1)
        else:
            if message:
                to_buffer(lines, message)

                if (len(lines) > 1000 and flush_buffer(lines) == True):
                    consumer.commit()
Beispiel #22
0
class KafkaStream:

    def __init__(self, topic):
        # 连接 kafka consumer
        try:
            topics = [topic]
            kafka_config = KAFKA_CONSUMER_DEFAULT_CONFIG
            self.consumer = Consumer(kafka_config)
            self.consumer.subscribe(topics)
        except Exception as e:
            logger.error(f"fail to init kafka consumer.[{topic}][{e}]")
            sys.exit(f"fail to init kafka consumer.[{topic}]")

    async def handler(self, data):
        pass

    async def read_stream(self):
        # 监听kafka
        try:
            while True:
                loop = asyncio.get_event_loop()
                message = await loop.run_in_executor(None, self.consumer.poll)
                if message is None:
                    continue
                if message.error():
                    logger.exception(message.error())
                    raise KafkaException(message.error())
                else:
                    data = json.loads(message.value().decode("utf-8"))
                    try:
                        finish = await self.handler(data)
                    except Exception as e:
                        logger.info(f"handler解析出现异常[{e}]")
                        time.sleep(1)
                        continue

                    if finish:
                        self.consumer.commit(asynchronus=True) # 不需要等待到触发回调函数之后.(?)

        except Exception as e:
            logger.error(f"kafka error.[{e}]")
            return
Beispiel #23
0
    def consume_msg(self):
        c = Consumer({'bootstrap.servers': "bootstrap_server1,server2......",
        'group.id': "foo",
        "session.timeout.ms": 6000,
        'auto.offset.reset': 'latest'}) # Consumer starts consuming either ealiest offset or latest offset.
        
        c.subscribe(['merto_mart'])
        try:
            while True:
                df = pd.DataFrame(columns = ["name", "surname", "age"])
                empty_list = list()

                msg = c.poll(1.0)

                if msg is None:
                    continue

                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        continue
                    else:
                        print(msg.error())
                        break

                print('Received message: {}, message offset: {}, topicname: {}'.format(msg.value().decode('utf-8'), msg.offset(), msg.topic()))
                
                for i in str(msg.value().decode('utf-8')).split(","):
                    print(i)
                    empty_list.append(i)
                df = df.append(pd.Series(empty_list, index=df.columns), ignore_index=True)
                df["date"] = self.today
                
                if(df.shape[1]!= 0):
                    self.biqquery_manager.push_to_bq(df, schema, 'kafka_test')
                    print("pushed succesfully")

                c.commit()
        except KeyboardInterrupt:
            sys.stderr.write('%% Aborted by user\n')

        finally:            
            c.close()
def test_on_commit():
    """ Verify that on_commit is only called once per commit() (issue #71) """
    class CommitState(object):
        def __init__(self, topic, partition):
            self.topic = topic
            self.partition = partition
            self.once = True

    def commit_cb(cs, err, ps):
        print('on_commit: err %s, partitions %s' % (err, ps))
        assert cs.once is True
        assert err == KafkaError._NO_OFFSET
        assert len(ps) == 1
        p = ps[0]
        assert p.topic == cs.topic
        assert p.partition == cs.partition
        cs.once = False

    cs = CommitState('test', 2)

    c = Consumer({
        'group.id': 'x',
        'enable.auto.commit': False,
        'socket.timeout.ms': 50,
        'session.timeout.ms': 100,
        'on_commit': lambda err, ps: commit_cb(cs, err, ps)
    })

    c.assign([TopicPartition(cs.topic, cs.partition)])

    for i in range(1, 3):
        c.poll(0.1)

        if cs.once:
            # Try commit once
            try:
                c.commit(asynchronous=False)
            except KafkaException as e:
                print('commit failed with %s (expected)' % e)
                assert e.args[0].code() == KafkaError._NO_OFFSET

    c.close()
Beispiel #25
0
async def consume_events(topic, group, brokers, callback, schema=None,registry=None,delay=0.01,**kwargs):
    """
    Connect to the Kafka endpoint and start consuming
    messages from the given `topic`.
    The given callback is applied on each
    message.
    """    
    global consumer
    if topic in consumers:
        raise RuntimeError("A consumer already exists for topic: %s" % topic)

    if (not registry_serializer or not registry_client) and registry:
        r_client,serializer = create_registry_client(registry)


    consumer = Consumer({'bootstrap.servers': brokers, 'group.id': group,
              'default.topic.config': {'auto.offset.reset': 'largest'}})
    consumer.subscribe([topic])
    consumers[topic] = consumer


    try:
        while True:
            message = consumer.poll(1)
            if message:
               if not message.error():
                   if registry:
                       message = serializer.decode_message(message.value())
                   else:
                       message = message.value()

                   await callback(message)
                   consumer.commit()
            else:
                   await asyncio.sleep(delay)
    except KafkaException as ex:
        pass
    else:
        consumer.close()
    finally:
        consumers.pop(topic, None)
def test_on_commit():
    """ Verify that on_commit is only called once per commit() (issue #71) """

    class CommitState(object):
        def __init__(self, topic, partition):
            self.topic = topic
            self.partition = partition
            self.once = True

    def commit_cb(cs, err, ps):
        print('on_commit: err %s, partitions %s' % (err, ps))
        assert cs.once is True
        assert err == KafkaError._NO_OFFSET
        assert len(ps) == 1
        p = ps[0]
        assert p.topic == cs.topic
        assert p.partition == cs.partition
        cs.once = False

    cs = CommitState('test', 2)

    c = Consumer({'group.id': 'x',
                  'enable.auto.commit': False, 'socket.timeout.ms': 50,
                  'session.timeout.ms': 100,
                  'on_commit': lambda err, ps: commit_cb(cs, err, ps)})

    c.assign([TopicPartition(cs.topic, cs.partition)])

    for i in range(1, 3):
        c.poll(0.1)

        if cs.once:
            # Try commit once
            try:
                c.commit(asynchronous=False)
            except KafkaException as e:
                print('commit failed with %s (expected)' % e)
                assert e.args[0].code() == KafkaError._NO_OFFSET

    c.close()
Beispiel #27
0
def start_consumer():
    logger.info('Starting consumer', extra={'tags': {
        'group_id': GROUP_ID
    }})
    consumer = Consumer({
        'bootstrap.servers': f'{PRODUCER_HOST}:{PRODUCER_PORT}',
        'group.id': GROUP_ID,
        'auto.offset.reset': 'earliest',
        'queued.max.messages.kbytes': 100000,
        'enable.auto.commit': 'false',
        'on_commit': on_commit
    })
    current_date = update_topic_list(consumer)

    while True:
        if int(time.time()) % 300 == 0:
            current_date = update_topic_list(consumer, current_topic_date=current_date)
        msg = consumer.poll(1)
        if msg is None:
            continue
        if msg.error():
            logger.error('Consumer error: {}'.format(msg.error()))
            continue

        process_start_time = datetime.now()
        alert = base64.b64encode(msg.value()).decode('utf-8')
        logger.info('Received alert from stream')
        success, candid = do_ingest(alert)
        logger.info('Finished processing message from {topic} with offset {offset}'.format(
                    topic=msg.topic() + '-' + str(msg.partition()), offset=msg.offset()),
                    extra={'tags': {
                                'candid': candid,
                                'success': success,
                                'record_processing_time': (datetime.now() - process_start_time).total_seconds(),
                                'processing_latency': datetime.now().timestamp() - msg.timestamp()[1]/1000
                          }}
                    )
        consumer.commit(msg)

    consumer.close()
Beispiel #28
0
class Listener:
    def __init__(self, kafka_server, topic):
        self.topic = topic
        self.kafka_server = kafka_server
        self.consumer = Consumer({
            'bootstrap.servers': kafka_server,
            'group.id': "poly_encoders_server",
            'enable.auto.commit': False,
            'auto.offset.reset': 'earliest',
            'metadata.max.age.ms': 10000
        })
        self.consumer.subscribe([topic])

    def listen(self):
        logger.info(f"Listening on topic: {self.topic}")
        while True:
            msgs = self.consumer.consume(500, timeout=1)
            if msgs is None or len(msgs) == 0:
                continue
            msg = msgs[-1]
            if msg.error():
                logger.error("Consumer error: {}".format(msg.error()))
                continue
            else:
                try:
                    value = msg.value().decode('utf-8')
                    event = PolyEncodersTrainingCompletedEvent(
                        **json.loads(value))
                    application_services['latest'] = ApplicationService(
                        model_dir=event.model_dir,
                        poly_m=event.poly_m,
                        max_query_len=event.max_query_len,
                        max_candidate_len=event.max_candidate_len,
                        random_seed=event.random_seed)
                    logger.debug('Received message: {}'.format(value))
                    self.consumer.commit(message=msgs[-1])
                except Exception as e:
                    logger.exception(e)
                    logger.error(msg.value())
                    raise e
Beispiel #29
0
def repl():
    c = Consumer(settings)
    c.subscribe([topic])
    db.connect()
    try:
        while True:
            if not db.ping():
                db.connect()
                continue
            msg = c.poll(0.1)
            # No message present
            if msg is None:
                continue
            # Found a message
            elif not msg.error():
                # Try to insert
                result = insertArmor(msg.value())
                if result:
                    pprint('Added Successfully ' + msg.value())
                    c.commit()
                else:
                    c.unsubscribe()
                    c.subscribe([topic])
                    print('Error Occurred Adding to Cassandra')
            elif msg.error().code() == KafkaError._PARTITION_EOF:
                print('End of partition reached {0}/{1}'.format(
                    msg.topic(), msg.partition()))
            else:
                print('Error occurred: {0}'.format(msg.error().str()))
            time.sleep(1)

    except KeyboardInterrupt:
        pass

    finally:
        c.close()
class QuerySubscriptionConsumer(object):
    """
    A Kafka consumer that processes query subscription update messages. Each message has
    a related subscription id and the latest values related to the subscribed query.
    These values are passed along to a callback associated with the subscription.
    """

    topic_to_dataset = {
        settings.KAFKA_EVENTS_SUBSCRIPTIONS_RESULTS: QueryDatasets.EVENTS
    }

    def __init__(self,
                 group_id,
                 topic=None,
                 commit_batch_size=100,
                 initial_offset_reset="earliest"):
        self.group_id = group_id
        if not topic:
            topic = settings.KAFKA_EVENTS_SUBSCRIPTIONS_RESULTS
        self.topic = topic
        cluster_name = settings.KAFKA_TOPICS[topic]["cluster"]
        self.bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name][
            "bootstrap.servers"]
        self.commit_batch_size = commit_batch_size
        self.initial_offset_reset = initial_offset_reset
        self.offsets = {}
        self.consumer = None

    def run(self):
        logger.debug("Starting snuba query subscriber")
        self.offsets.clear()

        conf = {
            "bootstrap.servers": self.bootstrap_servers,
            "group.id": self.group_id,
            "session.timeout.ms": 6000,
            "auto.offset.reset": self.initial_offset_reset,
            "enable.auto.commit": "false",
            "enable.auto.offset.store": "false",
            "enable.partition.eof": "false",
            "default.topic.config": {
                "auto.offset.reset": self.initial_offset_reset
            },
        }

        def on_revoke(consumer, partitions):
            self.commit_offsets()

        self.consumer = Consumer(conf)
        self.consumer.subscribe([self.topic], on_revoke=on_revoke)

        try:
            i = 0
            while True:
                message = self.consumer.poll(0.1)
                if message is None:
                    continue

                error = message.error()
                if error is not None:
                    raise KafkaException(error)

                i = i + 1

                with sentry_sdk.start_span(
                        Span(
                            op="handle_message",
                            transaction=
                            "query_subscription_consumer_process_message",
                            sampled=True,
                        )):
                    self.handle_message(message)

                # Track latest completed message here, for use in `shutdown` handler.
                self.offsets[message.partition()] = message.offset() + 1

                if i % self.commit_batch_size == 0:
                    logger.debug("Committing offsets")
                    self.commit_offsets()
        except KeyboardInterrupt:
            pass

        self.shutdown()

    def commit_offsets(self):
        if self.offsets and self.consumer:
            to_commit = [
                TopicPartition(self.topic, partition, offset)
                for partition, offset in self.offsets.items()
            ]
            self.consumer.commit(offsets=to_commit)
            self.offsets.clear()

    def shutdown(self):
        logger.debug("Committing offsets and closing consumer")
        self.commit_offsets()
        self.consumer.close()

    def handle_message(self, message):
        """
        Parses the value from Kafka, and if valid passes the payload to the callback defined by the
        subscription. If the subscription has been removed, or no longer has a valid callback then
        just log metrics/errors and continue.
        :param message:
        :return:
        """
        with sentry_sdk.push_scope() as scope:
            try:
                contents = self.parse_message_value(message.value())
            except InvalidMessageError:
                # If the message is in an invalid format, just log the error
                # and continue
                logger.exception(
                    "Subscription update could not be parsed",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return
            scope.set_tag("query_subscription_id", contents["subscription_id"])

            try:
                subscription = QuerySubscription.objects.get_from_cache(
                    subscription_id=contents["subscription_id"])
            except QuerySubscription.DoesNotExist:
                metrics.incr(
                    "snuba_query_subscriber.subscription_doesnt_exist")
                logger.error(
                    "Received subscription update, but subscription does not exist",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                try:
                    _delete_from_snuba(self.topic_to_dataset[message.topic()],
                                       contents["subscription_id"])
                except Exception:
                    logger.exception(
                        "Failed to delete unused subscription from snuba.")

                return

            if subscription.type not in subscriber_registry:
                metrics.incr(
                    "snuba_query_subscriber.subscription_type_not_registered")
                logger.error(
                    "Received subscription update, but no subscription handler registered",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return

            logger.info(
                "query-subscription-consumer.handle_message",
                extra={
                    "timestamp": contents["timestamp"],
                    "query_subscription_id": contents["subscription_id"],
                    "contents": contents,
                    "offset": message.offset(),
                    "partition": message.partition(),
                    "value": message.value(),
                },
            )

            callback = subscriber_registry[subscription.type]
            with sentry_sdk.start_span(
                    op="process_message") as span, metrics.timer(
                        "snuba_query_subscriber.callback.duration",
                        instance=subscription.type):
                span.set_data("payload", contents)
                callback(contents, subscription)

    def parse_message_value(self, value):
        """
        Parses the value received via the Kafka consumer and verifies that it
        matches the expected schema.
        :param value: A json formatted string
        :return: A dict with the parsed message
        """
        wrapper = loads(value)
        try:
            jsonschema.validate(wrapper, SUBSCRIPTION_WRAPPER_SCHEMA)
        except jsonschema.ValidationError:
            metrics.incr("snuba_query_subscriber.message_wrapper_invalid")
            raise InvalidSchemaError("Message wrapper does not match schema")

        schema_version = wrapper["version"]
        if schema_version not in SUBSCRIPTION_PAYLOAD_VERSIONS:
            metrics.incr(
                "snuba_query_subscriber.message_wrapper_invalid_version")
            raise InvalidMessageError(
                "Version specified in wrapper has no schema")

        payload = wrapper["payload"]
        try:
            jsonschema.validate(payload,
                                SUBSCRIPTION_PAYLOAD_VERSIONS[schema_version])
        except jsonschema.ValidationError:
            metrics.incr("snuba_query_subscriber.message_payload_invalid")
            raise InvalidSchemaError("Message payload does not match schema")

        payload["timestamp"] = parse_date(
            payload["timestamp"]).replace(tzinfo=pytz.utc)
        return payload
Beispiel #31
0
class QuerySubscriptionConsumer:
    """
    A Kafka consumer that processes query subscription update messages. Each message has
    a related subscription id and the latest values related to the subscribed query.
    These values are passed along to a callback associated with the subscription.
    """

    topic_to_dataset = {
        settings.KAFKA_EVENTS_SUBSCRIPTIONS_RESULTS:
        QueryDatasets.EVENTS,
        settings.KAFKA_TRANSACTIONS_SUBSCRIPTIONS_RESULTS:
        QueryDatasets.TRANSACTIONS,
    }

    def __init__(
        self,
        group_id,
        topic=None,
        commit_batch_size=100,
        initial_offset_reset="earliest",
        force_offset_reset=None,
    ):
        self.group_id = group_id
        if not topic:
            topic = settings.KAFKA_EVENTS_SUBSCRIPTIONS_RESULTS
        self.topic = topic
        cluster_name = settings.KAFKA_TOPICS[topic]["cluster"]
        self.commit_batch_size = commit_batch_size
        self.initial_offset_reset = initial_offset_reset
        self.offsets = {}
        self.consumer = None
        self.cluster_options = kafka_config.get_kafka_consumer_cluster_options(
            cluster_name,
            {
                "group.id": self.group_id,
                "session.timeout.ms": 6000,
                "auto.offset.reset": self.initial_offset_reset,
                "enable.auto.commit": "false",
                "enable.auto.offset.store": "false",
                "enable.partition.eof": "false",
                "default.topic.config": {
                    "auto.offset.reset": self.initial_offset_reset
                },
            },
        )
        self.admin_cluster_options = kafka_config.get_kafka_admin_cluster_options(
            cluster_name, {"allow.auto.create.topics": "true"})
        self.resolve_partition_force_offset = self.offset_reset_name_to_func(
            force_offset_reset)

    def offset_reset_name_to_func(self, offset_reset):
        if offset_reset in {"smallest", "earliest", "beginning"}:
            return self.resolve_partition_offset_earliest
        elif offset_reset in {"largest", "latest", "end"}:
            return self.resolve_partition_offset_latest

    def resolve_partition_offset_earliest(self, partition):
        low, high = self.consumer.get_watermark_offsets(partition)
        return TopicPartition(partition.topic, partition.partition, low)

    def resolve_partition_offset_latest(self, partition):
        low, high = self.consumer.get_watermark_offsets(partition)
        return TopicPartition(partition.topic, partition.partition, high)

    def run(self):
        logger.debug("Starting snuba query subscriber")
        self.offsets.clear()

        def on_assign(consumer, partitions):
            updated_partitions = []
            for partition in partitions:
                if self.resolve_partition_force_offset:
                    partition = self.resolve_partition_force_offset(partition)
                    updated_partitions.append(partition)

                if partition.offset == OFFSET_INVALID:
                    updated_offset = None
                else:
                    updated_offset = partition.offset
                self.offsets[partition.partition] = updated_offset
            if updated_partitions:
                self.consumer.assign(updated_partitions)
            logger.info(
                "query-subscription-consumer.on_assign",
                extra={
                    "offsets": six.text_type(self.offsets),
                    "partitions": six.text_type(partitions),
                },
            )

        def on_revoke(consumer, partitions):
            partition_numbers = [
                partition.partition for partition in partitions
            ]
            self.commit_offsets(partition_numbers)
            for partition_number in partition_numbers:
                self.offsets.pop(partition_number, None)
            logger.info(
                "query-subscription-consumer.on_revoke",
                extra={
                    "offsets": six.text_type(self.offsets),
                    "partitions": six.text_type(partitions),
                },
            )

        self.consumer = Consumer(self.cluster_options)
        if settings.KAFKA_CONSUMER_AUTO_CREATE_TOPICS:
            # This is required for confluent-kafka>=1.5.0, otherwise the topics will
            # not be automatically created.
            admin_client = AdminClient(self.admin_cluster_options)
            wait_for_topics(admin_client, [self.topic])

        self.consumer.subscribe([self.topic],
                                on_assign=on_assign,
                                on_revoke=on_revoke)

        try:
            i = 0
            while True:
                message = self.consumer.poll(0.1)
                if message is None:
                    continue

                error = message.error()
                if error is not None:
                    raise KafkaException(error)

                i = i + 1

                with sentry_sdk.start_transaction(
                        op="handle_message",
                        name="query_subscription_consumer_process_message",
                        sampled=True,
                ), metrics.timer("snuba_query_subscriber.handle_message"):
                    self.handle_message(message)

                # Track latest completed message here, for use in `shutdown` handler.
                self.offsets[message.partition()] = message.offset() + 1

                if i % self.commit_batch_size == 0:
                    logger.debug("Committing offsets")
                    self.commit_offsets()
        except KeyboardInterrupt:
            pass

        self.shutdown()

    def commit_offsets(self, partitions=None):
        logger.info(
            "query-subscription-consumer.commit_offsets",
            extra={
                "offsets": six.text_type(self.offsets),
                "partitions": six.text_type(partitions)
            },
        )

        if self.offsets and self.consumer:
            if partitions is None:
                partitions = self.offsets.keys()
            to_commit = []
            for partition in partitions:
                offset = self.offsets.get(partition)
                if offset is None:
                    # Skip partitions that have no offset
                    continue
                to_commit.append(TopicPartition(self.topic, partition, offset))

            self.consumer.commit(offsets=to_commit)

    def shutdown(self):
        logger.debug("Committing offsets and closing consumer")
        self.commit_offsets()
        self.consumer.close()

    def handle_message(self, message):
        """
        Parses the value from Kafka, and if valid passes the payload to the callback defined by the
        subscription. If the subscription has been removed, or no longer has a valid callback then
        just log metrics/errors and continue.
        :param message:
        :return:
        """
        with sentry_sdk.push_scope() as scope:
            try:
                with metrics.timer(
                        "snuba_query_subscriber.parse_message_value"):
                    contents = self.parse_message_value(message.value())
            except InvalidMessageError:
                # If the message is in an invalid format, just log the error
                # and continue
                logger.exception(
                    "Subscription update could not be parsed",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return
            scope.set_tag("query_subscription_id", contents["subscription_id"])

            try:
                with metrics.timer(
                        "snuba_query_subscriber.fetch_subscription"):
                    subscription = QuerySubscription.objects.get_from_cache(
                        subscription_id=contents["subscription_id"])
                    if subscription.status != QuerySubscription.Status.ACTIVE.value:
                        metrics.incr(
                            "snuba_query_subscriber.subscription_inactive")
                        return
            except QuerySubscription.DoesNotExist:
                metrics.incr(
                    "snuba_query_subscriber.subscription_doesnt_exist")
                logger.error(
                    "Received subscription update, but subscription does not exist",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                try:
                    _delete_from_snuba(self.topic_to_dataset[message.topic()],
                                       contents["subscription_id"])
                except Exception:
                    logger.exception(
                        "Failed to delete unused subscription from snuba.")
                return

            if subscription.type not in subscriber_registry:
                metrics.incr(
                    "snuba_query_subscriber.subscription_type_not_registered")
                logger.error(
                    "Received subscription update, but no subscription handler registered",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return

            logger.debug(
                "query-subscription-consumer.handle_message",
                extra={
                    "timestamp": contents["timestamp"],
                    "query_subscription_id": contents["subscription_id"],
                    "project_id": subscription.project_id,
                    "subscription_dataset": subscription.snuba_query.dataset,
                    "subscription_query": subscription.snuba_query.query,
                    "subscription_aggregation":
                    subscription.snuba_query.aggregate,
                    "subscription_time_window":
                    subscription.snuba_query.time_window,
                    "subscription_resolution":
                    subscription.snuba_query.resolution,
                    "offset": message.offset(),
                    "partition": message.partition(),
                    "value": message.value(),
                },
            )

            callback = subscriber_registry[subscription.type]
            with sentry_sdk.start_span(
                    op="process_message") as span, metrics.timer(
                        "snuba_query_subscriber.callback.duration",
                        instance=subscription.type):
                span.set_data("payload", contents)
                callback(contents, subscription)

    def parse_message_value(self, value):
        """
        Parses the value received via the Kafka consumer and verifies that it
        matches the expected schema.
        :param value: A json formatted string
        :return: A dict with the parsed message
        """
        with metrics.timer(
                "snuba_query_subscriber.parse_message_value.json_parse"):
            wrapper = json.loads(value)

        with metrics.timer(
                "snuba_query_subscriber.parse_message_value.json_validate_wrapper"
        ):
            try:
                jsonschema.validate(wrapper, SUBSCRIPTION_WRAPPER_SCHEMA)
            except jsonschema.ValidationError:
                metrics.incr("snuba_query_subscriber.message_wrapper_invalid")
                raise InvalidSchemaError(
                    "Message wrapper does not match schema")

        schema_version = wrapper["version"]
        if schema_version not in SUBSCRIPTION_PAYLOAD_VERSIONS:
            metrics.incr(
                "snuba_query_subscriber.message_wrapper_invalid_version")
            raise InvalidMessageError(
                "Version specified in wrapper has no schema")

        payload = wrapper["payload"]
        with metrics.timer(
                "snuba_query_subscriber.parse_message_value.json_validate_payload"
        ):
            try:
                jsonschema.validate(
                    payload, SUBSCRIPTION_PAYLOAD_VERSIONS[schema_version])
            except jsonschema.ValidationError:
                metrics.incr("snuba_query_subscriber.message_payload_invalid")
                raise InvalidSchemaError(
                    "Message payload does not match schema")
        # XXX: Since we just return the raw dict here, when the payload changes it'll
        # break things. This should convert the payload into a class rather than passing
        # the dict around, but until we get time to refactor we can keep things working
        # here.
        payload.setdefault("values", payload.get("result"))

        payload["timestamp"] = parse_date(
            payload["timestamp"]).replace(tzinfo=pytz.utc)
        return payload
class KafkaStreamingClient(AbstractStreamingClient):
    """Kafka streaming client."""

    def __init__(self, config):  # pragma: no cover
        """
        Streaming client implementation based on Kafka.

        Configuration keys:
          KAFKA_ADDRESS
          KAFKA_CONSUMER_GROUP
          KAFKA_TOPIC
          TIMEOUT
          EVENT_HUB_KAFKA_CONNECTION_STRING
        """
        self.logger = Logger()

        self.topic = config.get("KAFKA_TOPIC")
        if not self.topic:
            raise ValueError("KAFKA_TOPIC is not set in the config object.")

        if not config.get("KAFKA_ADDRESS"):
            raise ValueError("KAFKA_ADDRESS is not set in the config object.")

        if config.get("TIMEOUT"):
            try:
                self.timeout = int(config.get("TIMEOUT"))
            except ValueError:
                self.timeout = None
        else:
            self.timeout = None

        kafka_config = self.create_kafka_config(config)
        self.admin = admin.AdminClient(kafka_config)

        if config.get("KAFKA_CONSUMER_GROUP") is None:
            self.logger.info('Creating Producer')
            self.producer = Producer(kafka_config)
            self.run = False
        else:
            self.logger.info('Creating Consumer')
            self.consumer = Consumer(kafka_config)
            self.run = True
            signal.signal(signal.SIGTERM, self.exit_gracefully)

    @staticmethod
    def create_kafka_config(user_config: dict) -> dict:  # pragma: no cover
        """Create the kafka configuration."""
        config = {
            "bootstrap.servers": user_config.get("KAFKA_ADDRESS"),
            "enable.auto.commit": False,
            "auto.offset.reset": "latest",
            "default.topic.config": {'auto.offset.reset': 'latest'},
        }

        if user_config.get('KAFKA_CONSUMER_GROUP') is not None:
            config['group.id'] = user_config['KAFKA_CONSUMER_GROUP']

        if user_config.get('KAFKA_DEBUG') is not None:
            config['debug'] = user_config['KAFKA_DEBUG']

        if user_config.get('EVENT_HUB_KAFKA_CONNECTION_STRING'):
            ssl_location = user_config.get('SSL_CERT_LOCATION') or '/etc/ssl/certs/ca-certificates.crt'
            kakfa_config = {
                'security.protocol': "SASL_SSL",
                'sasl.mechanism': "PLAIN",
                'ssl.ca.location': ssl_location,
                'sasl.username': '******',
                'sasl.password': user_config.get('EVENT_HUB_KAFKA_CONNECTION_STRING'),
                'client.id': 'agogosml',
            }

            config = {**config, **kakfa_config}

        return config

    def delivery_report(self, err, msg):  # pragma: no cover
        """
        Indicate delivery result.

        Called once for each message produced. Triggered by poll() or flush().

        :param err: An error message.
        :param msg: A string input to be uploaded to kafka.
        """
        if err is not None:
            self.logger.error('Message delivery failed: %s', err)
        else:
            self.logger.info('Message delivered to %s [%s]',
                             msg.topic(), msg.partition())

    def send(self, message: str):  # pragma: no cover
        if not isinstance(message, str):
            raise TypeError('str type expected for message')
        try:
            mutated_message = message.encode('utf-8')
            self.logger.info('Sending message to kafka topic: %s', self.topic)
            self.producer.poll(0)
            self.producer.produce(
                self.topic, mutated_message, callback=self.delivery_report)
            self.producer.flush()
            return True
        except Exception as ex:
            self.logger.error('Error sending message to kafka: %s', ex)
            return False

    def stop(self):
        """Stop streaming client."""
        self.run = False

    def check_timeout(self, start: datetime):  # pragma: no cover
        """Interrupts if too much time has elapsed since the kafka client started running."""
        if self.timeout is not None:
            elapsed = datetime.now() - start
            if elapsed.seconds >= self.timeout:
                raise KeyboardInterrupt

    def handle_kafka_error(self, msg):  # pragma: no cover
        """Handle an error in kafka."""
        if msg.error().code() == KafkaError._PARTITION_EOF:
            # End of partition event
            self.logger.info('%% %s [%d] reached end at offset %d\n',
                             msg.topic(), msg.partition(), msg.offset())
        else:
            # Error
            raise KafkaException(msg.error())

    def start_receiving(self, on_message_received_callback):  # pragma: no cover
        try:
            self.subscribe_to_topic()
            start = datetime.now()

            while self.run:
                # Stop loop after timeout if exists
                self.check_timeout(start)

                # Poll messages from topic
                msg = self.read_single_message()
                if msg is not None:
                    on_message_received_callback(msg)

        except KeyboardInterrupt:
            self.logger.info('Aborting listener...')
            raise

        finally:
            # Close down consumer to commit final offsets.
            self.consumer.close()

    def exit_gracefully(self, signum, frame):  # pylint: disable=unused-argument
        """Handle interrupt signal or calls to stop and exit gracefully."""
        self.logger.info("Handling interrupt signal %s gracefully." % signum)
        self.stop()

    def subscribe_to_topic(self):  # pragma: no cover
        """Subscribe to topic."""
        self.consumer.subscribe([self.topic])

    def read_single_message(self):  # pragma: no cover
        """Poll messages from topic."""
        msg = self.consumer.poll(0.000001)

        if msg is None:
            return None

        if msg.error():
            # Error or event
            self.handle_kafka_error(msg)
            return None

        # Proper message
        # self.logger.info('kafka read message: %s, from topic: %s', msg.value(), msg.topic())
        self.consumer.commit(msg)
        return msg.value()
Beispiel #33
0
                        raise KafkaException(record.error())
                else:
                    recrods_pulled = True
                    # ** 在這裡進行商業邏輯與訊息處理 **
                    # 取出相關的metadata
                    topic = record.topic()
                    partition = record.partition()
                    offset = record.offset()
                    timestamp = record.timestamp()
                    # 取出msgKey與msgValue
                    msgKey = try_decode_utf8(record.key())
                    msgValue = try_decode_utf8(record.value())

                    # 秀出metadata與msgKey & msgValue訊息
                    print('%s-%d-%d : (%s , %s)' %
                          (topic, partition, offset, msgKey, msgValue))

            # 同步地執行commit (Sync commit)
            if (recrods_pulled):
                offsets = consumer.commit(asynchronous=False)
                print_sync_commit_result(offsets)

    except KeyboardInterrupt as e:
        sys.stderr.write('Aborted by user\n')
    except Exception as e:
        sys.stderr.write(str(e))

    finally:
        # 步驟6.關掉Consumer實例的連線
        consumer.close()
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({'group.id': 'test', 'socket.timeout.ms': '100',
                   'session.timeout.ms': 1000,  # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    msglist = kc.consume(num_messages=10, timeout=0.001)
    assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist)

    with pytest.raises(ValueError) as ex:
        kc.consume(-100)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    with pytest.raises(ValueError) as ex:
        kc.consume(1000001)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    partitions = list(map(lambda part: TopicPartition("test", part), range(0, 100, 3)))
    kc.assign(partitions)

    with pytest.raises(KafkaException) as ex:
        kc.seek(TopicPartition("test", 0, 123))
    assert 'Erroneous state' in str(ex.value)

    # Verify assignment
    assignment = kc.assignment()
    assert partitions == assignment

    # Pause partitions
    kc.pause(partitions)

    # Resume partitions
    kc.resume(partitions)

    # Get cached watermarks, should all be invalid.
    lo, hi = kc.get_watermark_offsets(partitions[0], cached=True)
    assert lo == -1001 and hi == -1001
    assert lo == OFFSET_INVALID and hi == OFFSET_INVALID

    # Query broker for watermarks, should raise an exception.
    try:
        lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))

    kc.unassign()

    kc.commit(asynchronous=True)

    try:
        kc.commit(asynchronous=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions)

    try:
        kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    try:
        kc.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)

    try:
        kc.list_topics(topic="hi", timeout=0.1)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)

    kc.close()
Beispiel #35
0
class SynchronizedConsumer(object):
    """
    This class implements the framework for a consumer that is intended to only
    consume messages that have already been consumed and committed by members
    of another consumer group.

    This works similarly to the Kafka built-in ``__consumer_offsets`` topic.
    The consumer group that is being "followed" (the one that must make
    progress for our consumer here to make progress, identified by the
    ``synchronize_commit_group`` constructor parameter/instance attribute) must
    report its offsets to a topic (identified by the ``commit_log_topic``
    constructor parameter/instance attribute). This consumer subscribes to both
    commit log topic, as well as the topic(s) that we are actually interested
    in consuming messages from. The messages received from the commit log topic
    control whether or not consumption from partitions belonging to the main
    topic is paused, resumed, or allowed to continue in its current state
    without changes.

    The furthest point in any partition that this consumer should ever consume
    to is the maximum offset that has been recorded to the commit log topic for
    that partition. If the offsets recorded to that topic move
    non-monotonically (due to an intentional offset rollback, for instance)
    this consumer *may* consume up to the highest watermark point. (The
    implementation here tries to pause consuming from the partition as soon as
    possible, but this makes no explicit guarantees about that behavior.)
    """
    initial_offset_reset_strategies = {
        'earliest': get_earliest_offset,
        'latest': get_latest_offset,
    }

    def __init__(self, bootstrap_servers, consumer_group, commit_log_topic,
                 synchronize_commit_group, initial_offset_reset='latest', on_commit=None):
        self.bootstrap_servers = bootstrap_servers
        self.consumer_group = consumer_group
        self.commit_log_topic = commit_log_topic
        self.synchronize_commit_group = synchronize_commit_group
        self.initial_offset_reset = self.initial_offset_reset_strategies[initial_offset_reset]

        self.__partition_state_manager = SynchronizedPartitionStateManager(
            self.__on_partition_state_change)
        self.__commit_log_consumer, self.__commit_log_consumer_stop_request = self.__start_commit_log_consumer()

        self.__positions = {}

        def commit_callback(error, partitions):
            if on_commit is not None:
                return on_commit(error, partitions)

        consumer_configuration = {
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.consumer_group,
            'enable.auto.commit': 'false',
            'enable.auto.offset.store': 'true',
            'enable.partition.eof': 'false',
            'default.topic.config': {
                'auto.offset.reset': 'error',
            },
            'on_commit': commit_callback,
        }

        self.__consumer = Consumer(consumer_configuration)

    def __start_commit_log_consumer(self, timeout=None):
        """
        Starts running the commit log consumer.
        """
        stop_request_event = threading.Event()
        start_event = threading.Event()
        result = execute(
            functools.partial(
                run_commit_log_consumer,
                bootstrap_servers=self.bootstrap_servers,
                consumer_group='{}:sync:{}'.format(self.consumer_group, uuid.uuid1().hex),
                commit_log_topic=self.commit_log_topic,
                synchronize_commit_group=self.synchronize_commit_group,
                partition_state_manager=self.__partition_state_manager,
                start_event=start_event,
                stop_request_event=stop_request_event,
            ),
        )
        start_event.wait(timeout)
        return result, stop_request_event

    def __check_commit_log_consumer_running(self):
        if not self.__commit_log_consumer.running():
            try:
                result = self.__commit_log_consumer.result(timeout=0)  # noqa
            except TimeoutError:
                pass  # not helpful

            raise Exception('Commit log consumer unexpectedly exit!')

    def __on_partition_state_change(
            self, topic, partition, previous_state_and_offsets, current_state_and_offsets):
        """
        Callback that is invoked when a partition state changes.
        """
        logger.debug('State change for %r: %r to %r', (topic, partition),
                     previous_state_and_offsets, current_state_and_offsets)

        current_state, current_offsets = current_state_and_offsets
        if current_offsets.local is None:
            # It only makes sense to manipulate the consumer if we've got an
            # assignment. (This block should only be entered at startup if the
            # remote offsets are retrieved from the commit log before the local
            # consumer has received its assignment.)
            return

        # TODO: This will be called from the commit log consumer thread, so need
        # to verify that calling the ``consumer.{pause,resume}`` methods is
        # thread safe!
        if current_state in (SynchronizedPartitionState.UNKNOWN, SynchronizedPartitionState.SYNCHRONIZED,
                             SynchronizedPartitionState.REMOTE_BEHIND):
            self.__consumer.pause([TopicPartition(topic, partition, current_offsets.local)])
        elif current_state is SynchronizedPartitionState.LOCAL_BEHIND:
            self.__consumer.resume([TopicPartition(topic, partition, current_offsets.local)])
        else:
            raise NotImplementedError('Unexpected partition state: %s' % (current_state,))

    def subscribe(self, topics, on_assign=None, on_revoke=None):
        """
        Subscribe to a topic.
        """
        self.__check_commit_log_consumer_running()

        def assignment_callback(consumer, assignment):
            # Since ``auto.offset.reset`` is set to ``error`` to force human
            # interaction on an offset reset, we have to explicitly specify the
            # starting offset if no offset has been committed for this topic during
            # the ``__consumer_offsets`` topic retention period.
            assignment = {
                (i.topic, i.partition): self.__positions.get((i.topic, i.partition)) for i in assignment
            }

            for i in self.__consumer.committed([TopicPartition(topic, partition) for (
                    topic, partition), offset in assignment.items() if offset is None]):
                k = (i.topic, i.partition)
                if i.offset > -1:
                    assignment[k] = i.offset
                else:
                    assignment[k] = self.initial_offset_reset(consumer, i.topic, i.partition)

            self.__consumer.assign([TopicPartition(topic, partition, offset)
                                    for (topic, partition), offset in assignment.items()])

            for (topic, partition), offset in assignment.items():
                # Setting the local offsets will either cause the partition to be
                # paused (if the remote offset is unknown or the local offset is
                # not trailing the remote offset) or resumed.
                self.__partition_state_manager.set_local_offset(topic, partition, offset)
                self.__positions[(topic, partition)] = offset

            if on_assign is not None:
                on_assign(self, [TopicPartition(topic, partition)
                                 for topic, partition in assignment.keys()])

        def revocation_callback(consumer, assignment):
            for item in assignment:
                # TODO: This should probably also be removed from the state manager.
                self.__positions.pop((item.topic, item.partition))

            if on_revoke is not None:
                on_revoke(self, assignment)

        self.__consumer.subscribe(
            topics,
            on_assign=assignment_callback,
            on_revoke=revocation_callback)

    def poll(self, timeout):
        self.__check_commit_log_consumer_running()

        message = self.__consumer.poll(timeout)
        if message is None:
            return

        if message.error() is not None:
            return message

        self.__partition_state_manager.validate_local_message(
            message.topic(), message.partition(), message.offset())
        self.__partition_state_manager.set_local_offset(
            message.topic(), message.partition(), message.offset() + 1)
        self.__positions[(message.topic(), message.partition())] = message.offset() + 1

        return message

    def commit(self, *args, **kwargs):
        self.__check_commit_log_consumer_running()

        return self.__consumer.commit(*args, **kwargs)

    def close(self):
        self.__check_commit_log_consumer_running()

        self.__commit_log_consumer_stop_request.set()
        try:
            self.__consumer.close()
        finally:
            self.__commit_log_consumer.result()
Beispiel #36
0
class KafkaConsumerWorker(BaseWorker):
    topic_name = None
    consumer_name = None
    consumer_settings = {}
    commit_on_complete = True
    async_commit = True
    poll_timeout = 0.01
    sleep_time = 0.05
    timestamp_fields = ['timestamp']
    decimal_fields = []
    boolean_fields = []

    def setup(self):
        self.consumer = Consumer(**self.get_consumer_settings())
        self.serializer = self.get_message_serializer()
        self.set_topic()

    def teardown(self):
        self.consumer.close()

    def get_topic_name(self):
        if self.topic_name is None:
            raise NotImplementedError
        return self.topic_name

    def get_consumer_name(self):
        if self.consumer_name is None:
            raise NotImplementedError
        return self.consumer_name

    def get_broker_url(self):
        broker_url = settings.BROKER_URL
        if broker_url is None:
            raise NotImplementedError
        return broker_url

    def get_zookeeper_url(self):
        zookeeper_url = settings.ZOOKEEPER_URL
        if zookeeper_url is None:
            raise NotImplementedError
        return zookeeper_url

    def get_consumer_settings(self):
        broker_url = self.get_broker_url()
        logger.debug('connecting to kafka: ' + broker_url)

        consumer_name = self.get_consumer_name()
        logger.debug('using group id: ' + consumer_name)

        initial_settings = {
            'api.version.request': True,
            'broker.version.fallback': '0.9.0',
            'client.id': 'JanglConsumer',
            'bootstrap.servers': broker_url,
            'group.id': consumer_name,
            'default.topic.config': {'auto.offset.reset': 'earliest'},
            'enable.auto.commit': False,
            'on_commit': self.on_commit,
            'session.timeout.ms': 10000,
            'heartbeat.interval.ms': 1000,
        }
        return generate_client_settings(initial_settings, self.consumer_settings)

    def get_message_serializer(self):
        schema_registry_url = self.get_schema_registry_url()
        logger.debug('loading schema registry: ' + schema_registry_url)
        schema_client = CachedSchemaRegistryClient(url=schema_registry_url)
        return MessageSerializer(schema_client)

    def get_schema_registry_url(self):
        schema_microservice = settings.SCHEMA_MICROSERVICE
        if schema_microservice:
            schema_registry_url = get_service_url(schema_microservice)
        else:
            schema_registry_url = settings.SCHEMA_REGISTRY_URL
        if schema_registry_url is None:
            raise NotImplementedError
        return schema_registry_url

    def set_topic(self):
        topic_name = self.get_topic_name()
        logger.debug('set kafka topic: ' + topic_name)
        self.consumer.subscribe([topic_name], on_assign=self.on_assign, on_revoke=self.on_revoke)

    def on_assign(self, consumer, partitions):
        logger.debug('partitions assigned: {}'.format(partitions))
        consumer.assign(partitions)

    def on_revoke(self, consumer, partitions):
        logger.debug('partitions revoked: {}'.format(partitions))
        try:
            consumer.commit(async=False)
        except KafkaException:
            pass
        consumer.unassign()

    def on_commit(self, err, partitions):
        if err is None:
            logger.debug('commit done: {}'.format(partitions))
        else:
            logger.error('commit error: {} - {}'.format(err, partitions))

    def handle(self):
        message = self.consumer.poll(timeout=self.poll_timeout)

        if message is not None:
            if message.error():
                if message.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    logger.info('%% %s [%d] reached end at offset %d\n' %
                                (message.topic(), message.partition(), message.offset()))
                elif message.error():
                    raise KafkaException(message.error())
            else:
                message = DecodedMessage(self.serializer, message)
                message = self.parse_message(message)

                self.consume_message(message)

                if self.commit_on_complete:
                    self.commit()
            self.done()
        else:
            self.wait()

    def parse_message(self, message):
        for field in self.timestamp_fields:
            if field in message:
                try:
                    message[field] = datetime.fromtimestamp(message[field], utc)
                except ValueError:
                    try:
                        message[field] = datetime.fromtimestamp(message[field]/1000, utc)
                    except TypeError:
                        pass
                except TypeError:
                    pass
        for field in self.decimal_fields:
            if field in message:
                try:
                    message[field] = decimal.Decimal(message[field])
                except (TypeError, decimal.InvalidOperation):
                    pass
        for field in self.boolean_fields:
            if field in message:
                try:
                    message[field] = bool(message[field])
                except TypeError:
                    pass
        return message

    def commit(self):
        if not self.consumer_settings.get('enable.auto.commit'):
            self.consumer.commit(async=self.async_commit)

    def consume_message(self, message):
        pass
class VerifiableConsumer(VerifiableClient):
    """
    confluent-kafka-python backed VerifiableConsumer class for use with
    Kafka's kafkatests client tests.
    """
    def __init__(self, conf):
        """
        conf is a config dict passed to confluent_kafka.Consumer()
        """
        super(VerifiableConsumer, self).__init__(conf)
        self.conf['on_commit'] = self.on_commit
        self.consumer = Consumer(**conf)
        self.consumed_msgs = 0
        self.consumed_msgs_last_reported = 0
        self.consumed_msgs_at_last_commit = 0
        self.use_auto_commit = False
        self.use_async_commit = False
        self.max_msgs = -1
        self.assignment = []
        self.assignment_dict = dict()

    def find_assignment(self, topic, partition):
        """ Find and return existing assignment based on topic and partition,
        or None on miss. """
        skey = '%s %d' % (topic, partition)
        return self.assignment_dict.get(skey)

    def send_records_consumed(self, immediate=False):
        """ Send records_consumed, every 100 messages, on timeout,
            or if immediate is set. """
        if self.consumed_msgs <= self.consumed_msgs_last_reported + (0 if immediate else 100):
            return

        if len(self.assignment) == 0:
            return

        d = {'name': 'records_consumed',
             'count': self.consumed_msgs - self.consumed_msgs_last_reported,
             'partitions': []}

        for a in self.assignment:
            if a.min_offset == -1:
                # Skip partitions that havent had any messages since last time.
                # This is to circumvent some minOffset checks in kafkatest.
                continue
            d['partitions'].append(a.to_dict())
            a.min_offset = -1

        self.send(d)
        self.consumed_msgs_last_reported = self.consumed_msgs

    def send_assignment(self, evtype, partitions):
        """ Send assignment update, evtype is either 'assigned' or 'revoked' """
        d = {'name': 'partitions_' + evtype,
             'partitions': [{'topic': x.topic, 'partition': x.partition} for x in partitions]}
        self.send(d)

    def on_assign(self, consumer, partitions):
        """ Rebalance on_assign callback """
        old_assignment = self.assignment
        self.assignment = [AssignedPartition(p.topic, p.partition) for p in partitions]
        # Move over our last seen offsets so that we can report a proper
        # minOffset even after a rebalance loop.
        for a in old_assignment:
            b = self.find_assignment(a.topic, a.partition)
            b.min_offset = a.min_offset

        self.assignment_dict = {a.skey: a for a in self.assignment}
        self.send_assignment('assigned', partitions)

    def on_revoke(self, consumer, partitions):
        """ Rebalance on_revoke callback """
        # Send final consumed records prior to rebalancing to make sure
        # latest consumed is in par with what is going to be committed.
        self.send_records_consumed(immediate=True)
        self.do_commit(immediate=True, asynchronous=False)
        self.assignment = list()
        self.assignment_dict = dict()
        self.send_assignment('revoked', partitions)

    def on_commit(self, err, partitions):
        """ Offsets Committed callback """
        if err is not None and err.code() == KafkaError._NO_OFFSET:
            self.dbg('on_commit(): no offsets to commit')
            return

        # Report consumed messages to make sure consumed position >= committed position
        self.send_records_consumed(immediate=True)

        d = {'name': 'offsets_committed',
             'offsets': []}

        if err is not None:
            d['success'] = False
            d['error'] = str(err)
        else:
            d['success'] = True
            d['error'] = ''

        for p in partitions:
            pd = {'topic': p.topic, 'partition': p.partition, 'offset': p.offset}
            if p.error is not None:
                pd['error'] = str(p.error)
            d['offsets'].append(pd)

        if len(self.assignment) == 0:
            self.dbg('Not sending offsets_committed: No current assignment: would be: %s' % d)
            return

        self.send(d)

    def do_commit(self, immediate=False, asynchronous=None):
        """ Commit every 1000 messages or whenever there is a consume timeout
            or immediate. """
        if (self.use_auto_commit
                or self.consumed_msgs_at_last_commit + (0 if immediate else 1000) >
                self.consumed_msgs):
            return

        # Make sure we report consumption before commit,
        # otherwise tests may fail because of commit > consumed
        if self.consumed_msgs_at_last_commit < self.consumed_msgs:
            self.send_records_consumed(immediate=True)

        if asynchronous is None:
            async_mode = self.use_async_commit
        else:
            async_mode = asynchronous

        self.dbg('Committing %d messages (Async=%s)' %
                 (self.consumed_msgs - self.consumed_msgs_at_last_commit,
                  async_mode))

        retries = 3
        while True:
            try:
                self.dbg('Commit')
                offsets = self.consumer.commit(asynchronous=async_mode)
                self.dbg('Commit done: offsets %s' % offsets)

                if not async_mode:
                    self.on_commit(None, offsets)

                break

            except KafkaException as e:
                if e.args[0].code() == KafkaError._NO_OFFSET:
                    self.dbg('No offsets to commit')
                    break
                elif e.args[0].code() in (KafkaError.REQUEST_TIMED_OUT,
                                          KafkaError.NOT_COORDINATOR_FOR_GROUP,
                                          KafkaError._WAIT_COORD):
                    self.dbg('Commit failed: %s (%d retries)' % (str(e), retries))
                    if retries <= 0:
                        raise
                    retries -= 1
                    time.sleep(1)
                    continue
                else:
                    raise

        self.consumed_msgs_at_last_commit = self.consumed_msgs

    def msg_consume(self, msg):
        """ Handle consumed message (or error event) """
        if msg.error():
            self.err('Consume failed: %s' % msg.error(), term=False)
            return

        if False:
            self.dbg('Read msg from %s [%d] @ %d' %
                     (msg.topic(), msg.partition(), msg.offset()))

        if self.max_msgs >= 0 and self.consumed_msgs >= self.max_msgs:
            return  # ignore extra messages

        # Find assignment.
        a = self.find_assignment(msg.topic(), msg.partition())
        if a is None:
            self.err('Received message on unassigned partition %s [%d] @ %d' %
                     (msg.topic(), msg.partition(), msg.offset()), term=True)

        a.consumed_msgs += 1
        if a.min_offset == -1:
            a.min_offset = msg.offset()
        if a.max_offset < msg.offset():
            a.max_offset = msg.offset()

        self.consumed_msgs += 1

        self.consumer.store_offsets(message=msg)
        self.send_records_consumed(immediate=False)
        self.do_commit(immediate=False)