Esempio n. 1
0
 def _update_partition_assignments(consumer: KafkaConsumer):
     # This is (should be?) equivalent to calling poll on the consumer.
     # which would return 0 results, since the subscription we just created will mean
     # a rejoin is needed, which skips the actual fetching. Nevertheless, an actual call to poll is to be avoided
     # and a better solution to this is desired (extend the consumer??)
     # pylint: disable=W0212
     consumer._coordinator.poll()
     if not consumer._subscription.has_all_fetch_positions():
         consumer._update_fetch_positions(consumer._subscription.missing_fetch_positions())
Esempio n. 2
0
class Consumer(BaseStreamConsumer):
    """
    Used in DB and SW worker. SW consumes per partition.
    """
    def __init__(self, location, enable_ssl, cert_path, topic, group,
                 partition_id):
        self._location = location
        self._group = group
        self._topic = topic
        kwargs = {}
        if enable_ssl:
            kwargs.update({
                'security_protocol':
                'SSL',
                'ssl_cafile':
                os_path_join(cert_path, 'ca-cert.pem'),
                'ssl_certfile':
                os_path_join(cert_path, 'client-cert.pem'),
                'ssl_keyfile':
                os_path_join(cert_path, 'client-key.pem')
            })
        self._consumer = KafkaConsumer(
            bootstrap_servers=self._location,
            group_id=self._group,
            max_partition_fetch_bytes=10485760,
            consumer_timeout_ms=100,
            client_id="%s-%s" %
            (self._topic,
             str(partition_id) if partition_id is not None else "all"),
            request_timeout_ms=120 * 1000,
            heartbeat_interval_ms=10000,
            **kwargs)

        if partition_id is not None:
            self._partition_ids = [TopicPartition(self._topic, partition_id)]
            self._consumer.assign(self._partition_ids)
        else:
            self._partition_ids = [
                TopicPartition(self._topic, pid)
                for pid in self._consumer.partitions_for_topic(self._topic)
            ]
            self._consumer.subscribe(topics=[self._topic])
            if self._consumer._use_consumer_group():
                self._consumer._coordinator.ensure_coordinator_known()
                self._consumer._coordinator.ensure_active_group()

        self._consumer._update_fetch_positions(self._partition_ids)
        self._start_looping_call()

    def _start_looping_call(self, interval=60):
        def errback(failure):
            logger.exception(failure.value)
            if failure.frames:
                logger.critical(
                    str("").join(format_tb(failure.getTracebackObject())))
            self._poll_task.start(interval).addErrback(errback)

        self._poll_task = LoopingCall(self._poll_client)
        self._poll_task.start(interval).addErrback(errback)

    def _poll_client(self):
        self._consumer._client.poll()

    def get_messages(self, timeout=0.1, count=1):
        result = []
        while count > 0:
            try:
                m = next(self._consumer)
                result.append(m.value)
                count -= 1
            except StopIteration:
                break
        return result

    def get_offset(self, partition_id):
        for tp in self._partition_ids:
            if tp.partition == partition_id:
                return self._consumer.position(tp)
        raise KeyError("Can't find partition %d", partition_id)

    def close(self):
        self._poll_task.stop()
        self._consumer.commit()
        # getting kafka client event loop running some more and execute commit
        tries = 3
        while tries:
            self.get_messages()
            sleep(2.0)
            tries -= 1
        self._consumer.close()
Esempio n. 3
0
class Consumer(BaseStreamConsumer):
    """
    Used in DB and SW worker. SW consumes per partition.
    """
    def __init__(self, location, topic, group, partition_id):
        self._location = location
        self._group = group
        self._topic = topic
        self._consumer = KafkaConsumer(
            bootstrap_servers=self._location,
            group_id=self._group,
            max_partition_fetch_bytes=10485760,
            consumer_timeout_ms=100,
            client_id="%s-%s" % (self._topic, str(partition_id) if partition_id is not None else "all"),
            request_timeout_ms=120 * 1000,
        )

        if partition_id is not None:
            self._partition_ids = [TopicPartition(self._topic, partition_id)]
            self._consumer.assign(self._partition_ids)
        else:
            self._partition_ids = [TopicPartition(self._topic, pid) for pid in self._consumer.partitions_for_topic(self._topic)]
            self._consumer.subscribe(topics=[self._topic])
            if self._consumer._use_consumer_group():
                self._consumer._coordinator.ensure_coordinator_known()
                self._consumer._coordinator.ensure_active_group()

        self._consumer._update_fetch_positions(self._partition_ids)
        self._start_looping_call()

    def _start_looping_call(self, interval=60):
        def errback(failure):
            logger.exception(failure.value)
            if failure.frames:
                logger.critical(str("").join(format_tb(failure.getTracebackObject())))
            self._poll_task.start(interval).addErrback(errback)

        self._poll_task = LoopingCall(self._poll_client)
        self._poll_task.start(interval).addErrback(errback)

    def _poll_client(self):
        self._consumer._client.poll()

    def get_messages(self, timeout=0.1, count=1):
        result = []
        while count > 0:
            try:
                m = next(self._consumer)
                result.append(m.value)
                count -= 1
            except StopIteration:
                break
        return result

    def get_offset(self, partition_id):
        for tp in self._partition_ids:
            if tp.partition == partition_id:
                return self._consumer.position(tp)
        raise KeyError("Can't find partition %d", partition_id)

    def close(self):
        self._poll_task.stop()
        self._consumer.commit()
        # getting kafka client event loop running some more and execute commit
        tries = 3
        while tries:
            self.get_messages()
            sleep(2.0)
            tries -= 1
        self._consumer.close()