Esempio n. 1
0
    def _get_kafka_offsets(self, kafka_conn, consumer_groups):
        # Query Kafka for consumer offsets
        consumer_offsets = {}
        topic_partitions = collections.defaultdict(set)
        for consumer_group, topics in consumer_groups.items():
            for topic in topics:
                kafka_consumer = None
                try:
                    kafka_consumer = consumer.SimpleConsumer(kafka_conn,
                                                             consumer_group,
                                                             topic,
                                                             auto_commit=False)
                    kafka_consumer.fetch_last_known_offsets()

                    partitions = kafka_consumer.offsets.keys()
                except Exception:
                    self.log.error(
                        'Error fetching partition list for topic {0}'.format(
                            topic))
                    if kafka_consumer is not None:
                        kafka_consumer.stop()
                    continue

                # Remember the topic partitions encountered so that we can look up their
                # broker offsets later
                topic_partitions[topic].update(set(partitions))
                consumer_offsets[(consumer_group, topic)] = {}
                for partition in partitions:
                    try:
                        consumer_offsets[(
                            consumer_group, topic
                        )][partition] = kafka_consumer.offsets[partition]
                    except KeyError:
                        self.log.error(
                            'Error fetching consumer offset for {0} partition {1}'
                            .format(topic, partition))

                kafka_consumer.stop()

        # Query Kafka for the broker offsets, done in a separate loop so only one query is done
        # per topic/partition even if multiple consumer groups watch the same topic
        broker_offsets = {}
        for topic, partitions in topic_partitions.items():
            offset_responses = []
            for p in partitions:
                try:
                    response = kafka_conn.send_offset_request(
                        [common.OffsetRequest(topic, p, -1, 1)])
                    offset_responses.append(response[0])
                except common.KafkaError as e:
                    self.log.error(
                        "Error fetching broker offset: {0}".format(e))

            for resp in offset_responses:
                broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0]

        return consumer_offsets, broker_offsets
Esempio n. 2
0
    def _create_kafka_consumer(self, partitions=None):
        # No auto-commit so that commits only happen after the message is processed.
        consumer = kafka_consumer.SimpleConsumer(
            self._kafka,
            self._kafka_group,
            self._kafka_topic,
            auto_commit=False,
            partitions=partitions,
            iter_timeout=5,
            fetch_size_bytes=self._kafka_fetch_size,
            buffer_size=self._kafka_fetch_size,
            max_buffer_size=None)

        consumer.provide_partition_info()
        consumer.fetch_last_known_offsets()
        return consumer
Esempio n. 3
0
    def _create_kafka_consumer(self, partitions=None):
        # No auto-commit so that commits only happen after the message is processed.

        # auto_offset_reset is a param that alters where the current offset in the consumer
        # will modify from (see whence param in SimpleConsumer.seek()). It is imperative to set
        # this param as either "largest" or "smallest" depending on where we would like
        # to modify the offset from, no matter what whence is set to.

        consumer = kafka_consumer.SimpleConsumer(
            self._kafka,
            self._kafka_group,
            self._kafka_topic,
            auto_commit=False,
            partitions=partitions,
            iter_timeout=5,
            fetch_size_bytes=self._kafka_fetch_size,
            buffer_size=self._kafka_fetch_size,
            max_buffer_size=None,
            auto_offset_reset="smallest")

        consumer.provide_partition_info()
        consumer.fetch_last_known_offsets()
        return consumer