def _get_kafka_offsets(self, kafka_conn, consumer_groups): # Query Kafka for consumer offsets consumer_offsets = {} topic_partitions = collections.defaultdict(set) for consumer_group, topics in consumer_groups.items(): for topic in topics: kafka_consumer = None try: kafka_consumer = consumer.SimpleConsumer(kafka_conn, consumer_group, topic, auto_commit=False) kafka_consumer.fetch_last_known_offsets() partitions = kafka_consumer.offsets.keys() except Exception: self.log.error( 'Error fetching partition list for topic {0}'.format( topic)) if kafka_consumer is not None: kafka_consumer.stop() continue # Remember the topic partitions encountered so that we can look up their # broker offsets later topic_partitions[topic].update(set(partitions)) consumer_offsets[(consumer_group, topic)] = {} for partition in partitions: try: consumer_offsets[( consumer_group, topic )][partition] = kafka_consumer.offsets[partition] except KeyError: self.log.error( 'Error fetching consumer offset for {0} partition {1}' .format(topic, partition)) kafka_consumer.stop() # Query Kafka for the broker offsets, done in a separate loop so only one query is done # per topic/partition even if multiple consumer groups watch the same topic broker_offsets = {} for topic, partitions in topic_partitions.items(): offset_responses = [] for p in partitions: try: response = kafka_conn.send_offset_request( [common.OffsetRequest(topic, p, -1, 1)]) offset_responses.append(response[0]) except common.KafkaError as e: self.log.error( "Error fetching broker offset: {0}".format(e)) for resp in offset_responses: broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0] return consumer_offsets, broker_offsets
def _create_kafka_consumer(self, partitions=None): # No auto-commit so that commits only happen after the message is processed. consumer = kafka_consumer.SimpleConsumer( self._kafka, self._kafka_group, self._kafka_topic, auto_commit=False, partitions=partitions, iter_timeout=5, fetch_size_bytes=self._kafka_fetch_size, buffer_size=self._kafka_fetch_size, max_buffer_size=None) consumer.provide_partition_info() consumer.fetch_last_known_offsets() return consumer
def _create_kafka_consumer(self, partitions=None): # No auto-commit so that commits only happen after the message is processed. # auto_offset_reset is a param that alters where the current offset in the consumer # will modify from (see whence param in SimpleConsumer.seek()). It is imperative to set # this param as either "largest" or "smallest" depending on where we would like # to modify the offset from, no matter what whence is set to. consumer = kafka_consumer.SimpleConsumer( self._kafka, self._kafka_group, self._kafka_topic, auto_commit=False, partitions=partitions, iter_timeout=5, fetch_size_bytes=self._kafka_fetch_size, buffer_size=self._kafka_fetch_size, max_buffer_size=None, auto_offset_reset="smallest") consumer.provide_partition_info() consumer.fetch_last_known_offsets() return consumer