class IndexedConsumer(): """ A simple consumer to retrieve messages from the input queue when it is time to send them """ def __init__(self, input_topic, hosts): self.input_topic = input_topic self.consumer = KafkaConsumer(bootstrap_servers=hosts) def retrieve_event(self, event_reference): self.consumer.set_topic_partitions( (self.input_topic, event_reference.partition, event_reference.offset)) message = self.consumer.next() event = ScheduledEvent.from_dict(json.loads(message.value)) return event
class KafkaGroupReader: def __init__(self, kafka_config): self.log = logging.getLogger(__name__) self.kafka_config = kafka_config self.kafka_groups = defaultdict(set) self.finished_partitions = set() def read_groups(self): self.log.info("Kafka consumer running") self.consumer = KafkaConsumer( CONSUMER_OFFSET_TOPIC, group_id='offset_monitoring_consumer', bootstrap_servers=self.kafka_config.broker_list, auto_offset_reset='smallest', auto_commit_enable=False, consumer_timeout_ms=10000, ) self.log.info("Consumer ready") self.watermarks = self.get_current_watermarks() while not self.finished(): try: message = self.consumer.next() max_offset = self.get_max_offset(message.partition) if message.offset >= max_offset - 1: self.finished_partitions.add(message.partition) except ConsumerTimeout: break except ( FailedPayloadsError, KafkaUnavailableError, LeaderNotAvailableError, NotLeaderForPartitionError, ) as e: self.log.warning("Got %s, retrying", e.__class__.__name__) self.process_consumer_offset_message(message) return self.kafka_groups def parse_consumer_offset_message(self, message): key = bytearray(message.key) ((key_schema,), cur) = relative_unpack('>h', key, 0) if key_schema not in [0, 1]: raise InvalidMessageException() # This is not an offset commit message (group, cur) = read_short_string(key, cur) (topic, cur) = read_short_string(key, cur) ((partition,), cur) = relative_unpack('>l', key, cur) if message.value: value = bytearray(message.value) ((value_schema,), cur) = relative_unpack('>h', value, 0) if value_schema not in [0, 1]: raise InvalidMessageException() # Unrecognized message value ((offset,), cur) = relative_unpack('>q', value, cur) else: offset = None # Offset was deleted return str(group), str(topic), partition, offset def process_consumer_offset_message(self, message): try: group, topic, partition, offset = self.parse_consumer_offset_message(message) except InvalidMessageException: return if offset: self.kafka_groups[group].add(topic) else: # No offset means group deletion self.kafka_groups.pop(group, None) def get_current_watermarks(self): self.consumer._client.load_metadata_for_topics() offsets = get_topics_watermarks( self.consumer._client, [CONSUMER_OFFSET_TOPIC], ) return {partition: offset for partition, offset in offsets[CONSUMER_OFFSET_TOPIC].iteritems() if offset.highmark > offset.lowmark} def get_max_offset(self, partition): return self.watermarks[partition].highmark def finished(self): return len(self.finished_partitions) >= len(self.watermarks)
class KafkaGroupReader: def __init__(self, kafka_config): self.log = logging.getLogger(__name__) self.kafka_config = kafka_config self.kafka_groups = defaultdict(set) self.finished_partitions = set() def read_groups(self): self.log.info("Kafka consumer running") self.consumer = KafkaConsumer( CONSUMER_OFFSET_TOPIC, group_id='offset_monitoring_consumer', bootstrap_servers=self.kafka_config.broker_list, auto_offset_reset='smallest', auto_commit_enable=False, consumer_timeout_ms=10000, ) self.log.info("Consumer ready") self.watermarks = self.get_current_watermarks() while not self.finished(): try: message = self.consumer.next() max_offset = self.get_max_offset(message.partition) if message.offset >= max_offset - 1: self.finished_partitions.add(message.partition) except ConsumerTimeout: break except ( FailedPayloadsError, KafkaUnavailableError, LeaderNotAvailableError, NotLeaderForPartitionError, ) as e: self.log.warning("Got %s, retrying", e.__class__.__name__) self.process_consumer_offset_message(message) return self.kafka_groups def parse_consumer_offset_message(self, message): key = bytearray(message.key) ((key_schema, ), cur) = relative_unpack('>h', key, 0) if key_schema not in [0, 1]: raise InvalidMessageException( ) # This is not an offset commit message (group, cur) = read_short_string(key, cur) (topic, cur) = read_short_string(key, cur) ((partition, ), cur) = relative_unpack('>l', key, cur) if message.value: value = bytearray(message.value) ((value_schema, ), cur) = relative_unpack('>h', value, 0) if value_schema not in [0, 1]: raise InvalidMessageException() # Unrecognized message value ((offset, ), cur) = relative_unpack('>q', value, cur) else: offset = None # Offset was deleted return str(group), str(topic), partition, offset def process_consumer_offset_message(self, message): try: group, topic, partition, offset = self.parse_consumer_offset_message( message) except InvalidMessageException: return if offset: self.kafka_groups[group].add(topic) else: # No offset means group deletion self.kafka_groups.pop(group, None) def get_current_watermarks(self): self.consumer._client.load_metadata_for_topics() offsets = get_topics_watermarks( self.consumer._client, [CONSUMER_OFFSET_TOPIC], ) return { partition: offset for partition, offset in offsets[CONSUMER_OFFSET_TOPIC].iteritems() if offset.highmark > offset.lowmark } def get_max_offset(self, partition): return self.watermarks[partition].highmark def finished(self): return len(self.finished_partitions) >= len(self.watermarks)
class KafkaGroupReader: def __init__(self, kafka_config): self.log = logging.getLogger(__name__) self.kafka_config = kafka_config self.kafka_groups = defaultdict(set) self.active_partitions = {} self._finished = False def read_group(self, group_id): partition_count = get_offset_topic_partition_count(self.kafka_config) partition = get_group_partition(group_id, partition_count) return self.read_groups(partition).get(group_id, []) def read_groups(self, partition=None): self.consumer = KafkaConsumer( group_id='offset_monitoring_consumer', bootstrap_servers=self.kafka_config.broker_list, auto_offset_reset='earliest', enable_auto_commit=False, consumer_timeout_ms=30000, fetch_max_wait_ms=2000, max_partition_fetch_bytes=10 * 1024 * 1024, # 10MB ) if partition is not None: self.active_partitions = { partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition), } else: self.active_partitions = { p: TopicPartition(CONSUMER_OFFSET_TOPIC, p) for p in self.consumer.partitions_for_topic( CONSUMER_OFFSET_TOPIC) } self.watermarks = self.get_current_watermarks( self.active_partitions.values()) # Active partitions are not empty. Remove the empty ones. self.active_partitions = { p: tp for p, tp in self.active_partitions.items() if tp.partition in self.watermarks and self.watermarks[tp.partition].highmark > 0 and self.watermarks[ tp.partition].highmark > self.watermarks[tp.partition].lowmark } # Cannot consume if there are no active partitions if not self.active_partitions: return {} self.consumer.assign(self.active_partitions.values()) self.log.info("Consuming from %s", self.active_partitions) while not self.finished(): try: message = self.consumer.next() except StopIteration: continue # Stop when reaching the last message written to the # __consumer_offsets topic when KafkaGroupReader first started if message.offset >= self.watermarks[ message.partition].highmark - 1: self.remove_partition_from_consumer(message.partition) self.process_consumer_offset_message(message) return { group: topics for group, topics in self.kafka_groups.items() if topics } def remove_partition_from_consumer(self, partition): deleted = self.active_partitions.pop(partition) # Terminate if there are no more partitions to consume if not self.active_partitions: self.log.info("Completed reading from all partitions") self._finished = True return # Reassign the remaining partitions to the consumer while saving the # position positions = [(p, self.consumer.position(p)) for p in self.active_partitions.values()] self.consumer.assign(self.active_partitions.values()) for topic_partition, position in positions: self.consumer.seek(topic_partition, position) self.log.info( "Completed reading from %s. Remaining partitions: %s", deleted, self.active_partitions, ) def parse_consumer_offset_message(self, message): key = bytearray(message.key) ((key_schema, ), cur) = relative_unpack(b'>h', key, 0) if key_schema not in [0, 1]: raise InvalidMessageException( ) # This is not an offset commit message (group, cur) = read_short_string(key, cur) (topic, cur) = read_short_string(key, cur) ((partition, ), cur) = relative_unpack(b'>l', key, cur) if message.value: value = bytearray(message.value) ((value_schema, ), cur) = relative_unpack(b'>h', value, 0) if value_schema not in [0, 1]: raise InvalidMessageException() # Unrecognized message value ((offset, ), cur) = relative_unpack(b'>q', value, cur) else: offset = None # Offset was deleted return str(group), str(topic), partition, offset def process_consumer_offset_message(self, message): try: group, topic, partition, offset = self.parse_consumer_offset_message( message) except InvalidMessageException: return if offset and (group not in self.kafka_groups or topic not in self.kafka_groups[group]): self.kafka_groups[group].add(topic) self.log.info("Added group %s topic %s to list of groups", group, topic) elif not offset and group in self.kafka_groups and \ topic in self.kafka_groups[group]: # No offset means topic deletion self.kafka_groups[group].discard(topic) self.log.info("Removed group %s topic %s from list of groups", group, topic) def get_current_watermarks(self, partitions=None): client = KafkaToolClient(self.kafka_config.broker_list) client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC) offsets = get_topics_watermarks( client, [CONSUMER_OFFSET_TOPIC], ) partitions_set = set(tp.partition for tp in partitions) if partitions else None return { part: offset for part, offset in offsets[CONSUMER_OFFSET_TOPIC].iteritems() if offset.highmark > offset.lowmark and ( partitions is None or part in partitions_set) } def finished(self): return self._finished
Copyright (c) 2019 Arshdeep Bahga and Vijay Madisetti Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ''' from kafka.client import KafkaClient from kafka.consumer import KafkaConsumer client = KafkaClient("localhost:6667") consumer = KafkaConsumer("test", metadata_broker_list=['localhost:6667']) while True: data = consumer.next().value print data