def consume(args): schema = args.schema table = args.table assert schema in settings.SCHEMAS, 'schema must in settings.SCHEMAS' assert table in settings.TABLES, 'table must in settings.TABLES' group_id = f'{schema}.{table}' consumer = KafkaConsumer( bootstrap_servers=settings.KAFKA_SERVER, value_deserializer=lambda x: json.loads(x, object_hook=object_hook), key_deserializer=lambda x: x.decode() if x else None, enable_auto_commit=False, group_id=group_id, auto_offset_reset='earliest', ) topic = settings.KAFKA_TOPIC partition = settings.PARTITIONS.get(group_id) consumer.assign([TopicPartition(topic, partition)]) event_list = [] logger.info( f'success consume topic:{topic},partition:{partition},schema:{schema},table:{table}' ) pk = reader.get_primary_key(schema, table) for msg in consumer: # type:ConsumerRecord logger.debug(f'kafka msg:{msg}') event = msg.value event_list.append(event) len_event = len(event_list) if len_event == settings.INSERT_NUMS or ( (int(time.time() * 10**6) - event_list[0]['event_unixtime']) / 10**6 >= settings.INSERT_INTERVAL > 0): data_dict = {} tmp_data = [] for items in event_list: action = items['action'] action_core = items['action_core'] data_dict.setdefault(table + schema + action + action_core, []).append(items) for k, v in data_dict.items(): tmp_data.append(v) result = writer.insert_event(tmp_data, settings.SKIP_TYPE, settings.SKIP_DELETE_TB_NAME, schema, table, pk) if result: event_list = [] consumer.commit() logger.info(f'commit success {len_event} events!') else: logger.error('insert event error!') exit()
def consume(args): schema = args.schema tables = args.tables skip_error = args.skip_error assert schema in settings.SCHEMAS, f'schema {schema} must in settings.SCHEMAS' topic = settings.KAFKA_TOPIC tables_pk = {} partitions = [] for table in tables.split(','): assert table in settings.TABLES, f'table {table} must in settings.TABLES' partition = settings.PARTITIONS.get(f'{schema}.{table}') tp = TopicPartition(topic, partition) partitions.append(tp) tables_pk[table] = reader.get_primary_key(schema, table) group_id = f'{schema}.{tables}' consumer = KafkaConsumer( bootstrap_servers=settings.KAFKA_SERVER, value_deserializer=lambda x: json.loads(x, object_hook=object_hook), key_deserializer=lambda x: x.decode() if x else None, enable_auto_commit=False, group_id=group_id, auto_offset_reset='earliest', ) consumer.assign(partitions) event_list = {} is_insert = False last_time = 0 len_event = 0 logger.info(f'success consume topic:{topic},partitions:{partitions},schema:{schema},tables:{tables}') for msg in consumer: # type:ConsumerRecord logger.debug(f'kafka msg:{msg}') event = msg.value event_unixtime = event['event_unixtime'] / 10 ** 6 table = event['table'] schema = event['schema'] event_list.setdefault(table, []).append(event) len_event += 1 if last_time == 0: last_time = event_unixtime if len_event == settings.INSERT_NUMS: is_insert = True else: if event_unixtime - last_time >= settings.INSERT_INTERVAL > 0: is_insert = True if is_insert: data_dict = {} events_num = 0 for table, items in event_list.items(): for item in items: action = item['action'] action_core = item['action_core'] data_dict.setdefault(table, {}).setdefault(table + schema + action + action_core, []).append(item) for table, v in data_dict.items(): tmp_data = [] for k1, v1 in v.items(): events_num += len(v1) tmp_data.append(v1) try: result = writer.insert_event(tmp_data, schema, table, tables_pk.get(table)) if not result: logger.error('insert event error!') if not skip_error: exit() except Exception as e: logger.error(f'insert event error!,error:{e}') if not skip_error: exit() consumer.commit() logger.info(f'commit success {events_num} events!') event_list = {} is_insert = False len_event = last_time = 0
def getMsgData(topic, group, result, maxsize): try: saveResult = SaveDataResult() saveResult.guid = str(uuid.uuid4()) saveResult.CreateDate = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") msgInfos = [] result.guid = saveResult.guid result.topic_messages = [] consumer = KafkaConsumer(bootstrap_servers=tmpbootstrap_servers, enable_auto_commit=False, group_id=group) # Get all partitions by topic par = consumer.partitions_for_topic(topic) now_count = 0 for p in par: tp = TopicPartition(topic, p) consumer.assign([tp]) print(tp) info = MsgPartitionInfo() # Get committed offset print('start to get committed offset.....') try: committed = consumer.committed(tp) or 0 except Exception, e_commit: print(str(e_commit)) # Move consumer to end to get the last position consumer.seek_to_end(tp) last_offset = consumer.position(tp) # Move consumer to beginning to get the first position consumer.seek_to_beginning() now_offset = consumer.position(tp) from_offset = committed if from_offset is None: from_offset = now_offset if from_offset < now_offset: from_offset = now_offset info.partition_ID = tp.partition info.get_last_offset = last_offset msgInfos.append(info) print("[%s] partition(%s) -> now:%s, last:%s, committed:%s" % (tp.topic, tp.partition, now_offset, last_offset, committed)) # Get msg from position to offset while (from_offset < last_offset) and (now_count < maxsize): consumer.seek(tp, from_offset) polldata = consumer.poll(100) from_offset += 1 now_count += 1 print('now_count=' + str(now_count)) result.topic_messages.append(polldata[tp][0].value) saveResult.MsgInfo = json.dumps(msgInfos, default=encode_MsgPartitionInfo, ensure_ascii=False) print(saveResult.MsgInfo) consumer.close() saveResult.message = "Success" saveResult.Code = 200 producer = KafkaProducer(bootstrap_servers=tmpbootstrap_servers) producer.send(topic + "_log", json.dumps(saveResult, default=encode_SaveDataResult)) producer.flush()
def consume(args): schema = args.schema skip_error = args.skip_error auto_offset_reset = args.auto_offset_reset topic = settings.KAFKA_TOPIC tables_pk = {} tables = settings.SCHEMAS.get(schema) partitions = [] for table in tables: partition = settings.PARTITIONS.get(schema) tp = TopicPartition(topic, partition) partitions.append(tp) tables_pk[table] = reader.get_primary_key(schema, table) consumer = KafkaConsumer( bootstrap_servers=settings.KAFKA_SERVER, value_deserializer=lambda x: json.loads(x, object_hook=object_hook), key_deserializer=lambda x: x.decode() if x else None, enable_auto_commit=False, group_id=schema, auto_offset_reset=auto_offset_reset, ) consumer.assign(partitions) event_list = {} is_insert = False last_time = 0 len_event = 0 logger.info( f'success consume topic:{topic},partitions:{partitions},schema:{schema},tables:{tables}' ) for msg in consumer: # type:ConsumerRecord logger.debug(f'kafka msg:{msg}') event = msg.value event_unixtime = event['event_unixtime'] / 10**6 table = event['table'] schema = event['schema'] action = event['action'] if action == 'query': do_query = True query = event['values']['query'] else: do_query = False query = None event_list.setdefault(table, []).append(event) len_event += 1 if last_time == 0: last_time = event_unixtime if len_event == settings.INSERT_NUMS: is_insert = True else: if event_unixtime - last_time >= settings.INSERT_INTERVAL > 0: is_insert = True if is_insert or do_query: data_dict = {} events_num = 0 for table, items in event_list.items(): for item in items: action = item['action'] action_core = item['action_core'] data_dict.setdefault(table, {}).setdefault( table + schema + action + action_core, []).append(item) for table, v in data_dict.items(): tmp_data = [] for k1, v1 in v.items(): events_num += len(v1) tmp_data.append(v1) try: result = writer.insert_event(tmp_data, schema, table, tables_pk.get(table)) if not result: logger.error('insert event error!') if not skip_error: exit() if settings.UI_ENABLE: insert_into_redis('consumer', schema, table, len(v1)) except Exception as e: logger.error(f'insert event error!,error:{e}') if not skip_error: exit() if do_query: try: logger.info(f'execute query:{query}') writer.execute(query) except Exception as e: logger.error(f'execute query error!,error:{e}') if not skip_error: exit() consumer.commit() logger.info(f'commit success {events_num} events!') event_list = {} is_insert = False len_event = last_time = 0
class KafkaGroupReader: def __init__(self, kafka_config): self.log = logging.getLogger(__name__) self.kafka_config = kafka_config self._kafka_groups = defaultdict(lambda: defaultdict(dict)) self.active_partitions = {} self._finished = False def read_group(self, group_id): partition_count = get_offset_topic_partition_count(self.kafka_config) partition = get_group_partition(group_id, partition_count) return self.read_groups(partition)[group_id] def read_groups(self, partition=None): self.consumer = KafkaConsumer( group_id='offset_monitoring_consumer', bootstrap_servers=self.kafka_config.broker_list, auto_offset_reset='earliest', enable_auto_commit=False, consumer_timeout_ms=30000, fetch_max_wait_ms=2000, max_partition_fetch_bytes=10 * 1024 * 1024, # 10MB ) # Fetch metadata as partitions_for_topic only returns locally cached metadata # See https://github.com/dpkp/kafka-python/issues/1742 self.consumer.topics() if partition is not None: self.active_partitions = { partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition), } else: self.active_partitions = { p: TopicPartition(CONSUMER_OFFSET_TOPIC, p) for p in self.consumer.partitions_for_topic( CONSUMER_OFFSET_TOPIC) } self.watermarks = self.get_current_watermarks( list(self.active_partitions.values())) # Active partitions are not empty. Remove the empty ones. self.active_partitions = { p: tp for p, tp in self.active_partitions.items() if tp.partition in self.watermarks and self.watermarks[tp.partition].highmark > 0 and self.watermarks[ tp.partition].highmark > self.watermarks[tp.partition].lowmark } # Cannot consume if there are no active partitions if not self.active_partitions: return {} self.consumer.assign(list(self.active_partitions.values())) self.log.info("Consuming from %s", self.active_partitions) message_iterator = iter(self.consumer) while not self.finished(): try: message = next(message_iterator) except StopIteration: continue # Stop when reaching the last message written to the # __consumer_offsets topic when KafkaGroupReader first started if message.offset >= self.watermarks[ message.partition].highmark - 1: self.remove_partition_from_consumer(message.partition) self.process_consumer_offset_message(message) self._remove_unsubscribed_topics() return { group: topics.keys() for group, topics in six.iteritems(self._kafka_groups) if topics } def _remove_unsubscribed_topics(self): for group, topics in list(six.iteritems(self._kafka_groups)): for topic, partitions in list(six.iteritems(topics)): # If offsets for all partitions are 0, consider the topic as unsubscribed if not any(partitions.values()): del self._kafka_groups[group][topic] self.log.info( "Removed group {group} topic {topic} from list of groups" .format(group=group, topic=topic)) def remove_partition_from_consumer(self, partition): deleted = self.active_partitions.pop(partition) # Terminate if there are no more partitions to consume if not self.active_partitions: self.log.info("Completed reading from all partitions") self._finished = True return # Reassign the remaining partitions to the consumer while saving the # position positions = [(p, self.consumer.position(p)) for p in self.active_partitions.values()] self.consumer.assign(list(self.active_partitions.values())) for topic_partition, position in positions: self.consumer.seek(topic_partition, position) self.log.info( "Completed reading from %s. Remaining partitions: %s", deleted, self.active_partitions, ) def parse_consumer_offset_message(self, message): key = message.key ((key_schema, ), cur) = relative_unpack(b'>h', key, 0) if key_schema not in [0, 1]: raise InvalidMessageException( ) # This is not an offset commit message (group, cur) = read_short_string(key, cur) (topic, cur) = read_short_string(key, cur) ((partition, ), cur) = relative_unpack(b'>l', key, cur) if message.value: value = message.value ((value_schema, ), cur) = relative_unpack(b'>h', value, 0) if value_schema not in [0, 1]: raise InvalidMessageException() # Unrecognized message value ((offset, ), cur) = relative_unpack(b'>q', value, cur) else: offset = None # Offset was deleted return group.decode(), topic.decode(), partition, offset def process_consumer_offset_message(self, message): try: group, topic, partition, offset = self.parse_consumer_offset_message( message) except InvalidMessageException: return if offset is not None: self._kafka_groups[group][topic][partition] = offset self.log.info( "Updated group {group} topic {topic} and updated offset in list of groups" .format( group=group, topic=topic, ), ) # TODO: check if we can ever find an offset commit message with message.value is None elif offset is None and group in self._kafka_groups and \ topic in self._kafka_groups[group]: # No offset means topic deletion del self._kafka_groups[group][topic] self.log.info( "Removed group {group} topic {topic} from list of groups". format(group=group, topic=topic)) def get_current_watermarks(self, partitions=None): client = KafkaToolClient(self.kafka_config.broker_list) client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC) offsets = get_topics_watermarks( client, [CONSUMER_OFFSET_TOPIC], ) partitions_set = set(tp.partition for tp in partitions) if partitions else None return { part: offset for part, offset in six.iteritems(offsets[CONSUMER_OFFSET_TOPIC]) if offset.highmark > offset.lowmark and ( partitions is None or part in partitions_set) } def finished(self): return self._finished
class KafkaGroupReader: def __init__(self, kafka_config): self.log = logging.getLogger(__name__) self.kafka_config = kafka_config self.kafka_groups = defaultdict(set) self.active_partitions = {} self._finished = False def read_group(self, group_id): partition_count = get_offset_topic_partition_count(self.kafka_config) partition = get_group_partition(group_id, partition_count) return self.read_groups(partition).get(group_id, []) def read_groups(self, partition=None): self.consumer = KafkaConsumer( group_id='offset_monitoring_consumer', bootstrap_servers=self.kafka_config.broker_list, auto_offset_reset='earliest', enable_auto_commit=False, consumer_timeout_ms=30000, fetch_max_wait_ms=2000, max_partition_fetch_bytes=10 * 1024 * 1024, # 10MB ) if partition is not None: self.active_partitions = { partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition), } else: self.active_partitions = { p: TopicPartition(CONSUMER_OFFSET_TOPIC, p) for p in self.consumer.partitions_for_topic( CONSUMER_OFFSET_TOPIC) } self.watermarks = self.get_current_watermarks( self.active_partitions.values()) # Active partitions are not empty. Remove the empty ones. self.active_partitions = { p: tp for p, tp in self.active_partitions.items() if tp.partition in self.watermarks and self.watermarks[tp.partition].highmark > 0 and self.watermarks[ tp.partition].highmark > self.watermarks[tp.partition].lowmark } # Cannot consume if there are no active partitions if not self.active_partitions: return {} self.consumer.assign(self.active_partitions.values()) self.log.info("Consuming from %s", self.active_partitions) while not self.finished(): try: message = self.consumer.next() except StopIteration: continue # Stop when reaching the last message written to the # __consumer_offsets topic when KafkaGroupReader first started if message.offset >= self.watermarks[ message.partition].highmark - 1: self.remove_partition_from_consumer(message.partition) self.process_consumer_offset_message(message) return { group: topics for group, topics in self.kafka_groups.items() if topics } def remove_partition_from_consumer(self, partition): deleted = self.active_partitions.pop(partition) # Terminate if there are no more partitions to consume if not self.active_partitions: self.log.info("Completed reading from all partitions") self._finished = True return # Reassign the remaining partitions to the consumer while saving the # position positions = [(p, self.consumer.position(p)) for p in self.active_partitions.values()] self.consumer.assign(self.active_partitions.values()) for topic_partition, position in positions: self.consumer.seek(topic_partition, position) self.log.info( "Completed reading from %s. Remaining partitions: %s", deleted, self.active_partitions, ) def parse_consumer_offset_message(self, message): key = bytearray(message.key) ((key_schema, ), cur) = relative_unpack(b'>h', key, 0) if key_schema not in [0, 1]: raise InvalidMessageException( ) # This is not an offset commit message (group, cur) = read_short_string(key, cur) (topic, cur) = read_short_string(key, cur) ((partition, ), cur) = relative_unpack(b'>l', key, cur) if message.value: value = bytearray(message.value) ((value_schema, ), cur) = relative_unpack(b'>h', value, 0) if value_schema not in [0, 1]: raise InvalidMessageException() # Unrecognized message value ((offset, ), cur) = relative_unpack(b'>q', value, cur) else: offset = None # Offset was deleted return str(group), str(topic), partition, offset def process_consumer_offset_message(self, message): try: group, topic, partition, offset = self.parse_consumer_offset_message( message) except InvalidMessageException: return if offset and (group not in self.kafka_groups or topic not in self.kafka_groups[group]): self.kafka_groups[group].add(topic) self.log.info("Added group %s topic %s to list of groups", group, topic) elif not offset and group in self.kafka_groups and \ topic in self.kafka_groups[group]: # No offset means topic deletion self.kafka_groups[group].discard(topic) self.log.info("Removed group %s topic %s from list of groups", group, topic) def get_current_watermarks(self, partitions=None): client = KafkaToolClient(self.kafka_config.broker_list) client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC) offsets = get_topics_watermarks( client, [CONSUMER_OFFSET_TOPIC], ) partitions_set = set(tp.partition for tp in partitions) if partitions else None return { part: offset for part, offset in offsets[CONSUMER_OFFSET_TOPIC].iteritems() if offset.highmark > offset.lowmark and ( partitions is None or part in partitions_set) } def finished(self): return self._finished
def consume(args): settings = Global.settings writer = Global.writer reader = Global.reader schema = args.schema skip_error = args.skip_error auto_offset_reset = args.auto_offset_reset offset = args.offset topic = settings.kafka_topic tables_pk = {} schema_table = settings.schema_table.get(schema) tables = schema_table.get("tables") for table in tables: tables_pk[table] = reader.get_primary_key(schema, table) consumer = KafkaConsumer( bootstrap_servers=settings.kafka_server, value_deserializer=lambda x: json.loads(x, object_hook=object_hook), key_deserializer=lambda x: x.decode() if x else None, enable_auto_commit=False, group_id=schema, auto_offset_reset=auto_offset_reset, ) partition = schema_table.get("kafka_partition") topic_partition = TopicPartition(topic, partition) consumer.assign([topic_partition]) if offset: consumer.seek(topic_partition, offset) event_list = {} is_insert = False last_time = 0 len_event = 0 logger.info( f"success consume topic:{topic},partitions:{partition},schema:{schema},tables:{tables}" ) for msg in consumer: # type:ConsumerRecord logger.debug(f"kafka msg:{msg}") event = msg.value event_unixtime = event["event_unixtime"] / 10**6 table = event["table"] schema = event["schema"] action = event["action"] if action == "query": alter_table = True query = event["values"]["query"] else: alter_table = False query = None event_list.setdefault(table, []).append(event) len_event += 1 if last_time == 0: last_time = event_unixtime if len_event == settings.insert_num: is_insert = True else: if event_unixtime - last_time >= settings.insert_interval > 0: is_insert = True if is_insert or alter_table: data_dict = {} events_num = 0 for table, items in event_list.items(): for item in items: action = item["action"] action_core = item["action_core"] data_dict.setdefault(table, {}).setdefault( table + schema + action + action_core, []).append(item) for table, v in data_dict.items(): tmp_data = [] for k1, v1 in v.items(): events_num += len(v1) tmp_data.append(v1) try: result = writer.insert_event(tmp_data, schema, table, tables_pk.get(table)) if not result: logger.error("insert event error!") if not skip_error: exit() except Exception as e: logger.error(f"insert event error!,error:{e}") if not skip_error: exit() if alter_table: try: logger.info(f"execute query:{query}") writer.execute(query) except Exception as e: logger.error(f"execute query error!,error:{e}") if not skip_error: exit() consumer.commit() logger.info(f"commit success {events_num} events!") event_list = {} is_insert = False len_event = last_time = 0
#***************************************************************************************** # https://github.com/cuyu/python-demo/blob/master/demo_kafka.py _TOPIC_NAME = 'anomaly' _BROKERS = ['localhost:9092' ] #['localhost.com:9092', 'systest-auto-deployer:9092'] _GROUP_ID = 'my_group' consumer = KafkaConsumer( group_id='ddd', auto_offset_reset='smallest', #largest enable_auto_commit= False, ## true时,Consumer会在消费消息后将offset同步到zookeeper,这样当Consumer失败后,新的consumer就能从zookeeper获取最新的offset bootstrap_servers=_BROKERS) # consumer = KafkaConsumer(bootstrap_servers=_BROKERS) consumer.assign([TopicPartition(_TOPIC_NAME, 0)]) tp = TopicPartition(_TOPIC_NAME, 0) print(consumer.committed(TopicPartition(_TOPIC_NAME, 0))) # consumer.subscribe(topics=[_TOPIC_NAME]) # # Subscribe to a regex topic pattern # consumer.subscribe(pattern='^awesome.*') print(consumer.topics()) # partition = TopicPartition(topic=_TOPIC_NAME, partition=consumer.partitions_for_topic(_TOPIC_NAME)) # consumer.seek_to_beginning() # consumer.seek(TopicPartition(_TOPIC_NAME, 0), 0) consumer.seek(tp, 50) # 10 stands for start consumer from 10th offset a = [] for m in consumer: if len(a) < 5: print(m.offset) a.append(m.offset)
class KafkaGroupReader: def __init__(self, kafka_config): self.log = logging.getLogger(__name__) self.kafka_config = kafka_config self._kafka_groups = defaultdict(lambda: defaultdict(dict)) self.active_partitions = {} self._finished = False def read_group(self, group_id): partition_count = get_offset_topic_partition_count(self.kafka_config) partition = get_group_partition(group_id, partition_count) return self.read_groups(partition).get(group_id, []) def read_groups(self, partition=None): self.consumer = KafkaConsumer( group_id='offset_monitoring_consumer', bootstrap_servers=self.kafka_config.broker_list, auto_offset_reset='earliest', enable_auto_commit=False, consumer_timeout_ms=30000, fetch_max_wait_ms=2000, max_partition_fetch_bytes=10 * 1024 * 1024, # 10MB ) # Fetch metadata as partitions_for_topic only returns locally cached metadata # See https://github.com/dpkp/kafka-python/issues/1742 self.consumer.topics() if partition is not None: self.active_partitions = { partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition), } else: self.active_partitions = { p: TopicPartition(CONSUMER_OFFSET_TOPIC, p) for p in self.consumer.partitions_for_topic(CONSUMER_OFFSET_TOPIC) } self.watermarks = self.get_current_watermarks(list(self.active_partitions.values())) # Active partitions are not empty. Remove the empty ones. self.active_partitions = { p: tp for p, tp in self.active_partitions.items() if tp.partition in self.watermarks and self.watermarks[tp.partition].highmark > 0 and self.watermarks[tp.partition].highmark > self.watermarks[tp.partition].lowmark } # Cannot consume if there are no active partitions if not self.active_partitions: return {} self.consumer.assign(list(self.active_partitions.values())) self.log.info("Consuming from %s", self.active_partitions) message_iterator = iter(self.consumer) while not self.finished(): try: message = next(message_iterator) except StopIteration: continue # Stop when reaching the last message written to the # __consumer_offsets topic when KafkaGroupReader first started if message.offset >= self.watermarks[message.partition].highmark - 1: self.remove_partition_from_consumer(message.partition) self.process_consumer_offset_message(message) self._remove_unsubscribed_topics() return { group: topics.keys() for group, topics in six.iteritems(self._kafka_groups) if topics } def _remove_unsubscribed_topics(self): for group, topics in list(six.iteritems(self._kafka_groups)): for topic, partitions in list(six.iteritems(topics)): # If offsets for all partitions are 0, consider the topic as unsubscribed if not any(partitions.values()): del self._kafka_groups[group][topic] self.log.info("Removed group {group} topic {topic} from list of groups".format(group=group, topic=topic)) def remove_partition_from_consumer(self, partition): deleted = self.active_partitions.pop(partition) # Terminate if there are no more partitions to consume if not self.active_partitions: self.log.info("Completed reading from all partitions") self._finished = True return # Reassign the remaining partitions to the consumer while saving the # position positions = [ (p, self.consumer.position(p)) for p in self.active_partitions.values() ] self.consumer.assign(list(self.active_partitions.values())) for topic_partition, position in positions: self.consumer.seek(topic_partition, position) self.log.info( "Completed reading from %s. Remaining partitions: %s", deleted, self.active_partitions, ) def parse_consumer_offset_message(self, message): key = message.key ((key_schema,), cur) = relative_unpack(b'>h', key, 0) if key_schema not in [0, 1]: raise InvalidMessageException() # This is not an offset commit message (group, cur) = read_short_string(key, cur) (topic, cur) = read_short_string(key, cur) ((partition,), cur) = relative_unpack(b'>l', key, cur) if message.value: value = message.value ((value_schema,), cur) = relative_unpack(b'>h', value, 0) if value_schema not in [0, 1]: raise InvalidMessageException() # Unrecognized message value ((offset,), cur) = relative_unpack(b'>q', value, cur) else: offset = None # Offset was deleted return group.decode(), topic.decode(), partition, offset def process_consumer_offset_message(self, message): try: group, topic, partition, offset = self.parse_consumer_offset_message(message) except InvalidMessageException: return if offset is not None: self._kafka_groups[group][topic][partition] = offset self.log.info( "Updated group {group} topic {topic} and updated offset in list of groups".format( group=group, topic=topic, ), ) # TODO: check if we can ever find an offset commit message with message.value is None elif offset is None and group in self._kafka_groups and \ topic in self._kafka_groups[group]: # No offset means topic deletion del self._kafka_groups[group][topic] self.log.info("Removed group {group} topic {topic} from list of groups".format(group=group, topic=topic)) def get_current_watermarks(self, partitions=None): client = KafkaToolClient(self.kafka_config.broker_list) client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC) offsets = get_topics_watermarks( client, [CONSUMER_OFFSET_TOPIC], ) partitions_set = set(tp.partition for tp in partitions) if partitions else None return {part: offset for part, offset in six.iteritems(offsets[CONSUMER_OFFSET_TOPIC]) if offset.highmark > offset.lowmark and (partitions is None or part in partitions_set)} def finished(self): return self._finished