Ejemplo n.º 1
0
class KafkaGroupReader:
    def __init__(self, kafka_config):
        self.log = logging.getLogger(__name__)
        self.kafka_config = kafka_config
        self._kafka_groups = defaultdict(lambda: defaultdict(dict))
        self.active_partitions = {}
        self._finished = False

    def read_group(self, group_id):
        partition_count = get_offset_topic_partition_count(self.kafka_config)
        partition = get_group_partition(group_id, partition_count)
        return self.read_groups(partition)[group_id]

    def read_groups(self, partition=None):
        self.consumer = KafkaConsumer(
            group_id='offset_monitoring_consumer',
            bootstrap_servers=self.kafka_config.broker_list,
            auto_offset_reset='earliest',
            enable_auto_commit=False,
            consumer_timeout_ms=30000,
            fetch_max_wait_ms=2000,
            max_partition_fetch_bytes=10 * 1024 * 1024,  # 10MB
        )

        # Fetch metadata as partitions_for_topic only returns locally cached metadata
        # See https://github.com/dpkp/kafka-python/issues/1742
        self.consumer.topics()

        if partition is not None:
            self.active_partitions = {
                partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition),
            }
        else:
            self.active_partitions = {
                p: TopicPartition(CONSUMER_OFFSET_TOPIC, p)
                for p in self.consumer.partitions_for_topic(
                    CONSUMER_OFFSET_TOPIC)
            }
        self.watermarks = self.get_current_watermarks(
            list(self.active_partitions.values()))
        # Active partitions are not empty. Remove the empty ones.
        self.active_partitions = {
            p: tp
            for p, tp in self.active_partitions.items()
            if tp.partition in self.watermarks
            and self.watermarks[tp.partition].highmark > 0 and self.watermarks[
                tp.partition].highmark > self.watermarks[tp.partition].lowmark
        }
        # Cannot consume if there are no active partitions
        if not self.active_partitions:
            return {}

        self.consumer.assign(list(self.active_partitions.values()))
        self.log.info("Consuming from %s", self.active_partitions)

        message_iterator = iter(self.consumer)

        while not self.finished():
            try:
                message = next(message_iterator)
            except StopIteration:
                continue
            # Stop when reaching the last message written to the
            # __consumer_offsets topic when KafkaGroupReader first started
            if message.offset >= self.watermarks[
                    message.partition].highmark - 1:
                self.remove_partition_from_consumer(message.partition)
            self.process_consumer_offset_message(message)

        self._remove_unsubscribed_topics()

        return {
            group: topics.keys()
            for group, topics in six.iteritems(self._kafka_groups) if topics
        }

    def _remove_unsubscribed_topics(self):
        for group, topics in list(six.iteritems(self._kafka_groups)):
            for topic, partitions in list(six.iteritems(topics)):
                # If offsets for all partitions are 0, consider the topic as unsubscribed
                if not any(partitions.values()):
                    del self._kafka_groups[group][topic]
                    self.log.info(
                        "Removed group {group} topic {topic} from list of groups"
                        .format(group=group, topic=topic))

    def remove_partition_from_consumer(self, partition):
        deleted = self.active_partitions.pop(partition)
        # Terminate if there are no more partitions to consume
        if not self.active_partitions:
            self.log.info("Completed reading from all partitions")
            self._finished = True
            return
        # Reassign the remaining partitions to the consumer while saving the
        # position
        positions = [(p, self.consumer.position(p))
                     for p in self.active_partitions.values()]
        self.consumer.assign(list(self.active_partitions.values()))
        for topic_partition, position in positions:
            self.consumer.seek(topic_partition, position)
        self.log.info(
            "Completed reading from %s. Remaining partitions: %s",
            deleted,
            self.active_partitions,
        )

    def parse_consumer_offset_message(self, message):
        key = message.key
        ((key_schema, ), cur) = relative_unpack(b'>h', key, 0)
        if key_schema not in [0, 1]:
            raise InvalidMessageException(
            )  # This is not an offset commit message
        (group, cur) = read_short_string(key, cur)
        (topic, cur) = read_short_string(key, cur)
        ((partition, ), cur) = relative_unpack(b'>l', key, cur)
        if message.value:
            value = message.value
            ((value_schema, ), cur) = relative_unpack(b'>h', value, 0)
            if value_schema not in [0, 1]:
                raise InvalidMessageException()  # Unrecognized message value
            ((offset, ), cur) = relative_unpack(b'>q', value, cur)
        else:
            offset = None  # Offset was deleted
        return group.decode(), topic.decode(), partition, offset

    def process_consumer_offset_message(self, message):
        try:
            group, topic, partition, offset = self.parse_consumer_offset_message(
                message)
        except InvalidMessageException:
            return

        if offset is not None:
            self._kafka_groups[group][topic][partition] = offset
            self.log.info(
                "Updated group {group} topic {topic} and updated offset in list of groups"
                .format(
                    group=group,
                    topic=topic,
                ), )
        # TODO: check if we can ever find an offset commit message with message.value is None
        elif offset is None and group in self._kafka_groups and \
                topic in self._kafka_groups[group]:  # No offset means topic deletion
            del self._kafka_groups[group][topic]
            self.log.info(
                "Removed group {group} topic {topic} from list of groups".
                format(group=group, topic=topic))

    def get_current_watermarks(self, partitions=None):
        client = KafkaToolClient(self.kafka_config.broker_list)
        client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC)
        offsets = get_topics_watermarks(
            client,
            [CONSUMER_OFFSET_TOPIC],
        )
        partitions_set = set(tp.partition
                             for tp in partitions) if partitions else None
        return {
            part: offset
            for part, offset in six.iteritems(offsets[CONSUMER_OFFSET_TOPIC])
            if offset.highmark > offset.lowmark and (
                partitions is None or part in partitions_set)
        }

    def finished(self):
        return self._finished
Ejemplo n.º 2
0
def getMsgData(topic, group, result, maxsize):
    try:
        saveResult = SaveDataResult()
        saveResult.guid = str(uuid.uuid4())
        saveResult.CreateDate = datetime.datetime.now().strftime(
            "%Y-%m-%d %H:%M:%S")

        msgInfos = []
        result.guid = saveResult.guid
        result.topic_messages = []

        consumer = KafkaConsumer(bootstrap_servers=tmpbootstrap_servers,
                                 enable_auto_commit=False,
                                 group_id=group)

        # Get all partitions by topic
        par = consumer.partitions_for_topic(topic)

        now_count = 0

        for p in par:
            tp = TopicPartition(topic, p)
            consumer.assign([tp])
            print(tp)
            info = MsgPartitionInfo()

            # Get committed offset
            print('start to get committed offset.....')
            try:
                committed = consumer.committed(tp) or 0
            except Exception, e_commit:
                print(str(e_commit))

            # Move consumer to end to get the last position
            consumer.seek_to_end(tp)
            last_offset = consumer.position(tp)

            # Move consumer to beginning to get the first position
            consumer.seek_to_beginning()
            now_offset = consumer.position(tp)
            from_offset = committed

            if from_offset is None:
                from_offset = now_offset

            if from_offset < now_offset:
                from_offset = now_offset

            info.partition_ID = tp.partition
            info.get_last_offset = last_offset
            msgInfos.append(info)

            print("[%s] partition(%s) -> now:%s,  last:%s,  committed:%s" %
                  (tp.topic, tp.partition, now_offset, last_offset, committed))

            # Get msg from position to offset
            while (from_offset < last_offset) and (now_count < maxsize):
                consumer.seek(tp, from_offset)
                polldata = consumer.poll(100)
                from_offset += 1
                now_count += 1
                print('now_count=' + str(now_count))
                result.topic_messages.append(polldata[tp][0].value)

        saveResult.MsgInfo = json.dumps(msgInfos,
                                        default=encode_MsgPartitionInfo,
                                        ensure_ascii=False)
        print(saveResult.MsgInfo)
        consumer.close()
        saveResult.message = "Success"
        saveResult.Code = 200

        producer = KafkaProducer(bootstrap_servers=tmpbootstrap_servers)
        producer.send(topic + "_log",
                      json.dumps(saveResult, default=encode_SaveDataResult))
        producer.flush()
Ejemplo n.º 3
0
class KafkaGroupReader:
    def __init__(self, kafka_config):
        self.log = logging.getLogger(__name__)
        self.kafka_config = kafka_config
        self.kafka_groups = defaultdict(set)
        self.active_partitions = {}
        self._finished = False

    def read_group(self, group_id):
        partition_count = get_offset_topic_partition_count(self.kafka_config)
        partition = get_group_partition(group_id, partition_count)
        return self.read_groups(partition).get(group_id, [])

    def read_groups(self, partition=None):
        self.consumer = KafkaConsumer(
            group_id='offset_monitoring_consumer',
            bootstrap_servers=self.kafka_config.broker_list,
            auto_offset_reset='earliest',
            enable_auto_commit=False,
            consumer_timeout_ms=30000,
            fetch_max_wait_ms=2000,
            max_partition_fetch_bytes=10 * 1024 * 1024,  # 10MB
        )

        if partition is not None:
            self.active_partitions = {
                partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition),
            }
        else:
            self.active_partitions = {
                p: TopicPartition(CONSUMER_OFFSET_TOPIC, p)
                for p in self.consumer.partitions_for_topic(
                    CONSUMER_OFFSET_TOPIC)
            }
        self.watermarks = self.get_current_watermarks(
            self.active_partitions.values())
        # Active partitions are not empty. Remove the empty ones.
        self.active_partitions = {
            p: tp
            for p, tp in self.active_partitions.items()
            if tp.partition in self.watermarks
            and self.watermarks[tp.partition].highmark > 0 and self.watermarks[
                tp.partition].highmark > self.watermarks[tp.partition].lowmark
        }
        # Cannot consume if there are no active partitions
        if not self.active_partitions:
            return {}

        self.consumer.assign(self.active_partitions.values())
        self.log.info("Consuming from %s", self.active_partitions)
        while not self.finished():
            try:
                message = self.consumer.next()
            except StopIteration:
                continue
            # Stop when reaching the last message written to the
            # __consumer_offsets topic when KafkaGroupReader first started
            if message.offset >= self.watermarks[
                    message.partition].highmark - 1:
                self.remove_partition_from_consumer(message.partition)
            self.process_consumer_offset_message(message)

        return {
            group: topics
            for group, topics in self.kafka_groups.items() if topics
        }

    def remove_partition_from_consumer(self, partition):
        deleted = self.active_partitions.pop(partition)
        # Terminate if there are no more partitions to consume
        if not self.active_partitions:
            self.log.info("Completed reading from all partitions")
            self._finished = True
            return
        # Reassign the remaining partitions to the consumer while saving the
        # position
        positions = [(p, self.consumer.position(p))
                     for p in self.active_partitions.values()]
        self.consumer.assign(self.active_partitions.values())
        for topic_partition, position in positions:
            self.consumer.seek(topic_partition, position)
        self.log.info(
            "Completed reading from %s. Remaining partitions: %s",
            deleted,
            self.active_partitions,
        )

    def parse_consumer_offset_message(self, message):
        key = bytearray(message.key)
        ((key_schema, ), cur) = relative_unpack(b'>h', key, 0)
        if key_schema not in [0, 1]:
            raise InvalidMessageException(
            )  # This is not an offset commit message
        (group, cur) = read_short_string(key, cur)
        (topic, cur) = read_short_string(key, cur)
        ((partition, ), cur) = relative_unpack(b'>l', key, cur)
        if message.value:
            value = bytearray(message.value)
            ((value_schema, ), cur) = relative_unpack(b'>h', value, 0)
            if value_schema not in [0, 1]:
                raise InvalidMessageException()  # Unrecognized message value
            ((offset, ), cur) = relative_unpack(b'>q', value, cur)
        else:
            offset = None  # Offset was deleted
        return str(group), str(topic), partition, offset

    def process_consumer_offset_message(self, message):
        try:
            group, topic, partition, offset = self.parse_consumer_offset_message(
                message)
        except InvalidMessageException:
            return

        if offset and (group not in self.kafka_groups
                       or topic not in self.kafka_groups[group]):
            self.kafka_groups[group].add(topic)
            self.log.info("Added group %s topic %s to list of groups", group,
                          topic)
        elif not offset and group in self.kafka_groups and \
                topic in self.kafka_groups[group]:  # No offset means topic deletion
            self.kafka_groups[group].discard(topic)
            self.log.info("Removed group %s topic %s from list of groups",
                          group, topic)

    def get_current_watermarks(self, partitions=None):
        client = KafkaToolClient(self.kafka_config.broker_list)
        client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC)
        offsets = get_topics_watermarks(
            client,
            [CONSUMER_OFFSET_TOPIC],
        )
        partitions_set = set(tp.partition
                             for tp in partitions) if partitions else None
        return {
            part: offset
            for part, offset in offsets[CONSUMER_OFFSET_TOPIC].iteritems()
            if offset.highmark > offset.lowmark and (
                partitions is None or part in partitions_set)
        }

    def finished(self):
        return self._finished
Ejemplo n.º 4
0
class IBUSStreamingDownsamplingConsumer:
    LOG_FORMAT ="{} UTC_TS\t"\
                "{}"

    def __init__(self, kafkaHost, kafkaPort, tcpHost, tcpPort, group_id, topic,
                 logTopic, interval):
        self.kafkaHost = kafkaHost
        self.kafkaPort = kafkaPort
        self.tcpHost = tcpHost
        self.tcpPort = tcpPort
        self.group_id = group_id
        self.topic = topic
        self.logTopic = logTopic
        self.interval = int(interval)
        self.consumer = KafkaConsumer(
            topic,
            bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)],
            group_id=group_id,
            enable_auto_commit=False)
        self.producer = KafkaProducer(
            bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)])
        self.tcpWriter = None

    def getTopicPartitions(self):
        self.consumer.topics()  #This ensures local cache is updated with
        # information about partitions, offsets etc.
        pids = self.consumer.partitions_for_topic(self.topic)
        tps = [TopicPartition(self.topic, pid) for pid in pids]
        return tps

    def getTopicPartitionsCommittedPositions(self):
        tps = self.getTopicPartitions()
        ret = [(tp, self.consumer.committed(tp)) for tp in tps]
        return ret

    async def tcp_server_handler(self, reader, writer):
        addr = str(writer.get_extra_info("socket").getpeername())
        if self.tcpWriter is not None:
            self.log("refused " + addr)
            writer.write(b"Connection limit reached; connection refused.")
            writer.close()
            return
        self.log("accepted " + addr)
        self.tcpWriter = writer
        t1 = asyncio.create_task(self.poll_from_Kafka(writer))
        try:
            while True:
                data = await reader.read(1)  # 1024*16 bytes
                if not data:
                    break
        except BrokenPipeError:
            """
      Catches connecton reset by peer when we are sending the batched data,
       which is also when we cannot check for reader. The broken connection
       on the writer side will ultimately lead to  BrokenPipeError on the
       reader side. Hence
      """
            pass
        finally:
            t1.cancel()
            self.log("closed " + addr)
            writer.close()
            self.tcpWriter = None

    async def poll_from_Kafka(self, writer):
        while True:
            prevPos = self.getTopicPartitionsCommittedPositions()
            polled = self.consumer.poll(timeout_ms=1000)
            records = [
                record.value for recordList in polled.values()
                for record in recordList
            ]
            try:
                for record in records:
                    writer.write(record)
                    await writer.drain()
            except ConnectionResetError:
                """
        The error is not thrown reliably. If a connection is broken, and
         one try to
            writer.write(record)
            await writer.drain()
         This error may not manifest. It is thrown more often when one try
         to repeatedly write to and drain a broken connection.
        """
                print("Last batch not fully sent, not commited.")
                for tp, pos in prevPos:
                    self.consumer.seek(tp, pos)
                break
            else:
                self.consumer.commit()
            await asyncio.sleep(self.interval)

    def log(self, msg):
        self.producer.send( self.logTopic,
                            self.LOG_FORMAT.format( datetime.now().timestamp(),
                                                    msg
                                                    ) \
                                .encode()
                            )

    def cleanup(self):
        self.log("shutdown")
        self.consumer.close()
        self.producer.flush()
        self.producer.close()

    def run(self):
        self.log("running")
        asyncio.run(self._async_run())

    async def _async_run(self):
        tcpServer = await asyncio.start_server(self.tcp_server_handler,
                                               self.tcpHost, self.tcpPort)
        await tcpServer.serve_forever()
Ejemplo n.º 5
0
def consume(args):
    settings = Global.settings
    writer = Global.writer
    reader = Global.reader

    schema = args.schema
    skip_error = args.skip_error
    auto_offset_reset = args.auto_offset_reset
    offset = args.offset

    topic = settings.kafka_topic
    tables_pk = {}
    schema_table = settings.schema_table.get(schema)
    tables = schema_table.get("tables")

    for table in tables:
        tables_pk[table] = reader.get_primary_key(schema, table)

    consumer = KafkaConsumer(
        bootstrap_servers=settings.kafka_server,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=schema,
        auto_offset_reset=auto_offset_reset,
    )
    partition = schema_table.get("kafka_partition")
    topic_partition = TopicPartition(topic, partition)
    consumer.assign([topic_partition])
    if offset:
        consumer.seek(topic_partition, offset)
    event_list = {}
    is_insert = False
    last_time = 0
    len_event = 0
    logger.info(
        f"success consume topic:{topic},partitions:{partition},schema:{schema},tables:{tables}"
    )

    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f"kafka msg:{msg}")
        event = msg.value
        event_unixtime = event["event_unixtime"] / 10**6
        table = event["table"]
        schema = event["schema"]
        action = event["action"]

        if action == "query":
            alter_table = True
            query = event["values"]["query"]
        else:
            alter_table = False
            query = None
            event_list.setdefault(table, []).append(event)
            len_event += 1

        if last_time == 0:
            last_time = event_unixtime

        if len_event == settings.insert_num:
            is_insert = True
        else:
            if event_unixtime - last_time >= settings.insert_interval > 0:
                is_insert = True
        if is_insert or alter_table:
            data_dict = {}
            events_num = 0
            for table, items in event_list.items():
                for item in items:
                    action = item["action"]
                    action_core = item["action_core"]
                    data_dict.setdefault(table, {}).setdefault(
                        table + schema + action + action_core, []).append(item)
            for table, v in data_dict.items():
                tmp_data = []
                for k1, v1 in v.items():
                    events_num += len(v1)
                    tmp_data.append(v1)
                try:
                    result = writer.insert_event(tmp_data, schema, table,
                                                 tables_pk.get(table))
                    if not result:
                        logger.error("insert event error!")
                        if not skip_error:
                            exit()
                except Exception as e:
                    logger.error(f"insert event error!,error:{e}")
                    if not skip_error:
                        exit()
            if alter_table:
                try:
                    logger.info(f"execute query:{query}")
                    writer.execute(query)
                except Exception as e:
                    logger.error(f"execute query error!,error:{e}")
                    if not skip_error:
                        exit()
            consumer.commit()
            logger.info(f"commit success {events_num} events!")
            event_list = {}
            is_insert = False
            len_event = last_time = 0
Ejemplo n.º 6
0
    auto_offset_reset='smallest',  #largest
    enable_auto_commit=
    False,  ## true时,Consumer会在消费消息后将offset同步到zookeeper,这样当Consumer失败后,新的consumer就能从zookeeper获取最新的offset
    bootstrap_servers=_BROKERS)
# consumer = KafkaConsumer(bootstrap_servers=_BROKERS)
consumer.assign([TopicPartition(_TOPIC_NAME, 0)])
tp = TopicPartition(_TOPIC_NAME, 0)
print(consumer.committed(TopicPartition(_TOPIC_NAME, 0)))
# consumer.subscribe(topics=[_TOPIC_NAME])
# # Subscribe to a regex topic pattern
# consumer.subscribe(pattern='^awesome.*')
print(consumer.topics())
# partition = TopicPartition(topic=_TOPIC_NAME, partition=consumer.partitions_for_topic(_TOPIC_NAME))
# consumer.seek_to_beginning()
# consumer.seek(TopicPartition(_TOPIC_NAME, 0), 0)
consumer.seek(tp, 50)  # 10 stands for start consumer from 10th offset
a = []
for m in consumer:
    if len(a) < 5:
        print(m.offset)
        a.append(m.offset)
        # consumer.commit()
    # else:
    #     a =[]

ProduceRequestPayload = namedtuple("ProduceRequestPayload",
                                   ["topic", "partition", "messages"])

ProduceResponsePayload = namedtuple("ProduceResponsePayload",
                                    ["topic", "partition", "error", "offset"])
Ejemplo n.º 7
0
class KafkaGroupReader:

    def __init__(self, kafka_config):
        self.log = logging.getLogger(__name__)
        self.kafka_config = kafka_config
        self._kafka_groups = defaultdict(lambda: defaultdict(dict))
        self.active_partitions = {}
        self._finished = False

    def read_group(self, group_id):
        partition_count = get_offset_topic_partition_count(self.kafka_config)
        partition = get_group_partition(group_id, partition_count)
        return self.read_groups(partition).get(group_id, [])

    def read_groups(self, partition=None):
        self.consumer = KafkaConsumer(
            group_id='offset_monitoring_consumer',
            bootstrap_servers=self.kafka_config.broker_list,
            auto_offset_reset='earliest',
            enable_auto_commit=False,
            consumer_timeout_ms=30000,
            fetch_max_wait_ms=2000,
            max_partition_fetch_bytes=10 * 1024 * 1024,  # 10MB
        )

        # Fetch metadata as partitions_for_topic only returns locally cached metadata
        # See https://github.com/dpkp/kafka-python/issues/1742
        self.consumer.topics()

        if partition is not None:
            self.active_partitions = {
                partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition),
            }
        else:
            self.active_partitions = {
                p: TopicPartition(CONSUMER_OFFSET_TOPIC, p)
                for p in self.consumer.partitions_for_topic(CONSUMER_OFFSET_TOPIC)
            }
        self.watermarks = self.get_current_watermarks(list(self.active_partitions.values()))
        # Active partitions are not empty. Remove the empty ones.
        self.active_partitions = {
            p: tp for p, tp in self.active_partitions.items()
            if tp.partition in self.watermarks and
            self.watermarks[tp.partition].highmark > 0 and
            self.watermarks[tp.partition].highmark > self.watermarks[tp.partition].lowmark
        }
        # Cannot consume if there are no active partitions
        if not self.active_partitions:
            return {}

        self.consumer.assign(list(self.active_partitions.values()))
        self.log.info("Consuming from %s", self.active_partitions)

        message_iterator = iter(self.consumer)

        while not self.finished():
            try:
                message = next(message_iterator)
            except StopIteration:
                continue
            # Stop when reaching the last message written to the
            # __consumer_offsets topic when KafkaGroupReader first started
            if message.offset >= self.watermarks[message.partition].highmark - 1:
                self.remove_partition_from_consumer(message.partition)
            self.process_consumer_offset_message(message)

        self._remove_unsubscribed_topics()

        return {
            group: topics.keys()
            for group, topics in six.iteritems(self._kafka_groups)
            if topics
        }

    def _remove_unsubscribed_topics(self):
        for group, topics in list(six.iteritems(self._kafka_groups)):
            for topic, partitions in list(six.iteritems(topics)):
                # If offsets for all partitions are 0, consider the topic as unsubscribed
                if not any(partitions.values()):
                    del self._kafka_groups[group][topic]
                    self.log.info("Removed group {group} topic {topic} from list of groups".format(group=group, topic=topic))

    def remove_partition_from_consumer(self, partition):
        deleted = self.active_partitions.pop(partition)
        # Terminate if there are no more partitions to consume
        if not self.active_partitions:
            self.log.info("Completed reading from all partitions")
            self._finished = True
            return
        # Reassign the remaining partitions to the consumer while saving the
        # position
        positions = [
            (p, self.consumer.position(p))
            for p in self.active_partitions.values()
        ]
        self.consumer.assign(list(self.active_partitions.values()))
        for topic_partition, position in positions:
            self.consumer.seek(topic_partition, position)
        self.log.info(
            "Completed reading from %s. Remaining partitions: %s",
            deleted,
            self.active_partitions,
        )

    def parse_consumer_offset_message(self, message):
        key = message.key
        ((key_schema,), cur) = relative_unpack(b'>h', key, 0)
        if key_schema not in [0, 1]:
            raise InvalidMessageException()   # This is not an offset commit message
        (group, cur) = read_short_string(key, cur)
        (topic, cur) = read_short_string(key, cur)
        ((partition,), cur) = relative_unpack(b'>l', key, cur)
        if message.value:
            value = message.value
            ((value_schema,), cur) = relative_unpack(b'>h', value, 0)
            if value_schema not in [0, 1]:
                raise InvalidMessageException()  # Unrecognized message value
            ((offset,), cur) = relative_unpack(b'>q', value, cur)
        else:
            offset = None  # Offset was deleted
        return group.decode(), topic.decode(), partition, offset

    def process_consumer_offset_message(self, message):
        try:
            group, topic, partition, offset = self.parse_consumer_offset_message(message)
        except InvalidMessageException:
            return

        if offset is not None:
            self._kafka_groups[group][topic][partition] = offset
            self.log.info(
                "Updated group {group} topic {topic} and updated offset in list of groups".format(
                    group=group,
                    topic=topic,
                ),
            )
        # TODO: check if we can ever find an offset commit message with message.value is None
        elif offset is None and group in self._kafka_groups and \
                topic in self._kafka_groups[group]:  # No offset means topic deletion
            del self._kafka_groups[group][topic]
            self.log.info("Removed group {group} topic {topic} from list of groups".format(group=group, topic=topic))

    def get_current_watermarks(self, partitions=None):
        client = KafkaToolClient(self.kafka_config.broker_list)
        client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC)
        offsets = get_topics_watermarks(
            client,
            [CONSUMER_OFFSET_TOPIC],
        )
        partitions_set = set(tp.partition for tp in partitions) if partitions else None
        return {part: offset for part, offset
                in six.iteritems(offsets[CONSUMER_OFFSET_TOPIC])
                if offset.highmark > offset.lowmark and
                (partitions is None or part in partitions_set)}

    def finished(self):
        return self._finished