Python KafkaConsumer.assign Examples

Programming Language: Python

Namespace/Package Name: kafka.consumer

Class/Type: KafkaConsumer

Method/Function: assign

Examples at hotexamples.com: 9

Python KafkaConsumer.assign - 9 examples found. These are the top rated real world Python examples of kafka.consumer.KafkaConsumer.assign extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

KafkaConsumer(30)

assign(8)

commit(6)

seek(6)

partitions_for_topic(6)

set_topic_partitions(5)

next(4)

poll(3)

position(3)

committed(3)

close(3)

topics(3)

subscribe(2)

get_partition_offsets(1)

highwater(1)

metrics(1)

fetch_messages(1)

offsets(1)

end_offsets(1)

seek_to_beginning(1)

seek_to_end(1)

Example #1

Show file

File: consumer.py Project: lwk123/mysql2ch

def consume(args):
    schema = args.schema
    table = args.table
    assert schema in settings.SCHEMAS, 'schema must in settings.SCHEMAS'
    assert table in settings.TABLES, 'table must in settings.TABLES'
    group_id = f'{schema}.{table}'
    consumer = KafkaConsumer(
        bootstrap_servers=settings.KAFKA_SERVER,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=group_id,
        auto_offset_reset='earliest',
    )
    topic = settings.KAFKA_TOPIC
    partition = settings.PARTITIONS.get(group_id)
    consumer.assign([TopicPartition(topic, partition)])
    event_list = []
    logger.info(
        f'success consume topic:{topic},partition:{partition},schema:{schema},table:{table}'
    )
    pk = reader.get_primary_key(schema, table)
    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f'kafka msg:{msg}')
        event = msg.value
        event_list.append(event)
        len_event = len(event_list)
        if len_event == settings.INSERT_NUMS or (
            (int(time.time() * 10**6) - event_list[0]['event_unixtime']) /
                10**6 >= settings.INSERT_INTERVAL > 0):
            data_dict = {}
            tmp_data = []
            for items in event_list:
                action = items['action']
                action_core = items['action_core']
                data_dict.setdefault(table + schema + action + action_core,
                                     []).append(items)
            for k, v in data_dict.items():
                tmp_data.append(v)
            result = writer.insert_event(tmp_data, settings.SKIP_TYPE,
                                         settings.SKIP_DELETE_TB_NAME, schema,
                                         table, pk)
            if result:
                event_list = []
                consumer.commit()
                logger.info(f'commit success {len_event} events!')
            else:
                logger.error('insert event error!')
                exit()

Example #2

Show file

File: consumer.py Project: zhanglei/mysql2ch

def consume(args):
    schema = args.schema
    tables = args.tables
    skip_error = args.skip_error
    assert schema in settings.SCHEMAS, f'schema {schema} must in settings.SCHEMAS'
    topic = settings.KAFKA_TOPIC
    tables_pk = {}
    partitions = []
    for table in tables.split(','):
        assert table in settings.TABLES, f'table {table} must in settings.TABLES'

        partition = settings.PARTITIONS.get(f'{schema}.{table}')
        tp = TopicPartition(topic, partition)
        partitions.append(tp)
        tables_pk[table] = reader.get_primary_key(schema, table)

    group_id = f'{schema}.{tables}'
    consumer = KafkaConsumer(
        bootstrap_servers=settings.KAFKA_SERVER,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=group_id,
        auto_offset_reset='earliest',
    )
    consumer.assign(partitions)

    event_list = {}
    is_insert = False
    last_time = 0
    len_event = 0
    logger.info(f'success consume topic:{topic},partitions:{partitions},schema:{schema},tables:{tables}')

    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f'kafka msg:{msg}')
        event = msg.value
        event_unixtime = event['event_unixtime'] / 10 ** 6
        table = event['table']
        schema = event['schema']
        event_list.setdefault(table, []).append(event)
        len_event += 1

        if last_time == 0:
            last_time = event_unixtime

        if len_event == settings.INSERT_NUMS:
            is_insert = True
        else:
            if event_unixtime - last_time >= settings.INSERT_INTERVAL > 0:
                is_insert = True
        if is_insert:
            data_dict = {}
            events_num = 0
            for table, items in event_list.items():
                for item in items:
                    action = item['action']
                    action_core = item['action_core']
                    data_dict.setdefault(table, {}).setdefault(table + schema + action + action_core, []).append(item)
            for table, v in data_dict.items():
                tmp_data = []
                for k1, v1 in v.items():
                    events_num += len(v1)
                    tmp_data.append(v1)
                try:
                    result = writer.insert_event(tmp_data, schema, table, tables_pk.get(table))
                    if not result:
                        logger.error('insert event error!')
                        if not skip_error:
                            exit()
                except Exception as e:
                    logger.error(f'insert event error!,error:{e}')
                    if not skip_error:
                        exit()
            consumer.commit()
            logger.info(f'commit success {events_num} events!')
            event_list = {}
            is_insert = False
            len_event = last_time = 0

Example #3

Show file

def getMsgData(topic, group, result, maxsize):
    try:
        saveResult = SaveDataResult()
        saveResult.guid = str(uuid.uuid4())
        saveResult.CreateDate = datetime.datetime.now().strftime(
            "%Y-%m-%d %H:%M:%S")

        msgInfos = []
        result.guid = saveResult.guid
        result.topic_messages = []

        consumer = KafkaConsumer(bootstrap_servers=tmpbootstrap_servers,
                                 enable_auto_commit=False,
                                 group_id=group)

        # Get all partitions by topic
        par = consumer.partitions_for_topic(topic)

        now_count = 0

        for p in par:
            tp = TopicPartition(topic, p)
            consumer.assign([tp])
            print(tp)
            info = MsgPartitionInfo()

            # Get committed offset
            print('start to get committed offset.....')
            try:
                committed = consumer.committed(tp) or 0
            except Exception, e_commit:
                print(str(e_commit))

            # Move consumer to end to get the last position
            consumer.seek_to_end(tp)
            last_offset = consumer.position(tp)

            # Move consumer to beginning to get the first position
            consumer.seek_to_beginning()
            now_offset = consumer.position(tp)
            from_offset = committed

            if from_offset is None:
                from_offset = now_offset

            if from_offset < now_offset:
                from_offset = now_offset

            info.partition_ID = tp.partition
            info.get_last_offset = last_offset
            msgInfos.append(info)

            print("[%s] partition(%s) -> now:%s,  last:%s,  committed:%s" %
                  (tp.topic, tp.partition, now_offset, last_offset, committed))

            # Get msg from position to offset
            while (from_offset < last_offset) and (now_count < maxsize):
                consumer.seek(tp, from_offset)
                polldata = consumer.poll(100)
                from_offset += 1
                now_count += 1
                print('now_count=' + str(now_count))
                result.topic_messages.append(polldata[tp][0].value)

        saveResult.MsgInfo = json.dumps(msgInfos,
                                        default=encode_MsgPartitionInfo,
                                        ensure_ascii=False)
        print(saveResult.MsgInfo)
        consumer.close()
        saveResult.message = "Success"
        saveResult.Code = 200

        producer = KafkaProducer(bootstrap_servers=tmpbootstrap_servers)
        producer.send(topic + "_log",
                      json.dumps(saveResult, default=encode_SaveDataResult))
        producer.flush()

Example #4

Show file

def consume(args):
    schema = args.schema
    skip_error = args.skip_error
    auto_offset_reset = args.auto_offset_reset
    topic = settings.KAFKA_TOPIC
    tables_pk = {}
    tables = settings.SCHEMAS.get(schema)
    partitions = []
    for table in tables:
        partition = settings.PARTITIONS.get(schema)
        tp = TopicPartition(topic, partition)
        partitions.append(tp)
        tables_pk[table] = reader.get_primary_key(schema, table)

    consumer = KafkaConsumer(
        bootstrap_servers=settings.KAFKA_SERVER,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=schema,
        auto_offset_reset=auto_offset_reset,
    )
    consumer.assign(partitions)

    event_list = {}
    is_insert = False
    last_time = 0
    len_event = 0
    logger.info(
        f'success consume topic:{topic},partitions:{partitions},schema:{schema},tables:{tables}'
    )

    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f'kafka msg:{msg}')
        event = msg.value
        event_unixtime = event['event_unixtime'] / 10**6
        table = event['table']
        schema = event['schema']
        action = event['action']

        if action == 'query':
            do_query = True
            query = event['values']['query']
        else:
            do_query = False
            query = None
            event_list.setdefault(table, []).append(event)
            len_event += 1

        if last_time == 0:
            last_time = event_unixtime

        if len_event == settings.INSERT_NUMS:
            is_insert = True
        else:
            if event_unixtime - last_time >= settings.INSERT_INTERVAL > 0:
                is_insert = True
        if is_insert or do_query:
            data_dict = {}
            events_num = 0
            for table, items in event_list.items():
                for item in items:
                    action = item['action']
                    action_core = item['action_core']
                    data_dict.setdefault(table, {}).setdefault(
                        table + schema + action + action_core, []).append(item)
            for table, v in data_dict.items():
                tmp_data = []
                for k1, v1 in v.items():
                    events_num += len(v1)
                    tmp_data.append(v1)
                try:
                    result = writer.insert_event(tmp_data, schema, table,
                                                 tables_pk.get(table))
                    if not result:
                        logger.error('insert event error!')
                        if not skip_error:
                            exit()

                    if settings.UI_ENABLE:
                        insert_into_redis('consumer', schema, table, len(v1))

                except Exception as e:
                    logger.error(f'insert event error!,error:{e}')
                    if not skip_error:
                        exit()
            if do_query:
                try:
                    logger.info(f'execute query:{query}')
                    writer.execute(query)
                except Exception as e:
                    logger.error(f'execute query error!,error:{e}')
                    if not skip_error:
                        exit()
            consumer.commit()
            logger.info(f'commit success {events_num} events!')
            event_list = {}
            is_insert = False
            len_event = last_time = 0

Example #5

Show file

class KafkaGroupReader:
    def __init__(self, kafka_config):
        self.log = logging.getLogger(__name__)
        self.kafka_config = kafka_config
        self._kafka_groups = defaultdict(lambda: defaultdict(dict))
        self.active_partitions = {}
        self._finished = False

    def read_group(self, group_id):
        partition_count = get_offset_topic_partition_count(self.kafka_config)
        partition = get_group_partition(group_id, partition_count)
        return self.read_groups(partition)[group_id]

    def read_groups(self, partition=None):
        self.consumer = KafkaConsumer(
            group_id='offset_monitoring_consumer',
            bootstrap_servers=self.kafka_config.broker_list,
            auto_offset_reset='earliest',
            enable_auto_commit=False,
            consumer_timeout_ms=30000,
            fetch_max_wait_ms=2000,
            max_partition_fetch_bytes=10 * 1024 * 1024,  # 10MB
        )

        # Fetch metadata as partitions_for_topic only returns locally cached metadata
        # See https://github.com/dpkp/kafka-python/issues/1742
        self.consumer.topics()

        if partition is not None:
            self.active_partitions = {
                partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition),
            }
        else:
            self.active_partitions = {
                p: TopicPartition(CONSUMER_OFFSET_TOPIC, p)
                for p in self.consumer.partitions_for_topic(
                    CONSUMER_OFFSET_TOPIC)
            }
        self.watermarks = self.get_current_watermarks(
            list(self.active_partitions.values()))
        # Active partitions are not empty. Remove the empty ones.
        self.active_partitions = {
            p: tp
            for p, tp in self.active_partitions.items()
            if tp.partition in self.watermarks
            and self.watermarks[tp.partition].highmark > 0 and self.watermarks[
                tp.partition].highmark > self.watermarks[tp.partition].lowmark
        }
        # Cannot consume if there are no active partitions
        if not self.active_partitions:
            return {}

        self.consumer.assign(list(self.active_partitions.values()))
        self.log.info("Consuming from %s", self.active_partitions)

        message_iterator = iter(self.consumer)

        while not self.finished():
            try:
                message = next(message_iterator)
            except StopIteration:
                continue
            # Stop when reaching the last message written to the
            # __consumer_offsets topic when KafkaGroupReader first started
            if message.offset >= self.watermarks[
                    message.partition].highmark - 1:
                self.remove_partition_from_consumer(message.partition)
            self.process_consumer_offset_message(message)

        self._remove_unsubscribed_topics()

        return {
            group: topics.keys()
            for group, topics in six.iteritems(self._kafka_groups) if topics
        }

    def _remove_unsubscribed_topics(self):
        for group, topics in list(six.iteritems(self._kafka_groups)):
            for topic, partitions in list(six.iteritems(topics)):
                # If offsets for all partitions are 0, consider the topic as unsubscribed
                if not any(partitions.values()):
                    del self._kafka_groups[group][topic]
                    self.log.info(
                        "Removed group {group} topic {topic} from list of groups"
                        .format(group=group, topic=topic))

    def remove_partition_from_consumer(self, partition):
        deleted = self.active_partitions.pop(partition)
        # Terminate if there are no more partitions to consume
        if not self.active_partitions:
            self.log.info("Completed reading from all partitions")
            self._finished = True
            return
        # Reassign the remaining partitions to the consumer while saving the
        # position
        positions = [(p, self.consumer.position(p))
                     for p in self.active_partitions.values()]
        self.consumer.assign(list(self.active_partitions.values()))
        for topic_partition, position in positions:
            self.consumer.seek(topic_partition, position)
        self.log.info(
            "Completed reading from %s. Remaining partitions: %s",
            deleted,
            self.active_partitions,
        )

    def parse_consumer_offset_message(self, message):
        key = message.key
        ((key_schema, ), cur) = relative_unpack(b'>h', key, 0)
        if key_schema not in [0, 1]:
            raise InvalidMessageException(
            )  # This is not an offset commit message
        (group, cur) = read_short_string(key, cur)
        (topic, cur) = read_short_string(key, cur)
        ((partition, ), cur) = relative_unpack(b'>l', key, cur)
        if message.value:
            value = message.value
            ((value_schema, ), cur) = relative_unpack(b'>h', value, 0)
            if value_schema not in [0, 1]:
                raise InvalidMessageException()  # Unrecognized message value
            ((offset, ), cur) = relative_unpack(b'>q', value, cur)
        else:
            offset = None  # Offset was deleted
        return group.decode(), topic.decode(), partition, offset

    def process_consumer_offset_message(self, message):
        try:
            group, topic, partition, offset = self.parse_consumer_offset_message(
                message)
        except InvalidMessageException:
            return

        if offset is not None:
            self._kafka_groups[group][topic][partition] = offset
            self.log.info(
                "Updated group {group} topic {topic} and updated offset in list of groups"
                .format(
                    group=group,
                    topic=topic,
                ), )
        # TODO: check if we can ever find an offset commit message with message.value is None
        elif offset is None and group in self._kafka_groups and \
                topic in self._kafka_groups[group]:  # No offset means topic deletion
            del self._kafka_groups[group][topic]
            self.log.info(
                "Removed group {group} topic {topic} from list of groups".
                format(group=group, topic=topic))

    def get_current_watermarks(self, partitions=None):
        client = KafkaToolClient(self.kafka_config.broker_list)
        client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC)
        offsets = get_topics_watermarks(
            client,
            [CONSUMER_OFFSET_TOPIC],
        )
        partitions_set = set(tp.partition
                             for tp in partitions) if partitions else None
        return {
            part: offset
            for part, offset in six.iteritems(offsets[CONSUMER_OFFSET_TOPIC])
            if offset.highmark > offset.lowmark and (
                partitions is None or part in partitions_set)
        }

    def finished(self):
        return self._finished

Example #6

Show file

class KafkaGroupReader:
    def __init__(self, kafka_config):
        self.log = logging.getLogger(__name__)
        self.kafka_config = kafka_config
        self.kafka_groups = defaultdict(set)
        self.active_partitions = {}
        self._finished = False

    def read_group(self, group_id):
        partition_count = get_offset_topic_partition_count(self.kafka_config)
        partition = get_group_partition(group_id, partition_count)
        return self.read_groups(partition).get(group_id, [])

    def read_groups(self, partition=None):
        self.consumer = KafkaConsumer(
            group_id='offset_monitoring_consumer',
            bootstrap_servers=self.kafka_config.broker_list,
            auto_offset_reset='earliest',
            enable_auto_commit=False,
            consumer_timeout_ms=30000,
            fetch_max_wait_ms=2000,
            max_partition_fetch_bytes=10 * 1024 * 1024,  # 10MB
        )

        if partition is not None:
            self.active_partitions = {
                partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition),
            }
        else:
            self.active_partitions = {
                p: TopicPartition(CONSUMER_OFFSET_TOPIC, p)
                for p in self.consumer.partitions_for_topic(
                    CONSUMER_OFFSET_TOPIC)
            }
        self.watermarks = self.get_current_watermarks(
            self.active_partitions.values())
        # Active partitions are not empty. Remove the empty ones.
        self.active_partitions = {
            p: tp
            for p, tp in self.active_partitions.items()
            if tp.partition in self.watermarks
            and self.watermarks[tp.partition].highmark > 0 and self.watermarks[
                tp.partition].highmark > self.watermarks[tp.partition].lowmark
        }
        # Cannot consume if there are no active partitions
        if not self.active_partitions:
            return {}

        self.consumer.assign(self.active_partitions.values())
        self.log.info("Consuming from %s", self.active_partitions)
        while not self.finished():
            try:
                message = self.consumer.next()
            except StopIteration:
                continue
            # Stop when reaching the last message written to the
            # __consumer_offsets topic when KafkaGroupReader first started
            if message.offset >= self.watermarks[
                    message.partition].highmark - 1:
                self.remove_partition_from_consumer(message.partition)
            self.process_consumer_offset_message(message)

        return {
            group: topics
            for group, topics in self.kafka_groups.items() if topics
        }

    def remove_partition_from_consumer(self, partition):
        deleted = self.active_partitions.pop(partition)
        # Terminate if there are no more partitions to consume
        if not self.active_partitions:
            self.log.info("Completed reading from all partitions")
            self._finished = True
            return
        # Reassign the remaining partitions to the consumer while saving the
        # position
        positions = [(p, self.consumer.position(p))
                     for p in self.active_partitions.values()]
        self.consumer.assign(self.active_partitions.values())
        for topic_partition, position in positions:
            self.consumer.seek(topic_partition, position)
        self.log.info(
            "Completed reading from %s. Remaining partitions: %s",
            deleted,
            self.active_partitions,
        )

    def parse_consumer_offset_message(self, message):
        key = bytearray(message.key)
        ((key_schema, ), cur) = relative_unpack(b'>h', key, 0)
        if key_schema not in [0, 1]:
            raise InvalidMessageException(
            )  # This is not an offset commit message
        (group, cur) = read_short_string(key, cur)
        (topic, cur) = read_short_string(key, cur)
        ((partition, ), cur) = relative_unpack(b'>l', key, cur)
        if message.value:
            value = bytearray(message.value)
            ((value_schema, ), cur) = relative_unpack(b'>h', value, 0)
            if value_schema not in [0, 1]:
                raise InvalidMessageException()  # Unrecognized message value
            ((offset, ), cur) = relative_unpack(b'>q', value, cur)
        else:
            offset = None  # Offset was deleted
        return str(group), str(topic), partition, offset

    def process_consumer_offset_message(self, message):
        try:
            group, topic, partition, offset = self.parse_consumer_offset_message(
                message)
        except InvalidMessageException:
            return

        if offset and (group not in self.kafka_groups
                       or topic not in self.kafka_groups[group]):
            self.kafka_groups[group].add(topic)
            self.log.info("Added group %s topic %s to list of groups", group,
                          topic)
        elif not offset and group in self.kafka_groups and \
                topic in self.kafka_groups[group]:  # No offset means topic deletion
            self.kafka_groups[group].discard(topic)
            self.log.info("Removed group %s topic %s from list of groups",
                          group, topic)

    def get_current_watermarks(self, partitions=None):
        client = KafkaToolClient(self.kafka_config.broker_list)
        client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC)
        offsets = get_topics_watermarks(
            client,
            [CONSUMER_OFFSET_TOPIC],
        )
        partitions_set = set(tp.partition
                             for tp in partitions) if partitions else None
        return {
            part: offset
            for part, offset in offsets[CONSUMER_OFFSET_TOPIC].iteritems()
            if offset.highmark > offset.lowmark and (
                partitions is None or part in partitions_set)
        }

    def finished(self):
        return self._finished

Example #7

Show file

def consume(args):
    settings = Global.settings
    writer = Global.writer
    reader = Global.reader

    schema = args.schema
    skip_error = args.skip_error
    auto_offset_reset = args.auto_offset_reset
    offset = args.offset

    topic = settings.kafka_topic
    tables_pk = {}
    schema_table = settings.schema_table.get(schema)
    tables = schema_table.get("tables")

    for table in tables:
        tables_pk[table] = reader.get_primary_key(schema, table)

    consumer = KafkaConsumer(
        bootstrap_servers=settings.kafka_server,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=schema,
        auto_offset_reset=auto_offset_reset,
    )
    partition = schema_table.get("kafka_partition")
    topic_partition = TopicPartition(topic, partition)
    consumer.assign([topic_partition])
    if offset:
        consumer.seek(topic_partition, offset)
    event_list = {}
    is_insert = False
    last_time = 0
    len_event = 0
    logger.info(
        f"success consume topic:{topic},partitions:{partition},schema:{schema},tables:{tables}"
    )

    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f"kafka msg:{msg}")
        event = msg.value
        event_unixtime = event["event_unixtime"] / 10**6
        table = event["table"]
        schema = event["schema"]
        action = event["action"]

        if action == "query":
            alter_table = True
            query = event["values"]["query"]
        else:
            alter_table = False
            query = None
            event_list.setdefault(table, []).append(event)
            len_event += 1

        if last_time == 0:
            last_time = event_unixtime

        if len_event == settings.insert_num:
            is_insert = True
        else:
            if event_unixtime - last_time >= settings.insert_interval > 0:
                is_insert = True
        if is_insert or alter_table:
            data_dict = {}
            events_num = 0
            for table, items in event_list.items():
                for item in items:
                    action = item["action"]
                    action_core = item["action_core"]
                    data_dict.setdefault(table, {}).setdefault(
                        table + schema + action + action_core, []).append(item)
            for table, v in data_dict.items():
                tmp_data = []
                for k1, v1 in v.items():
                    events_num += len(v1)
                    tmp_data.append(v1)
                try:
                    result = writer.insert_event(tmp_data, schema, table,
                                                 tables_pk.get(table))
                    if not result:
                        logger.error("insert event error!")
                        if not skip_error:
                            exit()
                except Exception as e:
                    logger.error(f"insert event error!,error:{e}")
                    if not skip_error:
                        exit()
            if alter_table:
                try:
                    logger.info(f"execute query:{query}")
                    writer.execute(query)
                except Exception as e:
                    logger.error(f"execute query error!,error:{e}")
                    if not skip_error:
                        exit()
            consumer.commit()
            logger.info(f"commit success {events_num} events!")
            event_list = {}
            is_insert = False
            len_event = last_time = 0

Example #8

Show file

#*****************************************************************************************

# https://github.com/cuyu/python-demo/blob/master/demo_kafka.py
_TOPIC_NAME = 'anomaly'
_BROKERS = ['localhost:9092'
            ]  #['localhost.com:9092', 'systest-auto-deployer:9092']
_GROUP_ID = 'my_group'

consumer = KafkaConsumer(
    group_id='ddd',
    auto_offset_reset='smallest',  #largest
    enable_auto_commit=
    False,  ## true时，Consumer会在消费消息后将offset同步到zookeeper，这样当Consumer失败后，新的consumer就能从zookeeper获取最新的offset
    bootstrap_servers=_BROKERS)
# consumer = KafkaConsumer(bootstrap_servers=_BROKERS)
consumer.assign([TopicPartition(_TOPIC_NAME, 0)])
tp = TopicPartition(_TOPIC_NAME, 0)
print(consumer.committed(TopicPartition(_TOPIC_NAME, 0)))
# consumer.subscribe(topics=[_TOPIC_NAME])
# # Subscribe to a regex topic pattern
# consumer.subscribe(pattern='^awesome.*')
print(consumer.topics())
# partition = TopicPartition(topic=_TOPIC_NAME, partition=consumer.partitions_for_topic(_TOPIC_NAME))
# consumer.seek_to_beginning()
# consumer.seek(TopicPartition(_TOPIC_NAME, 0), 0)
consumer.seek(tp, 50)  # 10 stands for start consumer from 10th offset
a = []
for m in consumer:
    if len(a) < 5:
        print(m.offset)
        a.append(m.offset)

Example #9

Show file

File: util.py Project: Yelp/kafka-utils

class KafkaGroupReader:

    def __init__(self, kafka_config):
        self.log = logging.getLogger(__name__)
        self.kafka_config = kafka_config
        self._kafka_groups = defaultdict(lambda: defaultdict(dict))
        self.active_partitions = {}
        self._finished = False

    def read_group(self, group_id):
        partition_count = get_offset_topic_partition_count(self.kafka_config)
        partition = get_group_partition(group_id, partition_count)
        return self.read_groups(partition).get(group_id, [])

    def read_groups(self, partition=None):
        self.consumer = KafkaConsumer(
            group_id='offset_monitoring_consumer',
            bootstrap_servers=self.kafka_config.broker_list,
            auto_offset_reset='earliest',
            enable_auto_commit=False,
            consumer_timeout_ms=30000,
            fetch_max_wait_ms=2000,
            max_partition_fetch_bytes=10 * 1024 * 1024,  # 10MB
        )

        # Fetch metadata as partitions_for_topic only returns locally cached metadata
        # See https://github.com/dpkp/kafka-python/issues/1742
        self.consumer.topics()

        if partition is not None:
            self.active_partitions = {
                partition: TopicPartition(CONSUMER_OFFSET_TOPIC, partition),
            }
        else:
            self.active_partitions = {
                p: TopicPartition(CONSUMER_OFFSET_TOPIC, p)
                for p in self.consumer.partitions_for_topic(CONSUMER_OFFSET_TOPIC)
            }
        self.watermarks = self.get_current_watermarks(list(self.active_partitions.values()))
        # Active partitions are not empty. Remove the empty ones.
        self.active_partitions = {
            p: tp for p, tp in self.active_partitions.items()
            if tp.partition in self.watermarks and
            self.watermarks[tp.partition].highmark > 0 and
            self.watermarks[tp.partition].highmark > self.watermarks[tp.partition].lowmark
        }
        # Cannot consume if there are no active partitions
        if not self.active_partitions:
            return {}

        self.consumer.assign(list(self.active_partitions.values()))
        self.log.info("Consuming from %s", self.active_partitions)

        message_iterator = iter(self.consumer)

        while not self.finished():
            try:
                message = next(message_iterator)
            except StopIteration:
                continue
            # Stop when reaching the last message written to the
            # __consumer_offsets topic when KafkaGroupReader first started
            if message.offset >= self.watermarks[message.partition].highmark - 1:
                self.remove_partition_from_consumer(message.partition)
            self.process_consumer_offset_message(message)

        self._remove_unsubscribed_topics()

        return {
            group: topics.keys()
            for group, topics in six.iteritems(self._kafka_groups)
            if topics
        }

    def _remove_unsubscribed_topics(self):
        for group, topics in list(six.iteritems(self._kafka_groups)):
            for topic, partitions in list(six.iteritems(topics)):
                # If offsets for all partitions are 0, consider the topic as unsubscribed
                if not any(partitions.values()):
                    del self._kafka_groups[group][topic]
                    self.log.info("Removed group {group} topic {topic} from list of groups".format(group=group, topic=topic))

    def remove_partition_from_consumer(self, partition):
        deleted = self.active_partitions.pop(partition)
        # Terminate if there are no more partitions to consume
        if not self.active_partitions:
            self.log.info("Completed reading from all partitions")
            self._finished = True
            return
        # Reassign the remaining partitions to the consumer while saving the
        # position
        positions = [
            (p, self.consumer.position(p))
            for p in self.active_partitions.values()
        ]
        self.consumer.assign(list(self.active_partitions.values()))
        for topic_partition, position in positions:
            self.consumer.seek(topic_partition, position)
        self.log.info(
            "Completed reading from %s. Remaining partitions: %s",
            deleted,
            self.active_partitions,
        )

    def parse_consumer_offset_message(self, message):
        key = message.key
        ((key_schema,), cur) = relative_unpack(b'>h', key, 0)
        if key_schema not in [0, 1]:
            raise InvalidMessageException()   # This is not an offset commit message
        (group, cur) = read_short_string(key, cur)
        (topic, cur) = read_short_string(key, cur)
        ((partition,), cur) = relative_unpack(b'>l', key, cur)
        if message.value:
            value = message.value
            ((value_schema,), cur) = relative_unpack(b'>h', value, 0)
            if value_schema not in [0, 1]:
                raise InvalidMessageException()  # Unrecognized message value
            ((offset,), cur) = relative_unpack(b'>q', value, cur)
        else:
            offset = None  # Offset was deleted
        return group.decode(), topic.decode(), partition, offset

    def process_consumer_offset_message(self, message):
        try:
            group, topic, partition, offset = self.parse_consumer_offset_message(message)
        except InvalidMessageException:
            return

        if offset is not None:
            self._kafka_groups[group][topic][partition] = offset
            self.log.info(
                "Updated group {group} topic {topic} and updated offset in list of groups".format(
                    group=group,
                    topic=topic,
                ),
            )
        # TODO: check if we can ever find an offset commit message with message.value is None
        elif offset is None and group in self._kafka_groups and \
                topic in self._kafka_groups[group]:  # No offset means topic deletion
            del self._kafka_groups[group][topic]
            self.log.info("Removed group {group} topic {topic} from list of groups".format(group=group, topic=topic))

    def get_current_watermarks(self, partitions=None):
        client = KafkaToolClient(self.kafka_config.broker_list)
        client.load_metadata_for_topics(CONSUMER_OFFSET_TOPIC)
        offsets = get_topics_watermarks(
            client,
            [CONSUMER_OFFSET_TOPIC],
        )
        partitions_set = set(tp.partition for tp in partitions) if partitions else None
        return {part: offset for part, offset
                in six.iteritems(offsets[CONSUMER_OFFSET_TOPIC])
                if offset.highmark > offset.lowmark and
                (partitions is None or part in partitions_set)}

    def finished(self):
        return self._finished