Python Consumer.commit Examples

Programming Language: Python

Namespace/Package Name: confluent_kafka.cimpl

Class/Type: Consumer

Method/Function: commit

Examples at hotexamples.com: 6

Python Consumer.commit - 6 examples found. These are the top rated real world Python examples of confluent_kafka.cimpl.Consumer.commit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Consumer(27)

subscribe(23)

poll(18)

close(17)

consume(10)

assign(6)

commit(6)

list_topics(5)

assignment(3)

get_watermark_offsets(3)

position(2)

offsets_for_times(1)

store_offsets(1)

unsubscribe(1)

Example #1

Show file

 def commit_offsets(self, consumer_id: str, offsets: List[TopicPartition]):
     config = Config.get_instance()
     consumer = Consumer({
         "group.id": consumer_id,
         **config.create_confluent_config()
     })
     consumer.commit(offsets=offsets, asynchronous=False)
     consumer.close()

Example #2

Show file

def kafka_consume_expected(topic,
                           group='0',
                           timeout=1.0,
                           mfilter=lambda x: True,
                           validator=lambda x: None,
                           after_subscribe=lambda: None):
    consumer = Consumer({
        'bootstrap.servers': KAFK,
        'group.id': group,
        'auto.offset.reset': 'earliest'  # earliest _committed_ offset
    })
    msgs = []
    topics = consumer.list_topics(topic)  # promises to create topic
    logging.debug("Topic state: %s", topics.topics)
    if topics.topics[topic].error is not None:
        logging.warning("Error subscribing to topic: %s", topics.topics)
        return msgs
    consumer.subscribe([topic])
    time.sleep(5)  # for kafka to rebalance consumer groups

    after_subscribe()

    logging.debug("Waiting for messages...")
    while True:
        msg = consumer.poll(timeout)

        if msg is None:
            break

        logging.info("Seen message: %r %r", msg.key(), msg.value())

        if msg.error():
            logging.warning("Consumer error: {}".format(msg.error()))
            continue

        if mfilter(msg):
            validator(msg)
            msgs.append(msg)

    consumer.commit()
    consumer.close()

    return msgs

Example #3

Show file

def reset_offsets_from_partitions(client: AdminClient, brokers: str,
                                  app_name: str, input_topic: str):
    topic_description = get_topic(client, input_topic)
    partition_ids = [
        partition_metada.id
        for partition_metada in topic_description.partitions.values()
    ]
    partitions = [
        TopicPartition(input_topic, id_partition, 0)
        for id_partition in partition_ids
    ]
    consumer = Consumer({
        'bootstrap.servers': brokers,
        'group.id': app_name,
        'session.timeout.ms': 6000
    })
    response = consumer.commit(offsets=partitions, asynchronous=False)
    if not isinstance(response, list):
        raise FaustAppCleanException("Error while cleaning the Faust app!")

Example #4

Show file

class TimeOrderedGeneratorWithTimeout(GeneratorInterface):
    """
    A general generator which can read multiple topics and merge their messages in time order.
    A message must be emitted at (arrival_system_time + latency_ms).
    In batch mode (until reaching the first EOP on each stream) the generator will not discard any messages.
    """
    def __init__(self,
                 broker,
                 groupid,
                 topics_infos: List[TopicInfo],
                 latency_ms,
                 commit_interval_sec=None,
                 group_by_time=False,
                 begin_timestamp=None,
                 begin_flag=None,
                 end_timestamp=None,
                 end_flag=None,
                 heartbeat_interval_ms=-1):
        """
        :param broker: Broker to connect to.
        :param groupid: Group id of the consumer.
        :param topics_infos: [TopicInfo()] - list of TopicInfo objects.
        :param latency_ms: (integer >=0) Latency to wait before serving a message.
                            After this messages with lower or equal timestamps will be discarded.
        :param commit_interval_sec: How many seconds to wait between commits.-1 does not commit with the given group id.
        :param group_by_time: Group messages with the same timestamp. This will yield a list of messages.
        :param begin_timestamp: Timestamp of the kafka messages where the generator will start.
        :param begin_flag: BEGINNING, CONTINUE, LIVE - CONTINUE will continue from the last committed offset.
                            If there was no committed offset will start from the end of the stream.
        :param end_timestamp: Timestamp where to end the reading.
        :param end_flag: NEVER, END_OF_PARTITION
        :param heartbeat_interval_ms: -1 does not produce heartbeat. After every interval will produce a HeartBeat typed
                                        message with the timestamp.
        """
        if begin_timestamp is not None and begin_flag is not None:
            raise Exception(
                'You can not set the begin timestamp and a flag in the same time.'
            )
        if end_timestamp is not None and end_flag is not None:
            raise Exception(
                'You can not set the end timestamp and a flag in the same time.'
            )
        if begin_timestamp is not None and end_timestamp is not None and begin_timestamp >= end_timestamp:
            raise Exception(
                'The begin timestamp is larger then the end timestamp.')
        if begin_flag is not None and end_flag is not None and \
                begin_flag == BeginFlag.LIVE and end_flag == EndFlag.END_OF_PARTITION:
            raise Exception(
                'You can not start in live and process until the end of the streams.'
            )
        if end_flag is not None and not (end_flag == EndFlag.END_OF_PARTITION
                                         or end_flag == EndFlag.NEVER):
            raise Exception(
                'Unknow end flag: {} . Please use the given enum to use proper end flag.'
                .format(end_flag))
        self.end_ts = end_timestamp
        self.end_flag = end_flag
        self.commit_interval_sec = commit_interval_sec
        self.latency_ms = latency_ms
        self.group_by_time = group_by_time
        self.max_poll_interval_ms = 5 * 60 * 1000
        self.consumer = Consumer({
            'bootstrap.servers':
            broker,
            'group.id':
            groupid,
            'enable.auto.commit':
            False,
            'auto.offset.reset':
            'earliest'
            if begin_flag == BeginFlag.CONTINUE_OR_BEGINNING else 'latest',
            'fetch.wait.max.ms':
            20,
            'max.poll.interval.ms':
            self.max_poll_interval_ms,
            'enable.partition.eof':
            True
        })
        self.last_poll = None

        self.tps = []
        self.queues = {}
        self.messages_to_be_committed = {}
        self.begin_timestamp = begin_timestamp
        for ti in topics_infos:
            topic_name = ti.topic
            self.messages_to_be_committed[topic_name] = {
                'last_msg': None,
                'committed': True
            }
            if begin_timestamp is not None:
                self.tps.extend(
                    self.consumer.offsets_for_times([
                        TopicPartition(topic_name,
                                       partition=ti.partition,
                                       offset=begin_timestamp)
                    ]))
            elif begin_flag is not None:
                if begin_flag == BeginFlag.BEGINNING:
                    self.tps.append(
                        TopicPartition(topic_name,
                                       partition=ti.partition,
                                       offset=OFFSET_BEGINNING))
                elif begin_flag in (BeginFlag.CONTINUE,
                                    BeginFlag.CONTINUE_OR_BEGINNING):
                    self.tps.append(
                        TopicPartition(topic_name,
                                       partition=ti.partition,
                                       offset=OFFSET_STORED))
                elif begin_flag == BeginFlag.LIVE:
                    self.tps.append(
                        TopicPartition(topic_name,
                                       partition=ti.partition,
                                       offset=OFFSET_END))
                else:
                    raise Exception(
                        'Unknown begin flag. Please use the enum to provide proper begin flag.'
                    )
            else:
                self.tps.append(
                    TopicPartition(topic_name,
                                   partition=ti.partition,
                                   offset=OFFSET_END))
            end_offset = None
            if end_flag is not None and end_flag == EndFlag.END_OF_PARTITION:
                end_offset = self.consumer.get_watermark_offsets(
                    TopicPartition(topic_name, 0))[1] - 1
            if end_offset is None or end_offset >= 0:
                self.queues[topic_name] = Topic(topic_name,
                                                self.consumer,
                                                end_offset=end_offset,
                                                partition=ti.partition,
                                                drop=ti.drop)
        self.consumer.assign(self.tps)
        self.last_commit = time.time()
        self.running = True
        self.heartbeat_interval_ms = heartbeat_interval_ms
        self.next_hb = None

    def stopGenerator(self):
        self.running = False

    def _serve_messages(self, message_to_serve):
        if self.commit_interval_sec is not None and self.group_by_time:
            for msg in message_to_serve:
                self.messages_to_be_committed[msg.topic()]['last_msg'] = msg
                self.messages_to_be_committed[msg.topic()]['committed'] = False

        # serve messages
        if self.group_by_time:
            yield message_to_serve
        else:
            for msg in message_to_serve:
                self.messages_to_be_committed[msg.topic()]['last_msg'] = msg
                self.messages_to_be_committed[msg.topic()]['committed'] = False
                yield msg
                if not self.running:
                    break

        # commit messages when they were delivered
        current_time = time.time()
        if self.commit_interval_sec is not None and (
                current_time - self.last_commit) > self.commit_interval_sec:
            for k in self.messages_to_be_committed.keys():
                if not self.messages_to_be_committed[k]['committed']:
                    self.consumer.commit(
                        self.messages_to_be_committed[k]['last_msg'])
                    self.messages_to_be_committed[k]['committed'] = True
            self.last_commit = current_time

    def _serve_heartbeat(self, current_timestamp_ms):
        if self.next_hb is None:
            if self.begin_timestamp is not None:
                self.next_hb = self.begin_timestamp
            else:
                self.next_hb = current_timestamp_ms
        while self.next_hb <= current_timestamp_ms:
            yield HeartBeat(self.next_hb)
            self.next_hb += self.heartbeat_interval_ms

    def _can_serve(self):
        min_ets = min([
            q.queue[0].message.timestamp()[1]
            for q in self.queues.values() if len(q.queue) > 0
        ],
                      default=-1)
        if min_ets == -1:
            return None
        deadline = getSystemTimestamp() - self.latency_ms
        if all([q.can_be_emitted(min_ets) for q in self.queues.values()]) and \
                any([q.queue[0].ts < deadline for q in self.queues.values()
                     if len(q.queue) > 0 and q.queue[0].message.timestamp()[1] == min_ets]):
            return min_ets
        else:
            return None

    def getMessages(self):
        while self.running:
            if all([v.stopped for v in self.queues.values()]):
                message_to_serve = []
                for q in self.queues.values():
                    message_to_serve.extend(q.queue)
                message_to_serve = [m.message for m in message_to_serve]
                message_to_serve.sort(key=lambda x: x.timestamp()[1])
                while len(message_to_serve) > 0:
                    ts = message_to_serve[0].timestamp()[1]
                    serve_it = []
                    while len(message_to_serve) > 0 and message_to_serve[
                            0].timestamp()[1] == ts:
                        serve_it.append(message_to_serve.pop(0))
                    if not self.heartbeat_interval_ms == -1:
                        yield from self._serve_heartbeat(ts)
                    yield from self._serve_messages(serve_it)
                logging.info('Exiting from generator.')
                break
            self.last_poll = getSystemTimestamp()
            msg = self.consumer.poll(0.001)
            if msg is not None:
                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        if msg.topic() in self.queues:
                            self.queues[msg.topic()].first_eop_reached = True
                            self.queues[msg.topic()].end_of_partition = True
                    else:
                        logging.error('Unhandle error: {}'.format(msg.error()))
                        break
                else:
                    self.queues[msg.topic()].end_of_partition = False
                    if self.end_ts is not None and msg.timestamp(
                    )[1] > self.end_ts:
                        self.queues[msg.topic()].stop_topic()
                    else:
                        self.queues[msg.topic()].add_message(msg)
            while self.running:
                event_ts_to_serve = self._can_serve()
                if event_ts_to_serve is None or \
                        self.max_poll_interval_ms - (getSystemTimestamp() - self.last_poll) < 30000:
                    if self.end_flag == EndFlag.NEVER and self.heartbeat_interval_ms != -1 \
                            and any([q.end_of_partition for q in self.queues.values()]):
                        if self.next_hb is None:
                            self.next_hb = min(
                                getSystemTimestamp() - self.latency_ms,
                                min([
                                    q.queue[0].message.timestamp()[1]
                                    for q in self.queues.values()
                                    if len(q.queue) > 0
                                ],
                                    default=sys.maxsize))
                        if self.next_hb < min(
                                getSystemTimestamp() - self.latency_ms,
                                min([
                                    q.queue[0].message.timestamp()[1]
                                    for q in self.queues.values()
                                    if len(q.queue) > 0
                                ],
                                    default=sys.maxsize)):
                            yield from self._serve_heartbeat(self.next_hb)
                    break
                if self.heartbeat_interval_ms != -1:
                    yield from self._serve_heartbeat(event_ts_to_serve)
                message_to_serve = []
                for q in self.queues.values():
                    message_to_serve.extend(q.get_messages(event_ts_to_serve))
                yield from self._serve_messages(message_to_serve)
                if self.end_ts is not None and self.end_ts <= event_ts_to_serve:
                    self.running = False
        self.consumer.close()

Example #5

Show file

class AsyncWorker(object):
    """
    Fetches from Kafka topics and processes them.

    :param consumer_topic: Name of the Kafka topic for consume.
    :type consumer_topic: str
    :param service: Service function which is executed every time when job is processed.
    Service must get as argument str or dict type object.
    :type service: callable
    :param consumer_conf: config for Kafka consumer.
    :type consumer_conf: dict
    :param failed_topic: Kafka topic for produce unprocessed messages from consumer_topic.
    :type failed_topic: str
    :param producer_conf: config for Kafka producer for producing unprocessed messages.
    :type producer_conf: dict
    """
    def __init__(self, consumer_topic: str, service: Callable,
                 consumer_conf: dict, failed_topic: str, producer_conf: dict):

        self._consumer_topic = consumer_topic
        self._consumer = Consumer(consumer_conf)
        self._service = service
        self._failed_topic = failed_topic  # use naming like <project name>_<version>_<consumer_topic><retry/failed>
        self._producer = AsyncProducer(producer_conf)

    def __repr__(self):
        """Return the string representation of the worker.
        :return: String representation of the worker.
        :rtype: str
        """

        return 'Worker(Consumer={}, consume_topic={})'.format(
            self._consumer, self._consumer_topic)

    def __del__(self):  # pragma: no cover
        # noinspection PyBroadException
        try:
            self._consumer.close()
        except Exception:
            pass

    async def _exec_service(self, message_value):
        if iscoroutinefunction(self._service):
            res = await self._service(message_value)
        else:
            res = self._service(message_value)
        return res

    async def _process_message(self, msg: Message):
        """
        De-serialize message and execute service.
        :param msg: Kafka message.
        :type msg: confluent_kafka.Message`
        """
        LOGGER.info(
            'Processing Message(topic={}, partition={}, offset={}) ...'.format(
                msg.topic, msg.partition, msg.offset))
        service_repr = get_call_repr(self._service)
        LOGGER.info('Executing job {}'.format(service_repr))
        try:
            message_value = _decode_msg_value(msg.value())
            res = await self._exec_service(message_value)

        except KeyboardInterrupt:
            LOGGER.error('Job was interrupted: {}'.format(msg.offset()))

        except Exception as err:
            LOGGER.exception('Job {} raised an exception: {}'.format(
                msg.offset(), err))

            await self._producer.produce(topic=self._failed_topic,
                                         value=msg.value(),
                                         error=str(err))
        else:
            LOGGER.info('Job {} returned: {}'.format(msg.offset(), res))

    @property
    def consumer_topic(self):
        """Return the name of the Kafka topic.
        :return: Name of the Kafka topic.
        :rtype: str
        """
        return self._consumer_topic

    @property
    def consumer(self):
        """Return the Kafka consumer instance.
        :return: Kafka consumer instance.
        :rtype: kafka.KafkaConsumer
        """
        return self._consumer

    @property
    def service(self):
        """Return the service function.
        :return: Callback function, or None if not set.
        :rtype: callable | None
        """
        return self._service

    async def start(self,
                    max_messages: int = math.inf,
                    commit_offsets: bool = True) -> int:
        """Start processing Kafka messages and executing jobs.
        :param max_messages: Maximum number of Kafka messages to process before stopping. If not set, worker runs until
        interrupted.

        :type max_messages: int
        :param commit_offsets: If set to True, consumer offsets are committed every time a message is processed
        (default: True).
        :type commit_offsets: bool
        :return: Total number of messages processed.
        :rtype: int
        """
        LOGGER.info('Starting {} ...'.format(self))

        self._consumer.unsubscribe()
        self._consumer.subscribe([self.consumer_topic])
        LOGGER.info(" Try get messages from position: {}".format(
            self._consumer.position(self._consumer.assignment())))
        messages_processed = 0
        while messages_processed < max_messages:
            loop = asyncio.get_event_loop()
            # awaiting place for processing messages in other coroutines
            messages = await loop.run_in_executor(
                None, partial(self._consumer.consume, 10, 2.0))
            LOGGER.debug(" Try get messages from position: {}".format(
                self._consumer.position(self._consumer.assignment())))
            if not messages:
                LOGGER.debug("Messages not found")
                continue
            for msg in messages:
                if msg.error():
                    LOGGER.error("Consumer error: {}".format(msg.error()))
                LOGGER.info("Get message with offset {}".format(msg.offset()))
                asyncio.create_task(self._process_message(msg))
            if commit_offsets:
                self._consumer.commit()

            messages_processed += 1
        self._consumer.close()
        return messages_processed

Example #6

Show file

class AioConsumer:

    def __init__(self, config,
                 topics: list,
                 group_id: str,
                 handler,
                 max_retry=-1,
                 consumer_no=0,
                 timeout=1,
                 loop=None, exe=None):
        """
        consumer = new AioConsumer(...)
        :param config: kafka consumer config
        :param topics:
        :param group_id:
        :param handler:
        :param max_retry: 消费失败重试次数。-1：不重试
        :param consumer_no: 消费者编号
        :param timeout: poll超时时间
        :param loop:
        :param exe:
        """
        self.loop = loop or asyncio.get_event_loop()
        assert config is not None, 'init kafka consumer error, config is None'
        _config = copy.deepcopy(config)
        _config['group.id'] = group_id
        _config['on_commit'] = self.commit_completed
        self.handler = handler
        self.consumer = Consumer(_config)
        self.consumer.subscribe(topics)
        self.redis_retry_key = f'{"_".join(topics)}_{self.handler.__name__}'
        self.name = f'{self.redis_retry_key}_{consumer_no}'
        self.max_retry = max_retry
        self.exe = exe
        self.timeout = timeout
        # 'INIT' -> 'RUNNING' -> 'STOP'
        self.status = 'INIT'

    @staticmethod
    def commit_completed(err, partitions):
        if err:
            logger.info(str(err))
        else:
            logger.info("Committed partition offsets: " + str(partitions))

    async def poll(self):
        return await self.loop.run_in_executor(self.exe, self.consumer.poll, self.timeout)

    async def _get_message_from_kafka(self):
        poll_message = await self.poll()
        if not poll_message:
            return None
        elif poll_message.error():
            raise KafkaException(poll_message.error())
        else:
            return poll_message.value()

    async def run(self):
        while self.status == 'RUNNING':
            str_message = await self._get_message_from_kafka()
            message = json.loads(str_message or '{}')
            if not message:
                await asyncio.sleep(1)
                continue
            try:
                if asyncio.iscoroutinefunction(self.handler):
                    await self.handler(message)
                else:
                    self.handler(message)
                await self.commit()
            except Exception as e:
                logger.warning(f'{str(self)} handler error: {e.args}. msg: {str_message}')

        await self.close()

    async def commit(self):
        def _commit():
            self.consumer.commit(asynchronous=False)
        await self.loop.run_in_executor(self.exe, _commit)

    async def close(self):
        await self.commit()
        await self.loop.run_in_executor(self.exe, self.consumer.close)
        logger.info(f'{self.name} closed')

    def stop(self):
        self.status = 'STOP'