Exemple #1
0
def _commit_offsets_to_watermark(
    kafka_client,
    group,
    topics,
    watermark,
    raise_on_error,
    offset_storage,
):
    topics = _verify_topics_and_partitions(kafka_client, topics, raise_on_error)

    watermark_offsets = get_topics_watermarks(kafka_client, topics, raise_on_error)

    if watermark == HIGH_WATERMARK:
        group_offset_reqs = [
            OffsetCommitRequestPayload(
                topic, partition,
                watermark_offsets[topic][partition].highmark,
                metadata=''
            )
            for topic, partitions in topics.iteritems()
            for partition in partitions
        ]
    elif watermark == LOW_WATERMARK:
        group_offset_reqs = [
            OffsetCommitRequestPayload(
                topic, partition,
                watermark_offsets[topic][partition].lowmark,
                metadata=''
            )
            for topic, partitions in topics.iteritems()
            for partition in partitions
        ]
    else:
        raise ValueError(
            "Unknown watermark: {watermark}".format(watermark=watermark)
        )

    if offset_storage == 'kafka' or not offset_storage:
        send_api = kafka_client.send_offset_commit_request_kafka
    elif offset_storage == 'zookeeper':
        send_api = kafka_client.send_offset_commit_request
    else:
        raise InvalidOffsetStorageError(offset_storage)

    status = []
    if group_offset_reqs:
        status = send_api(
            group,
            group_offset_reqs,
            raise_on_error,
            callback=_check_commit_response_error
        )

    return filter(None, status)
Exemple #2
0
def _commit_offsets_to_watermark(
    kafka_client,
    group,
    topics,
    watermark,
    raise_on_error,
):
    topics = _verify_topics_and_partitions(kafka_client, topics, raise_on_error)

    watermark_offsets = get_topics_watermarks(kafka_client, topics, raise_on_error)

    if watermark == HIGH_WATERMARK:
        group_offset_reqs = [
            OffsetCommitRequestPayload(
                topic, partition,
                watermark_offsets[topic][partition].highmark,
                metadata=''
            )
            for topic, partitions in six.iteritems(topics)
            for partition in partitions
        ]
    elif watermark == LOW_WATERMARK:
        group_offset_reqs = [
            OffsetCommitRequestPayload(
                topic, partition,
                watermark_offsets[topic][partition].lowmark,
                metadata=''
            )
            for topic, partitions in six.iteritems(topics)
            for partition in partitions
        ]
    else:
        raise ValueError(
            "Unknown watermark: {watermark}".format(watermark=watermark)
        )

    send_api = kafka_client.send_offset_commit_request_kafka

    status = []
    if group_offset_reqs:
        status = send_api(
            group,
            group_offset_reqs,
            raise_on_error,
            callback=_check_commit_response_error
        )

    return [_f for _f in status if _f]
Exemple #3
0
    def test_commit_fetch_offsets_dual(self):
        req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata')
        (resp, ) = self.client.send_offset_commit_request_kafka('group', [req])
        self.assertEqual(resp.error, 0)

        (resp, ) = self.client.send_offset_fetch_request_kafka('group', [req])
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, 42)
        # Metadata is stored in kafka
        self.assertEqual(resp.metadata, 'metadata')
Exemple #4
0
    def test_commit_fetch_offsets(self):
        req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata')
        (resp, ) = self.client.send_offset_commit_request('group', [req])
        self.assertEqual(resp.error, 0)

        req = OffsetFetchRequestPayload(self.topic, 0)
        (resp, ) = self.client.send_offset_fetch_request('group', [req])
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, 42)
        self.assertEqual(resp.metadata, '')  # Metadata isn't stored for now
Exemple #5
0
    def commit(self, partitions=None):
        """Commit stored offsets to Kafka via OffsetCommitRequest (v0)

        Keyword Arguments:
            partitions (list): list of partitions to commit, default is to commit
                all of them

        Returns: True on success, False on failure
        """

        # short circuit if nothing happened. This check is kept outside
        # to prevent un-necessarily acquiring a lock for checking the state
        if self.count_since_commit == 0:
            return

        with self.commit_lock:
            # Do this check again, just in case the state has changed
            # during the lock acquiring timeout
            if self.count_since_commit == 0:
                return

            reqs = []
            if partitions is None:  # commit all partitions
                partitions = list(self.offsets.keys())

            log.debug("Committing new offsets for %s, partitions %s",
                      self.topic, partitions)
            for partition in partitions:
                offset = self.offsets[partition]
                log.debug(
                    "Commit offset %d in SimpleConsumer: "
                    "group=%s, topic=%s, partition=%s",
                    offset,
                    self.group,
                    self.topic,
                    partition,
                )

                reqs.append(
                    OffsetCommitRequestPayload(self.topic, partition, offset,
                                               None))

            try:
                self.client.send_offset_commit_request(self.group, reqs)
            except KafkaError as e:
                log.error("%s saving offsets: %s", e.__class__.__name__, e)
                return False
            else:
                self.count_since_commit = 0
                return True
Exemple #6
0
    def commit_partition_offsets(self, partition_offsets):
        """
        Commit explicit partition/offset pairs.
        """
        self.logger.debug("Committing partition offsets: %s", partition_offsets)

        commit_requests = [
            OffsetCommitRequestPayload(self.consumer.topic, partition, offset, None)
            for partition, offset in partition_offsets.items()
        ]
        commit_responses = self.consumer.client.send_offset_commit_request(
            self.consumer.group,
            commit_requests,
        )
        for commit_response in commit_responses:
            check_error(commit_response)
Exemple #7
0
def set_consumer_offsets(
    kafka_client,
    group,
    new_offsets,
    raise_on_error=True,
    offset_storage='kafka',
):
    """Set consumer offsets to the specified offsets.

    This method does not validate the specified offsets, it is up to
    the caller to specify valid offsets within a topic partition.

    If any partition leader is not available, the request fails for all the
    other topics. This is the tradeoff of sending all topic requests in batch
    and save both in performance and Kafka load.

    :param kafka_client: a connected KafkaToolClient
    :param group: kafka group_id
    :param topics: dict {<topic>: {<partition>: <offset>}}
    :param raise_on_error: if False the method does not raise exceptions
      on errors encountered. It may still fail on the request send.
    :param offset_storage: String, one of {zookeeper, kafka}.
    :returns: a list of errors for each partition offset update that failed.
    :rtype: list [OffsetCommitError]
    :raises:
      :py:class:`kafka_utils.util.error.UnknownTopic`: upon missing
      topics and raise_on_error=True

      :py:class:`kafka_utils.util.error.UnknownPartition`: upon missing
      partitions and raise_on_error=True

      :py:class:`exceptions.TypeError`: upon badly formatted input
      new_offsets

      :py:class:`kafka_utils.util.error.InvalidOffsetStorageError: upon unknown
      offset_storage choice.

      FailedPayloadsError: upon send request error.
    """
    valid_new_offsets = _verify_commit_offsets_requests(
        kafka_client, new_offsets, raise_on_error)

    group_offset_reqs = [
        OffsetCommitRequestPayload(
            topic,
            partition,
            offset,
            metadata='',
        ) for topic, new_partition_offsets in six.iteritems(valid_new_offsets)
        for partition, offset in six.iteritems(new_partition_offsets)
    ]

    if offset_storage == 'kafka' or not offset_storage:
        send_api = kafka_client.send_offset_commit_request_kafka
    elif offset_storage == 'zookeeper':
        send_api = kafka_client.send_offset_commit_request
    else:
        raise InvalidOffsetStorageError(offset_storage)

    status = []
    if group_offset_reqs:
        status = send_api(group,
                          group_offset_reqs,
                          raise_on_error,
                          callback=_check_commit_response_error)

    return [_f for _f in status if _f and _f.error != 0]
Exemple #8
0
    def commit(self):
        """Store consumed message offsets (marked via task_done())
        to kafka cluster for this consumer_group.

        Returns:
            True on success, or False if no offsets were found for commit

        Note:
            this functionality requires server version >=0.8.1.1
            https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
        """
        if not self._config['group_id']:
            logger.warning('Cannot commit without a group_id!')
            raise KafkaConfigurationError(
                'Attempted to commit offsets '
                'without a configured consumer group (group_id)')

        # API supports storing metadata with each commit
        # but for now it is unused
        metadata = b''

        offsets = self._offsets.task_done
        commits = []
        for topic_partition, task_done_offset in six.iteritems(offsets):

            # Skip if None
            if task_done_offset is None:
                continue

            # Commit offsets as the next offset to fetch
            # which is consistent with the Java Client
            # task_done is marked by messages consumed,
            # so add one to mark the next message for fetching
            commit_offset = (task_done_offset + 1)

            # Skip if no change from previous committed
            if commit_offset == self._offsets.commit[topic_partition]:
                continue

            commits.append(
                OffsetCommitRequestPayload(topic_partition[0],
                                           topic_partition[1], commit_offset,
                                           metadata))

        if commits:
            logger.info('committing consumer offsets to group %s',
                        self._config['group_id'])

            resps = []
            if self._config['offset_storage'] in ['zookeeper', 'dual']:
                resps += self._client.send_offset_commit_request(
                    self._config['group_id'],
                    commits,
                    fail_on_error=False,
                )
            if self._config['offset_storage'] in ['kafka', 'dual']:
                resps += self._client.send_offset_commit_request_kafka(
                    self._config['group_id'],
                    commits,
                    fail_on_error=False,
                )

            for r in resps:
                check_error(r)
                topic_partition = (r.topic, r.partition)
                task_done = self._offsets.task_done[topic_partition]
                self._offsets.commit[topic_partition] = (task_done + 1)

            if self._config['auto_commit_enable']:
                self._reset_auto_commit()

            return True

        else:
            logger.info('No new offsets found to commit in group %s',
                        self._config['group_id'])
            return False