Example #1
0
def _commit_offsets_to_watermark(
    kafka_client,
    group,
    topics,
    watermark,
    raise_on_error,
    offset_storage,
):
    topics = _verify_topics_and_partitions(kafka_client, topics,
                                           raise_on_error)

    watermark_offsets = get_topics_watermarks(kafka_client, topics,
                                              raise_on_error)

    if watermark == HIGH_WATERMARK:
        group_offset_reqs = [
            OffsetCommitRequest(kafka_bytestring(topic), partition,
                                watermark_offsets[topic][partition].highmark,
                                None)
            for topic, partitions in topics.iteritems()
            for partition in partitions
        ]
    elif watermark == LOW_WATERMARK:
        group_offset_reqs = [
            OffsetCommitRequest(kafka_bytestring(topic), partition,
                                watermark_offsets[topic][partition].lowmark,
                                None)
            for topic, partitions in topics.iteritems()
            for partition in partitions
        ]
    else:
        raise ValueError(
            "Unknown watermark: {watermark}".format(watermark=watermark))

    if offset_storage == 'zookeeper' or not offset_storage:
        send_api = kafka_client.send_offset_commit_request
    elif offset_storage == 'kafka':
        send_api = kafka_client.send_offset_commit_request_kafka
    else:
        raise InvalidOffsetStorageError(offset_storage)

    status = []
    if group_offset_reqs:
        status = send_api(kafka_bytestring(group),
                          group_offset_reqs,
                          raise_on_error,
                          callback=_check_commit_response_error)

    return filter(None, status)
    def commit_offsets(self, topic_to_partition_offset_map):
        """Commits offset information to kafka.  Allows lower-level control for
        committing offsets.  In general, :meth:`commit_message` or
        :meth:`commit_messages` should be used, but this can be useful when paired with
        :meth:`data_pipeline.position_data.PositionData.topic_to_last_position_info_map`.

        **Example**::
            The `topic_to_partition_offset_map` should be formatted like::

                {
                  'topic1': {0: 83854, 1: 8943892},
                  'topic2': {0: 190898}
                }

        Args::
            topic_to_partition_offset_map (Dict[str, Dict[int, int]]): Maps from
                topics to a partition and offset map for each topic.
        """
        topic_to_partition_offset_map = self._get_offsets_map_to_be_committed(
            topic_to_partition_offset_map)
        return self._send_offset_commit_requests(offset_commit_request_list=[
            OffsetCommitRequest(topic=kafka_bytestring(topic),
                                partition=partition,
                                offset=offset,
                                metadata=None) for topic, partition_map in
            topic_to_partition_offset_map.iteritems()
            for partition, offset in partition_map.iteritems()
        ])
Example #3
0
    def commit_message(self, message):
        """Commit the message offset for this consumer group. This function does not
        take care of the consumer offset tracking. It should only be used if
        auto_commit is disabled and the commit function never called.

        .. note:: all the messages received before message itself will be committed
                  as consequence.

        :param message: message to commit.
        :type message: Message namedtuple, which consists of: partition number,
                       offset, key, and message value
        :return: True on success, False on failure.
        """
        reqs = [
            OffsetCommitRequest(
                self.topic,
                message.partition,
                message.offset,
                None,
            )
        ]

        try:
            if self.config.offset_storage in [None, 'zookeeper', 'dual']:
                self.client.send_offset_commit_request(self.config.group_id,
                                                       reqs)
            if self.config.offset_storage in ['kafka', 'dual']:
                self.client.send_offset_commit_request_kafka(
                    self.config.group_id, reqs)
        except KafkaError as e:
            self.log.error("%s saving offsets: %s", e.__class__.__name__, e)
            return False
        else:
            return True
Example #4
0
    def commit(self, partitions=None):
        """XXX"""

        # short circuit if nothing happened. This check is kept outside
        # to prevent un-necessarily acquiring a lock for checking the state
        if self._count_since_commit == 0:
            return

        with (yield from self._commit_lock):
            # Do this check again, just in case the state has changed
            # during the lock acquiring timeout
            if self._count_since_commit == 0:
                return

            reqs = []
            if not partitions:  # commit all partitions
                partitions = self._offsets.keys()

            for partition in partitions:
                offset = self._offsets[partition]
                log.debug("Commit offset %d in SimpleConsumer: "
                          "group=%s, topic=%s, partition=%s" %
                          (offset, self._group, self._topic, partition))

                reqs.append(
                    OffsetCommitRequest(self._topic, partition, offset, None))

            resps = yield from self._client.send_offset_commit_request(
                self._group, reqs)

            for resp in resps:
                check_error(resp)
            self._count_since_commit = 0
Example #5
0
    def commit(self):
        """Store consumed message offsets (marked via task_done())
        to kafka cluster for this consumer_group.

        Returns:
            True on success, or False if no offsets were found for commit

        Note:
            this functionality requires server version >=0.8.1.1
            https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI
        """
        if not self._config['group_id']:
            logger.warning('Cannot commit without a group_id!')
            raise KafkaConfigurationError('Attempted to commit offsets without a configured consumer group (group_id)')

        # API supports storing metadata with each commit
        # but for now it is unused
        metadata = b''

        offsets = self._offsets.task_done
        commits = []
        for topic_partition, task_done_offset in six.iteritems(offsets):

            # Skip if None
            if task_done_offset is None:
                continue

            # Commit offsets as the next offset to fetch
            # which is consistent with the Java Client
            # task_done is marked by messages consumed,
            # so add one to mark the next message for fetching
            commit_offset = (task_done_offset + 1)

            # Skip if no change from previous committed
            if commit_offset == self._offsets.commit[topic_partition]:
                continue

            commits.append(OffsetCommitRequest(topic_partition[0], topic_partition[1], commit_offset, metadata))

        if commits:
            logger.info('committing consumer offsets to group %s', self._config['group_id'])
            resps = self._client.send_offset_commit_request(kafka_bytestring(self._config['group_id']),
                                                            commits,
                                                            fail_on_error=False)

            for r in resps:
                check_error(r)
                topic_partition = (r.topic, r.partition)
                task_done = self._offsets.task_done[topic_partition]
                self._offsets.commit[topic_partition] = (task_done + 1)

            if self._config['auto_commit_enable']:
                self._reset_auto_commit()

            return True

        else:
            logger.info('No new offsets found to commit in group %s', self._config['group_id'])
            return False
Example #6
0
    def test_encode_offset_commit_request_kafka(self):

        header = b"".join([
            struct.pack('>i', 113),  # Total message length
            struct.pack('>h', 8),  # Message type = offset commit
            struct.pack('>h', 2),  # API version
            struct.pack('>i', 42),  # Correlation ID
            struct.pack('>h9s', 9, b"client_id"),  # The client ID
            struct.pack('>h8s', 8, b"group_id"),  # The group to commit for
            struct.pack('>i', -1),  # Consumer group generation id
            struct.pack(">h0s", 0, b""),  # Consumer id
            struct.pack('>q', -1),  # Retention time
            struct.pack('>i', 2),  # Num topics
        ])

        topic1 = b"".join([
            struct.pack(">h6s", 6, b"topic1"),  # Topic for the request
            struct.pack(">i", 2),  # Two partitions
            struct.pack(">i", 0),  # Partition 0
            struct.pack(">q", 123),  # Offset 123
            struct.pack(">h", -1),  # Null metadata
            struct.pack(">i", 1),  # Partition 1
            struct.pack(">q", 234),  # Offset 234
            struct.pack(">h", -1),  # Null metadata
        ])

        topic2 = b"".join([
            struct.pack(">h6s", 6, b"topic2"),  # Topic for the request
            struct.pack(">i", 1),  # One partition
            struct.pack(">i", 2),  # Partition 2
            struct.pack(">q", 345),  # Offset 345
            struct.pack(">h", -1),  # Null metadata
        ])

        expected1 = b"".join([header, topic1, topic2])
        expected2 = b"".join([header, topic2, topic1])

        encoded = KafkaToolProtocol.encode_offset_commit_request_kafka(
            b"client_id", 42, b"group_id", [
                OffsetCommitRequest(b"topic1", 0, 123, None),
                OffsetCommitRequest(b"topic1", 1, 234, None),
                OffsetCommitRequest(b"topic2", 2, 345, None),
            ])

        assert encoded in [expected1, expected2]
Example #7
0
    def test_commit_fetch_offsets(self):
        req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata")
        (resp, ) = self.client.send_offset_commit_request(b"group", [req])
        self.assertEqual(resp.error, 0)

        req = OffsetFetchRequest(self.bytes_topic, 0)
        (resp, ) = self.client.send_offset_fetch_request(b"group", [req])
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, 42)
        self.assertEqual(resp.metadata, b"")  # Metadata isn't stored for now
    def test_commit_fetch_offsets_dual(self):
        req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata")
        (resp, ) = self.client.send_offset_commit_request_kafka(
            b"group", [req])
        self.assertEqual(resp.error, 0)

        (resp, ) = self.client.send_offset_fetch_request_kafka(b"group", [req])
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, 42)
        # Metadata is stored in kafka
        self.assertEqual(resp.metadata, b"metadata")
Example #9
0
    def test_encode_offset_commit_request(self):
        header = "".join([
            struct.pack('>i', 99),  # Total message length
            struct.pack('>h', 8),  # Message type = offset commit
            struct.pack('>h', 0),  # API version
            struct.pack('>i', 42),  # Correlation ID
            struct.pack('>h9s', 9, "client_id"),  # The client ID
            struct.pack('>h8s', 8, "group_id"),  # The group to commit for
            struct.pack('>i', 2),  # Num topics
        ])

        topic1 = "".join([
            struct.pack(">h6s", 6, "topic1"),  # Topic for the request
            struct.pack(">i", 2),  # Two partitions
            struct.pack(">i", 0),  # Partition 0
            struct.pack(">q", 123),  # Offset 123
            struct.pack(">h", -1),  # Null metadata
            struct.pack(">i", 1),  # Partition 1
            struct.pack(">q", 234),  # Offset 234
            struct.pack(">h", -1),  # Null metadata
        ])

        topic2 = "".join([
            struct.pack(">h6s", 6, "topic2"),  # Topic for the request
            struct.pack(">i", 1),  # One partition
            struct.pack(">i", 2),  # Partition 2
            struct.pack(">q", 345),  # Offset 345
            struct.pack(">h", -1),  # Null metadata
        ])

        expected1 = "".join([header, topic1, topic2])
        expected2 = "".join([header, topic2, topic1])

        encoded = KafkaProtocol.encode_offset_commit_request(
            "client_id", 42, "group_id", [
                OffsetCommitRequest("topic1", 0, 123, None),
                OffsetCommitRequest("topic1", 1, 234, None),
                OffsetCommitRequest("topic2", 2, 345, None),
            ])

        self.assertIn(encoded, [expected1, expected2])
Example #10
0
    def test_commit_message_default(self, config):
        with mock_kafka() as (mock_client, mock_consumer):
            consumer = KafkaSimpleConsumer('test_topic', config)
            consumer.connect()

            actual = consumer.commit_message(
                Message(0, 100, 'mykey', 'myvalue'), )

            assert actual is True
            mock_client.return_value.send_offset_commit_request \
                .assert_called_once_with(
                    'test_group'.encode(),
                    [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)],
                )
Example #11
0
    def commit(self, partitions=None):
        """Commit stored offsets to Kafka via OffsetCommitRequest (v0)

        Keyword Arguments:
            partitions (list): list of partitions to commit, default is to commit
                all of them

        Returns: True on success, False on failure
        """

        # short circuit if nothing happened. This check is kept outside
        # to prevent un-necessarily acquiring a lock for checking the state
        if self.count_since_commit == 0:
            return

        with self.commit_lock:
            # Do this check again, just in case the state has changed
            # during the lock acquiring timeout
            if self.count_since_commit == 0:
                return

            reqs = []
            if partitions is None:  # commit all partitions
                partitions = list(self.offsets.keys())

            log.debug('Committing new offsets for %s, partitions %s',
                      self.topic, partitions)
            for partition in partitions:
                offset = self.offsets[partition]
                log.debug(
                    'Commit offset %d in SimpleConsumer: '
                    'group=%s, topic=%s, partition=%s', offset, self.group,
                    self.topic, partition)

                reqs.append(
                    OffsetCommitRequest(self.topic, partition, offset, None))

            try:
                if self.offset_storage in ['zookeeper', 'dual']:
                    self.client.send_offset_commit_request(self.group, reqs)
                if self.offset_storage in ['kafka', 'dual']:
                    self.client.send_offset_commit_request_kafka(
                        self.group, reqs)
            except KafkaError as e:
                log.error('%s saving offsets: %s', e.__class__.__name__, e)
                return False
            else:
                self.count_since_commit = 0
                return True
Example #12
0
    def test_commit_message_dual(self, config):
        if getattr(KafkaClient, 'send_offset_commit_request_kafka',
                   None) is None:
            return

        with mock_kafka() as (mock_client, mock_consumer):
            config._config['offset_storage'] = 'dual'
            consumer = KafkaSimpleConsumer('test_topic', config)
            consumer.connect()

            actual = consumer.commit_message(
                Message(0, 100, 'mykey', 'myvalue'), )

            assert actual is True
            mock_client.return_value.send_offset_commit_request \
                .assert_called_once_with(
                    'test_group'.encode(),
                    [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)],
                )
            mock_client.return_value.send_offset_commit_request_kafka \
                .assert_called_once_with(
                    'test_group'.encode(),
                    [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)],
                )
Example #13
0
    def commit_partition_offsets(self, partition_offsets):
        """
        Commit explicit partition/offset pairs.
        """
        self.logger.debug("Committing partition offsets: %s", partition_offsets)

        commit_requests = [
            OffsetCommitRequest(self.consumer.topic, partition, offset, None)
            for partition, offset in partition_offsets.items()
        ]
        commit_responses = self.consumer.client.send_offset_commit_request(
            self.consumer.group,
            commit_requests,
        )
        for commit_response in commit_responses:
            check_error(commit_response)
Example #14
0
    def commit(self, partitions=None):
        """
        Commit offsets for this consumer

        Keyword Arguments:
            partitions (list): list of partitions to commit, default is to commit
                all of them
        """

        # short circuit if nothing happened. This check is kept outside
        # to prevent un-necessarily acquiring a lock for checking the state
        if self.count_since_commit == 0:
            return

        with self.commit_lock:
            # Do this check again, just in case the state has changed
            # during the lock acquiring timeout
            if self.count_since_commit == 0:
                return

            reqs = []
            if not partitions:  # commit all partitions
                partitions = self.offsets.keys()

            for partition in partitions:
                offset = self.offsets[partition]
                log.debug("Commit offset %d in SimpleConsumer: "
                          "group=%s, topic=%s, partition=%s" %
                          (offset, self.group, self.topic, partition))

                reqs.append(
                    OffsetCommitRequest(self.topic, partition, offset, None))

            resps = self.client.send_offset_commit_request(self.group, reqs)
            for resp in resps:
                kafka.common.check_error(resp)

            self.count_since_commit = 0
Example #15
0
def set_consumer_offsets(
    kafka_client,
    group,
    new_offsets,
    raise_on_error=True,
    offset_storage='zookeeper',
):
    """Set consumer offsets to the specified offsets.

    This method does not validate the specified offsets, it is up to
    the caller to specify valid offsets within a topic partition.

    If any partition leader is not available, the request fails for all the
    other topics. This is the tradeoff of sending all topic requests in batch
    and save both in performance and Kafka load.

    :param kafka_client: a connected KafkaToolClient
    :param group: kafka group_id
    :param topics: dict {<topic>: {<partition>: <offset>}}
    :param raise_on_error: if False the method does not raise exceptions
      on errors encountered. It may still fail on the request send.
    :param offset_storage: String, one of {zookeeper, kafka}.
    :returns: a list of errors for each partition offset update that failed.
    :rtype: list [OffsetCommitError]
    :raises:
      :py:class:`kafka_utils.util.error.UnknownTopic`: upon missing
      topics and raise_on_error=True

      :py:class:`kafka_utils.util.error.UnknownPartition`: upon missing
      partitions and raise_on_error=True

      :py:class:`exceptions.TypeError`: upon badly formatted input
      new_offsets

      :py:class:`kafka_utils.util.error.InvalidOffsetStorageError: upon unknown
      offset_storage choice.

      FailedPayloadsError: upon send request error.
    """
    valid_new_offsets = _verify_commit_offsets_requests(
        kafka_client, new_offsets, raise_on_error)

    group_offset_reqs = [
        OffsetCommitRequest(kafka_bytestring(topic), partition, offset, None)
        for topic, new_partition_offsets in valid_new_offsets.iteritems()
        for partition, offset in new_partition_offsets.iteritems()
    ]

    if offset_storage == 'zookeeper' or not offset_storage:
        send_api = kafka_client.send_offset_commit_request
    elif offset_storage == 'kafka':
        send_api = kafka_client.send_offset_commit_request_kafka
    else:
        raise InvalidOffsetStorageError(offset_storage)

    status = []
    if group_offset_reqs:
        status = send_api(kafka_bytestring(group),
                          group_offset_reqs,
                          raise_on_error,
                          callback=_check_commit_response_error)

    return filter(None, status)