def _commit_offsets_to_watermark( kafka_client, group, topics, watermark, raise_on_error, offset_storage, ): topics = _verify_topics_and_partitions(kafka_client, topics, raise_on_error) watermark_offsets = get_topics_watermarks(kafka_client, topics, raise_on_error) if watermark == HIGH_WATERMARK: group_offset_reqs = [ OffsetCommitRequest(kafka_bytestring(topic), partition, watermark_offsets[topic][partition].highmark, None) for topic, partitions in topics.iteritems() for partition in partitions ] elif watermark == LOW_WATERMARK: group_offset_reqs = [ OffsetCommitRequest(kafka_bytestring(topic), partition, watermark_offsets[topic][partition].lowmark, None) for topic, partitions in topics.iteritems() for partition in partitions ] else: raise ValueError( "Unknown watermark: {watermark}".format(watermark=watermark)) if offset_storage == 'zookeeper' or not offset_storage: send_api = kafka_client.send_offset_commit_request elif offset_storage == 'kafka': send_api = kafka_client.send_offset_commit_request_kafka else: raise InvalidOffsetStorageError(offset_storage) status = [] if group_offset_reqs: status = send_api(kafka_bytestring(group), group_offset_reqs, raise_on_error, callback=_check_commit_response_error) return filter(None, status)
def commit_offsets(self, topic_to_partition_offset_map): """Commits offset information to kafka. Allows lower-level control for committing offsets. In general, :meth:`commit_message` or :meth:`commit_messages` should be used, but this can be useful when paired with :meth:`data_pipeline.position_data.PositionData.topic_to_last_position_info_map`. **Example**:: The `topic_to_partition_offset_map` should be formatted like:: { 'topic1': {0: 83854, 1: 8943892}, 'topic2': {0: 190898} } Args:: topic_to_partition_offset_map (Dict[str, Dict[int, int]]): Maps from topics to a partition and offset map for each topic. """ topic_to_partition_offset_map = self._get_offsets_map_to_be_committed( topic_to_partition_offset_map) return self._send_offset_commit_requests(offset_commit_request_list=[ OffsetCommitRequest(topic=kafka_bytestring(topic), partition=partition, offset=offset, metadata=None) for topic, partition_map in topic_to_partition_offset_map.iteritems() for partition, offset in partition_map.iteritems() ])
def commit_message(self, message): """Commit the message offset for this consumer group. This function does not take care of the consumer offset tracking. It should only be used if auto_commit is disabled and the commit function never called. .. note:: all the messages received before message itself will be committed as consequence. :param message: message to commit. :type message: Message namedtuple, which consists of: partition number, offset, key, and message value :return: True on success, False on failure. """ reqs = [ OffsetCommitRequest( self.topic, message.partition, message.offset, None, ) ] try: if self.config.offset_storage in [None, 'zookeeper', 'dual']: self.client.send_offset_commit_request(self.config.group_id, reqs) if self.config.offset_storage in ['kafka', 'dual']: self.client.send_offset_commit_request_kafka( self.config.group_id, reqs) except KafkaError as e: self.log.error("%s saving offsets: %s", e.__class__.__name__, e) return False else: return True
def commit(self, partitions=None): """XXX""" # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self._count_since_commit == 0: return with (yield from self._commit_lock): # Do this check again, just in case the state has changed # during the lock acquiring timeout if self._count_since_commit == 0: return reqs = [] if not partitions: # commit all partitions partitions = self._offsets.keys() for partition in partitions: offset = self._offsets[partition] log.debug("Commit offset %d in SimpleConsumer: " "group=%s, topic=%s, partition=%s" % (offset, self._group, self._topic, partition)) reqs.append( OffsetCommitRequest(self._topic, partition, offset, None)) resps = yield from self._client.send_offset_commit_request( self._group, reqs) for resp in resps: check_error(resp) self._count_since_commit = 0
def commit(self): """Store consumed message offsets (marked via task_done()) to kafka cluster for this consumer_group. Returns: True on success, or False if no offsets were found for commit Note: this functionality requires server version >=0.8.1.1 https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI """ if not self._config['group_id']: logger.warning('Cannot commit without a group_id!') raise KafkaConfigurationError('Attempted to commit offsets without a configured consumer group (group_id)') # API supports storing metadata with each commit # but for now it is unused metadata = b'' offsets = self._offsets.task_done commits = [] for topic_partition, task_done_offset in six.iteritems(offsets): # Skip if None if task_done_offset is None: continue # Commit offsets as the next offset to fetch # which is consistent with the Java Client # task_done is marked by messages consumed, # so add one to mark the next message for fetching commit_offset = (task_done_offset + 1) # Skip if no change from previous committed if commit_offset == self._offsets.commit[topic_partition]: continue commits.append(OffsetCommitRequest(topic_partition[0], topic_partition[1], commit_offset, metadata)) if commits: logger.info('committing consumer offsets to group %s', self._config['group_id']) resps = self._client.send_offset_commit_request(kafka_bytestring(self._config['group_id']), commits, fail_on_error=False) for r in resps: check_error(r) topic_partition = (r.topic, r.partition) task_done = self._offsets.task_done[topic_partition] self._offsets.commit[topic_partition] = (task_done + 1) if self._config['auto_commit_enable']: self._reset_auto_commit() return True else: logger.info('No new offsets found to commit in group %s', self._config['group_id']) return False
def test_encode_offset_commit_request_kafka(self): header = b"".join([ struct.pack('>i', 113), # Total message length struct.pack('>h', 8), # Message type = offset commit struct.pack('>h', 2), # API version struct.pack('>i', 42), # Correlation ID struct.pack('>h9s', 9, b"client_id"), # The client ID struct.pack('>h8s', 8, b"group_id"), # The group to commit for struct.pack('>i', -1), # Consumer group generation id struct.pack(">h0s", 0, b""), # Consumer id struct.pack('>q', -1), # Retention time struct.pack('>i', 2), # Num topics ]) topic1 = b"".join([ struct.pack(">h6s", 6, b"topic1"), # Topic for the request struct.pack(">i", 2), # Two partitions struct.pack(">i", 0), # Partition 0 struct.pack(">q", 123), # Offset 123 struct.pack(">h", -1), # Null metadata struct.pack(">i", 1), # Partition 1 struct.pack(">q", 234), # Offset 234 struct.pack(">h", -1), # Null metadata ]) topic2 = b"".join([ struct.pack(">h6s", 6, b"topic2"), # Topic for the request struct.pack(">i", 1), # One partition struct.pack(">i", 2), # Partition 2 struct.pack(">q", 345), # Offset 345 struct.pack(">h", -1), # Null metadata ]) expected1 = b"".join([header, topic1, topic2]) expected2 = b"".join([header, topic2, topic1]) encoded = KafkaToolProtocol.encode_offset_commit_request_kafka( b"client_id", 42, b"group_id", [ OffsetCommitRequest(b"topic1", 0, 123, None), OffsetCommitRequest(b"topic1", 1, 234, None), OffsetCommitRequest(b"topic2", 2, 345, None), ]) assert encoded in [expected1, expected2]
def test_commit_fetch_offsets(self): req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata") (resp, ) = self.client.send_offset_commit_request(b"group", [req]) self.assertEqual(resp.error, 0) req = OffsetFetchRequest(self.bytes_topic, 0) (resp, ) = self.client.send_offset_fetch_request(b"group", [req]) self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, 42) self.assertEqual(resp.metadata, b"") # Metadata isn't stored for now
def test_commit_fetch_offsets_dual(self): req = OffsetCommitRequest(self.bytes_topic, 0, 42, b"metadata") (resp, ) = self.client.send_offset_commit_request_kafka( b"group", [req]) self.assertEqual(resp.error, 0) (resp, ) = self.client.send_offset_fetch_request_kafka(b"group", [req]) self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, 42) # Metadata is stored in kafka self.assertEqual(resp.metadata, b"metadata")
def test_encode_offset_commit_request(self): header = "".join([ struct.pack('>i', 99), # Total message length struct.pack('>h', 8), # Message type = offset commit struct.pack('>h', 0), # API version struct.pack('>i', 42), # Correlation ID struct.pack('>h9s', 9, "client_id"), # The client ID struct.pack('>h8s', 8, "group_id"), # The group to commit for struct.pack('>i', 2), # Num topics ]) topic1 = "".join([ struct.pack(">h6s", 6, "topic1"), # Topic for the request struct.pack(">i", 2), # Two partitions struct.pack(">i", 0), # Partition 0 struct.pack(">q", 123), # Offset 123 struct.pack(">h", -1), # Null metadata struct.pack(">i", 1), # Partition 1 struct.pack(">q", 234), # Offset 234 struct.pack(">h", -1), # Null metadata ]) topic2 = "".join([ struct.pack(">h6s", 6, "topic2"), # Topic for the request struct.pack(">i", 1), # One partition struct.pack(">i", 2), # Partition 2 struct.pack(">q", 345), # Offset 345 struct.pack(">h", -1), # Null metadata ]) expected1 = "".join([header, topic1, topic2]) expected2 = "".join([header, topic2, topic1]) encoded = KafkaProtocol.encode_offset_commit_request( "client_id", 42, "group_id", [ OffsetCommitRequest("topic1", 0, 123, None), OffsetCommitRequest("topic1", 1, 234, None), OffsetCommitRequest("topic2", 2, 345, None), ]) self.assertIn(encoded, [expected1, expected2])
def test_commit_message_default(self, config): with mock_kafka() as (mock_client, mock_consumer): consumer = KafkaSimpleConsumer('test_topic', config) consumer.connect() actual = consumer.commit_message( Message(0, 100, 'mykey', 'myvalue'), ) assert actual is True mock_client.return_value.send_offset_commit_request \ .assert_called_once_with( 'test_group'.encode(), [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)], )
def commit(self, partitions=None): """Commit stored offsets to Kafka via OffsetCommitRequest (v0) Keyword Arguments: partitions (list): list of partitions to commit, default is to commit all of them Returns: True on success, False on failure """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if partitions is None: # commit all partitions partitions = list(self.offsets.keys()) log.debug('Committing new offsets for %s, partitions %s', self.topic, partitions) for partition in partitions: offset = self.offsets[partition] log.debug( 'Commit offset %d in SimpleConsumer: ' 'group=%s, topic=%s, partition=%s', offset, self.group, self.topic, partition) reqs.append( OffsetCommitRequest(self.topic, partition, offset, None)) try: if self.offset_storage in ['zookeeper', 'dual']: self.client.send_offset_commit_request(self.group, reqs) if self.offset_storage in ['kafka', 'dual']: self.client.send_offset_commit_request_kafka( self.group, reqs) except KafkaError as e: log.error('%s saving offsets: %s', e.__class__.__name__, e) return False else: self.count_since_commit = 0 return True
def test_commit_message_dual(self, config): if getattr(KafkaClient, 'send_offset_commit_request_kafka', None) is None: return with mock_kafka() as (mock_client, mock_consumer): config._config['offset_storage'] = 'dual' consumer = KafkaSimpleConsumer('test_topic', config) consumer.connect() actual = consumer.commit_message( Message(0, 100, 'mykey', 'myvalue'), ) assert actual is True mock_client.return_value.send_offset_commit_request \ .assert_called_once_with( 'test_group'.encode(), [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)], ) mock_client.return_value.send_offset_commit_request_kafka \ .assert_called_once_with( 'test_group'.encode(), [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)], )
def commit_partition_offsets(self, partition_offsets): """ Commit explicit partition/offset pairs. """ self.logger.debug("Committing partition offsets: %s", partition_offsets) commit_requests = [ OffsetCommitRequest(self.consumer.topic, partition, offset, None) for partition, offset in partition_offsets.items() ] commit_responses = self.consumer.client.send_offset_commit_request( self.consumer.group, commit_requests, ) for commit_response in commit_responses: check_error(commit_response)
def commit(self, partitions=None): """ Commit offsets for this consumer Keyword Arguments: partitions (list): list of partitions to commit, default is to commit all of them """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if not partitions: # commit all partitions partitions = self.offsets.keys() for partition in partitions: offset = self.offsets[partition] log.debug("Commit offset %d in SimpleConsumer: " "group=%s, topic=%s, partition=%s" % (offset, self.group, self.topic, partition)) reqs.append( OffsetCommitRequest(self.topic, partition, offset, None)) resps = self.client.send_offset_commit_request(self.group, reqs) for resp in resps: kafka.common.check_error(resp) self.count_since_commit = 0
def set_consumer_offsets( kafka_client, group, new_offsets, raise_on_error=True, offset_storage='zookeeper', ): """Set consumer offsets to the specified offsets. This method does not validate the specified offsets, it is up to the caller to specify valid offsets within a topic partition. If any partition leader is not available, the request fails for all the other topics. This is the tradeoff of sending all topic requests in batch and save both in performance and Kafka load. :param kafka_client: a connected KafkaToolClient :param group: kafka group_id :param topics: dict {<topic>: {<partition>: <offset>}} :param raise_on_error: if False the method does not raise exceptions on errors encountered. It may still fail on the request send. :param offset_storage: String, one of {zookeeper, kafka}. :returns: a list of errors for each partition offset update that failed. :rtype: list [OffsetCommitError] :raises: :py:class:`kafka_utils.util.error.UnknownTopic`: upon missing topics and raise_on_error=True :py:class:`kafka_utils.util.error.UnknownPartition`: upon missing partitions and raise_on_error=True :py:class:`exceptions.TypeError`: upon badly formatted input new_offsets :py:class:`kafka_utils.util.error.InvalidOffsetStorageError: upon unknown offset_storage choice. FailedPayloadsError: upon send request error. """ valid_new_offsets = _verify_commit_offsets_requests( kafka_client, new_offsets, raise_on_error) group_offset_reqs = [ OffsetCommitRequest(kafka_bytestring(topic), partition, offset, None) for topic, new_partition_offsets in valid_new_offsets.iteritems() for partition, offset in new_partition_offsets.iteritems() ] if offset_storage == 'zookeeper' or not offset_storage: send_api = kafka_client.send_offset_commit_request elif offset_storage == 'kafka': send_api = kafka_client.send_offset_commit_request_kafka else: raise InvalidOffsetStorageError(offset_storage) status = [] if group_offset_reqs: status = send_api(kafka_bytestring(group), group_offset_reqs, raise_on_error, callback=_check_commit_response_error) return filter(None, status)