def _commit_offsets_to_watermark( kafka_client, group, topics, watermark, raise_on_error, offset_storage, ): topics = _verify_topics_and_partitions(kafka_client, topics, raise_on_error) watermark_offsets = get_topics_watermarks(kafka_client, topics, raise_on_error) if watermark == HIGH_WATERMARK: group_offset_reqs = [ OffsetCommitRequestPayload( topic, partition, watermark_offsets[topic][partition].highmark, metadata='' ) for topic, partitions in topics.iteritems() for partition in partitions ] elif watermark == LOW_WATERMARK: group_offset_reqs = [ OffsetCommitRequestPayload( topic, partition, watermark_offsets[topic][partition].lowmark, metadata='' ) for topic, partitions in topics.iteritems() for partition in partitions ] else: raise ValueError( "Unknown watermark: {watermark}".format(watermark=watermark) ) if offset_storage == 'kafka' or not offset_storage: send_api = kafka_client.send_offset_commit_request_kafka elif offset_storage == 'zookeeper': send_api = kafka_client.send_offset_commit_request else: raise InvalidOffsetStorageError(offset_storage) status = [] if group_offset_reqs: status = send_api( group, group_offset_reqs, raise_on_error, callback=_check_commit_response_error ) return filter(None, status)
def _commit_offsets_to_watermark( kafka_client, group, topics, watermark, raise_on_error, ): topics = _verify_topics_and_partitions(kafka_client, topics, raise_on_error) watermark_offsets = get_topics_watermarks(kafka_client, topics, raise_on_error) if watermark == HIGH_WATERMARK: group_offset_reqs = [ OffsetCommitRequestPayload( topic, partition, watermark_offsets[topic][partition].highmark, metadata='' ) for topic, partitions in six.iteritems(topics) for partition in partitions ] elif watermark == LOW_WATERMARK: group_offset_reqs = [ OffsetCommitRequestPayload( topic, partition, watermark_offsets[topic][partition].lowmark, metadata='' ) for topic, partitions in six.iteritems(topics) for partition in partitions ] else: raise ValueError( "Unknown watermark: {watermark}".format(watermark=watermark) ) send_api = kafka_client.send_offset_commit_request_kafka status = [] if group_offset_reqs: status = send_api( group, group_offset_reqs, raise_on_error, callback=_check_commit_response_error ) return [_f for _f in status if _f]
def test_commit_fetch_offsets_dual(self): req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') (resp, ) = self.client.send_offset_commit_request_kafka('group', [req]) self.assertEqual(resp.error, 0) (resp, ) = self.client.send_offset_fetch_request_kafka('group', [req]) self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, 42) # Metadata is stored in kafka self.assertEqual(resp.metadata, 'metadata')
def test_commit_fetch_offsets(self): req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata') (resp, ) = self.client.send_offset_commit_request('group', [req]) self.assertEqual(resp.error, 0) req = OffsetFetchRequestPayload(self.topic, 0) (resp, ) = self.client.send_offset_fetch_request('group', [req]) self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, 42) self.assertEqual(resp.metadata, '') # Metadata isn't stored for now
def commit(self, partitions=None): """Commit stored offsets to Kafka via OffsetCommitRequest (v0) Keyword Arguments: partitions (list): list of partitions to commit, default is to commit all of them Returns: True on success, False on failure """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if partitions is None: # commit all partitions partitions = list(self.offsets.keys()) log.debug("Committing new offsets for %s, partitions %s", self.topic, partitions) for partition in partitions: offset = self.offsets[partition] log.debug( "Commit offset %d in SimpleConsumer: " "group=%s, topic=%s, partition=%s", offset, self.group, self.topic, partition, ) reqs.append( OffsetCommitRequestPayload(self.topic, partition, offset, None)) try: self.client.send_offset_commit_request(self.group, reqs) except KafkaError as e: log.error("%s saving offsets: %s", e.__class__.__name__, e) return False else: self.count_since_commit = 0 return True
def commit_partition_offsets(self, partition_offsets): """ Commit explicit partition/offset pairs. """ self.logger.debug("Committing partition offsets: %s", partition_offsets) commit_requests = [ OffsetCommitRequestPayload(self.consumer.topic, partition, offset, None) for partition, offset in partition_offsets.items() ] commit_responses = self.consumer.client.send_offset_commit_request( self.consumer.group, commit_requests, ) for commit_response in commit_responses: check_error(commit_response)
def set_consumer_offsets( kafka_client, group, new_offsets, raise_on_error=True, offset_storage='kafka', ): """Set consumer offsets to the specified offsets. This method does not validate the specified offsets, it is up to the caller to specify valid offsets within a topic partition. If any partition leader is not available, the request fails for all the other topics. This is the tradeoff of sending all topic requests in batch and save both in performance and Kafka load. :param kafka_client: a connected KafkaToolClient :param group: kafka group_id :param topics: dict {<topic>: {<partition>: <offset>}} :param raise_on_error: if False the method does not raise exceptions on errors encountered. It may still fail on the request send. :param offset_storage: String, one of {zookeeper, kafka}. :returns: a list of errors for each partition offset update that failed. :rtype: list [OffsetCommitError] :raises: :py:class:`kafka_utils.util.error.UnknownTopic`: upon missing topics and raise_on_error=True :py:class:`kafka_utils.util.error.UnknownPartition`: upon missing partitions and raise_on_error=True :py:class:`exceptions.TypeError`: upon badly formatted input new_offsets :py:class:`kafka_utils.util.error.InvalidOffsetStorageError: upon unknown offset_storage choice. FailedPayloadsError: upon send request error. """ valid_new_offsets = _verify_commit_offsets_requests( kafka_client, new_offsets, raise_on_error) group_offset_reqs = [ OffsetCommitRequestPayload( topic, partition, offset, metadata='', ) for topic, new_partition_offsets in six.iteritems(valid_new_offsets) for partition, offset in six.iteritems(new_partition_offsets) ] if offset_storage == 'kafka' or not offset_storage: send_api = kafka_client.send_offset_commit_request_kafka elif offset_storage == 'zookeeper': send_api = kafka_client.send_offset_commit_request else: raise InvalidOffsetStorageError(offset_storage) status = [] if group_offset_reqs: status = send_api(group, group_offset_reqs, raise_on_error, callback=_check_commit_response_error) return [_f for _f in status if _f and _f.error != 0]
def commit(self): """Store consumed message offsets (marked via task_done()) to kafka cluster for this consumer_group. Returns: True on success, or False if no offsets were found for commit Note: this functionality requires server version >=0.8.1.1 https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetCommit/FetchAPI """ if not self._config['group_id']: logger.warning('Cannot commit without a group_id!') raise KafkaConfigurationError( 'Attempted to commit offsets ' 'without a configured consumer group (group_id)') # API supports storing metadata with each commit # but for now it is unused metadata = b'' offsets = self._offsets.task_done commits = [] for topic_partition, task_done_offset in six.iteritems(offsets): # Skip if None if task_done_offset is None: continue # Commit offsets as the next offset to fetch # which is consistent with the Java Client # task_done is marked by messages consumed, # so add one to mark the next message for fetching commit_offset = (task_done_offset + 1) # Skip if no change from previous committed if commit_offset == self._offsets.commit[topic_partition]: continue commits.append( OffsetCommitRequestPayload(topic_partition[0], topic_partition[1], commit_offset, metadata)) if commits: logger.info('committing consumer offsets to group %s', self._config['group_id']) resps = [] if self._config['offset_storage'] in ['zookeeper', 'dual']: resps += self._client.send_offset_commit_request( self._config['group_id'], commits, fail_on_error=False, ) if self._config['offset_storage'] in ['kafka', 'dual']: resps += self._client.send_offset_commit_request_kafka( self._config['group_id'], commits, fail_on_error=False, ) for r in resps: check_error(r) topic_partition = (r.topic, r.partition) task_done = self._offsets.task_done[topic_partition] self._offsets.commit[topic_partition] = (task_done + 1) if self._config['auto_commit_enable']: self._reset_auto_commit() return True else: logger.info('No new offsets found to commit in group %s', self._config['group_id']) return False