class Consumer(object): """ Base class to be used by other consumers. Not to be used directly This base class provides logic for * initialization and fetching metadata of partitions * Auto-commit logic * APIs for fetching pending message count """ def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): self.client = client self.topic = topic self.group = group self.client.load_metadata_for_topics(topic) self.offsets = {} if not partitions: partitions = self.client.topic_partitions[topic] # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit) self.commit_timer.start() def get_or_init_offset_callback(resp): if resp.error == ErrorMapping.NO_ERROR: return resp.offset elif resp.error == ErrorMapping.UNKNOWN_TOPIC_OR_PARTITON: return 0 else: raise Exception("OffsetFetchRequest for topic=%s, " "partition=%d failed with errorcode=%s" % ( resp.topic, resp.partition, resp.error)) # TODO add in check for testing which version kafka from the broker try: for partition in partitions: req = OffsetFetchRequest(topic, partition) (offset,) = self.client.send_offset_fetch_request(group, [req], callback=get_or_init_offset_callback, fail_on_error=False) self.offsets[partition] = offset except: for partition in partitions: self.offsets[partition] = 0 def commit(self, partitions=None): """ Commit offsets for this consumer partitions: list of partitions to commit, default is to commit all of them """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if not partitions: # commit all partitions partitions = self.offsets.keys() for partition in partitions: offset = self.offsets[partition] log.debug("Commit offset %d in SimpleConsumer: " "group=%s, topic=%s, partition=%s" % (offset, self.group, self.topic, partition)) reqs.append(OffsetCommitRequest(self.topic, partition, offset, None)) resps = self.client.send_offset_commit_request(self.group, reqs) for resp in resps: assert resp.error == 0 self.count_since_commit = 0 def _auto_commit(self): """ Check if we have to commit based on number of messages and commit """ # Check if we are supposed to do an auto-commit if not self.auto_commit or self.auto_commit_every_n is None: return if self.count_since_commit > self.auto_commit_every_n: self.commit() def stop(self): if self.commit_timer is not None: self.commit_timer.stop() self.commit() def pending(self, partitions=None): """ Gets the pending message count partitions: list of partitions to check for, default is to check all """ if not partitions: partitions = self.offsets.keys() total = 0 reqs = [] for partition in partitions: reqs.append(OffsetRequest(self.topic, partition, -1, 1)) resps = self.client.send_offset_request(reqs) for resp in resps: partition = resp.partition pending = resp.offsets[0] offset = self.offsets[partition] total += pending - offset - (1 if offset > 0 else 0) return total
class Consumer(object): """ Base class to be used by other consumers. Not to be used directly This base class provides logic for * initialization and fetching metadata of partitions * Auto-commit logic * APIs for fetching pending message count """ def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): self.client = client self.topic = kafka_bytestring(topic) self.group = None if group is None else kafka_bytestring(group) self.client.load_metadata_for_topics(topic) self.offsets = {} if partitions is None: partitions = self.client.get_partition_ids_for_topic(topic) else: assert all(isinstance(x, numbers.Integral) for x in partitions) # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit) self.commit_timer.start() # Set initial offsets if self.group is not None: self.fetch_last_known_offsets(partitions) else: for partition in partitions: self.offsets[partition] = 0 # Register a cleanup handler def cleanup(obj): obj.stop() self._cleanup_func = cleanup atexit.register(cleanup, self) self.partition_info = False # Do not return partition info in msgs def provide_partition_info(self): """ Indicates that partition info must be returned by the consumer """ self.partition_info = True def fetch_last_known_offsets(self, partitions=None): if self.group is None: raise ValueError('KafkaClient.group must not be None') if partitions is None: partitions = self.client.get_partition_ids_for_topic(self.topic) responses = self.client.send_offset_fetch_request( self.group, [OffsetFetchRequest(self.topic, p) for p in partitions], fail_on_error=False ) for resp in responses: try: check_error(resp) # API spec says server wont set an error here # but 0.8.1.1 does actually... except UnknownTopicOrPartitionError: pass # -1 offset signals no commit is currently stored if resp.offset == -1: self.offsets[resp.partition] = 0 # Otherwise we committed the stored offset # and need to fetch the next one else: self.offsets[resp.partition] = resp.offset def commit(self, partitions=None): """Commit stored offsets to Kafka via OffsetCommitRequest (v0) Keyword Arguments: partitions (list): list of partitions to commit, default is to commit all of them Returns: True on success, False on failure """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if partitions is None: # commit all partitions partitions = list(self.offsets.keys()) log.debug('Committing new offsets for %s, partitions %s', self.topic, partitions) for partition in partitions: offset = self.offsets[partition] log.debug('Commit offset %d in SimpleConsumer: ' 'group=%s, topic=%s, partition=%s', offset, self.group, self.topic, partition) reqs.append(OffsetCommitRequest(self.topic, partition, offset, None)) try: self.client.send_offset_commit_request(self.group, reqs) except KafkaError as e: log.error('%s saving offsets: %s', e.__class__.__name__, e) return False else: self.count_since_commit = 0 return True def _auto_commit(self): """ Check if we have to commit based on number of messages and commit """ # Check if we are supposed to do an auto-commit if not self.auto_commit or self.auto_commit_every_n is None: return if self.count_since_commit >= self.auto_commit_every_n: self.commit() def stop(self): if self.commit_timer is not None: self.commit_timer.stop() self.commit() if hasattr(self, '_cleanup_func'): # Remove cleanup handler now that we've stopped # py3 supports unregistering if hasattr(atexit, 'unregister'): atexit.unregister(self._cleanup_func) # pylint: disable=no-member # py2 requires removing from private attribute... else: # ValueError on list.remove() if the exithandler no longer # exists is fine here try: atexit._exithandlers.remove((self._cleanup_func, (self,), {})) except ValueError: pass del self._cleanup_func def pending(self, partitions=None): """ Gets the pending message count Keyword Arguments: partitions (list): list of partitions to check for, default is to check all """ if partitions is None: partitions = self.offsets.keys() total = 0 reqs = [] for partition in partitions: reqs.append(OffsetRequest(self.topic, partition, -1, 1)) resps = self.client.send_offset_request(reqs) for resp in resps: partition = resp.partition pending = resp.offsets[0] offset = self.offsets[partition] total += pending - offset return total
class Consumer(object): """ Base class to be used by other consumers. Not to be used directly This base class provides logic for * initialization and fetching metadata of partitions * Auto-commit logic * APIs for fetching pending message count """ def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): self.client = client self.topic = topic self.group = group self.client.load_metadata_for_topics(topic) self.offsets = {} if not partitions: partitions = self.client.get_partition_ids_for_topic(topic) else: assert all(isinstance(x, numbers.Integral) for x in partitions) # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit) self.commit_timer.start() if auto_commit: self.fetch_last_known_offsets(partitions) else: for partition in partitions: self.offsets[partition] = 0 def fetch_last_known_offsets(self, partitions=None): if not partitions: partitions = self.client.get_partition_ids_for_topic(self.topic) def get_or_init_offset(resp): try: kafka.common.check_error(resp) return resp.offset except UnknownTopicOrPartitionError: return 0 for partition in partitions: req = OffsetFetchRequest(self.topic, partition) (resp,) = self.client.send_offset_fetch_request(self.group, [req], fail_on_error=False) self.offsets[partition] = get_or_init_offset(resp) self.fetch_offsets = self.offsets.copy() def commit(self, partitions=None): """ Commit offsets for this consumer Keyword Arguments: partitions (list): list of partitions to commit, default is to commit all of them """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if not partitions: # commit all partitions partitions = self.offsets.keys() for partition in partitions: offset = self.offsets[partition] log.debug("Commit offset %d in SimpleConsumer: " "group=%s, topic=%s, partition=%s" % (offset, self.group, self.topic, partition)) reqs.append(OffsetCommitRequest(self.topic, partition, offset, None)) resps = self.client.send_offset_commit_request(self.group, reqs) for resp in resps: kafka.common.check_error(resp) self.count_since_commit = 0 def _auto_commit(self): """ Check if we have to commit based on number of messages and commit """ # Check if we are supposed to do an auto-commit if not self.auto_commit or self.auto_commit_every_n is None: return if self.count_since_commit >= self.auto_commit_every_n: self.commit() def stop(self): if self.commit_timer is not None: self.commit_timer.stop() self.commit() def pending(self, partitions=None): """ Gets the pending message count Keyword Arguments: partitions (list): list of partitions to check for, default is to check all """ if not partitions: partitions = self.offsets.keys() total = 0 reqs = [] for partition in partitions: reqs.append(OffsetRequest(self.topic, partition, -1, 1)) resps = self.client.send_offset_request(reqs) for resp in resps: partition = resp.partition pending = resp.offsets[0] offset = self.offsets[partition] total += pending - offset return total
class Consumer(object): """ Base class to be used by other consumers. Not to be used directly This base class provides logic for * initialization and fetching metadata of partitions * Auto-commit logic * APIs for fetching pending message count """ def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): self.client = client self.topic = topic self.group = group self.client._load_metadata_for_topics(topic) self.offsets = {} if not partitions: partitions = self.client.topic_partitions[topic] # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit) self.commit_timer.start() def get_or_init_offset_callback(resp): if resp.error == ErrorMapping.NO_ERROR: return resp.offset elif resp.error == ErrorMapping.UNKNOWN_TOPIC_OR_PARTITON: return 0 else: raise Exception("OffsetFetchRequest for topic=%s, " "partition=%d failed with errorcode=%s" % ( resp.topic, resp.partition, resp.error)) # Uncomment for 0.8.1 # #for partition in partitions: # req = OffsetFetchRequest(topic, partition) # (offset,) = self.client.send_offset_fetch_request(group, [req], # callback=get_or_init_offset_callback, # fail_on_error=False) # self.offsets[partition] = offset for partition in partitions: self.offsets[partition] = 0 def commit(self, partitions=None): """ Commit offsets for this consumer partitions: list of partitions to commit, default is to commit all of them """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if not partitions: # commit all partitions partitions = self.offsets.keys() for partition in partitions: offset = self.offsets[partition] log.debug("Commit offset %d in SimpleConsumer: " "group=%s, topic=%s, partition=%s" % (offset, self.group, self.topic, partition)) reqs.append(OffsetCommitRequest(self.topic, partition, offset, None)) resps = self.client.send_offset_commit_request(self.group, reqs) for resp in resps: assert resp.error == 0 self.count_since_commit = 0 def _auto_commit(self): """ Check if we have to commit based on number of messages and commit """ # Check if we are supposed to do an auto-commit if not self.auto_commit or self.auto_commit_every_n is None: return if self.count_since_commit > self.auto_commit_every_n: self.commit() def stop(self): if self.commit_timer is not None: self.commit_timer.stop() self.commit() def pending(self, partitions=None): """ Gets the pending message count partitions: list of partitions to check for, default is to check all """ if not partitions: partitions = self.offsets.keys() total = 0 reqs = [] for partition in partitions: reqs.append(OffsetRequest(self.topic, partition, -1, 1)) resps = self.client.send_offset_request(reqs) for resp in resps: partition = resp.partition pending = resp.offsets[0] offset = self.offsets[partition] total += pending - offset - (1 if offset > 0 else 0) return total
class Consumer(object): """ Base class to be used by other consumers. Not to be used directly This base class provides logic for * initialization and fetching metadata of partitions * Auto-commit logic * APIs for fetching pending message count """ def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): warnings.warn( 'deprecated -- this class will be removed in a future' ' release. Use KafkaConsumer instead.', DeprecationWarning) self.client = client self.topic = topic self.group = group self.client.load_metadata_for_topics(topic, ignore_leadernotavailable=True) self.offsets = {} if partitions is None: partitions = self.client.get_partition_ids_for_topic(topic) else: assert all(isinstance(x, numbers.Integral) for x in partitions) # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit) self.commit_timer.start() # Set initial offsets if self.group is not None: self.fetch_last_known_offsets(partitions) else: for partition in partitions: self.offsets[partition] = 0 # Register a cleanup handler def cleanup(obj): obj.stop() self._cleanup_func = cleanup atexit.register(cleanup, self) self.partition_info = False # Do not return partition info in msgs def provide_partition_info(self): """ Indicates that partition info must be returned by the consumer """ self.partition_info = True def fetch_last_known_offsets(self, partitions=None): if self.group is None: raise ValueError('SimpleClient.group must not be None') if partitions is None: partitions = self.client.get_partition_ids_for_topic(self.topic) responses = self.client.send_offset_fetch_request( self.group, [OffsetFetchRequestPayload(self.topic, p) for p in partitions], fail_on_error=False) for resp in responses: try: check_error(resp) # API spec says server wont set an error here # but 0.8.1.1 does actually... except UnknownTopicOrPartitionError: pass # -1 offset signals no commit is currently stored if resp.offset == -1: self.offsets[resp.partition] = 0 # Otherwise we committed the stored offset # and need to fetch the next one else: self.offsets[resp.partition] = resp.offset def commit(self, partitions=None): """Commit stored offsets to Kafka via OffsetCommitRequest (v0) Keyword Arguments: partitions (list): list of partitions to commit, default is to commit all of them Returns: True on success, False on failure """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if partitions is None: # commit all partitions partitions = list(self.offsets.keys()) log.debug('Committing new offsets for %s, partitions %s', self.topic, partitions) for partition in partitions: offset = self.offsets[partition] log.debug( 'Commit offset %d in SimpleConsumer: ' 'group=%s, topic=%s, partition=%s', offset, self.group, self.topic, partition) reqs.append( OffsetCommitRequestPayload(self.topic, partition, offset, None)) try: self.client.send_offset_commit_request(self.group, reqs) except KafkaError as e: log.error('%s saving offsets: %s', e.__class__.__name__, e) return False else: self.count_since_commit = 0 return True def _auto_commit(self): """ Check if we have to commit based on number of messages and commit """ # Check if we are supposed to do an auto-commit if not self.auto_commit or self.auto_commit_every_n is None: return if self.count_since_commit >= self.auto_commit_every_n: self.commit() def stop(self): if self.commit_timer is not None: self.commit_timer.stop() self.commit() if hasattr(self, '_cleanup_func'): # Remove cleanup handler now that we've stopped # py3 supports unregistering if hasattr(atexit, 'unregister'): atexit.unregister(self._cleanup_func) # pylint: disable=no-member # py2 requires removing from private attribute... else: # ValueError on list.remove() if the exithandler no longer # exists is fine here try: atexit._exithandlers.remove( # pylint: disable=no-member (self._cleanup_func, (self, ), {})) except ValueError: pass del self._cleanup_func def pending(self, partitions=None): """ Gets the pending message count Keyword Arguments: partitions (list): list of partitions to check for, default is to check all """ if partitions is None: partitions = self.offsets.keys() total = 0 reqs = [] for partition in partitions: reqs.append(OffsetRequestPayload(self.topic, partition, -1, 1)) resps = self.client.send_offset_request(reqs) for resp in resps: partition = resp.partition pending = resp.offsets[0] offset = self.offsets[partition] total += pending - offset return total
class SimpleConsumer(object): """ A simple consumer implementation that consumes all partitions for a topic client: a connected KafkaClient group: a name for this consumer, used for offset storage and must be unique topic: the topic to consume auto_commit: default True. Whether or not to auto commit the offsets auto_commit_every_n: default 100. How many messages to consume before a commit auto_commit_every_t: default 5000. How much time (in milliseconds) to wait before commit Auto commit details: If both auto_commit_every_n and auto_commit_every_t are set, they will reset one another when one is triggered. These triggers simply call the commit method on this class. A manual call to commit will also reset these triggers """ def __init__(self, client, group, topic, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): self.client = client self.topic = topic self.group = group self.client._load_metadata_for_topics(topic) self.offsets = {} # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self._timed_commit) self.commit_timer.start() def get_or_init_offset_callback(resp): if resp.error == ErrorMapping.NO_ERROR: return resp.offset elif resp.error == ErrorMapping.UNKNOWN_TOPIC_OR_PARTITON: return 0 else: raise Exception("OffsetFetchRequest for topic=%s, " "partition=%d failed with errorcode=%s" % ( resp.topic, resp.partition, resp.error)) # Uncomment for 0.8.1 # #for partition in self.client.topic_partitions[topic]: # req = OffsetFetchRequest(topic, partition) # (offset,) = self.client.send_offset_fetch_request(group, [req], # callback=get_or_init_offset_callback, # fail_on_error=False) # self.offsets[partition] = offset for partition in self.client.topic_partitions[topic]: self.offsets[partition] = 0 def stop(self): if self.commit_timer is not None: self.commit_timer.stop() self.commit() def seek(self, offset, whence): """ Alter the current offset in the consumer, similar to fseek offset: how much to modify the offset whence: where to modify it from 0 is relative to the earliest available offset (head) 1 is relative to the current offset 2 is relative to the latest known offset (tail) """ if whence == 1: # relative to current position for partition, _offset in self.offsets.items(): self.offsets[partition] = _offset + offset elif whence in (0, 2): # relative to beginning or end # divide the request offset by number of partitions, # distribute the remained evenly (delta, rem) = divmod(offset, len(self.offsets)) deltas = {} for partition, r in izip_longest(self.offsets.keys(), repeat(1, rem), fillvalue=0): deltas[partition] = delta + r reqs = [] for partition in self.offsets.keys(): if whence == 0: reqs.append(OffsetRequest(self.topic, partition, -2, 1)) elif whence == 2: reqs.append(OffsetRequest(self.topic, partition, -1, 1)) else: pass resps = self.client.send_offset_request(reqs) for resp in resps: self.offsets[resp.partition] = resp.offsets[0] + \ deltas[resp.partition] else: raise ValueError("Unexpected value for `whence`, %d" % whence) def pending(self, partitions=[]): """ Gets the pending message count partitions: list of partitions to check for, default is to check all """ if len(partitions) == 0: partitions = self.offsets.keys() total = 0 reqs = [] for partition in partitions: reqs.append(OffsetRequest(self.topic, partition, -1, 1)) resps = self.client.send_offset_request(reqs) for resp in resps: partition = resp.partition pending = resp.offsets[0] offset = self.offsets[partition] total += pending - offset - (1 if offset > 0 else 0) return total def _timed_commit(self): """ Commit offsets as part of timer """ self.commit() # Once the commit is done, start the timer again self.commit_timer.start() def commit(self, partitions=[]): """ Commit offsets for this consumer partitions: list of partitions to commit, default is to commit all of them """ # short circuit if nothing happened if self.count_since_commit == 0: return with self.commit_lock: reqs = [] if len(partitions) == 0: # commit all partitions partitions = self.offsets.keys() for partition in partitions: offset = self.offsets[partition] log.debug("Commit offset %d in SimpleConsumer: " "group=%s, topic=%s, partition=%s" % (offset, self.group, self.topic, partition)) reqs.append(OffsetCommitRequest(self.topic, partition, offset, None)) resps = self.client.send_offset_commit_request(self.group, reqs) for resp in resps: assert resp.error == 0 self.count_since_commit = 0 def _auto_commit(self): """ Check if we have to commit based on number of messages and commit """ # Check if we are supposed to do an auto-commit if not self.auto_commit or self.auto_commit_every_n is None: return if self.count_since_commit > self.auto_commit_every_n: if self.commit_timer is not None: self.commit_timer.stop() self.commit() self.commit_timer.start() else: self.commit() def __iter__(self): """ Create an iterate per partition. Iterate through them calling next() until they are all exhausted. """ iters = {} for partition, offset in self.offsets.items(): iters[partition] = self.__iter_partition__(partition, offset) if len(iters) == 0: return while True: if len(iters) == 0: break for partition, it in iters.items(): try: yield it.next() except StopIteration: log.debug("Done iterating over partition %s" % partition) del iters[partition] # skip auto-commit since we didn't yield anything continue # Count, check and commit messages if necessary self.count_since_commit += 1 self._auto_commit() def __iter_partition__(self, partition, offset): """ Iterate over the messages in a partition. Create a FetchRequest to get back a batch of messages, yield them one at a time. After a batch is exhausted, start a new batch unless we've reached the end of this partition. """ while True: # TODO: configure fetch size req = FetchRequest(self.topic, partition, offset, 1024) (resp,) = self.client.send_fetch_request([req]) assert resp.topic == self.topic assert resp.partition == partition next_offset = None for message in resp.messages: next_offset = message.offset yield message # update the internal state _after_ we yield the message self.offsets[partition] = message.offset if next_offset is None: break else: offset = next_offset + 1
class Consumer(object): """ Base class to be used by other consumers. Not to be used directly This base class provides logic for * initialization and fetching metadata of partitions * Auto-commit logic * APIs for fetching pending message count """ def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): self.client = client self.topic = topic self.group = group self.client.load_metadata_for_topics(topic) self.offsets = {} if not partitions: partitions = self.client.get_partition_ids_for_topic(topic) else: assert all(isinstance(x, numbers.Integral) for x in partitions) # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit) self.commit_timer.start() if auto_commit: self.fetch_last_known_offsets(partitions) else: for partition in partitions: self.offsets[partition] = 0 def fetch_last_known_offsets(self, partitions=None): if not partitions: partitions = self.client.get_partition_ids_for_topic(self.topic) def get_or_init_offset(resp): try: kafka.common.check_error(resp) return resp.offset except UnknownTopicOrPartitionError: return 0 for partition in partitions: req = OffsetFetchRequest(self.topic, partition) (resp, ) = self.client.send_offset_fetch_request( self.group, [req], fail_on_error=False) self.offsets[partition] = get_or_init_offset(resp) self.fetch_offsets = self.offsets.copy() def commit(self, partitions=None): """ Commit offsets for this consumer partitions: list of partitions to commit, default is to commit all of them """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if not partitions: # commit all partitions partitions = self.offsets.keys() for partition in partitions: offset = self.offsets[partition] log.debug("Commit offset %d in SimpleConsumer: " "group=%s, topic=%s, partition=%s" % (offset, self.group, self.topic, partition)) reqs.append( OffsetCommitRequest(self.topic, partition, offset, None)) resps = self.client.send_offset_commit_request(self.group, reqs) for resp in resps: kafka.common.check_error(resp) self.count_since_commit = 0 def _auto_commit(self): """ Check if we have to commit based on number of messages and commit """ # Check if we are supposed to do an auto-commit if not self.auto_commit or self.auto_commit_every_n is None: return if self.count_since_commit >= self.auto_commit_every_n: self.commit() def stop(self): if self.commit_timer is not None: self.commit_timer.stop() self.commit() def pending(self, partitions=None): """ Gets the pending message count partitions: list of partitions to check for, default is to check all """ if not partitions: partitions = self.offsets.keys() total = 0 reqs = [] for partition in partitions: reqs.append(OffsetRequest(self.topic, partition, -1, 1)) resps = self.client.send_offset_request(reqs) for resp in resps: partition = resp.partition pending = resp.offsets[0] offset = self.offsets[partition] total += pending - offset - (1 if offset > 0 else 0) return total
class Consumer(object): """ Base class to be used by other consumers. Not to be used directly This base class provides logic for * initialization and fetching metadata of partitions * Auto-commit logic * APIs for fetching pending message count """ def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): self.client = client self.topic = topic self.group = group self.client.load_metadata_for_topics(topic) self.offsets = {} if not partitions: partitions = self.client.topic_partitions[topic] else: assert all(isinstance(x, numbers.Integral) for x in partitions) # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # add callback to be called when stopping consumer - # used in conjunction with ZSimpleConsumer to perform an action # prior to a repartition event. self.on_stop_callback = None # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit) self.commit_timer.start() def get_or_init_offset_callback(resp): try: kafka.common.check_error(resp) return resp.offset except kafka.common.UnknownTopicOrPartitionError: return 0 for partition in partitions: req = OffsetFetchRequest(topic, partition) (offset,) = self.client.send_offset_fetch_request(group, [req], callback=get_or_init_offset_callback, fail_on_error=False) self.offsets[partition] = offset def commit(self, partitions=None): """ Commit offsets for this consumer partitions: list of partitions to commit, default is to commit all of them """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if not partitions: # commit all partitions partitions = self.offsets.keys() for partition in partitions: offset = self.offsets[partition] log.info("Commit offset %d in SimpleConsumer: " "group=%s, topic=%s, partition=%s" % (offset, self.group, self.topic, partition)) reqs.append(OffsetCommitRequest(self.topic, partition, offset, None)) resps = self.client.send_offset_commit_request(self.group, reqs) for resp in resps: kafka.common.check_error(resp) self.count_since_commit = 0 def commit_offsets(self, offsets): assert not self.auto_commit, 'cannot manually commit offsets if autocommit is True' with self.commit_lock: reqs = [] for partition, offset in offsets.iteritems(): reqs.append(OffsetCommitRequest(self.topic, partition, offset, None)) resps = self.client.send_offset_commit_request(self.group, reqs) for resp in resps: kafka.common.check_error(resp) self.count_since_commit = 0 def register_on_stop_callback(self, fn): if self.on_stop_callback is None: self.on_stop_callback = fn def _auto_commit(self): """ Check if we have to commit based on number of messages and commit """ # Check if we are supposed to do an auto-commit if not self.auto_commit or self.auto_commit_every_n is None: return if self.count_since_commit >= self.auto_commit_every_n: self.commit() def stop(self): if self.commit_timer is not None: self.commit_timer.stop() self.commit() if not self.auto_commit and self.on_stop_callback: try: log.info('executing "on_stop_callback"') self.on_stop_callback() except: log.exception('There was an error executing "on_stop_callback"') def pending(self, partitions=None): """ Gets the pending message count partitions: list of partitions to check for, default is to check all """ if not partitions: partitions = self.offsets.keys() total = 0 reqs = [] for partition in partitions: reqs.append(OffsetRequest(self.topic, partition, -1, 1)) resps = self.client.send_offset_request(reqs) for resp in resps: partition = resp.partition pending = resp.offsets[0] offset = self.offsets[partition] total += pending - offset - (1 if offset > 0 else 0) return total
class Consumer(object): """ Base class to be used by other consumers. Not to be used directly This base class provides logic for * initialization and fetching metadata of partitions * Auto-commit logic * APIs for fetching pending message count Offset: #ClientOffset.Zero or 0; ClientOffset.Previous or -1; ClientOffset.CurrentBeginning or -2; ClientOffset.PreviousOrCurrentBeginning or -3; Default. ClientOffset.Latest or -4; Other value >= 0; """ def __init__(self, client, group, topic, partitions=None, offset = ClientOffset.PREVIOUS_OR_CURRENT_BEGINNING, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL): self.client = client self.topic = topic self.group = group self.client.load_metadata_for_topics(topic) self.offsets = {} if not partitions: partitions = self.client.topic_partitions[topic] # Variables for handling offset commits self.commit_lock = Lock() self.commit_timer = None self.count_since_commit = 0 self.auto_commit = auto_commit self.auto_commit_every_n = auto_commit_every_n self.auto_commit_every_t = auto_commit_every_t # Set up the auto-commit timer if auto_commit is True and auto_commit_every_t is not None: self.commit_timer = ReentrantTimer(auto_commit_every_t, self.commit) self.commit_timer.start() def get_current_offsets_callback(resp): if resp.error == ErrorMapping.NO_ERROR: return resp.offsets elif resp.error == ErrorMapping.UNKNOWN_TOPIC_OR_PARTITON: return 0 else: raise BrokerResponseError("OffsetRequest for topic=%s, " "partition=%d failed with errorcode=%s" % ( resp.topic, resp.partition, resp.error)) # callback for fetching on zookeeper def get_or_init_previous_offset_callback(resp): if resp.error == ErrorMapping.NO_ERROR: return resp.offset elif resp.error == ErrorMapping.UNKNOWN_TOPIC_OR_PARTITON: return 0 else: raise BrokerResponseError("OffsetFetchRequest for topic=%s, " "partition=%d failed with errorcode=%s" % ( resp.topic, resp.partition, resp.error)) currTimeMs = int(time.time()*1000) PAYLOAD_MAX_OFFSET = 2147483647 for partition in partitions: # current stream req = OffsetRequest(topic, partition,currTimeMs,PAYLOAD_MAX_OFFSET) (raw_offsets,) = self.client.send_offset_request([req], fail_on_error=False, callback=get_current_offsets_callback) offset_start = raw_offsets[-1] offset_end = raw_offsets[0] # zookeeper req = OffsetFetchRequest(topic, partition) (last_offset,) = self.client.send_offset_fetch_request(group, [req], callback=get_or_init_previous_offset_callback, fail_on_error=False) if offset == ClientOffset.PREVIOUS_OR_CURRENT_BEGINNING: if offset_start <= last_offset <= offset_end: self.offsets[partition] = last_offset else: self.offsets[partition] = offset_start elif offset == ClientOffset.PREVIOUS: self.offsets[partition] = last_offset elif offset == ClientOffset.CURRENT_BEGINNING: self.offsets[partition] = offset_start elif offset == ClientOffset.LATEST: self.offsets[partition] = offset_end elif offset >=0: if offset_start <= offset <= offset_end: for partition in partitions: self.offsets[partition] = offset else: raise ValueError("Invalid parameter value offset=%d," "allowed range %d to %d" % (offset,offset_start,offset_end)) def commit(self, partitions=None): """ Commit offsets for this consumer partitions: list of partitions to commit, default is to commit all of them """ # short circuit if nothing happened. This check is kept outside # to prevent un-necessarily acquiring a lock for checking the state if self.count_since_commit == 0: return with self.commit_lock: # Do this check again, just in case the state has changed # during the lock acquiring timeout if self.count_since_commit == 0: return reqs = [] if not partitions: # commit all partitions partitions = self.offsets.keys() for partition in partitions: offset = self.offsets[partition] log.debug("Commit offset %d in SimpleConsumer: " "group=%s, topic=%s, partition=%s" % (offset, self.group, self.topic, partition)) reqs.append(OffsetCommitRequest(self.topic, partition, offset, None)) resps = self.client.send_offset_commit_request(self.group, reqs) for resp in resps: assert resp.error == 0 self.count_since_commit = 0 def _auto_commit(self): """ Check if we have to commit based on number of messages and commit """ # Check if we are supposed to do an auto-commit if not self.auto_commit or self.auto_commit_every_n is None: return if self.count_since_commit > self.auto_commit_every_n: self.commit() def stop(self): if self.commit_timer is not None: self.commit_timer.stop() self.commit() def pending(self, partitions=None): """ Gets the pending message count partitions: list of partitions to check for, default is to check all """ if not partitions: partitions = self.offsets.keys() total = 0 reqs = [] for partition in partitions: reqs.append(OffsetRequest(self.topic, partition, -1, 1)) resps = self.client.send_offset_request(reqs) for resp in resps: partition = resp.partition pending = resp.offsets[0] offset = self.offsets[partition] total += pending - offset - (1 if offset > 0 else 0) return total