def test_any_method_after_close_throws_exception(): """ Calling any consumer method after close should thorw a RuntimeError """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.subscribe(['test']) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.unsubscribe() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.poll() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.consume() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.assign([TopicPartition('test', 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.unassign() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.assignment() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.commit() assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.committed([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.position([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.seek([TopicPartition("test", 0, 0)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: lo, hi = c.get_watermark_offsets(TopicPartition("test", 0)) assert 'Consumer closed' == str(ex.value)
def test_any_method_after_close_throws_exception(): """ Calling any consumer method after close should thorw a RuntimeError """ c = Consumer({'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100}) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.subscribe(['test']) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.unsubscribe() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.poll() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.consume() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.assign([TopicPartition('test', 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.unassign() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.assignment() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.commit() assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.committed([TopicPartition("test", 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.position([TopicPartition("test", 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: c.seek([TopicPartition("test", 0, 0)]) assert ex.match('Consumer closed') with pytest.raises(RuntimeError) as ex: lo, hi = c.get_watermark_offsets(TopicPartition("test", 0)) assert ex.match('Consumer closed')
class KafkaConsumer(object): def __init__(self, group_id, topic): self.client = Consumer({ 'bootstrap.servers': KAFKA_SERVER_HOSTS, 'group.id': group_id, 'session.timeout.ms': 6000, 'default.topic.config': { 'auto.offset.reset': 'smallest' } }) self.topic = topic def query_kafka(self, max_part): for p_id in range(0, max_part): tp = TopicPartition(self.topic, p_id) committed = self.client.committed([tp]) watermark_offsets = self.client.get_watermark_offsets(tp) c_offset = committed[0].offset partition = committed[0].partition min_offset = watermark_offsets[0] max_offset = watermark_offsets[1] print("%d %d %d %d %d" % (partition, min_offset, c_offset, max_offset, max_offset - c_offset)) def reset_kafka(self, tps): for tp in tps: self.client.assign([tp]) print(tp) self.client.poll() def close(self): self.client.close()
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb (err, partitions): pass kc = Consumer({'group.id':'test', 'socket.timeout.ms':'100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb}) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke (consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') partitions = list(map(lambda p: TopicPartition("test", p), range(0,100,3))) kc.assign(partitions) kc.unassign() kc.commit(async=True) try: kc.commit(async=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == -1001]) == len(partitions) try: offsets = kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT kc.close()
def test_send_offsets_committed_transaction(kafka_cluster): input_topic = kafka_cluster.create_topic("input_topic") output_topic = kafka_cluster.create_topic("output_topic") error_cb = prefixed_error_cb('test_send_offsets_committed_transaction') producer = kafka_cluster.producer({ 'client.id': 'producer1', 'transactional.id': 'example_transactional_id', 'error_cb': error_cb, }) consumer_conf = { 'group.id': str(uuid1()), 'auto.offset.reset': 'earliest', 'enable.auto.commit': False, 'enable.partition.eof': True, 'error_cb': error_cb } consumer_conf.update(kafka_cluster.client_conf()) consumer = Consumer(consumer_conf) kafka_cluster.seed_topic(input_topic) consumer.subscribe([input_topic]) read_all_msgs(consumer) producer.init_transactions() transactional_produce(producer, output_topic, 100) consumer_position = consumer.position(consumer.assignment()) group_metadata = consumer.consumer_group_metadata() print( "=== Sending offsets {} to transaction ===".format(consumer_position)) producer.send_offsets_to_transaction(consumer_position, group_metadata) producer.commit_transaction() producer2 = kafka_cluster.producer({ 'client.id': 'producer2', 'transactional.id': 'example_transactional_id', 'error_cb': error_cb }) # ensure offset commits are visible prior to sending FetchOffsets request producer2.init_transactions() committed_offsets = consumer.committed(consumer.assignment()) print("=== Committed offsets for {} ===".format(committed_offsets)) assert [tp.offset for tp in committed_offsets] == [100] consumer.close()
def get_consumer_offsets( self, topics: List[str], ignore_group_regex: str = IGNORE_GROUP_REGEX ) -> List[Offset]: broker_topics = self.client.list_topics().topics partitions = [] for topic_name in topics: partitions.extend([TopicPartition(topic_name, k) for k in broker_topics[topic_name].partitions]) offsets = [] for consumer_group in self.get_consumer_groups(): if re.findall(ignore_group_regex, consumer_group): logger.debug(f'Ignoring consumer group: {consumer_group}') continue consumer = Consumer({**self.config, 'group.id': consumer_group}) for tp in consumer.committed(partitions, timeout=10): if tp.offset == -1001: continue offset = Offset(consumer_group, tp.topic, tp.partition, tp.offset) offsets.append(offset) return offsets
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({ 'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb }) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) msglist = kc.consume(num_messages=10, timeout=0.001) assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist) with pytest.raises(ValueError) as ex: kc.consume(-100) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) with pytest.raises(ValueError) as ex: kc.consume(1000001) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) partitions = list( map(lambda part: TopicPartition("test", part), range(0, 100, 3))) kc.assign(partitions) with pytest.raises(KafkaException) as ex: kc.seek(TopicPartition("test", 0, 123)) assert 'Erroneous state' in str(ex.value) # Verify assignment assignment = kc.assignment() assert partitions == assignment # Pause partitions kc.pause(partitions) # Resume partitions kc.resume(partitions) # Get cached watermarks, should all be invalid. lo, hi = kc.get_watermark_offsets(partitions[0], cached=True) assert lo == -1001 and hi == -1001 assert lo == OFFSET_INVALID and hi == OFFSET_INVALID # Query broker for watermarks, should raise an exception. try: lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\ str(e.args([0])) kc.unassign() kc.commit(asynchronous=True) try: kc.commit(asynchronous=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions) try: kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT try: kc.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) try: kc.list_topics(topic="hi", timeout=0.1) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) kc.close()
class SynchronizedConsumer(object): """ This class implements the framework for a consumer that is intended to only consume messages that have already been consumed and committed by members of another consumer group. This works similarly to the Kafka built-in ``__consumer_offsets`` topic. The consumer group that is being "followed" (the one that must make progress for our consumer here to make progress, identified by the ``synchronize_commit_group`` constructor parameter/instance attribute) must report its offsets to a topic (identified by the ``commit_log_topic`` constructor parameter/instance attribute). This consumer subscribes to both commit log topic, as well as the topic(s) that we are actually interested in consuming messages from. The messages received from the commit log topic control whether or not consumption from partitions belonging to the main topic is paused, resumed, or allowed to continue in its current state without changes. The furthest point in any partition that this consumer should ever consume to is the maximum offset that has been recorded to the commit log topic for that partition. If the offsets recorded to that topic move non-monotonically (due to an intentional offset rollback, for instance) this consumer *may* consume up to the highest watermark point. (The implementation here tries to pause consuming from the partition as soon as possible, but this makes no explicit guarantees about that behavior.) """ initial_offset_reset_strategies = { 'earliest': get_earliest_offset, 'latest': get_latest_offset, } def __init__(self, bootstrap_servers, consumer_group, commit_log_topic, synchronize_commit_group, initial_offset_reset='latest', on_commit=None): self.bootstrap_servers = bootstrap_servers self.consumer_group = consumer_group self.commit_log_topic = commit_log_topic self.synchronize_commit_group = synchronize_commit_group self.initial_offset_reset = self.initial_offset_reset_strategies[initial_offset_reset] self.__partition_state_manager = SynchronizedPartitionStateManager( self.__on_partition_state_change) self.__commit_log_consumer, self.__commit_log_consumer_stop_request = self.__start_commit_log_consumer() self.__positions = {} def commit_callback(error, partitions): if on_commit is not None: return on_commit(error, partitions) consumer_configuration = { 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.consumer_group, 'enable.auto.commit': 'false', 'enable.auto.offset.store': 'true', 'enable.partition.eof': 'false', 'default.topic.config': { 'auto.offset.reset': 'error', }, 'on_commit': commit_callback, } self.__consumer = Consumer(consumer_configuration) def __start_commit_log_consumer(self, timeout=None): """ Starts running the commit log consumer. """ stop_request_event = threading.Event() start_event = threading.Event() result = execute( functools.partial( run_commit_log_consumer, bootstrap_servers=self.bootstrap_servers, consumer_group='{}:sync:{}'.format(self.consumer_group, uuid.uuid1().hex), commit_log_topic=self.commit_log_topic, synchronize_commit_group=self.synchronize_commit_group, partition_state_manager=self.__partition_state_manager, start_event=start_event, stop_request_event=stop_request_event, ), ) start_event.wait(timeout) return result, stop_request_event def __check_commit_log_consumer_running(self): if not self.__commit_log_consumer.running(): try: result = self.__commit_log_consumer.result(timeout=0) # noqa except TimeoutError: pass # not helpful raise Exception('Commit log consumer unexpectedly exit!') def __on_partition_state_change( self, topic, partition, previous_state_and_offsets, current_state_and_offsets): """ Callback that is invoked when a partition state changes. """ logger.debug('State change for %r: %r to %r', (topic, partition), previous_state_and_offsets, current_state_and_offsets) current_state, current_offsets = current_state_and_offsets if current_offsets.local is None: # It only makes sense to manipulate the consumer if we've got an # assignment. (This block should only be entered at startup if the # remote offsets are retrieved from the commit log before the local # consumer has received its assignment.) return # TODO: This will be called from the commit log consumer thread, so need # to verify that calling the ``consumer.{pause,resume}`` methods is # thread safe! if current_state in (SynchronizedPartitionState.UNKNOWN, SynchronizedPartitionState.SYNCHRONIZED, SynchronizedPartitionState.REMOTE_BEHIND): self.__consumer.pause([TopicPartition(topic, partition, current_offsets.local)]) elif current_state is SynchronizedPartitionState.LOCAL_BEHIND: self.__consumer.resume([TopicPartition(topic, partition, current_offsets.local)]) else: raise NotImplementedError('Unexpected partition state: %s' % (current_state,)) def subscribe(self, topics, on_assign=None, on_revoke=None): """ Subscribe to a topic. """ self.__check_commit_log_consumer_running() def assignment_callback(consumer, assignment): # Since ``auto.offset.reset`` is set to ``error`` to force human # interaction on an offset reset, we have to explicitly specify the # starting offset if no offset has been committed for this topic during # the ``__consumer_offsets`` topic retention period. assignment = { (i.topic, i.partition): self.__positions.get((i.topic, i.partition)) for i in assignment } for i in self.__consumer.committed([TopicPartition(topic, partition) for ( topic, partition), offset in assignment.items() if offset is None]): k = (i.topic, i.partition) if i.offset > -1: assignment[k] = i.offset else: assignment[k] = self.initial_offset_reset(consumer, i.topic, i.partition) self.__consumer.assign([TopicPartition(topic, partition, offset) for (topic, partition), offset in assignment.items()]) for (topic, partition), offset in assignment.items(): # Setting the local offsets will either cause the partition to be # paused (if the remote offset is unknown or the local offset is # not trailing the remote offset) or resumed. self.__partition_state_manager.set_local_offset(topic, partition, offset) self.__positions[(topic, partition)] = offset if on_assign is not None: on_assign(self, [TopicPartition(topic, partition) for topic, partition in assignment.keys()]) def revocation_callback(consumer, assignment): for item in assignment: # TODO: This should probably also be removed from the state manager. self.__positions.pop((item.topic, item.partition)) if on_revoke is not None: on_revoke(self, assignment) self.__consumer.subscribe( topics, on_assign=assignment_callback, on_revoke=revocation_callback) def poll(self, timeout): self.__check_commit_log_consumer_running() message = self.__consumer.poll(timeout) if message is None: return if message.error() is not None: return message self.__partition_state_manager.validate_local_message( message.topic(), message.partition(), message.offset()) self.__partition_state_manager.set_local_offset( message.topic(), message.partition(), message.offset() + 1) self.__positions[(message.topic(), message.partition())] = message.offset() + 1 return message def commit(self, *args, **kwargs): self.__check_commit_log_consumer_running() return self.__consumer.commit(*args, **kwargs) def close(self): self.__check_commit_log_consumer_running() self.__commit_log_consumer_stop_request.set() try: self.__consumer.close() finally: self.__commit_log_consumer.result()
class SynchronizedConsumer: """ This class implements the framework for a consumer that is intended to only consume messages that have already been consumed and committed by members of another consumer group. This works similarly to the Kafka built-in ``__consumer_offsets`` topic. The consumer group that is being "followed" (the one that must make progress for our consumer here to make progress, identified by the ``synchronize_commit_group`` constructor parameter/instance attribute) must report its offsets to a topic (identified by the ``commit_log_topic`` constructor parameter/instance attribute). This consumer subscribes to both commit log topic, as well as the topic(s) that we are actually interested in consuming messages from. The messages received from the commit log topic control whether or not consumption from partitions belonging to the main topic is paused, resumed, or allowed to continue in its current state without changes. The furthest point in any partition that this consumer should ever consume to is the maximum offset that has been recorded to the commit log topic for that partition. If the offsets recorded to that topic move non-monotonically (due to an intentional offset rollback, for instance) this consumer *may* consume up to the highest watermark point. (The implementation here tries to pause consuming from the partition as soon as possible, but this makes no explicit guarantees about that behavior.) """ initial_offset_reset_strategies = { "earliest": get_earliest_offset, "latest": get_latest_offset } def __init__( self, cluster_name, consumer_group, commit_log_topic, synchronize_commit_group, initial_offset_reset="latest", on_commit=None, ): self.cluster_name = cluster_name self.consumer_group = consumer_group self.commit_log_topic = commit_log_topic self.synchronize_commit_group = synchronize_commit_group self.initial_offset_reset = self.initial_offset_reset_strategies[ initial_offset_reset] self.__partition_state_manager = SynchronizedPartitionStateManager( self.__on_partition_state_change) ( self.__commit_log_consumer, self.__commit_log_consumer_stop_request, ) = self.__start_commit_log_consumer() self.__positions = {} def commit_callback(error, partitions): if on_commit is not None: return on_commit(error, partitions) consumer_configuration = kafka_config.get_kafka_consumer_cluster_options( cluster_name, override_params={ "group.id": self.consumer_group, "enable.auto.commit": "false", "enable.auto.offset.store": "true", "enable.partition.eof": "false", "default.topic.config": { "auto.offset.reset": "error" }, "on_commit": commit_callback, }, ) self.__consumer = Consumer(consumer_configuration) def __start_commit_log_consumer(self, timeout=None): """ Starts running the commit log consumer. """ stop_request_event = threading.Event() start_event = threading.Event() result = execute( functools.partial( run_commit_log_consumer, cluster_name=self.cluster_name, consumer_group=f"{self.consumer_group}:sync:{uuid.uuid1().hex}", commit_log_topic=self.commit_log_topic, synchronize_commit_group=self.synchronize_commit_group, partition_state_manager=self.__partition_state_manager, start_event=start_event, stop_request_event=stop_request_event, )) start_event.wait(timeout) return result, stop_request_event def __check_commit_log_consumer_running(self): if not self.__commit_log_consumer.running(): try: result = self.__commit_log_consumer.result(timeout=0) # noqa except TimeoutError: pass # not helpful raise Exception("Commit log consumer unexpectedly exit!") def __on_partition_state_change(self, topic, partition, previous_state_and_offsets, current_state_and_offsets): """ Callback that is invoked when a partition state changes. """ logger.debug( "State change for %r: %r to %r", (topic, partition), previous_state_and_offsets, current_state_and_offsets, ) current_state, current_offsets = current_state_and_offsets if current_offsets.local is None: # It only makes sense to manipulate the consumer if we've got an # assignment. (This block should only be entered at startup if the # remote offsets are retrieved from the commit log before the local # consumer has received its assignment.) return # TODO: This will be called from the commit log consumer thread, so need # to verify that calling the ``consumer.{pause,resume}`` methods is # thread safe! if current_state in ( SynchronizedPartitionState.UNKNOWN, SynchronizedPartitionState.SYNCHRONIZED, SynchronizedPartitionState.REMOTE_BEHIND, ): self.__consumer.pause( [TopicPartition(topic, partition, current_offsets.local)]) elif current_state is SynchronizedPartitionState.LOCAL_BEHIND: self.__consumer.resume( [TopicPartition(topic, partition, current_offsets.local)]) else: raise NotImplementedError( f"Unexpected partition state: {current_state}") def subscribe(self, topics, on_assign=None, on_revoke=None): """ Subscribe to a topic. """ self.__check_commit_log_consumer_running() def assignment_callback(consumer, assignment): # Since ``auto.offset.reset`` is set to ``error`` to force human # interaction on an offset reset, we have to explicitly specify the # starting offset if no offset has been committed for this topic during # the ``__consumer_offsets`` topic retention period. assignment = {(i.topic, i.partition): self.__positions.get( (i.topic, i.partition)) for i in assignment} for i in self.__consumer.committed([ TopicPartition(topic, partition) for (topic, partition), offset in assignment.items() if offset is None ]): k = (i.topic, i.partition) if i.offset > -1: assignment[k] = i.offset else: assignment[k] = self.initial_offset_reset( consumer, i.topic, i.partition) self.__consumer.assign([ TopicPartition(topic, partition, offset) for (topic, partition), offset in assignment.items() ]) for (topic, partition), offset in assignment.items(): # Setting the local offsets will either cause the partition to be # paused (if the remote offset is unknown or the local offset is # not trailing the remote offset) or resumed. self.__partition_state_manager.set_local_offset( topic, partition, offset) self.__positions[(topic, partition)] = offset if on_assign is not None: on_assign( self, [ TopicPartition(topic, partition) for topic, partition in assignment.keys() ], ) def revocation_callback(consumer, assignment): for item in assignment: # TODO: This should probably also be removed from the state manager. self.__positions.pop((item.topic, item.partition)) if on_revoke is not None: on_revoke(self, assignment) self.__consumer.subscribe(topics, on_assign=assignment_callback, on_revoke=revocation_callback) def poll(self, timeout): self.__check_commit_log_consumer_running() message = self.__consumer.poll(timeout) if message is None: return if message.error() is not None: return message self.__partition_state_manager.validate_local_message( message.topic(), message.partition(), message.offset()) self.__partition_state_manager.set_local_offset( message.topic(), message.partition(), message.offset() + 1) self.__positions[(message.topic(), message.partition())] = message.offset() + 1 return message def commit(self, *args, **kwargs): self.__check_commit_log_consumer_running() return self.__consumer.commit(*args, **kwargs) def close(self): self.__check_commit_log_consumer_running() self.__commit_log_consumer_stop_request.set() try: self.__consumer.close() finally: self.__commit_log_consumer.result()
class KafkaConsumer(Consumer[TPayload]): """ The behavior of this consumer differs slightly from the Confluent consumer during rebalancing operations. Whenever a partition is assigned to this consumer, offsets are *always* automatically reset to the committed offset for that partition (or if no offsets have been committed for that partition, the offset is reset in accordance with the ``auto.offset.reset`` configuration value.) This causes partitions that are maintained across a rebalance to have the same offset management behavior as a partition that is moved from one consumer to another. To prevent uncommitted messages from being consumed multiple times, ``commit`` should be called in the partition revocation callback. The behavior of ``auto.offset.reset`` also differs slightly from the Confluent consumer as well: offsets are only reset during initial assignment or subsequent rebalancing operations. Any other circumstances that would otherwise lead to preemptive offset reset (e.g. the consumer tries to read a message that is before the earliest offset, or the consumer attempts to read a message that is after the latest offset) will cause an exception to be thrown, rather than resetting the offset, as this could lead to chunks messages being replayed or skipped, depending on the circumstances. This also means that if the committed offset is no longer available (such as when reading older messages from the log and those messages expire, or reading newer messages from the log and the leader crashes and partition ownership fails over to an out-of-date replica), the consumer will fail-stop rather than reset to the value of ``auto.offset.reset``. """ # Set of logical offsets that do not correspond to actual log positions. # These offsets should be considered an implementation detail of the Kafka # consumer and not used publically. # https://github.com/confluentinc/confluent-kafka-python/blob/443177e1c83d9b66ce30f5eb8775e062453a738b/tests/test_enums.py#L22-L25 LOGICAL_OFFSETS = frozenset( [OFFSET_BEGINNING, OFFSET_END, OFFSET_STORED, OFFSET_INVALID]) def __init__( self, configuration: Mapping[str, Any], codec: Codec[KafkaPayload, TPayload], *, commit_retry_policy: Optional[RetryPolicy] = None, ) -> None: if commit_retry_policy is None: commit_retry_policy = NoRetryPolicy() auto_offset_reset = configuration.get("auto.offset.reset", "largest") if auto_offset_reset in {"smallest", "earliest", "beginning"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_earliest) elif auto_offset_reset in {"largest", "latest", "end"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_latest) elif auto_offset_reset == "error": self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_error) else: raise ValueError( "invalid value for 'auto.offset.reset' configuration") if (as_kafka_configuration_bool( configuration.get("enable.auto.commit", "true")) is not False): raise ValueError( "invalid value for 'enable.auto.commit' configuration") if (as_kafka_configuration_bool( configuration.get("enable.auto.offset.store", "true")) is not False): raise ValueError( "invalid value for 'enable.auto.offset.store' configuration") # NOTE: Offsets are explicitly managed as part of the assignment # callback, so preemptively resetting offsets is not enabled. self.__consumer = ConfluentConsumer({ **configuration, "auto.offset.reset": "error" }) self.__codec = codec self.__offsets: MutableMapping[Partition, int] = {} self.__staged_offsets: MutableMapping[Partition, int] = {} self.__paused: Set[Partition] = set() self.__commit_retry_policy = commit_retry_policy self.__state = KafkaConsumerState.CONSUMING def __resolve_partition_offset_earliest( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: low, high = self.__consumer.get_watermark_offsets(partition) return ConfluentTopicPartition(partition.topic, partition.partition, low) def __resolve_partition_offset_latest( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: low, high = self.__consumer.get_watermark_offsets(partition) return ConfluentTopicPartition(partition.topic, partition.partition, high) def __resolve_partition_offset_error( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: raise ConsumerError("unable to resolve partition offsets") def subscribe( self, topics: Sequence[Topic], on_assign: Optional[Callable[[Mapping[Partition, int]], None]] = None, on_revoke: Optional[Callable[[Sequence[Partition]], None]] = None, ) -> None: """ Subscribe to topics. This replaces a previous subscription. This method does not block. The subscription may not be fulfilled immediately: instead, the ``on_assign`` and ``on_revoke`` callbacks are called when the subscription state changes with the updated assignment for this consumer. If provided, the ``on_assign`` callback is called with a mapping of partitions to their offsets (at this point, the working offset and the committed offset are the same for each partition) on each subscription change. Similarly, the ``on_revoke`` callback (if provided) is called with a sequence of partitions that are being removed from this consumer's assignment. (This callback does not include the offsets, as the working offset and committed offset may differ, in some cases by substantial margin.) Raises an ``InvalidState`` exception if called on a closed consumer. """ if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) def assignment_callback( consumer: ConfluentConsumer, partitions: Sequence[ConfluentTopicPartition]) -> None: self.__state = KafkaConsumerState.ASSIGNING try: assignment: MutableSequence[ConfluentTopicPartition] = [] for partition in self.__consumer.committed(partitions): if partition.offset >= 0: assignment.append(partition) elif partition.offset == OFFSET_INVALID: assignment.append( self.__resolve_partition_starting_offset( partition)) else: raise ValueError("received unexpected offset") offsets: MutableMapping[Partition, int] = { Partition(Topic(i.topic), i.partition): i.offset for i in assignment } self.__seek(offsets) # Ensure that all partitions are resumed on assignment to avoid # carrying over state from a previous assignment. self.__consumer.resume([ ConfluentTopicPartition(partition.topic.name, partition.index, offset) for partition, offset in offsets.items() ]) for partition in offsets: self.__paused.discard(partition) except Exception: self.__state = KafkaConsumerState.ERROR raise try: if on_assign is not None: on_assign(offsets) finally: self.__state = KafkaConsumerState.CONSUMING def revocation_callback( consumer: ConfluentConsumer, partitions: Sequence[ConfluentTopicPartition]) -> None: self.__state = KafkaConsumerState.REVOKING partitions = [ Partition(Topic(i.topic), i.partition) for i in partitions ] try: if on_revoke is not None: on_revoke(partitions) finally: for partition in partitions: # Staged offsets are deleted during partition revocation to # prevent later committing offsets for partitions that are # no longer owned by this consumer. if partition in self.__staged_offsets: logger.warning( "Dropping staged offset for revoked partition (%r)!", partition, ) del self.__staged_offsets[partition] try: self.__offsets.pop(partition) except KeyError: # If there was an error during assignment, this # partition may have never been added to the offsets # mapping. logger.warning( "failed to delete offset for unknown partition: %r", partition, ) self.__paused.discard(partition) self.__state = KafkaConsumerState.CONSUMING self.__consumer.subscribe( [topic.name for topic in topics], on_assign=assignment_callback, on_revoke=revocation_callback, ) def unsubscribe(self) -> None: """ Unsubscribe from topics. Raises an ``InvalidState`` exception if called on a closed consumer. """ if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) self.__consumer.unsubscribe() def poll(self, timeout: Optional[float] = None) -> Optional[Message[TPayload]]: """ Return the next message available to be consumed, if one is available. If no message is available, this method will block up to the ``timeout`` value before returning ``None``. A timeout of ``0.0`` represents "do not block", while a timeout of ``None`` represents "block until a message is available (or forever)". Calling this method may also invoke subscription state change callbacks. This method may also raise an ``EndOfPartition`` error (a subtype of ``ConsumerError``) when the consumer has reached the end of a partition that it is subscribed to and no additional messages are available. The ``partition`` attribute of the raised exception specifies the end which partition has been reached. (Since this consumer is multiplexing a set of partitions, this exception does not mean that *all* of the partitions that the consumer is subscribed to do not have any messages, just that it has reached the end of one of them. This also does not mean that additional messages won't be available in future poll calls.) Not every backend implementation supports this feature or is configured to raise in this scenario. Raises an ``InvalidState`` exception if called on a closed consumer. Raises a ``TransportError`` for various other consumption-related errors. """ if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) message: Optional[ConfluentMessage] = self.__consumer.poll( *[timeout] if timeout is not None else []) if message is None: return None error: Optional[KafkaError] = message.error() if error is not None: code = error.code() if code == KafkaError._PARTITION_EOF: raise EndOfPartition( Partition(Topic(message.topic()), message.partition()), message.offset(), ) elif code == KafkaError._TRANSPORT: raise TransportError(str(error)) else: raise ConsumerError(str(error)) headers: Optional[Headers] = message.headers() result = Message( Partition(Topic(message.topic()), message.partition()), message.offset(), self.__codec.decode( KafkaPayload( message.key(), message.value(), headers if headers is not None else [], )), datetime.utcfromtimestamp(message.timestamp()[1] / 1000.0), ) self.__offsets[result.partition] = result.get_next_offset() return result def tell(self) -> Mapping[Partition, int]: """ Return the read offsets for all assigned partitions. Raises an ``InvalidState`` if called on a closed consumer. """ if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) return self.__offsets def __validate_offsets(self, offsets: Mapping[Partition, int]) -> None: invalid_offsets: Mapping[Partition, int] = { partition: offset for partition, offset in offsets.items() if offset < 0 } if invalid_offsets: raise ConsumerError(f"invalid offsets: {invalid_offsets!r}") def __seek(self, offsets: Mapping[Partition, int]) -> None: self.__validate_offsets(offsets) if self.__state is KafkaConsumerState.ASSIGNING: # Calling ``seek`` on the Confluent consumer from an assignment # callback will throw an "Erroneous state" error. Instead, # partition offsets have to be initialized by calling ``assign``. self.__consumer.assign([ ConfluentTopicPartition(partition.topic.name, partition.index, offset) for partition, offset in offsets.items() ]) else: for partition, offset in offsets.items(): self.__consumer.seek( ConfluentTopicPartition(partition.topic.name, partition.index, offset)) self.__offsets.update(offsets) def seek(self, offsets: Mapping[Partition, int]) -> None: """ Change the read offsets for the provided partitions. Raises an ``InvalidState`` if called on a closed consumer. """ if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) if offsets.keys() - self.__offsets.keys(): raise ConsumerError("cannot seek on unassigned partitions") self.__seek(offsets) def pause(self, partitions: Sequence[Partition]) -> None: """ Pause the consumption of messages for the provided partitions. Raises an ``InvalidState`` if called on a closed consumer. """ if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) if set(partitions) - self.__offsets.keys(): raise ConsumerError("cannot pause unassigned partitions") self.__consumer.pause([ ConfluentTopicPartition(partition.topic.name, partition.index) for partition in partitions ]) self.__paused.update(partitions) # XXX: Seeking to a specific partition offset and immediately pausing # that partition causes the seek to be ignored for some reason. self.seek({ partition: offset for partition, offset in self.__offsets.items() if partition in partitions }) def resume(self, partitions: Sequence[Partition]) -> None: """ Resume the consumption of messages for the provided partitions. Raises an ``InvalidState`` if called on a closed consumer. """ if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) if set(partitions) - self.__offsets.keys(): raise ConsumerError("cannot resume unassigned partitions") self.__consumer.resume([ ConfluentTopicPartition(partition.topic.name, partition.index) for partition in partitions ]) for partition in partitions: self.__paused.discard(partition) def paused(self) -> Sequence[Partition]: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) return [*self.__paused] def stage_offsets(self, offsets: Mapping[Partition, int]) -> None: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) if offsets.keys() - self.__offsets.keys(): raise ConsumerError( "cannot stage offsets for unassigned partitions") self.__validate_offsets(offsets) # TODO: Maybe log a warning if these offsets exceed the current # offsets, since that's probably a side effect of an incorrect usage # pattern? self.__staged_offsets.update(offsets) def __commit(self) -> Mapping[Partition, int]: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) result: Optional[Sequence[ConfluentTopicPartition]] if self.__staged_offsets: result = self.__consumer.commit( offsets=[ ConfluentTopicPartition(partition.topic.name, partition.index, offset) for partition, offset in self.__staged_offsets.items() ], asynchronous=False, ) else: result = [] assert result is not None # synchronous commit should return result immediately self.__staged_offsets.clear() offsets: MutableMapping[Partition, int] = {} for value in result: # The Confluent Kafka Consumer will include logical offsets in the # sequence of ``Partition`` objects returned by ``commit``. These # are an implementation detail of the Kafka Consumer, so we don't # expose them here. # NOTE: These should no longer be seen now that we are forcing # offsets to be set as part of the assignment callback. if value.offset in self.LOGICAL_OFFSETS: continue assert value.offset >= 0, "expected non-negative offset" offsets[Partition(Topic(value.topic), value.partition)] = value.offset return offsets def commit_offsets(self) -> Mapping[Partition, int]: """ Commit staged offsets for all partitions that this consumer is assigned to. The return value of this method is a mapping of partitions with their committed offsets as values. Raises an ``InvalidState`` if called on a closed consumer. """ return self.__commit_retry_policy.call(self.__commit) def close(self, timeout: Optional[float] = None) -> None: """ Close the consumer. This stops consuming messages, *may* commit staged offsets (depending on the configuration), and ends its subscription. Raises a ``InvalidState`` if the consumer is unable to be closed before the timeout is reached. """ try: self.__consumer.close() except RuntimeError: pass self.__state = KafkaConsumerState.CLOSED @property def closed(self) -> bool: return self.__state is KafkaConsumerState.CLOSED
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({ 'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb }) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) partitions = list( map(lambda p: TopicPartition("test", p), range(0, 100, 3))) kc.assign(partitions) kc.unassign() kc.commit(async=True) try: kc.commit(async=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == -1001]) == len(partitions) try: offsets = kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT kc.close()
# 获取一个 partition 的最小、最大 offset consumer.get_watermark_offsets(TopicPartition('test', 4)) # (0, 19) # 如果是一个新的 group.id 必须先消费一条消息,这样后面的重置 offset 才有效, 如果不消费,重置 offset 前后获取到的 offset 值都是-1001 # 获取当前 offset 位置 consumer.position([TopicPartition('test', 3)]) # 重置 offset 到任意位置,committed 决定了下一次连接后的 offset 位置(以 group 为维度),本次连接无效。本次连接的 offset 位置由 position 决定。 # 重置 offset 后,要 close 重新连才有效。position 决定本次连接的 offset 位置,用 seek() 修改。 consumer.seek(TopicPartition('test', 3, 1)) consumer.commit(offsets=[TopicPartition('test', 3, 7)]) # 检查重置的位置 msg = consumer.committed([TopicPartition('test', 3)]) print(msg) # offset:Either an absolute offset (>=0) or a logical offset: OFFSET_BEGINNING, OFFSET_END, OFFSET_STORED, OFFSET_INVALID while True: msg = consumer.poll(3.0) if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: continue else: print(msg.error()) break print('Received message: {}'.format(msg.value().decode('utf-8')))
config = { 'bootstrap.servers': 'localhost', 'group.id': 'my-group2', 'enable.auto.commit': True, 'default.topic.config': { 'auto.offset.reset': 'smallest' } } consumer = Consumer(config) tp = TopicPartition(topic, 0) consumer.subscribe([topic]) _, offset_max = consumer.get_watermark_offsets(tp) offset_min = consumer.committed([tp])[0].offset print(offset_min, offset_max) sleep(2) number = offset_max - max(offset_min, 0) print(f"Debería leer un total de {number} mensajes") messages = consumer.consume(num_messages=number, timeout=10) if messages is None: raise ValueError('No he podido leer nada') print("Hay un total de " + str(len(messages)) + " mensajes.")
def test_basic_api(): """ Basic API tests, these wont really do anything since there is no broker configured. """ try: kc = Consumer() except TypeError as e: assert str(e) == "expected configuration dict" def dummy_commit_cb(err, partitions): pass kc = Consumer({'group.id': 'test', 'socket.timeout.ms': '100', 'session.timeout.ms': 1000, # Avoid close() blocking too long 'on_commit': dummy_commit_cb}) kc.subscribe(["test"]) kc.unsubscribe() def dummy_assign_revoke(consumer, partitions): pass kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke) kc.unsubscribe() msg = kc.poll(timeout=0.001) if msg is None: print('OK: poll() timeout') elif msg.error(): print('OK: consumer error: %s' % msg.error().str()) else: print('OK: consumed message') if msg is not None: assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1) msglist = kc.consume(num_messages=10, timeout=0.001) assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist) with pytest.raises(ValueError) as ex: kc.consume(-100) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) with pytest.raises(ValueError) as ex: kc.consume(1000001) assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value) partitions = list(map(lambda part: TopicPartition("test", part), range(0, 100, 3))) kc.assign(partitions) with pytest.raises(KafkaException) as ex: kc.seek(TopicPartition("test", 0, 123)) assert 'Erroneous state' in str(ex.value) # Verify assignment assignment = kc.assignment() assert partitions == assignment # Pause partitions kc.pause(partitions) # Resume partitions kc.resume(partitions) # Get cached watermarks, should all be invalid. lo, hi = kc.get_watermark_offsets(partitions[0], cached=True) assert lo == -1001 and hi == -1001 assert lo == OFFSET_INVALID and hi == OFFSET_INVALID # Query broker for watermarks, should raise an exception. try: lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\ str(e.args([0])) kc.unassign() kc.commit(asynchronous=True) try: kc.commit(asynchronous=False) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET) # Get current position, should all be invalid. kc.position(partitions) assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions) try: kc.committed(partitions, timeout=0.001) except KafkaException as e: assert e.args[0].code() == KafkaError._TIMED_OUT try: kc.list_topics(timeout=0.2) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) try: kc.list_topics(topic="hi", timeout=0.1) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT) kc.close()
class KafkaConsumer(Consumer[TopicPartition, int, bytes]): """ The behavior of this consumer differs slightly from the Confluent consumer during rebalancing operations. Whenever a partition is assigned to this consumer, offsets are *always* automatically reset to the committed offset for that partition (or if no offsets have been committed for that partition, the offset is reset in accordance with the ``auto.offset.reset`` configuration value.) This causes partitions that are maintained across a rebalance to have the same offset management behavior as a partition that is moved from one consumer to another. To prevent uncommitted messages from being consumed multiple times, ``commit`` should be called in the partition revocation callback. The behavior of ``auto.offset.reset`` also differs slightly from the Confluent consumer as well: offsets are only reset during initial assignment or subsequent rebalancing operations. Any other circumstances that would otherwise lead to preemptive offset reset (e.g. the consumer tries to read a message that is before the earliest offset, or the consumer attempts to read a message that is after the latest offset) will cause an exception to be thrown, rather than resetting the offset, as this could lead to chunks messages being replayed or skipped, depending on the circumstances. This also means that if the committed offset is no longer available (such as when reading older messages from the log and those messages expire, or reading newer messages from the log and the leader crashes and partition ownership fails over to an out-of-date replica), the consumer will fail-stop rather than reset to the value of ``auto.offset.reset``. """ # Set of logical offsets that do not correspond to actual log positions. # These offsets should be considered an implementation detail of the Kafka # consumer and not used publically. # https://github.com/confluentinc/confluent-kafka-python/blob/443177e1c83d9b66ce30f5eb8775e062453a738b/tests/test_enums.py#L22-L25 LOGICAL_OFFSETS = frozenset( [OFFSET_BEGINNING, OFFSET_END, OFFSET_STORED, OFFSET_INVALID]) def __init__(self, configuration: Mapping[str, Any]) -> None: auto_offset_reset = configuration.get("auto.offset.reset", "largest") if auto_offset_reset in {"smallest", "earliest", "beginning"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_earliest) elif auto_offset_reset in {"largest", "latest", "end"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_latest) elif auto_offset_reset == "error": self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_error) else: raise ValueError( "invalid value for 'auto.offset.reset' configuration") # NOTE: Offsets are explicitly managed as part of the assignment # callback, so preemptively resetting offsets is not enabled. self.__consumer = ConfluentConsumer({ **configuration, "auto.offset.reset": "error" }) self.__offsets: MutableMapping[TopicPartition, int] = {} self.__state = KafkaConsumerState.CONSUMING def __resolve_partition_offset_earliest( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: low, high = self.__consumer.get_watermark_offsets(partition) return ConfluentTopicPartition(partition.topic, partition.partition, low) def __resolve_partition_offset_latest( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: low, high = self.__consumer.get_watermark_offsets(partition) return ConfluentTopicPartition(partition.topic, partition.partition, high) def __resolve_partition_offset_error( self, partition: ConfluentTopicPartition) -> ConfluentTopicPartition: raise ConsumerError("unable to resolve partition offsets") def subscribe( self, topics: Sequence[str], on_assign: Optional[Callable[[Sequence[TopicPartition]], None]] = None, on_revoke: Optional[Callable[[Sequence[TopicPartition]], None]] = None, ) -> None: if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) def assignment_callback( consumer: ConfluentConsumer, partitions: Sequence[ConfluentTopicPartition]) -> None: self.__state = KafkaConsumerState.ASSIGNING try: assignment: MutableSequence[ConfluentTopicPartition] = [] for partition in self.__consumer.committed(partitions): if partition.offset >= 0: assignment.append(partition) elif partition.offset == OFFSET_INVALID: assignment.append( self.__resolve_partition_starting_offset( partition)) else: raise ValueError("received unexpected offset") offsets: MutableMapping[TopicPartition, int] = { TopicPartition(i.topic, i.partition): i.offset for i in assignment } self.__seek(offsets) except Exception: self.__state = KafkaConsumerState.ERROR raise try: if on_assign is not None: on_assign(list(offsets.keys())) finally: self.__state = KafkaConsumerState.CONSUMING def revocation_callback( consumer: ConfluentConsumer, partitions: Sequence[ConfluentTopicPartition]) -> None: self.__state = KafkaConsumerState.REVOKING streams = [ TopicPartition(i.topic, i.partition) for i in partitions ] try: if on_revoke is not None: on_revoke(streams) finally: for stream in streams: try: self.__offsets.pop(stream) except KeyError: # If there was an error during assignment, this stream # may have never been added to the offsets mapping. logger.warning( "failed to delete offset for unknown stream: %r", stream) self.__state = KafkaConsumerState.CONSUMING self.__consumer.subscribe(topics, on_assign=assignment_callback, on_revoke=revocation_callback) def unsubscribe(self) -> None: if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) self.__consumer.unsubscribe() def poll(self, timeout: Optional[float] = None) -> Optional[KafkaMessage]: if self.__state is not KafkaConsumerState.CONSUMING: raise InvalidState(self.__state) message: Optional[ConfluentMessage] = self.__consumer.poll( *[timeout] if timeout is not None else []) if message is None: return None error: Optional[KafkaError] = message.error() if error is not None: code = error.code() if code == KafkaError._PARTITION_EOF: raise EndOfStream( TopicPartition(message.topic(), message.partition()), message.offset(), ) elif code == KafkaError._TRANSPORT: raise TransportError(str(error)) else: raise ConsumerError(str(error)) result = KafkaMessage( TopicPartition(message.topic(), message.partition()), message.offset(), message.value(), ) self.__offsets[result.stream] = result.get_next_offset() return result def tell(self) -> Mapping[TopicPartition, int]: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) return self.__offsets def __seek(self, offsets: Mapping[TopicPartition, int]) -> None: if self.__state is KafkaConsumerState.ASSIGNING: # Calling ``seek`` on the Confluent consumer from an assignment # callback will throw an "Erroneous state" error. Instead, # partition offsets have to be initialized by calling ``assign``. self.__consumer.assign([ ConfluentTopicPartition(stream.topic, stream.partition, offset) for stream, offset in offsets.items() ]) else: for stream, offset in offsets.items(): self.__consumer.seek( ConfluentTopicPartition(stream.topic, stream.partition, offset)) self.__offsets.update(offsets) def seek(self, offsets: Mapping[TopicPartition, int]) -> None: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) if offsets.keys() - self.__offsets.keys(): raise ConsumerError("cannot seek on unassigned streams") self.__seek(offsets) def commit(self) -> Mapping[TopicPartition, int]: if self.__state in { KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR }: raise InvalidState(self.__state) result: Optional[Sequence[ConfluentTopicPartition]] = None retries_remaining = 3 while result is None: try: result = self.__consumer.commit(asynchronous=False) assert result is not None except KafkaException as e: if not e.args[0].code() in ( KafkaError.REQUEST_TIMED_OUT, KafkaError.NOT_COORDINATOR_FOR_GROUP, KafkaError._WAIT_COORD, ): raise if not retries_remaining: raise logger.warning( "Commit failed: %s (%d retries remaining)", str(e), retries_remaining, ) retries_remaining -= 1 time.sleep(1) offsets: MutableMapping[TopicPartition, int] = {} for value in result: # The Confluent Kafka Consumer will include logical offsets in the # sequence of ``TopicPartition`` objects returned by ``commit``. # These are an implementation detail of the Kafka Consumer, so we # don't expose them here. # NOTE: These should no longer be seen now that we are forcing # offsets to be set as part of the assignment callback. if value.offset in self.LOGICAL_OFFSETS: continue assert value.offset >= 0, "expected non-negative offset" offsets[TopicPartition(value.topic, value.partition)] = value.offset return offsets def close(self, timeout: Optional[float] = None) -> None: try: self.__consumer.close() except RuntimeError: pass self.__state = KafkaConsumerState.CLOSED
class Base_Consumer: def __init__(self, topic: str, bootstrap_server: str, sess_timeout: int, retries: int, group_id: str, assign: bool) -> None: """ Config Consumer properties, Args: topic (str): topic of meassage bootstrap_server (str): broker connection host:port sess_timeout (int): detect failures when using Kafka’s group management facilities retries (int): retry for error and exception group (str): consumer group id partition (int, optional): Defaults to 0. offset (int, optional): message next ready to read position. Defaults to 0. """ self.topic = topic self.need_assign_ = assign self.consumer = Consumer({ "bootstrap.servers": bootstrap_server, "group.id": group_id, "default.topic.config": { "auto.offset.reset": "earliest", "acks": 1 }, #EOS "api.version.request": True, "session.timeout.ms": sess_timeout, #heartbeat "max.poll.interval.ms": 20000, #processing thread "enable.auto.commit": False, "auto.commit.interval.ms": 10000, "enable.auto.offset.store": True, 'topic.metadata.refresh.interval.ms': 20000, "partition.assignment.strategy": "range", #default "retries": retries, "debug": "all" }) def get_partitions_(self, partition_id: int) -> dict: part = TopicPartition(self.topic, partition_id) partitions = self.consumer.committed([part]) pprint(f"Current Partition: {partition_id} - {partitions}") return partitions def get_topics(self) -> str: return self.consumer.list_topics(self.topic) # @staticmethod def on_assign(self, consumer, partitions: List[int]) -> Text: for p in partitions: p.offset = 100 pprint(f"Assign: {partitions}") consumer.assign(partitions) async def consume(self): """Asynchronously consuming""" while True: results = 1 while results > 0: results = self.receive_msgs() await sleep(1) def receive_msgs(self, func_assign: Callable) -> Union[Text, pd.DataFrame]: running = True c = self.consumer if self.need_assign_: try: c.subscribe(self.topic, on_assign=func_assign) except KafkaException as e: pprint(e) else: try: c.subscribe(self.topic) except Exception as e: pprint(e) message_values = list() offsets = list() keys = list() partitions = list() try: while running: msg = c.poll(10) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue # ==== processing ==== payload_ = msg.value().decode("utf-8") key_ = msg.key().decode("utf-8") partition_ = msg.partition() offset_ = msg.offset() pprint(f"Receive messages: {payload_}: {offset_}") message_values.append(payload_) keys.append(key_) partitions.append(partition_) offsets.append(offset_) except Exception as e: pprint(f"Error: {str(e)}") except: running = False print("Error pooling messages and exit...") finally: return pd.DataFrame({ "keys": keys, "lon_val": [v.split("\t-")[0] for v in message_values], "lat_val": [v.split("\t-")[1] for v in message_values], "partitions": partitions, "offsets": offsets }) c.close()
def replicate(topic, rerun, delete, source, src_groupid, target, trg_groupid, trg_partitions): global source_partitions # Connect to source kafka cluster src = Consumer({ 'bootstrap.servers': source, 'group.id': src_groupid, 'auto.offset.reset': 'smallest', 'enable.auto.commit': False }) # Connect to target kafka cluster trg = Consumer({ 'bootstrap.servers': target, 'group.id': trg_groupid, }) admin_client = KafkaAdminClient(bootstrap_servers=TRG_BOOTSTRAP_SERVERS, client_id=TRG_GROUP_ID) if delete: logger.warning( f"DELETING topic {topic} on {TRG_BOOTSTRAP_SERVERS} as requested") admin_client.delete_topics([topic]) logger.warning(f"DELETION of {topic} completed.") logger.info(f"source cluster: {source} source group_id: {src_groupid}") logger.info(f"target cluster: {target} target group_id: {trg_groupid}") # Determine if latest source topic is at least partially loaded to target trg_topics, the_topic, offset_sum_delta = determine_topic( topic, src, trg, rerun) src_cm = src.list_topics() # returns ClusterMetadata if the_topic not in src_cm.topics: logger.error( f"Current topics in {source} with group id {src_groupid} are:") logger.error(f"{src_cm.topics}") logger.error( f"Topic {topic} not in cluster {source} with group id {src_groupid}" ) sys.exit(1) src_partition_count = len(src_cm.topics[the_topic].partitions) logger.info( f"topic: {the_topic} has # of partitions: {src_partition_count}") # Calculate multiplier for demuxing # Example: # source = 4 target = 9 then multiplier is 9/4=2.25 # int(2.25) = 2 multiplier = int(trg_partitions / src_partition_count) trg_partition_count = src_partition_count * multiplier logger.info( f"multiplier={multiplier} target_partition_count={trg_partition_count}" ) # Add the new topic in target cluster if the_topic not in trg_topics: logger.info( f"replicate {the_topic} to {TRG_BOOTSTRAP_SERVERS} with source group id: {src_groupid}" ) topic_list = [ NewTopic(name=the_topic, num_partitions=trg_partition_count, replication_factor=1) ] try: logger.info( f"Creating topic {the_topic} with {trg_partition_count} partitions" ) admin_client.create_topics(new_topics=topic_list, validate_only=False) except kafka.errors.TopicAlreadyExistsError: logger.info(f"Topic already exists in {TRG_BOOTSTRAP_SERVERS} ") part_map = create_part_map(src_partition_count, multiplier) # Get offset status for each partition logger.info(f"Source broker partitions for topic {the_topic}") logger.info( "-------------------------------------------------------------------------" ) parts = {} total_committed = 0 total_offsets = 0 for part in src_cm.topics[the_topic].partitions: tp = TopicPartition(the_topic, part) tp.offset = confluent_kafka.OFFSET_BEGINNING src.assign([tp]) any_committed = src.committed([tp]) committed = any_committed[0].offset total_committed += committed end_offset = src.get_watermark_offsets(tp, cached=False)[1] position = src.position([tp])[0].offset if position == confluent_kafka.OFFSET_BEGINNING: position = 0 elif position == confluent_kafka.OFFSET_END: position = end_offset elif position == confluent_kafka.OFFSET_INVALID: position = 0 parts[str(part)] = end_offset total_offsets += end_offset logger.info( "Source topic: %s partition: %s end offset: %s committed: %s position: %s lag: %s" % (the_topic, part, end_offset, committed, position, (position - committed))) src.close() logger.info( f"Source: total_committed={total_committed} total_offsets={total_offsets}" ) logger.info( "=========================================================================" ) logger.info( f"Starting multi-process: the_topic={the_topic} rerun={rerun} src_partition_count={src_partition_count}" ) procs = [ mp.Process(target=proc_replicate, args=(the_topic, part, parts[str(part)], part_map, rerun)) for part in range(0, src_partition_count) ] for proc in procs: proc.start() for proc in procs: proc.join() logger.info(f"END")
class OffsetTranslator(): """Translates consumer group offsets as part of a migration to a new cluster. Given a consumer group, source and destination cluster, it will find the topics involved in the consumer group and the committed offsets. For there it uses OffsetsForTimes() to find the offset for a message with an equal or greater time in the destination cluster and compares a hash of the message value to confirm if the offset relates to the same message. If not, it advances the timestamp by one millisecond and finds the next offset - this becomes the range of offsets it will traverse over to find a matching hash. If there were no more recent timestamps on the topic partition, it will call getWatermarkOffsets() to get the last offset and traverse accordingly. If the number of messages to traverse is stupidly large (currently set at 500) it throws a warning. There is every possibility that the message simply doesn't exist, in which case it will throw an exception. """ def __init__(self, src_bootstrap_server, src_group_id, src_topic, dest_bootstrap_server, dest_group_id): self._admin = AdminClient({"bootstrap.servers": src_bootstrap_server}) # For reading offsets/messages in the source cluster self._consumer = Consumer({ "bootstrap.servers": src_bootstrap_server, "group.id": src_group_id, "enable.auto.commit": "false" }) # For reading offsets/messages in the destination cluster self._dest_consumer = Consumer({ "bootstrap.servers": dest_bootstrap_server, "group.id": dest_group_id, "enable.auto.commit": "false" }) # Handy instance variables self._src_group_id = src_group_id self._src_topic = src_topic self._src_bootstrap_servers = src_bootstrap_server self._dest_group_id = dest_group_id self._dest_bootstrap_servers = dest_bootstrap_server self._metadata = defaultdict(dict) self.logger = logging.getLogger('translator') self.logger.info("Offset Translator object instantiated.") self.logger.info( f" Source bootstrap servers: {self._src_bootstrap_servers}") self.logger.info( f" Destination bootstrap servers: {self._src_bootstrap_servers}") self.logger.info(f" Consumer group: {self._src_group_id}") def metadataKeyFromTPO(self, tpo): """Return a string key from TopicPartition object for use in metadata hash """ return f"{tpo.topic}::{tpo.partition}" def buildMetadataMap(self, tpos): """Use TopicPartition data to build internal metadata hash for comparing offsets, timestamps etc between source and destination clusters. """ self.logger.info(f"Building metadata map...") for tpo in tpos: key = self.metadataKeyFromTPO(tpo) self._metadata[key] = { "src_offset": tpo.offset, "src_timestamp": 0, "src_hash": None, "src_tpo": tpo, "src_message": None, "dest_offset": None, "dest_timestamp": None, "dest_hash": None, "dest_tpo": None, "dest_message": None } self.logger.info(f"Built metadata for {len(tpos)} TPOs") return self._metadata def getTPOs(self, topics): """Use the AdminAPI to return a list of TopicParition objects for a list of topics """ self.logger.info( f"Getting TPOs for {len(topics)} topics via admin API...") tpos = [] for t in topics: for p in self._admin.list_topics(t).topics[t].partitions: tpos.append(TopicPartition(t, p)) self.logger.info(f"Found {len(tpos)} TPOs for {len(topics)} topics.") return tpos def updateMetadata(self, metadata): """Takes output of inspectTPOMessages() and updates metadata. We don't do this automatically within inspectTPOMessagse, as we may want to use inspectTPOMessages on the destination cluster and compare to the source, so updating the object's metadata would render that useless. """ self.logger.info("Updating metadata...") for key in metadata.keys(): for inner_key in metadata[key]: self._metadata[key][inner_key] = metadata[key][inner_key] # Grab the first key and check if it relates to src_ or dest_ data.. sample = metadata[next(iter(metadata.keys()))] if 'src_offset' in sample.keys(): cluster = "source" elif 'dest_offset' in sample.keys(): cluster = "destination" else: raise Exception( "Metadata doesn't clearly indicate which cluster it is from.. no src_offset or dest_offset key present..." ) self.logger.info( f"{len(metadata)} updates to metadata from {cluster} cluster.") return self._metadata def inspectTPOMessages(self, tpos, cluster="source"): """ Given a list of TopicPartition objects, for each partition read the message at the required offset and extract the timestamp, hash the message value """ self.logger.info(f"Inspecting {len(tpos)} TPOs in {cluster} cluster.") # Default to the source cluster consumer; we will also use this # to inspect destination cluster messages if cluster == "source": consumer = self._consumer elif cluster == "destination": consumer = self._dest_consumer else: raise Exception( "cluster argument to inspectTPOMessages must be one of 'source' or 'destination'" ) circuit_breaker_retry_count = 0 metadata = defaultdict(dict) # This seems a slow way to just read one message at a time from a partition, but I'm not aware # of a better way of reading a single message for each partition when there may be further messages # on the partition. for tpo in tpos: # If the tpo.offset is < 0, then the consumer hasn't read anything # from the topic partition, so skip it. if tpo.offset < 0: continue consumer.assign([tpo]) while True: # Poll for data on this specific TopicPartition m = consumer.poll(1) if m is None: circuit_breaker_retry_count += 1 if circuit_breaker_retry_count > 10: print( "Too many iterations polling for data and getting nothing." ) break else: continue elif m.error() is None: # We'll build a local copy of metadata md = {} if cluster == "source": md['src_offset'] = m.offset() md['src_timestamp'] = m.timestamp()[1] md['src_hash'] = self.sha256Object(m.value()) md['src_tpo'] = tpo md['src_message'] = m elif cluster == "destination": md['dest_offset'] = m.offset() md['dest_timestamp'] = m.timestamp()[1] md['dest_hash'] = self.sha256Object(m.value()) md['dest_tpo'] = tpo md['dest_message'] = m key = self.metadataKeyFromTPO(tpo) metadata[key] = md circruit_breaker_retry_count = 0 # Break the while loop, we've got our data for this topic/partition break else: raise Exception( f"Error reading offset {tpo.offset} from {tpo.topic}/{tpo.partition}: {m.error()}" ) self.logger.info(f"Returning metadata for {len(metadata)} TPOs") return metadata def sha256Object(self, obj): """Return the sha256 digest for a supplied object""" return hashlib.sha256(bytes(obj)).hexdigest() def getTPOsByTime(self, metadata=None): """ Build a list of TopicPartitions using message timestamps instead of offsets """ if metadata is None: metadata = self._metadata self.logger.info( f"Getting offsets from timestamps for {len(metadata)} metadata entries.." ) tpos_by_time = list() for key in metadata.keys(): md = self._metadata[key] if md['src_timestamp'] > 0: tpo = md['src_tpo'] tpos_by_time.append( TopicPartition(tpo.topic, tpo.partition, md['src_timestamp'])) # This returns the earliest offset for a given timestamp tpos = self._dest_consumer.offsets_for_times(tpos_by_time) # Check for errors for t in [t for t in tpos if t.error is not None]: raise Exception( f"Error getting offset from timestamp: Topic {t.topic}, Partition {t.partition}, Offset {t.offset}: Error {t.error}" ) self.logger.info( f"Returning {len(tpos)} offsets from destination cluster.") return tpos def findMatchingMessages(self): """Iterate over metadata and find matching source/destination messages and separate into matched / unmatched buckets, returning a tuple """ self.logger.info( "Searching for destination messages that match via message hash..." ) # Iterate over the source cluster metadata and compare to destination cluster translated_offsets = list() unmatched_offsets = list() for key in self._metadata.keys(): metadata = self._metadata[key] src_tpo = metadata['src_tpo'] dest_message = metadata['dest_message'] dest_timestamp = metadata['dest_timestamp'] dest_tpo = metadata['dest_tpo'] self.logger.info( f" Working with TopicPartition({src_tpo.topic},{src_tpo.partition},{src_tpo.offset}) @ {metadata['src_timestamp']}" ) # We found the destination cluster message by offsets_for_times and compared hashes # If they match, then the destination offset if metadata['src_hash'] == metadata['dest_hash']: self.logger.info( f" FOUND: TopicPartition({dest_tpo.topic},{dest_tpo.partition},{dest_tpo.offset}) @ {dest_timestamp} in destination cluster" ) self._metadata[key]['matched'] = True translated_offsets.append(dest_tpo) else: self.logger.info( f" NOT FOUND: TopicPartition({dest_tpo.topic},{dest_tpo.partition},{dest_tpo.offset}) @ {dest_timestamp} does not have same hash." ) self.logger.info( f" will traverse messages and attempt to find a match.") self._metadata[key]['matched'] = False unmatched_offsets.append(metadata) self.logger.info( f"Found {len(translated_offsets)} matching offsets and {len(unmatched_offsets)} that don't match." ) return (translated_offsets, unmatched_offsets) def findOffsetRangeToScan(self, md): """Using a metadata record as a base, identify how many records (maximum) to scan through to find a match We are here because we didn't find a match for source cluster timestamp, which means it is either not there, or multiple messages were produced during that millisecond and our offsets_for_times() call provided the lowest offset for that millisecond. We will add 1 ms to the timestamp and get the offset (if possible) and then iterate over each message and compare hashes to determine what the exact offset should be. """ self.logger.info( "Find the start/end offsets to iterate over to find a match based on message value hash." ) timestamp_end = md['src_timestamp'] + 1 # add one millisecond tpo = md['dest_tpo'] starting_offset = md['dest_offset'] end_offset = self._dest_consumer.offsets_for_times( [TopicPartition(tpo.topic, tpo.partition, timestamp_end)]) self.logger.info( f"Shifting timestamp by 1ms, from {md['src_timestamp']} to {timestamp_end}" ) self.logger.info( f" yields an offset of {end_offset[0]}") target_offset = -1 if end_offset[0].offset == -1: # There are no more recent timestamps for the topic/partition # Set the ending offset at the end of partition low, high = self._dest_consumer.get_watermark_offsets( TopicPartition(tpo.topic, tpo.partition)) target_offset = high self.logger.info( f"Reading to end of the partition... {target_offset}") if target_offset - tpo.offset > 500: self.logger.warning( f" Note: that involves reading and hashing {target_offset - tpo.offet} messages.. might take some time." ) else: # There was a more recent timestamped message, so we'll use that as our target offset target_offset = end_offset[0].offset self.logger.info( f"Starting offset for scan is {starting_offset} (inclusive)") self.logger.info( f"Ending offset for scan is {target_offset} (exclusive)") return (starting_offset, target_offset) def compareOffsets(self): """For the list of tpos in the source cluster, look them up in the destination and compare value hashes; if they match all good; if not, iterate over records until a match is found (where duration is one millisecond, based on the assumption that multiple messages have been produced during the same millisecond) """ self.logger.info( "Comparing offsets between source and destination cluster...") pp = pprint.PrettyPrinter(indent=4) pp.pprint(self._metadata) # Check that we have destination cluster offsets and hashes before proceeding - if not, we # have incomplete data and should explode into a ball of flames to the sound of a distorted # sitar being played backwards. counter = 0 for k in self._metadata.keys(): if self._metadata[k]['dest_hash'] is None or \ self._metadata[k]['dest_offset'] is None or \ self._metadata[k]['src_hash'] is None: counter += 1 if counter > 0: raise Exception( f"{counter} out of {len(self._metadata)} topic partitions have insufficient data. Exiting." ) translated_offsets, unmatched_offsets = self.findMatchingMessages() self.logger.info("Working on unmatched offsets...") messages_found = 0 for md in unmatched_offsets: tpo = md['dest_tpo'] (starting_offset, target_offset) = self.findOffsetRangeToScan(md) for offset in range(starting_offset, target_offset): self.logger.info( f"Inspecting destination cluster message at offset {offset}..." ) results = self.inspectTPOMessages( [TopicPartition(tpo.topic, tpo.partition, offset)], cluster="destination") if len(results) == 0: raise Exception( "Didn't get any metadata from call to inspectTPOMessages(). This implies we read data from the source cluster, but couldn't inspect any messages in the destination cluster. Stopping." ) elif len(results) > 1: raise Exception( f"Expecting only one result from call to inspectTPOMessages, but got {len(results)}. Stopping" ) else: # Get the (only) key from the dict key = next(iter(results)) dest_hash = results[key]['dest_hash'] dest_tpo = results[key]['dest_tpo'] dest_message = results[key]['dest_message'] if dest_hash == md['src_hash']: self.logger.info(" FOUND matching record: ") self.logger.info( f" source hash was {md['src_hash']}, and" ) self.logger.info( f" dest_hash is {dest_hash}" ) self.logger.info( f". destination {dest_tpo}" ) self._metadata[key]['matched'] = True # Update our metadata to accurately reflect the correct destination message self._metadata[key][ 'dest_offset'] = dest_message.offset() self._metadata[key]['dest_hash'] = dest_hash self._metadata[key][ 'dest_timestamp'] = dest_message.timestamp()[1] self._metadata[key]['dest_tpo'] = dest_tpo self._metadata[key]['dest_message'] = dest_message translated_offsets.append(dest_tpo) messages_found += 1 # Found it so stop iterating break self.logger.info( f"Found {messages_found} out of {len(unmatched_offsets)} unmatched objects." ) # Sort the offset map by partition number, which may have become out of # order if we needed to read and hash messages to find a hash match return sorted(translated_offsets, key=lambda k: k.partition) def getMetadata(self): """Return our offset metadata object""" return self._metadata def getMessage(self, consumer, tpo): """Read a message at a tpo, return it""" consumer.assign([tpo]) res = consumer.consume(num_messages=1, timeout=3) if len(res) == 1: return res[0] else: return None def commitTranslatedOffsets(self, tpos): """Given a list of TopicPartition objects, set the consumer group offsets""" self.logger.info("Committing offsets for supplied TPOs...") # Our offsets have been the last message consumed; need to set all offsets to +1 # so that they represent the next message to consume. for t in tpos: t.offset += 1 self.logger.info( " TPO offsets are incremented by one so that next message consumed is correct." ) errored_commits = list() retries = 3 while retries > 0: self.logger.info( f" Calling commit() for {len(tpos)} topic/partitions to destination cluster." ) committed = self._dest_consumer.commit(offsets=tpos, asynchronous=False) for t in [t for t in committed if t.error is not None]: errored_commits.append(t) if len(errored_commits) > 0: self.logger.warning(f" Errors commiting offsets:") for t in errored_commits: self.logger.info( f" Partition({t.partition}), Offset({t.offset}): {t.error}" ) self.logger.info(f" Trying again in 2 seconds...") time.sleep(2) tpos = errored_commits errored_commits = list() retries -= 1 else: self.logger.info( "Offsets committed successfully to destination cluster") errored_commits.clear() break if len(errored_commits) > 0: self.logger.warning("Still had errors after 3 tries:") for t in errored_commits: self.logger.info( f" Partition({t.partition}), Offset({t.offset}): {t.error}" ) self.logger.info("Returning with a job not finished!!") return committed def printMetadata(self, metadata=None): if metadata is None: metadata = self._metadata #print("================================================================================") #print("================================================================================") #print("================================================================================") #pp = pprint.PrettyPrinter(indent=4) #pp.pprint(metadata) #print("================================================================================") #print("================================================================================") #print("================================================================================") topic = None for key in metadata.keys(): md = metadata[key] tpo = md['src_tpo'] if tpo.topic != topic: topic = tpo.topic self.logger.info(f"topic: {tpo.topic}:") src_offset = md['src_offset'] src_timestamp = md['src_timestamp'] src_hash = md['src_hash'] # We might be passed a metadata object that doesn't set dest_* fields if 'dest_tpo' in md: if md['dest_tpo'] is not None: dest_offset = md['dest_tpo'].offset else: dest_offset = '' else: dest_offset = '' if 'dest_message' in md: if md['dest_message'] is not None: dest_timestamp = md['dest_message'].timestamp()[1] else: dest_timestamp = '' else: dest_timestamp = '' if 'dest_hash' in md: dest_hash = md['dest_hash'] else: dest_hash = '' self.logger.info(f" p[{tpo.partition:1}]") self.logger.info( f" source last message offset ({src_offset:1}), timestamp({src_timestamp:12}), hash({src_hash})" ) self.logger.info( f" destination last message offset ({dest_offset:1}), timestamp({dest_timestamp:12}), hash({dest_hash})" ) #if 'src_message' in md and md['src_message'] is not None: # pp.pprint(str(md['src_message'].value(),'utf-8')) #if 'dest_message' in md and md['dest_message'] is not None: # pp.pprint(str(md['dest_message'].value(),'utf-8')) #print("<<<<<< DONE") def getConsumerGroupOffsets(self, topics): """Return the latest offset for the consumer group defined at object initialisation time. Moves offset by -1 so that we can re-read the last message consumed. """ self.logger.info( f"Getting consumer group offsets for {len(topics)} topics...") tpos = self.getTPOs(topics) tpos = self._consumer.committed(tpos) self.logger.info( " Decrementing offsets so that we can inspect the last message consumed (for hashing, timestamps, etc)" ) # Wind back one offset so that we can re-read the messages for t in tpos: t.offset -= 1 self.logger.info(f"Found offsets for {len(tpos)} topic partitions.") return tpos def allOffsetsMatched(self): """Test that all metadata has a matched == True value """ self.logger.info( "Checking that all metadata records were matched in the destination cluster..." ) for md in self._metadata: if self._metadata[md]['matched'] == False: self.logger.info("Unmatched metadata records found.") return False self.logger.info("All metadata was matched.") return True def findTopicsForConsumerGroup(self, cg=None): """Given a consumer group name, Find the topics associated with the consumer group. We use the shell because the confluent_kafka_python package doesn't yet provide this, see: https://github.com/confluentinc/confluent-kafka-python/issues/223 """ self.logger.info( f"Finding topics associated with {self._src_group_id}...") # Test that we have a kafka-consumer-groups handy... if subprocess.run(['which', 'kafka-consumer-groups']).returncode == 1: raise OSError("No 'kafka-consumer-groups' command found in $PATH") if cg is None: cg = self._src_group_id cmd = f"kafka-consumer-groups --bootstrap-server {self._src_bootstrap_servers} --describe --group {cg} 2>/dev/null| grep {cg} | grep -v 'Error: Consumer group '| awk '{{print $2}}' | sort -u" self.logger.info(f"Running {cmd}") res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE) cg_topics = list() for topic in str(res.stdout, 'utf-8').split('\n'): if topic != '': cg_topics.append(topic) print(f">>>>>>{cg_topics}>>>>>") if len(cg_topics) == 0: raise Exception( f"No topics found for consumer group {cg}. Nothing to do. Stopping." ) # If we were configured to run for just one topic in a CG; then return just that topic, # but only if it exists in the CG if self._src_topic is not None: if self._src_topic in cg_topics: self.logger.info( "Overriding topic list from CG tool with supplied topic.") cg_topics = [self._src_topic] else: raise Exception( f"{self._src_topic} is not associated with {cg}. Stopping." ) self.logger.info(f"Returning {cg_topics}...") return (cg_topics)
def get_metrics_for_topic(consumer: Consumer, topic_name: str) -> list: # get topic metadata for topic name metadata = consumer.list_topics(topic=topic_name, timeout=10) committed_partitions = consumer.committed(get_partitions_for_topics(metadata),timeout=10) metrics = get_metrics_for_partitions(consumer, committed_partitions) return metrics
class KafkaConsumer(object): """ 消费者, 可以消费多个topic, 但是不能同时重置多个topic的offset """ def __init__(self, topic, cfg, cid=None, logger=None, normal=True, debug=False, **kwargs): """ :param topic: [(topic, partition)] :param cfg: 共有配置 :param cid: consumer的id :param logger: 外部id实例 :param normal: 消费者模式 :param kwargs: 主要用于内部参数传递 auto_commit: 是否自动提交 True/False block: 是否阻塞获取,默认为False """ self._cfg = dict( cfg, **{ 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'fetch.min.bytes': 1024 * 1024, # 一次获取多大消息 'fetch.wait.max.ms': 1000, # 耗费多少时间填充信息 'fetch.message.max.bytes': 1048576, # 批量信息最大长度 'on_commit': self._on_commit, # 'offset.store.method': 'broker', # 'enable.auto.offset.store': True, 'default.topic.config': { 'auto.offset.reset': 'earliest', }, 'debug': ','.join([cfg.get('debug', ''), 'cgrp,topic,fetch']), }) if debug is False: del self._cfg['debug'] self._cfg['enable.auto.commit'] = kwargs.get('auto_commit', True) self._id = cid # 当前consumer_id self._topic = topic # 当前监听的topic self._create_time = time.time() # 创建时间 self._block = kwargs.get('block', False) # 是否阻塞获取 self._logger = logger self._start = True self._normal = normal self._start_offset = defaultdict(dict) # 获取的起始offset self._end_offset = defaultdict(dict) # 最后获取的offset self._total_offset = None # 开始运行时的offset范围 self._ori_offset = None # 开始运行时 self._consumer = Consumer(**self._cfg) if self._normal: self._total_offset = self.total_offset(self._topic) self._logger.info("total offset >>> \n{}".format( KafkaConsumer._convert_to_show(self._total_offset))) self._ori_offset = self.current_offset() self._logger.info("current offset >>> \n{}".format( KafkaConsumer._convert_to_show(self._ori_offset))) self._consumer.subscribe(self._topic, on_assign=self._on_assign, on_revoke=self._on_revoke) # FIXME 没有清楚何时调用 def _on_assign(self, c, ps): # print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!assign!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') # print(c, ps) pass # FIXME 没有清楚何时调用 def _on_revoke(self, c, ps): # print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!revoke!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') # print(c, ps) pass def _on_commit(self, err, partitions): pass def reset_offset(self, offsets): """ 目前重置offset的基本单位是topic, 所有partition都一起重置 :param offsets: dict :return: """ _range = dict() def get_value(topic, partition, offset, p_offset=_range): """ 获取offset值并解析 :param topic: :param partition: :param offset: :param p_offset: :return: """ # FIXME 官方不支持commit 提交OFFSET_BEGINNING, OFFSET_END, 使用total_offset替代 if offset in ('min', 'max'): if topic not in p_offset or partition not in p_offset[topic]: _total = self.total_offset([topic]) p_offset.update(dict(p_offset, **_total)) if _total is None: return None return p_offset[topic][partition][ 0] if offset == 'min' else p_offset[topic][partition][1] # return confluent_kafka.OFFSET_BEGINNING if para == 'min' \ # else confluent_kafka.OFFSET_END if isinstance(offset, int) and offset >= 0: return offset self._logger.warning( 'unknown reset value: {}, will not reset offset'.format( offset)) return None if isinstance(offsets, dict): assigns = [ TopicPartition(_topic, 0, get_value(_topic, 0, _offset)) for _topic, _offset in offsets.items() if get_value(_topic, 0, _offset) is not None ] elif isinstance(offsets, list): assigns = [ TopicPartition(_item[0], _item[1], get_value(*_item)) for _item in offsets if get_value(*_item) is not None ] else: self._logger.warning( 'unknown type: {} for param[offsets], will not reset offset'. format(type(offsets).__name__)) return False if assigns: [ self._logger.warning( 'RESET offset to {0} for topic {1}({2})'.format( _t.offset, _t.topic, _t.partition)) for _t in assigns ] return self.commit(assigns, if_format=True) return not offsets def get(self): """ :return: 如果有结果返回dict { 'topic': xx, 'partition': xx, 'offset': xx, 'tm': 'data': xx }, 没有结果返回None,出错返回-1 """ if self._block: result = self._consumer.poll() else: result = self._consumer.poll(timeout=0.00001) if not result: return None if result.error(): if result.error().code() == KafkaError._PARTITION_EOF: self._logger.warning( '%s [%d] reached end at offset %d\n'.format( result.topic(), result.partition(), result.offset())) else: self._logger.error('encourage error:\n{}'.format( json.dumps( { 'name': result.error().name(), 'code': result.error().code(), 'description': result.error().str() }, indent=1))) return None # 记录处理的数据数量 self._restore_offset_info(result) return KafkaMessage( topic=result.topic(), partition=result.partition(), offset=result.offset(), timestamp=result.timestamp()[1] if result.timestamp()[0] != confluent_kafka.TIMESTAMP_NOT_AVAILABLE else None, value=result.value()) @classmethod def _convert_to_show(cls, data, show=True): """ 将格式转换成展示的格式 :param data: :return: """ result = { i + '(' + str(k) + ')': str(l) for i, j in data.items() for k, l in j.items() } return json.dumps(result, indent=1) if show else result def _restore_offset_info(self, msg): """ 记录消费的信息 :param msg: :return: """ if msg.topic() not in self._start_offset or \ msg.partition() not in self._start_offset[msg.topic()]: self._start_offset[msg.topic()][msg.partition()] = msg.offset() self._end_offset[msg.topic()][msg.partition()] = msg.offset() def commit(self, offsets=None, if_format=False): """ 手动提交, 提交最后一个消费的消息, 或者提交指定的offset :return: """ if if_format: topics = offsets else: src = self._end_offset if offsets is None else offsets topics = [ TopicPartition(i, k, m) for i, j in src.items() for k, m in j.items() ] for _ in range(3): try: self._consumer.commit(offsets=topics, async=False) return True except (confluent_kafka.KafkaException, ) as e: _exp_name, exp_code = e.args[0].name(), e.args[0].code() if int(exp_code) == 27: # REBALANCE_IN_PROGRESS self._logger.warning( 'COMMIT: kafka server is now in rebalancing, will retry...' ) time.sleep(1) continue self._logger.error( 'COMMIT: commit offset failed with message: {0}({1}) >> {2}' .format(e.args[0].name(), e.args[0].code(), e.args[0].str())) return False self._logger.error('COMMIT: commit failed after 3 times retry') return False def current_offset(self): """ 获取topic目前消费的位置 :return: """ result = defaultdict(dict) if not self._total_offset: self._total_offset = self.total_offset(self._topic) _p_topics = [ TopicPartition(_key, _sub_key, -1) for _key, _value in self._total_offset.items() for _sub_key in _value.keys() ] try: r = self._consumer.committed(_p_topics) for _d in r: result[_d.topic][_d.partition] = _d.offset return result except (confluent_kafka.KafkaException, ) as e: self._logger.error( 'get total_offset failed with message: {0}({1}) >> {2}'.format( e.args[0].name(), e.args[0].code(), e.args[0].str())) return None def total_offset(self, topics=None): """ get smallest and biggest offset for specified topics :param topics: should be a list, exp: [topic,topic,topic] :return: """ result = defaultdict(dict) for _topic in topics: try: for _n in range(100): r = self._consumer.get_watermark_offsets(TopicPartition( _topic, _n, -1), timeout=30, cached=False) result[_topic][_n] = tuple(int(i) for i in r) except (confluent_kafka.KafkaException, ) as e: name, code, e_str = e.args[0].name(), e.args[0].code( ), e.args[0].str() if name == '_UNKNOWN_PARTITION': continue else: raise return result # FIXME not work def _commit_cb(self, err, reqs): self._logger.info('commit callback') self._logger.info(err) self._logger.info(reqs) def stop(self): if self._start: self._logger.debug('consumer stopped') self._consumer.close() self._start = False if self._normal: left, right = KafkaConsumer._convert_to_show(self._start_offset, show=False),\ KafkaConsumer._convert_to_show(self._end_offset, show=False) _out = { i: ' - '.join((left[i], right[i])) for i in right.keys() } self._logger.info( 'CONSUMER SUM UP:\n' '-create time: {0}[{1}]\n' '-consume offsets: \n{2}\n'.format( # self._id, self._create_time, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self._create_time)), json.dumps(_out, indent=1), )) def __del__(self): self.stop()