def get_consumer_for_topic(self, topic_name, group_id, partition, offset=None): """ Method to instantiate the kafka consumer for the given topic, consumer group and partition :param topic_name: topic name :param group_id: consumer group id :param partition: partition id :return: consumer instance """ try: log.info("Fetching consumer for topic: " + topic_name) if topic_name + "_" + str(partition) in self.consumer_dict: return self.consumer_dict[topic_name + "_" + str(partition)] conf = {'bootstrap.servers': self.bootstrap_servers, 'group.id': group_id, # 'session.timeout.ms': 1000, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } consumer = confluent_kafka.Consumer(**conf) if offset is None: tp = confluent_kafka.TopicPartition(topic_name, partition) else: tp = confluent_kafka.TopicPartition(topic_name, partition, offset) consumer.assign([tp]) self.consumer_dict[topic_name + "_" + str(partition)] = consumer except Exception as e: print(e) log.error("Error while setting up the consumer for topic: " + topic_name) raise e return consumer
def poll_kafka(self): import confluent_kafka as ck def commit(_part): topic, part_no, _, _, offset = _part[1:] _tp = ck.TopicPartition(topic, part_no, offset + 1) self.consumer.commit(offsets=[_tp], asynchronous=True) @gen.coroutine def checkpoint_emit(_part): ref = RefCounter(cb=lambda: commit(_part)) yield self._emit(_part, metadata=[{'ref': ref}]) tps = [] for partition in range(self.npartitions): tps.append(ck.TopicPartition(self.topic, partition)) while True: try: committed = self.consumer.committed(tps, timeout=1) except ck.KafkaException: pass else: for tp in committed: self.positions[tp.partition] = tp.offset break try: while not self.stopped: out = [] for partition in range(self.npartitions): tp = ck.TopicPartition(self.topic, partition, 0) try: low, high = self.consumer.get_watermark_offsets( tp, timeout=0.1) except (RuntimeError, ck.KafkaException): continue if 'auto.offset.reset' in self.consumer_params.keys(): if self.consumer_params[ 'auto.offset.reset'] == 'latest': self.positions[partition] = high current_position = self.positions[partition] lowest = max(current_position, low) if high > lowest + self.max_batch_size: high = lowest + self.max_batch_size if high > lowest: out.append((self.consumer_params, self.topic, partition, self.keys, lowest, high - 1)) self.positions[partition] = high self.consumer_params['auto.offset.reset'] = 'earliest' for part in out: yield self.loop.add_callback(checkpoint_emit, part) else: yield gen.sleep(self.poll_interval) finally: self.consumer.unsubscribe() self.consumer.close()
def cb(self): while True: started = self.get_source_consumer() if started: delete_from_db = [] for topic_partition in self.db.keys(): topic, partition = str_topic_partition(topic_partition) if len(self.db[topic_partition]): while True: try: low_offset, high_offset = self.consumer.get_watermark_offsets( ck.TopicPartition(topic, partition)) current_offset = self.consumer.committed( [ck.TopicPartition(topic, partition)])[0].offset break except Exception as e: logger.warning(e) for offset in sorted(self.db[topic_partition].keys()): if self.db[topic_partition][offset]: if offset < current_offset: logger.warning( f'topic partition: {topic_partition}, offset: {offset}, current offset: {current_offset}, current offset higher than message offset' ) delete_from_db.append( ck.TopicPartition( topic, partition, offset + 1)) else: self.partitions.append( ck.TopicPartition( topic, partition, offset + 1)) else: break if self.commit(): L = [] for p in self.partitions: self.db[topic_partition_str( p.topic, p.partition)].pop(p.offset - 1) L.append({ 'topic': p.topic, 'partition': p.partition, 'offset': p.offset - 1 }) self.last = self._emit(L, emit_id=self.last_emit_id) self.partitions = [] self.last_emit_id = None for p in delete_from_db: self.db[topic_partition_str(p.topic, p.partition)].pop(p.offset - 1) yield self.last yield gen.sleep(self.interval)
def inner(topic: str, options=None): topic_name = get_topic_name(topic) topics = [topic_name] options = processing_config(options) # look for the servers (it is the only config we are interested in) servers = [ elm["value"] for elm in options["processing"]["kafka_config"] if elm["name"] == "bootstrap.servers" ] if len(servers) < 1: raise ValueError( "Bad kafka_config, could not find 'bootstrap.servers'.\n" "The configuration should have an entry of the format \n" "{name:'bootstrap.servers', value:'127.0.0.1'} at path 'processing.kafka_config'" ) servers = servers[0] settings = { "bootstrap.servers": servers, "group.id": "test-consumer-%s" % uuid.uuid4().hex, "enable.auto.commit": True, "auto.offset.reset": "earliest", } consumer = kafka.Consumer(settings) consumer.assign([kafka.TopicPartition(t, 0) for t in topics]) def die(): consumer.close() request.addfinalizer(die) return consumer, options, topic_name
def committed(self, partitions, timeout=10000): """ Retrieves the last successfully committed Kafka offset of the underlying KafkaDatasource connection. Parameters ---------- partitions : list, Topic/Partition instances that specify the TOPPAR instances the offsets should be retrieved for timeout : int, default 10000, Max time to wait on the response from the Kafka broker in milliseconds Returns ------- tuple Tuple of ck.TopicPartition objects """ toppars = [ ck.TopicPartition( part.topic, part.partition, self.kafka_meta_client.get_committed_offset( part.topic.encode(), part.partition ), ) for part in partitions ] return toppars
def verify_consumer_seek(c, seek_to_msg): """ Seek to message and verify the next consumed message matches. Must only be performed on an actively consuming consumer. """ tp = confluent_kafka.TopicPartition(seek_to_msg.topic(), seek_to_msg.partition(), seek_to_msg.offset()) print('seek: Seeking to %s' % tp) c.seek(tp) while True: msg = c.poll() assert msg is not None if msg.error(): print('seek: Ignoring non-message: %s' % msg.error()) continue if msg.topic() != seek_to_msg.topic() or msg.partition( ) != seek_to_msg.partition(): continue print('seek: message at offset %d' % msg.offset()) assert msg.offset() == seek_to_msg.offset(), \ 'expected message at offset %d, not %d' % (seek_to_msg.offset(), msg.offset()) break
def subscribe(self, topic, timeout=10.0): """Subscribes to a topic for consuming. This method doesn't use Kafka's Consumer Groups; it assigns all partitions manually to this process. Parameters ---------- topic : `str` The name of the topic to subscribe to. timeout : `float` How long, in seconds, to block when fetching topic metadata """ logger.debug(f"subscribing to topic {topic}") topic_meta = self.describe_topic(topic, timeout) assignment = [] for partition_id in topic_meta.partitions.keys(): logger.debug( f"adding subscription to topic partition={partition_id}") tp = confluent_kafka.TopicPartition( topic=topic, partition=partition_id, offset=confluent_kafka.OFFSET_BEGINNING, ) assignment.append(tp) logger.debug("registering topic assignment") self.consumer.assign(assignment)
def poll_kafka(self): import confluent_kafka as ck try: while not self.stopped: out = [] for partition in range(self.npartitions): tp = ck.TopicPartition(self.topic, partition, 0) try: low, high = self.consumer.get_watermark_offsets( tp, timeout=0.1) except (RuntimeError, ck.KafkaException): continue current_position = self.positions[partition] lowest = max(current_position, low) if high > lowest: out.append((self.consumer_params, self.topic, partition, lowest, high - 1)) self.positions[partition] = high for part in out: yield self._emit(part) else: yield gen.sleep(self.poll_interval) finally: self.consumer.unsubscribe() self.consumer.close()
def subscribe(self, topic: str, timeout: timedelta = timedelta(seconds=10)): """Subscribes to a topic for consuming. This method doesn't use Kafka's Consumer Groups; it assigns all partitions manually to this process. The topic must already exist for the subscription to succeed. """ self.logger.debug(f"subscribing to topic {topic}") try: topic_meta = self.describe_topic(topic, timeout) except KeyError: raise ValueError(f"topic {topic} does not exist on the broker, so can't subscribe") assignment = [] for partition_id in topic_meta.partitions.keys(): self.logger.debug(f"adding subscription to topic partition={partition_id}") tp = confluent_kafka.TopicPartition( topic=topic, partition=partition_id, ) assignment.append(tp) self.logger.debug("registering topic assignment") self._consumer.assign(assignment)
def get_consumer_offsets( self, topics: List[str], ignore_group_regex: str = IGNORE_GROUP_REGEX, no_of_threads: int = 1 ) -> List[Offset]: broker_topics = self.client.list_topics().topics partitions = [] for topic_name in topics: partitions.extend([ck.TopicPartition(topic_name, k) for k in broker_topics[topic_name].partitions]) consumer_groups = [] logger.info('Fetch consumer groups from broker') for consumer_group in self.get_consumer_groups(): if re.findall(ignore_group_regex, consumer_group): logger.debug(f'Ignoring consumer group: {consumer_group}') continue consumer_groups.append(consumer_group) logger.info(f'Fetch consumer offsets for {len(consumer_groups)} ' 'consumer groups') if no_of_threads == 1: offsets: List[Offset] = [] for cg in consumer_groups: _offsets = ConfluentAdminClient._get_offsets( cg, partitions, self.consumer_config,) offsets.extend(_offsets) return offsets return self._threaded_get_offsets(partitions, consumer_groups, no_of_threads)
def on_assign(consumer, partitions): ready = True current = {a.topic for a in self._consumer.assignment()} current.update({p.topic for p in partitions}) if ensure_topics and set(ensure_topics) - current: ready = False seeks = [] for p in partitions: if p.topic in seek_topics: seeks.append( confluent_kafka.TopicPartition( p.topic, p.partition, timestamp)) if seeks: consumer.assign(consumer.offsets_for_times(seeks)) seek_topics.difference_update({p.topic for p in seeks}) if seek_topics: ready = False if ready: ret.set() else: raise NotReady()
def get_message_batch(kafka_params, topic, partition, keys, low, high, timeout=None): """Fetch a batch of kafka messages (keys & values) in given topic/partition This will block until messages are available, or timeout is reached. """ import confluent_kafka as ck t0 = time.time() consumer = ck.Consumer(kafka_params) tp = ck.TopicPartition(topic, partition, low) consumer.assign([tp]) out = [] try: while True: msg = consumer.poll(0) if msg and msg.value() and msg.error() is None: if high >= msg.offset(): if keys: out.append({'key':msg.key(), 'value':msg.value()}) else: out.append(msg.value()) if high <= msg.offset(): break else: time.sleep(0.1) if timeout is not None and time.time() - t0 > timeout: break finally: consumer.close() return out
def start(self): if self.stopped: self.stopped = False self.consumer = ck.Consumer(self.cpars) self.consumer.subscribe(self.topics) tp = ck.TopicPartition(self.topics[0], 0, 0) # blocks for consumer thread to come up self.consumer.get_watermark_offsets(tp) self.loop.add_callback(self.poll_kafka)
def start(self): import confluent_kafka as ck if self.stopped: self.stopped = False self.consumer = ck.Consumer(self.cpars) tp = ck.TopicPartition(self.topics[0], 0, 0) self.consumer.get_watermark_offsets(tp) self.loop.add_callback(self.poll_kafka)
def start(self): import confluent_kafka as ck if self.stopped: self.consumer = ck.Consumer(self.consumer_params) self.stopped = False tp = ck.TopicPartition(self.topic, 0, 0) # blocks for consumer thread to come up self.consumer.get_watermark_offsets(tp) self.loop.add_callback(self.poll_kafka)
def set_consumer_offsets(self, offsets: List[Offset]): grouped_offsets = ConfluentAdminClient._group_offsets( offsets) for consumer_group, _offsets in grouped_offsets.items(): consumer = ck.Consumer({**self.consumer_config, 'group.id': consumer_group}) tps = [ck.TopicPartition(o.topic, o.partition, o.value) for o in _offsets] logger.info(f'Set {len(tps)} offsets for consumer ' f'group: {consumer_group}') consumer.commit(offsets=tps, asynchronous=False)
def test_kafka_offset(kafka_client, topic, commit_offset): offsets = [ck.TopicPartition(topic, 0, commit_offset)] kafka_client.commit(offsets=offsets) # Get the offsets that were just committed to Kafka retrieved_offsets = kafka_client.committed(offsets) for off in retrieved_offsets: assert_eq(off.topic, offsets[0].topic) assert_eq(off.partition, offsets[0].partition) assert_eq(off.offset, offsets[0].offset)
def fetch_udf(params): """ PySpark UDF to fetch available messages from the Kafka cluster, starting at the most recent commit point according to Kafka's own internal log. Joins a Kafka group and lets Kafka handle partition assignments. Does NOT commit offsets to Kafka. NOTE: Because PySpark doesn't deal well with the concept of modules, your Spark application will need to wrap this function itself. The process to follow is: 1. Start up a SparkSession 2. `spark.sparkContext.addPyFile(etl_lib.__file__)` 3. ``` fetch_udf = pandas_udf(etl_lib.fetch_udf, "partition_id long, offset long, value string", PandasUDFType.GROUPED_MAP) ``` Arguments: params: One-line dataframe containing information on how to connect to Kafka. Columns of this dataframe: partition_id: Which Kafka partition to fetch from bootstrap_servers: String to pass for the eponymous argument when connecting to Kafka topic_name: Name of Kafka topic to subscribe to Returns a Pandas dataframe with the schema (partition_id, offset, message) """ partition_ix = params["partition_id"][0] bootstrap_servers = params["bootstrap_servers"][0] topic_name = params["topic_name"][0] max_messages = 100000 # NOTE: Timeouts MUST be several seconds, or Kafka won't reliably return # any data, even when running locally. timeout_sec = 10.0 c = _make_consumer(bootstrap_servers) c.assign([kafka.TopicPartition(topic_name, partition_ix)]) msgs = c.consume(num_messages=max_messages, timeout=timeout_sec) c.close() # Convert list of surrogate objects to a list of tuples and then to a dataframe. msg_tuples = [(partition_ix, m.offset(), m.value()) for m in msgs] # Convert buffered data to a dataframe. return pd.DataFrame.from_records( msg_tuples, columns=["partition_id", "offset", "value"])
def start(self): import confluent_kafka as ck if self.stopped: self.stopped = False self.consumer = ck.Consumer(self.cpars) self.consumer.subscribe(self.topics) weakref.finalize( self, lambda consumer=self.consumer: _close_consumer(consumer)) tp = ck.TopicPartition(self.topics[0], 0, 0) # blocks for consumer thread to come up self.consumer.get_watermark_offsets(tp) self.loop.add_callback(self.poll_kafka)
def _read_partition(part_metadata, topic, conf, batch_size): """ TODO: describe func """ print("Here1") part_no, low, high = part_metadata tp = ck.TopicPartition(topic, part_no, low) # create the consumer only the first time it is called # then store it in the worker state dict worker_state = get_worker() if not hasattr(worker_state, 'consumer'): worker_state.consumer = None # first call -> create consumer if worker_state.consumer == None: worker_state.consumer = ck.Consumer(conf) # use the consumer stored in the worker dict c = worker_state.consumer last_offset = low c.assign([tp]) print("Created consumer") # get a batch of messages messages = c.consume(min(batch_size, high - last_offset)) print("Read messages") values = [] for m in messages: last_offset = m.offset() values.append(json.loads(m.value().decode('utf-8'))) # commit the current offset _tp = ck.TopicPartition(topic, part_no, last_offset+1) c.commit(offsets=[_tp], asynchronous=True) return values
def start(self): import confluent_kafka as ck if self.stopped: self.stopped = False self.consumer = ck.Consumer(self.cpars) self.consumer.subscribe(self.topics) weakref.finalize( self, lambda consumer=self.consumer: _close_consumer(consumer) ) tp = ck.TopicPartition(self.topics[0], 0, 0) # blocks for consumer thread to come up and invoke poll to # establish connection with broker to fetch oauth token for kafka self.consumer.poll(timeout=1) self.consumer.get_watermark_offsets(tp) self.loop.add_callback(self.poll_kafka)
def commit_offsets(partition_offset_pairs: List[Tuple[int, int]], kafka_bootstrap_servers: str, kafka_topic: str): """ Tell Kafka that we have consumed all messages up to a set of offsets. Args: partition_offset_pairs: List of commit offsets by partition. Must have exactly one offset per partition. kafka_bootstrap_servers: Kafka connection string """ c = _make_consumer(kafka_bootstrap_servers) c.commit(offsets=[ kafka.TopicPartition(kafka_topic, partition, offset) for partition, offset in partition_offset_pairs ]) c.close()
def start(self): import confluent_kafka as ck if self.engine == "cudf": # pragma: no cover from custreamz import kafka if self.stopped: if self.engine == "cudf": # pragma: no cover self.consumer = kafka.Consumer(self.consumer_params) else: self.consumer = ck.Consumer(self.consumer_params) self.stopped = False tp = ck.TopicPartition(self.topic, 0, 0) # blocks for consumer thread to come up self.consumer.get_watermark_offsets(tp) self.loop.add_callback(self.poll_kafka)
def read_batch(self): """ TODO: describe func """ # get partitions metadata parts_metadata = self.master_consumer \ .list_topics(self.topic) \ .topics[self.topic].partitions # create a list of partitions to be processed partitions_list = [] for part_num in range(len(parts_metadata)): # create a TopicPartion object for the current partition tp = ck.TopicPartition(self.topic, part_num) # get first and last offsets for the current partition low, high = self.master_consumer.get_watermark_offsets(tp) # get the last committed position (take the first element) committed_offset = self.master_consumer.committed([tp], timeout=1)[0].offset if committed_offset==-1001: committed_offset = 0 # check if there are new messages if low + committed_offset == high: #print(f"No new messages in partition {part_num}.") continue else: partitions_list.append((part_num, low+committed_offset, high)) if len(partitions_list)==0: #print("All partitions have been processed. Skipping.") return [] else: # send the partitions to the workers clients_fut = self.client.scatter(partitions_list, broadcast=True) # read the partition in each worker partitions_fut = [ self.client.submit( _read_partition, fut, self.topic, self.conf, self.batch_size ) for fut in clients_fut ] # return the list of futures return partitions_fut
def get_message_batch(kafka_params, topic, partition, low, high, done=set(), timeout=None): import confluent_kafka as ck t0 = time.time() consumer = ck.Consumer(kafka_params) tp = ck.TopicPartition(topic, partition, low) consumer.assign([tp]) out = [] try: while True: msg = consumer.poll(0) if msg and msg.value() and msg.error() is None: partition = msg.partition() if partition in done: continue offset = msg.offset() topic = msg.topic() val = msg.value() id_val = { 'partition': partition, 'offset': offset, 'topic': topic, } if high >= msg.offset(): out.append((id_val, val)) if high <= msg.offset(): break else: time.sleep(0.1) if timeout is not None and time.time() - t0 > timeout: break finally: consumer.close() return out
def start(self): import confluent_kafka as ck if self.engine == "cudf": # pragma: no cover from custreamz import kafka if self.stopped: if self.engine == "cudf": # pragma: no cover self.consumer = kafka.Consumer(self.consumer_params) else: self.consumer = ck.Consumer(self.consumer_params) weakref.finalize(self, lambda consumer=self.consumer: _close_consumer(consumer)) self.stopped = False tp = ck.TopicPartition(self.topic, 0, 0) # blocks for consumer thread to come up and invoke poll to establish # connection with broker to fetch oauth token for kafka self.consumer.poll(timeout=1) self.consumer.get_watermark_offsets(tp) self.loop.add_callback(self.poll_kafka)
def __init__(self, topic_prefix, channel, consumer_group, brokers, partition=None, reset_offsets=False, commit_offsets=True): if sys.version_info[0] == 2: self.channel = channel else: self.channel = bytes(channel, 'ascii') # connect to kafka self.topic_name = ".".join([topic_prefix, channel]) self.consumer_group = ".".join([consumer_group, self.topic_name]) self.partition = partition conf = { 'bootstrap.servers': brokers, 'group.id': self.consumer_group, 'default.topic.config': { 'auto.offset.reset': 'earliest' }, 'heartbeat.interval.ms': 60000, 'api.version.request': True, 'enable.auto.commit': commit_offsets, } self.kc = confluent_kafka.Consumer(conf) if self.partition: topic_list = [ confluent_kafka.TopicPartition(self.topic_name, self.partition) ] self.kc.assign(topic_list) else: self.kc.subscribe([self.topic_name]) if reset_offsets: logging.info("Resetting commited offsets") raise NotImplementedError
def read_messages(topic, timeout=10): availableTopics = adminClient.list_topics().topics if topic not in availableTopics: raise Exception("Topic {} not found".format(topic)) topicPartitions = [] for partition in availableTopics[topic].partitions.keys(): topicPartitions.append( ck.TopicPartition(topic, partition, ck.OFFSET_BEGINNING)) consumer.assign(topicPartitions) messages = [] while True: msg = consumer.poll(timeout=timeout) if not msg or msg.error(): break messages.append(msg.value()) return messages
def verify_batch_consumer(): """ Verify basic batch Consumer functionality """ # Consumer config conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } # Create consumer c = confluent_kafka.Consumer(**conf) # Subscribe to a list of topics c.subscribe([topic]) max_msgcnt = 1000 batch_cnt = 100 msgcnt = 0 while msgcnt < max_msgcnt: # Consume until we hit max_msgcnt # Consume messages (error()==0) or event (error()!=0) msglist = c.consume(batch_cnt, 10.0) assert len(msglist) == batch_cnt, 'expected %d messages, not %d' % ( batch_cnt, len(msglist)) for msg in msglist: if msg.error(): print('Consumer error: %s: ignoring' % msg.error()) continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) if (msg.offset() % 5) == 0: # Async commit c.commit(msg, async=True) elif (msg.offset() % 4) == 0: offsets = c.commit(msg, async=False) assert len( offsets) == 1, 'expected 1 offset, not %s' % (offsets) assert offsets[0].offset == msg.offset()+1, \ 'expected offset %d to be committed, not %s' % \ (msg.offset(), offsets) print('Sync committed offset: %s' % offsets) msgcnt += 1 print('max_msgcnt %d reached' % msgcnt) # Get current assignment assignment = c.assignment() # Get cached watermark offsets # Since we're not making use of statistics the low offset is not known so ignore it. lo, hi = c.get_watermark_offsets(assignment[0], cached=True) print('Cached offsets for %s: %d - %d' % (assignment[0], lo, hi)) # Query broker for offsets lo, hi = c.get_watermark_offsets(assignment[0], timeout=1.0) print('Queried offsets for %s: %d - %d' % (assignment[0], lo, hi)) # Close consumer c.close() # Start a new client and get the committed offsets c = confluent_kafka.Consumer(**conf) offsets = c.committed( list( map(lambda p: confluent_kafka.TopicPartition(topic, p), range(0, 3)))) for tp in offsets: print(tp) c.close()
def verify_consumer(): """ Verify basic Consumer functionality """ # Consumer config conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } # Create consumer c = confluent_kafka.Consumer(**conf) def print_wmark(consumer, parts): # Verify #294: get_watermark_offsets() should not fail on the first call # This is really a librdkafka issue. for p in parts: wmarks = consumer.get_watermark_offsets(parts[0]) print('Watermarks for %s: %s' % (p, wmarks)) # Subscribe to a list of topics c.subscribe([topic], on_assign=print_wmark) max_msgcnt = 100 msgcnt = 0 while True: # Consume until EOF or error # Consume message (error()==0) or event (error()!=0) msg = c.poll() if msg is None: raise Exception( 'Got timeout from poll() without a timeout set: %s' % msg) if msg.error(): if msg.error().code() == confluent_kafka.KafkaError._PARTITION_EOF: print('Reached end of %s [%d] at offset %d' % (msg.topic(), msg.partition(), msg.offset())) break else: print('Consumer error: %s: ignoring' % msg.error()) break tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) if (msgcnt == 11): parts = c.assignment() print('Pausing partitions briefly') c.pause(parts) exp_None = c.poll(timeout=2.0) assert exp_None is None, "expected no messages during pause, got %s" % exp_None print('Resuming partitions') c.resume(parts) if (msg.offset() % 5) == 0: # Async commit c.commit(msg, async=True) elif (msg.offset() % 4) == 0: offsets = c.commit(msg, async=False) assert len(offsets) == 1, 'expected 1 offset, not %s' % (offsets) assert offsets[0].offset == msg.offset()+1, \ 'expected offset %d to be committed, not %s' % \ (msg.offset(), offsets) print('Sync committed offset: %s' % offsets) msgcnt += 1 if msgcnt >= max_msgcnt: print('max_msgcnt %d reached' % msgcnt) break # Get current assignment assignment = c.assignment() # Get cached watermark offsets # Since we're not making use of statistics the low offset is not known so ignore it. lo, hi = c.get_watermark_offsets(assignment[0], cached=True) print('Cached offsets for %s: %d - %d' % (assignment[0], lo, hi)) # Query broker for offsets lo, hi = c.get_watermark_offsets(assignment[0], timeout=1.0) print('Queried offsets for %s: %d - %d' % (assignment[0], lo, hi)) # Query offsets for timestamps by setting the topic partition offset to a timestamp. 123456789000 + 1 topic_partions_to_search = list( map(lambda p: confluent_kafka.TopicPartition(topic, p, 123456789001), range(0, 3))) print("Searching for offsets with %s" % topic_partions_to_search) offsets = c.offsets_for_times(topic_partions_to_search, timeout=1.0) print("offsets_for_times results: %s" % offsets) # Close consumer c.close() # Start a new client and get the committed offsets c = confluent_kafka.Consumer(**conf) offsets = c.committed( list( map(lambda p: confluent_kafka.TopicPartition(topic, p), range(0, 3)))) for tp in offsets: print(tp) c.close()