def get_kafka_old_offset(topic, kafka_broker, partition_count): '''获取kafka 最旧的offset 用来跟后面的 batch_loader 所读取到的offset 做对比''' kafka_old_offset = {} kafka_new_offset = {} try: #使用kafka 库来获取的方式 ''' from kafka import SimpleClient from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy from kafka.common import OffsetRequestPayload client = SimpleClient(broker_list) partitions = client.topic_partitions[topic] offset_requests = [OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()] offsets_responses = client.send_offset_request(offset_requests) for r in offsets_responses: #print("partition = %s, offset = %s"%(r.partition, r.offsets[0])) kafka_old_offset[r.partition] = r.offsets[0] ''' from confluent_kafka import TopicPartition, Consumer, KafkaException from confluent_kafka.admin import AdminClient conf = {'bootstrap.servers': kafka_broker, 'session.timeout.ms': 6000} try: admin_client = AdminClient(conf) consumer_client = Consumer(conf) md = admin_client.list_topics(timeout=10) for t in iter(md.topics.values()): if str(t) == topic: for p in iter(t.partitions.values()): td = TopicPartition(str(t), p.id) oldest_offset, newest_offset = consumer_client.get_watermark_offsets( td) kafka_old_offset[p.id] = oldest_offset kafka_new_offset[p.id] = newest_offset except KafkaException as e: logger.error("请检查kafka是否存活:%s" % e) except ImportError: for partition_id in range(partition_count): command = 'kafka-run-class kafka.tools.GetOffsetShell --topic %s --broker-list %s --time -2 --partition %d' % ( topic, kafka_broker, partition_id) args = shlex.split(command) process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = '{}'.format( process.stdout.read().decode(encoding='UTF-8')) offset = output.split(':')[2] kafka_old_offset[partition_id] = int(offset) return kafka_old_offset
def test_delivery_report_serialization(kafka_cluster, load_avsc, avsc, data, record_type): """ Tests basic Avro serializer functionality Args: kafka_cluster (KafkaClusterFixture): cluster fixture load_avsc (callable(str)): Avro file reader avsc (str) avsc: Avro schema file data (object): data to be serialized Raises: AssertionError on test failure """ topic = kafka_cluster.create_topic("serialization-avro-dr") sr = kafka_cluster.schema_registry() schema_str = load_avsc(avsc) value_serializer = AvroSerializer(sr, schema_str) value_deserializer = AvroDeserializer(sr, schema_str) producer = kafka_cluster.producer(value_serializer=value_serializer) def assert_cb(err, msg): actual = value_deserializer(SerializationContext(topic, MessageField.VALUE), msg.value()) if record_type == "record": assert [v == actual[k] for k, v in data.items()] elif record_type == 'float': assert data == pytest.approx(actual) else: assert actual == data producer.produce(topic, value=data, partition=0, on_delivery=assert_cb) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) msg = consumer.poll() actual = msg.value() # schema may include default which need not exist in the original if record_type == 'record': assert [v == actual[k] for k, v in data.items()] elif record_type == 'float': assert data == pytest.approx(actual) else: assert actual == data
def listen_for_messages(msg, consumer, application_source_id): # noqa: C901 """ Listen for Platform-Sources kafka messages. Args: consumer (Consumer): Kafka consumer object application_source_id (Integer): Cost Management's current Application Source ID. Used for kafka message filtering. Returns: None """ try: try: msg = get_sources_msg_data(msg, application_source_id) offset = msg.get("offset") partition = msg.get("partition") except SourcesMessageError: return if msg: LOG.info( f"Processing message offset: {offset} partition: {partition}") topic_partition = TopicPartition(topic=Config.SOURCES_TOPIC, partition=partition, offset=offset) LOG.info(f"Cost Management Message to process: {str(msg)}") try: with transaction.atomic(): process_message(application_source_id, msg) consumer.commit() except (InterfaceError, OperationalError) as err: close_and_set_db_connection() LOG.error(f"{type(err).__name__}: {err}") rewind_consumer_to_retry(consumer, topic_partition) except (IntegrityError, SourcesHTTPClientError) as err: LOG.error(f"{type(err).__name__}: {err}") rewind_consumer_to_retry(consumer, topic_partition) except SourceNotFoundError: LOG.warning( f"Source not found in platform sources. Skipping msg: {msg}" ) consumer.commit() except KafkaError as error: LOG.error( f"[listen_for_messages] Kafka error encountered: {type(error).__name__}: {error}", exc_info=True) except Exception as error: LOG.error( f"[listen_for_messages] UNKNOWN error encountered: {type(error).__name__}: {error}", exc_info=True)
def read_from_offset(self, offset=1000): c = AvroConsumer( dict( self.base_config, **{ 'group.id': 'groupid-1', 'default.topic.config': { 'auto.offset.reset': 'beginning', 'auto.commit.enable': 'false' } })) c.assign([TopicPartition(self.topic, partition=0, offset=offset)]) return self.run_loop(c, return_message=True, file_object=False)
def _subscribe(self): """ Subscribe to Kafka topics. A workaround for missing Zookeeper support in confluent-python is required here. Automatic partition rebalancing is not working with Kafka Versions < 0.9.0. Therefore we manually assign the partitions to the consumer for legacy Kafka versions. """ if self.broker_version < self.KAFKA_VERSION_ZOOKEEPER_OPTIONAL: self.consumer.assign( [TopicPartition(self.topic, p) for p in range(0, 10)]) else: self.consumer.subscribe([self.topic])
def __reset_pos(self): logger = logging.getLogger() if self.type == "None": return parts = [TopicPartition(self.topic, 0)] (start, end) = self.cons.get_watermark_offsets(parts[0]) logger.debug("Currently at {}/{} offset <{}, {}>".format( parts[0].topic, parts[0].partition, start, end)) if end > 0: parts[0].offset = end - 1 self.cons.seek(parts[0])
def getTPOs(self, topics): """Use the AdminAPI to return a list of TopicParition objects for a list of topics """ self.logger.info( f"Getting TPOs for {len(topics)} topics via admin API...") tpos = [] for t in topics: for p in self._admin.list_topics(t).topics[t].partitions: tpos.append(TopicPartition(t, p)) self.logger.info(f"Found {len(tpos)} TPOs for {len(topics)} topics.") return tpos
def prepareCommit(self, message): topic = message.topic() partition = message.partition() offset = message.offset() key = message.key() cacheKey = topic + '_' + str(partition) if cacheKey in self.commitCache: self.commitCache[cacheKey].offset = offset else: self.commitCache[cacheKey] = TopicPartition( topic, partition, offset)
def commit_offsets(self, partitions=None): if self.offsets and self.consumer: if partitions is None: partitions = self.offsets.keys() to_commit = [] for partition in partitions: offset = self.offsets.get(partition) if offset is None: # Skip partitions that have no offset continue to_commit.append(TopicPartition(self.topic, partition, offset)) self.consumer.commit(offsets=to_commit)
def morning_notice(): #(rise_ratio_list_smallest, rise_ratio_list_largest) = consumer.get_watermark_offsets(TopicPartition('eastmoney', 0)) (volume_list_smallest, volume_list_largest) = consumer.get_watermark_offsets(TopicPartition('eastmoney', 0)) try: #consumer.assign([TopicPartition('eastmoney', 0, rise_ratio_list_largest-1)]) #consumer.seek(TopicPartition('eastmoney', 0, rise_ratio_list_largest-1)) # consumer.seek(TopicPartition('eastmoney', 0, rise_ratio_list_largest-1)) # latest_rise_ratio = json.loads(consumer.poll(1.0).value())["data"] #latest_rise_ratio = pd.read_json(json.loads(consumer.poll(1.0).value())["data"]).sort_index() #latest_rise_ratio["涨幅%"] = latest_rise_ratio["涨幅%"].map(lambda x: float(x.replace('----', '0.00'))) #print(latest_rise_ratio.head(10)) last_data_point = volume_list_largest-1 consumer.assign([TopicPartition('eastmoney', 0, last_data_point)]) consumer.seek(TopicPartition('eastmoney', 0, last_data_point)) all_data = json.loads(consumer.poll(3.0).value()) latest_volume = pd.read_json(all_data["data"]).sort_index() latest_volume["涨幅%"] = latest_volume["涨幅%"].map(lambda x: float(x.replace('----', '0.00'))) result = latest_volume.head(100).sort_values("涨幅%", ascending = False).head(20) print("| 当前时间: " + time.strftime("%Y-%m-%d %H:%M:%S") + " | " + "数据更新时间: " + time.strftime("%Y-%m-%d %H:%M:%S" ,time.localtime(all_data["timestamp"]/1000000000)) + " |") print(tabulate(result, headers='keys', tablefmt='psql')) finally: consumer.close()
def assignment_callback(consumer, assignment): # Since ``auto.offset.reset`` is set to ``error`` to force human # interaction on an offset reset, we have to explicitly specify the # starting offset if no offset has been committed for this topic during # the ``__consumer_offsets`` topic retention period. assignment = {(i.topic, i.partition): self.__positions.get( (i.topic, i.partition)) for i in assignment} for i in self.__consumer.committed([ TopicPartition(topic, partition) for (topic, partition), offset in assignment.items() if offset is None ]): k = (i.topic, i.partition) if i.offset > -1: assignment[k] = i.offset else: assignment[k] = self.initial_offset_reset( consumer, i.topic, i.partition) self.__consumer.assign([ TopicPartition(topic, partition, offset) for (topic, partition), offset in assignment.items() ]) for (topic, partition), offset in assignment.items(): # Setting the local offsets will either cause the partition to be # paused (if the remote offset is unknown or the local offset is # not trailing the remote offset) or resumed. self.__partition_state_manager.set_local_offset( topic, partition, offset) self.__positions[(topic, partition)] = offset if on_assign is not None: on_assign(self, [ TopicPartition(topic, partition) for topic, partition in assignment.keys() ])
def test_calling_store_offsets_after_close_throws_erro(): """ calling store_offset after close should throw RuntimeError """ c = Consumer({ 'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100 }) c.subscribe(["test"]) c.unsubscribe() c.close() with pytest.raises(RuntimeError) as ex: c.store_offsets(offsets=[TopicPartition("test", 0, 42)]) assert 'Consumer closed' == str(ex.value) with pytest.raises(RuntimeError) as ex: c.offsets_for_times([TopicPartition("test", 0)]) assert 'Consumer closed' == str(ex.value)
def __init__(self, my_id=1, bootstrap_servers='', list_of_partitions=[], request_topic='', inference_topic='', group_id='my_grp'): """ Constructor :type interval: int :param interval: Check interval, in seconds """ self.model = None #Create the model instance here self.my_id = my_id self.t = request_topic self.result_t = inference_topic self.my_grp_id = group_id self.result_t_p = 8 self.bootstrap_servers = bootstrap_servers self.tls = [] x = 0 for i in list_of_partitions: self.tls.insert(x, TopicPartition(self.t, i)) x = x + 1 #self.tls=list_of_partitions print(self.tls) conf = { 'bootstrap.servers': bootstrap_servers, 'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'ssl.ca.location': '/tmp/cacert.pem', 'sasl.username': '******', 'sasl.password': '******', # 'key.serializer': StringSerializer('utf_8'), # 'value.serializer': StringSerializer('utf_8'), 'client.id': 'test-sw-1' } self.producer = Producer(conf) conf = { 'bootstrap.servers': bootstrap_servers, #'sasl.mechanism': 'PLAIN', 'sasl.username': '******', 'sasl.password': '******', 'ssl.ca.location': '/tmp/cacert.pem', 'group.id': group_id, 'auto.offset.reset': 'smallest' } self.consumer = consumer = Consumer(conf) self.consumer.assign(self.tls)
def test_sort(): """ TopicPartition sorting (rich comparator) """ # sorting uses the comparator correct = [TopicPartition('topic1', 3), TopicPartition('topic3', 0), TopicPartition('topicA', 5), TopicPartition('topicA', 5)] tps = sorted([TopicPartition('topicA', 5), TopicPartition('topic3', 0), TopicPartition('topicA', 5), TopicPartition('topic1', 3)]) assert correct == tps
def __init__(self, my_id=1, bootstrap_servers='', list_of_partitions=[], request_topic='', inference_topic='', group_id='my_grp'): """ Constructor :type interval: int :param interval: Check interval, in seconds """ self.model = tree.HoeffdingTreeClassifier(max_depth=10) # compose.Pipeline( # preprocessing.MinMaxScaler(), # anomaly.HalfSpaceTrees(seed=42)) self.metric = metrics.ROCAUC() # metrics.Accuracy() # self.my_id = my_id self.t = request_topic self.result_t = inference_topic self.my_grp_id = group_id self.result_t_p = 8 self.bootstrap_servers = bootstrap_servers # self.list_of_partitions = list_of_partitions self.tls = [] x = 0 for i in list_of_partitions: self.tls.insert(x, TopicPartition(self.t, i)) x = x+1 #self.tls=list_of_partitions print(self.tls) conf = {'bootstrap.servers': bootstrap_servers, 'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'ssl.ca.location': '/tmp/cacert.pem', 'sasl.username': '******', 'sasl.password': '******', # 'sasl.username': '******', # 'sasl.password': '******', # 'key.serializer': StringSerializer('utf_8'), # 'value.serializer': StringSerializer('utf_8'), 'client.id': 'test-sw-1'} self.producer = Producer(conf) conf = {'bootstrap.servers': bootstrap_servers, 'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': '******', 'sasl.password': '******', 'ssl.ca.location': '/tmp/cacert.pem', 'group.id': group_id, 'auto.offset.reset': 'latest'} self.consumer = consumer = Consumer(conf) self.consumer.assign(self.tls)
def main(): # parse and check command line args parser = argparse.ArgumentParser( epilog= """Description: Plays and optionaly dumps video from a jpeg topic (a topic that ends with Image.jpg).""" , formatter_class=RawTextHelpFormatter ) parser.add_argument("broker", help="The name of the kafka broker.", type=str) parser.add_argument("topic", help="The name of topic (*.Image.jpg).", type=str) parser.add_argument('-f', "--full_screen", action='store_true') parser.add_argument('-d', "--dump", help="if set images are stored in jpg files", action='store_true') parser.add_argument('-o', "--offset", type=int, default=-1) args = parser.parse_args() if not args.topic.endswith(".Image.jpg"): raise argparse.ArgumentTypeError('The topic must be a jpeg image topic (should end with .Image.jpg)') # handle full screen window_name = args.topic if args.full_screen: cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) # calc start time and create consumer c = Consumer({'bootstrap.servers': args.broker, 'group.id': 'display', 'auto.offset.reset': 'latest'}) c.assign([TopicPartition(topic=args.topic, partition=0, offset=args.offset)]) # read frames and show (or dump) them while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue time = msg.timestamp()[1] img = decode_image_message(msg) if type(img) == np.ndarray: if args.dump: cv2.imwrite(args.topic + "_" + str(time) + ".jpg", img) cv2.imshow(window_name, img) k = cv2.waitKey(33) if k == 113: # The 'q' key to stop break elif k == -1: # normally -1 returned,so don't print it continue else: print(f"Press 'q' key for EXIT!")
def __init__(self, topicname): # ensure_topic(topicname) super().__init__({ "group.id": "pyrandall-pytest", "bootstrap.servers": bootstrap_servers }) self.waiting_assignment = True self.topic = topicname self.topic_partition = TopicPartition(topic=self.topic, partition=0) # open connection with kafka and do not rely on latest offset. # because latest offset is retrieved on partition assignment # that would happen after the messages in our functional tests are produced self.goto_largest_offset() self.subscribe([topicname], self.on_assign)
def __on_partition_state_change(self, topic, partition, previous_state_and_offsets, current_state_and_offsets): """ Callback that is invoked when a partition state changes. """ logger.debug( "State change for %r: %r to %r", (topic, partition), previous_state_and_offsets, current_state_and_offsets, ) current_state, current_offsets = current_state_and_offsets if current_offsets.local is None: # It only makes sense to manipulate the consumer if we've got an # assignment. (This block should only be entered at startup if the # remote offsets are retrieved from the commit log before the local # consumer has received its assignment.) return # TODO: This will be called from the commit log consumer thread, so need # to verify that calling the ``consumer.{pause,resume}`` methods is # thread safe! if current_state in ( SynchronizedPartitionState.UNKNOWN, SynchronizedPartitionState.SYNCHRONIZED, SynchronizedPartitionState.REMOTE_BEHIND, ): self.__consumer.pause( [TopicPartition(topic, partition, current_offsets.local)]) elif current_state is SynchronizedPartitionState.LOCAL_BEHIND: self.__consumer.resume( [TopicPartition(topic, partition, current_offsets.local)]) else: raise NotImplementedError( f"Unexpected partition state: {current_state}")
def listen_for_messages(msg, consumer): """ Listen for messages on the hccm topic. Once a message from one of these topics arrives, we add them extract the payload and line item process the report files. Once all files from the manifest are complete a celery job is dispatched to the worker to complete summary processing for the manifest. Several exceptions can occur while listening for messages: Database Errors - Re-processing attempts will be made until successful. Internal Errors - Re-processing attempts will be made until successful. Report Processing Errors - Kafka message will be committed with an error. Errors of this type would require a report processor fix and we do not want to block the message queue. Upon successful processing the kafka message is manually committed. Manual commits are used so we can use the message queue to store unprocessed messages to make the service more tolerant of SIGTERM events. Args: consumer - (Consumer): kafka consumer for HCCM ingress topic. Returns: None """ offset = msg.offset() partition = msg.partition() topic_partition = TopicPartition(topic=Config.HCCM_TOPIC, partition=partition, offset=offset) try: LOG.info(f"Processing message offset: {offset} partition: {partition}") process_messages(msg) LOG.debug(f"COMMITTING: message offset: {offset} partition: {partition}") consumer.commit() except (InterfaceError, OperationalError, ReportProcessorDBError) as error: close_and_set_db_connection() LOG.error(f"[listen_for_messages] Database error. Error: {type(error).__name__}: {error}. Retrying...") rewind_consumer_to_retry(consumer, topic_partition) except (KafkaMsgHandlerError, RabbitOperationalError) as error: LOG.error(f"[listen_for_messages] Internal error. {type(error).__name__}: {error}. Retrying...") rewind_consumer_to_retry(consumer, topic_partition) except ReportProcessorError as error: LOG.error(f"[listen_for_messages] Report processing error: {str(error)}") LOG.debug(f"COMMITTING: message offset: {offset} partition: {partition}") consumer.commit() except Exception as error: LOG.error(f"[listen_for_messages] UNKNOWN error encountered: {type(error).__name__}: {error}", exc_info=True)
def test_json_record_serialization_custom(kafka_cluster, load_file): """ Ensures to_dict and from_dict hooks are properly applied by the serializer. Args: kafka_cluster (KafkaClusterFixture): cluster fixture load_file (callable(str)): JSON Schema file reader """ topic = kafka_cluster.create_topic("serialization-json") sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'}) schema_str = load_file("product.json") value_serializer = JSONSerializer(schema_str, sr, to_dict=_testProduct_to_dict) value_deserializer = JSONDeserializer(schema_str, from_dict=_testProduct_from_dict) producer = kafka_cluster.producer(value_serializer=value_serializer) record = _TestProduct(product_id=1, name="The ice sculpture", price=12.50, tags=["cold", "ice"], dimensions={ "length": 7.0, "width": 12.0, "height": 9.5 }, location={ "latitude": -78.75, "longitude": 20.4 }) producer.produce(topic, value=record, partition=0) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) msg = consumer.poll() actual = msg.value() assert all([ getattr(actual, attribute) == getattr(record, attribute) for attribute in vars(record) ])
def get_messages(self, timestamp): ret = [] while len(self.queue) > 0 and self.queue[0].message.timestamp( )[1] <= timestamp: ret.append(self.queue.popleft().message) if len(self.queue ) < self.min_limit and self.paused and not self.stopped: logging.debug('Resume reading on topic: {}'.format( self.topic_name)) self.paused = False self.consumer_ref.resume([ TopicPartition(topic=self.topic_name, partition=self.partition) ]) self.last_message_ts = timestamp return ret
def _size(self, queue): """Get the number of pending messages in the topic/queue.""" queue = self.sanitize_queue_name(queue) consumer = self._kafka_consumers.get(queue, None) if consumer is None: return 0 size = 0 for assignment in consumer.assignment(): topic_partition = TopicPartition(queue, assignment.partition) (_, end_offset) = consumer.get_watermark_offsets(topic_partition) [committed_offset] = consumer.committed([topic_partition]) size += end_offset - committed_offset.offset return size
def count_messages(bootstrap_servers): c = Consumer({ 'bootstrap.servers': bootstrap_servers, 'group.id': 'group2', 'enable.auto.commit': False, 'auto.offset.reset': 'beginning' }) metadata = c.list_topics() topics = metadata.topics for topic, topicMetadata in topics.items(): for partition in topicMetadata.partitions: (low, high) = c.get_watermark_offsets(TopicPartition(topic, partition)) print(f"{topic} {partition}: {high}")
def commit_offsets(): offsets_to_commit = [] for (topic, partition), offset in owned_partition_offsets.items(): if offset is None: logger.debug('Skipping commit of unprocessed partition: %r', (topic, partition)) continue offsets_to_commit.append(TopicPartition(topic, partition, offset)) if offsets_to_commit: logger.debug( 'Committing offset(s) for %s owned partition(s): %r', len(offsets_to_commit), offsets_to_commit) commit(offsets_to_commit)
def commitOffsets(self): """ Commit consumed offsets if needed """ # may be asked to commit on rebalance or shutdown but # should only commit if the processor has requested. if self.commitOffsetNeeded: offsetsToCommit = [ TopicPartition(t, p, o + 1) for ((t, p), o) in self.consumedOffsets.items() ] self.consumer.commit(offsets=offsetsToCommit, async=False) self.consumedOffsets.clear() self.commitOffsetNeeded = False self.commitRequested = False
def __init__(self, topic=None, ip='localhost'): self.topic = topic self.ip = ip # os.environ['KAFKA_SERVER_IP'] self.base_config = { 'bootstrap.servers': self.ip + ':9092', 'schema.registry.url': 'http://' + self.ip + ':8081' } self.avro_consumer = AvroConsumer( dict(self.base_config, **{'group.id': 'groupid'})) self.avro_consumer.assign([TopicPartition(self.topic, 0)]) self.key_schema = avro.load(os.path.join(SCHEMAS, 'keyschema.avsc')) self.value_schema = avro.load( os.path.join(SCHEMAS, self.topic + '.avsc'))
def test_json_record_serialization(kafka_cluster, load_file): """ Tests basic JsonSerializer and JsonDeserializer basic functionality. product.json from: https://json-schema.org/learn/getting-started-step-by-step.html Args: kafka_cluster (KafkaClusterFixture): cluster fixture load_file (callable(str)): JSON Schema file reader """ topic = kafka_cluster.create_topic("serialization-json") sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'}) schema_str = load_file("product.json") value_serializer = JSONSerializer(schema_str, sr) value_deserializer = JSONDeserializer(schema_str) producer = kafka_cluster.producer(value_serializer=value_serializer) record = { "productId": 1, "productName": "An ice sculpture", "price": 12.50, "tags": ["cold", "ice"], "dimensions": { "length": 7.0, "width": 12.0, "height": 9.5 }, "warehouseLocation": { "latitude": -78.75, "longitude": 20.4 } } producer.produce(topic, value=record, partition=0) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) msg = consumer.poll() actual = msg.value() assert all([actual[k] == v for k, v in record.items()])
def get_kafka_old_offset(topic, kafka_broker, partition_count): kafka_old_offset = {} #kafka_new_offset = {} try: #Get kafka offset through kafka module ''' from kafka import SimpleClient from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy from kafka.common import OffsetRequestPayload client = SimpleClient(broker_list) partitions = client.topic_partitions[topic] offset_requests = [OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()] offsets_responses = client.send_offset_request(offset_requests) for r in offsets_responses: #print("partition = %s, offset = %s"%(r.partition, r.offsets[0])) kafka_old_offset[r.partition] = r.offsets[0] ''' # Get kafka offset through confluent_kafka module from confluent_kafka import TopicPartition, Consumer, KafkaException from confluent_kafka.admin import AdminClient conf = {'bootstrap.servers': kafka_broker, 'session.timeout.ms': 6000} admin_client = AdminClient(conf) consumer_client = Consumer(conf) md = admin_client.list_topics(timeout=10) for t in iter(md.topics.values()): if str(t) == topic: for p in iter(t.partitions.values()): td = TopicPartition(str(t), p.id) oldest_offset, newest_offset = consumer_client.get_watermark_offsets( td) kafka_old_offset[p.id] = oldest_offset #kafka_new_offset[p.id] = newest_offset except ImportError: for partition_id in range(partition_count): command = 'kafka-run-class kafka.tools.GetOffsetShell --topic {} --broker-list {} --time -2 --partition {}'.format( topic, kafka_broker, partition_id) #args = shlex.split(command) #process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) #output = '{}'.format(process.stdout.read().decode(encoding='UTF-8')) output = utils.shell_wrapper.check_output(command) offset = output.split(':')[2] kafka_old_offset[partition_id] = int(offset) return kafka_old_offset
def test_offsets_for_times(): c = Consumer({ 'group.id': 'test', 'enable.auto.commit': True, 'enable.auto.offset.store': False, 'socket.timeout.ms': 50, 'session.timeout.ms': 100 }) # Query broker for timestamps for partition try: test_topic_partition = TopicPartition("test", 0, 100) c.offsets_for_times([test_topic_partition], timeout=0.1) except KafkaException as e: assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\ str(e.args([0])) c.close()
def get_last_available_status_message(cons: Consumer, status_topic: str): """ :param cons: :param status_topic: :return: The last status message. """ partitions = cons.assignment() _, hi = cons.get_watermark_offsets(partitions[0], cached=False, timeout=2.0) last_msg_offset = hi - 1 cons.assign( [TopicPartition(status_topic, partition=0, offset=last_msg_offset)]) status_msg, _ = poll_for_valid_message(cons, expected_file_identifier=None) return status_msg