def commit_offsets_in_kafka(broker, group_name, group_dict): cons = KafkaConsumer(bootstrap_servers=broker, group_id=group_name) for topic_name, topic_dict in group_dict.iteritems(): for partition, offset in topic_dict.iteritems(): logging.info( "Commiting {} {} to topic {} and partition number {}".format( group_name, offset, topic_name, partition)) tp = TopicPartition(topic_name, int(partition)) cons.assign([tp]) cons.seek(tp, int(offset)) # commit it cons.commit() time.sleep(8) cons.close() time.sleep(1)
def __init__(self, broker: str, topic: str, partition: int = -1, start: Union[int, datetime, PartitionOffset] = PartitionOffset.END, stop: Union[int, datetime, PartitionOffset] = PartitionOffset.NEVER): consumer = KafkaConsumer(bootstrap_servers=broker, fetch_max_bytes=52428800 * 6, consumer_timeout_ms=100) existing_topics = consumer.topics() self.current_msg = None self.current_offset_limits = HighLowOffset(-1, -1) if topic not in existing_topics: raise RuntimeError(f"Topic \"{topic}\" does not exist.") existing_partitions = consumer.partitions_for_topic(topic) if partition == -1: partition = existing_partitions.pop() elif partition not in existing_partitions: raise RuntimeError(f"Partition {partition} for topic \"{topic}\" does not exist.") topic_partition = TopicPartition(topic, partition) consumer.assign([topic_partition, ]) if start == PartitionOffset.BEGINNING: consumer.seek_to_beginning() elif start == PartitionOffset.END or start == PartitionOffset.NEVER: consumer.seek_to_end() elif type(start) is int: first_offset = consumer.beginning_offsets([topic_partition, ]) if first_offset[topic_partition] > start: consumer.seek_to_beginning() else: consumer.seek(partition=topic_partition, offset=start) elif type(start) is datetime: found_offsets = consumer.offsets_for_times({topic_partition: int(start.timestamp() * 1000)}) consumer.seek(partition=topic_partition, offset=found_offsets[topic_partition].offset) self.to_thread = Queue() self.from_thread = Queue(maxsize=100) self.thread = Thread(target=thread_function, daemon=True, kwargs={"consumer": consumer, "stop": stop, "in_queue": self.to_thread, "out_queue": self.from_thread, "stop": stop, "topic_partition": topic_partition}) self.thread.start()
def worker(kafka_settings, mongo_settings, partition): #Join a consumer group for dynamic partition assignment and offset commits consumer = KafkaConsumer( auto_offset_reset='earliest' ) #kafka_settings["topic"], auto_offset_reset='earliest')#, group_id="consumer_group1")#kafka_settings["topic"], auto_offset_reset='earliest')#group_id='tweet_reader' consumer.assign([TopicPartition(kafka_settings["topic"], partition)]) print("Connected to Kafka") #Create connection to Mongo client = MongoClient( mongo_settings["ip"], mongo_settings["port"] ) #client = MongoClient('mongodb://localhost:27017/') also works #Connect to a database db = client[mongo_settings[ "db"]] #Can use db = client["test-db"] to select dbs that don't use attribute style access #Connect to the collection you want collection = db[mongo_settings[ "collection"]] #Dictionary style access works here too: collection = db['test-collection'] first = True #print("Connected to Mongo") #Iterate through messages for msg in consumer: #print("%s:%d:%d: key=%s value=%s") % (msg.topic, msg.partition, msg.offset, msg.key, str(bytes_to_dict(msg.value))) #print("Value:", bytes_to_dict(msg.value)) #Convert the kafka message back into a dict tweet_in = bytes_to_dict(msg.value) #Check if there is a place field if ("place" in tweet_in) or ("location" in tweet_in): #Dict that will be pushed into Mongo tweet_out = {} #Start processing the tweet #Keep pertinent informatino that doesn't need processing tweet_out["track_id"] = tweet_in["track_id"] tweet_out["tweet_id"] = tweet_in["tweet_id"] tweet_out["created_at"] = tweet_in["created_at"] #tweet_out["created_at_int"] = int(tweet_to_utc(tweet_in["created_at"])) #tweet_in["created_at"] #Take in the coordinate box and average it to a single set of coordinates tweet_out["location"] = location_strip(tweet_in) #Analyze sentiment #tweet_out["sentiment"] = sentiment_analyze(tweet_in) #Insert/Update the tweet into Mongo post_id = collection.update({"_id": tweet_out["tweet_id"]}, tweet_out, upsert=True) if first: print(tweet_in) print("----------Processed----------------") print("Wrote:", tweet_out) print("Type:", type(tweet_out)) first = False ''' for j in tweet: f_o.write("--" + str(j) + ":" + str(tweet[j]) + "\n") f_o.write("\n--------------------------------------------------------\n") ''' client.close()
def init_consumer(): c = KafkaConsumer(bootstrap_servers=brokers, group_id=group, value_deserializer=msgpack.unpackb, enable_auto_commit=False) t_p = TopicPartition(topic, partition) c.assign([t_p]) return c, t_p
def func(topic, partition): i=0 consumer = KafkaConsumer(bootstrap_servers='104.154.53.184:6667', group_id='grp-5327', auto_offset_reset='earliest', consumer_timeout_ms = 10000) consumer.assign([TopicPartition(topic, partition)]) for msg in consumer: i=i+1 print(i)
def start(self): kafka_brokers = '{0}:{1}'.format(self._server,self._port) consumer = KC(bootstrap_servers=[kafka_brokers],group_id=self._topic) partition = [TopicPartition(self._topic,int(self._id))] consumer.assign(partitions=partition) consumer.poll() return consumer
def start(self): kafka_brokers = '{0}:{1}'.format(self._server, self._port) consumer = KC(bootstrap_servers=[kafka_brokers], group_id=self._topic) partition = [TopicPartition(self._topic, int(self._id))] consumer.assign(partitions=partition) consumer.poll() return consumer
def create_kafka_consumer(topics, server, consumer_timeout=12000): consumer = KafkaConsumer(bootstrap_servers=server, consumer_timeout_ms=consumer_timeout) tp = [] for topic in topics: tp.append(TopicPartition(topic, 0)) consumer.assign(tp) return consumer
class KafkaHandler: def __init__(self, kafka_endpoint: str, socketio: SocketIO): self.consumer = KafkaConsumer(bootstrap_servers=kafka_endpoint) self.socketio = socketio self.dumps = {} end_offset = {} for topic in topics: self.dumps[topic] = collections.deque(maxlen=100) current_partition = TopicPartition(topic, 0) self.consumer.assign([current_partition]) self.consumer.seek_to_end() end_offset[topic] = self.consumer.position(current_partition) topic_partitions = [TopicPartition(topic, 0) for topic in topics] self.consumer.assign(topic_partitions) for topic in topics: self.consumer.seek(TopicPartition(topic, 0), max(0, end_offset[topic] - 100)) self.thread = threading.Thread(target=self.run) self.thread.daemon = True # Demonize thread self.thread.start() # Start the execution def run(self): for msg in self.consumer: try: msg_json = json.loads(msg.value.decode('utf-8')) if 'http_code' in msg_json and msg_json['http_code'] != 200: continue output = { "topic": msg.topic, "timestamp": msg.timestamp, "value": msg_json } output_json = json.dumps(output) self.dumps[str(msg.topic)].append(output) self.socketio.emit(str(msg.topic), output_json, namespace='/') except Exception as e: print('error emit msg', e) self.consumer.close() def on_connect(self): if self.dumps: for msg_topic in self.dumps: messages = list(self.dumps[msg_topic]) emit(msg_topic, messages, namespace='/') def status(self): status_dict = {} for topic in self.dumps: status_dict[topic] = { 'messages': len(self.dumps[topic]), 'last_message': self.dumps[topic][-1] if self.dumps[topic] else '' } return json.dumps(status_dict)
def export_csv_for_topic(self, topic): auth_header = request.headers.get('Authorization') merchant_token = auth_header.split(' ')[-1] if auth_header else None merchant_id = calculate_id(merchant_token) if merchant_token else None if topic not in topics: return json.dumps({'error': 'unknown topic'}) try: consumer = KafkaConsumer(consumer_timeout_ms=1000, bootstrap_servers=self.kafka_endpoint) topic_partition = TopicPartition(topic, 0) consumer.assign([topic_partition]) consumer.seek_to_beginning() start_offset = consumer.position(topic_partition) consumer.seek_to_end() end_offset = consumer.position(topic_partition) msgs = [] ''' Assumption: message offsets are continuous. Start and end can be anywhere, end - start needs to match the amount of messages. TODO: when deletion of some individual messages is possible and used, refactor! ''' max_messages = 10 ** 5 offset = max(start_offset, end_offset - max_messages) consumer.seek(topic_partition, offset) for msg in consumer: ''' Don't handle steadily incoming new messages only iterate to last messages when requested ''' if offset >= end_offset: break offset += 1 try: msg_json = json.loads(msg.value.decode('utf-8')) # filtering on messages that can be filtered on merchant_id if 'merchant_id' not in msg_json or msg_json['merchant_id'] == merchant_id: msgs.append(msg_json) except ValueError as e: print('ValueError', e, 'in message:\n', msg.value) consumer.close() if topic == 'marketSituation': df = market_situation_shaper(msgs) else: df = pd.DataFrame(msgs) filename = topic + '_' + str(int(time.time())) filepath = 'data/' + filename + '.csv' df.to_csv(filepath, index=False) response = {'url': filepath} except Exception as e: response = {'error': 'failed with: ' + str(e)} return json.dumps(response)
def main(): """ Main predictor function """ args = init_parser() config = init_config(args) logger = get_logger(f'hawkes-{config["partition"]}', broker_list=config["bootstrap_servers"], debug=True) consumer = KafkaConsumer(bootstrap_servers=config["bootstrap_servers"]) consumer.assign( [TopicPartition(config["consumer_topic"], config["partition"])]) producer = KafkaProducer( bootstrap_servers=config["bootstrap_servers"], value_serializer=lambda v: json.dumps(v).encode("utf-8"), key_serializer=lambda v: json.dumps(v).encode("utf-8")) alpha = config["alpha"] mu = config["mu"] for message in consumer: mess = message.value.decode().replace("'", '"').replace('(', '[').replace( ')', ']') mess = eval(mess) cascade = np.array(mess["tweets"]) tweet_id = mess["cid"] text = mess["msg"] T_obs = mess["T_obs"] p, beta = 0.02, 1 / 3600 t = cascade[-1, 0] LL = loglikelihood((p, beta), cascade, t) LL_MLE, MLE = compute_MLE(cascade, t, alpha, mu) p_est, beta_est = MLE N, G1, n_star = prediction([p_est, beta_est], cascade, alpha, mu, t) messfinal = { "type": "parameters", "cid": tweet_id, "msg": text, "n_obs": len(cascade), "n_supp": N, "params": list(MLE), "G1": G1, "n_star": n_star } producer.send(config["producer_topic"], key=T_obs, value=messfinal, partition=config["partition"]) logger.info( "Predicted params p = {: .3f} and beta = {: .3f} for tweet {} at time {} on partition: {}" .format(p_est, beta_est, tweet_id, T_obs, config["partition"]))
def get_kafka_consumer(group_id, host, port, topic): """Return consumer for Kafka topic""" consumer = KafkaConsumer(group_id=group_id, auto_offset_reset="earliest", bootstrap_servers=[f"{host}:{port}"], value_deserializer=lambda x: x.decode('utf-8')) partition = TopicPartition(topic, 0) consumer.assign([partition]) return consumer, partition
class PubSubInterface: EVTYPE = "PubSubEvent" def __init__(self, topic_list): self.logger = logging.getLogger('Mercury.PubSubInterface') self.producer = None self.consumer_thread = None self.topiclist = [] self.topiclock = threading.Lock() for topic in topic_list: self._add_topic(topic) self.msglist = [] self.msglock = threading.Lock() self.evhandler = eventhandler.EventHandler() def configure(self, config): self.psconfig = config['PubSub'] def _add_topic(self, new_topic): self.topiclock.acquire() self.topiclist.append(TopicPartition(new_topic, 0)) self.topiclock.release() def connect(self): self.logger.info("Connecting to Kafka pubsub") self.consumer_thread = threading.Thread(target=self.run_consumer) self.consumer_thread.daemon = True self.consumer_thread.start() self.producer = KafkaProducer( bootstrap_servers=[self.psconfig['bootstrap_server']]) def _add_msg(self, msg): self.msglock.acquire() self.msglist.append(msg) self.msglock.release() def run_consumer(self): self.consumer = KafkaConsumer( bootstrap_servers=[self.psconfig['bootstrap_server']]) self.consumer.assign(self.topiclist) for msg in self.consumer: self.logger.debug("Received message from pubsub!") self._add_msg(msg) ev = eventhandler.MercuryEvent(PubSubInterface.EVTYPE) self.evhandler.fire(ev) def get_msg(self): msg = None self.msglock.acquire() if len(self.msglist): msg = self.msglist.pop(0) self.msglock.release() return msg def send_msg(self, topic, msg): self.logger.debug("psubi send_msg") self.producer.send(topic, msg.encode())
def consumer_message3(): consumer = KafkaConsumer(bootstrap_servers=servers, consumer_timeout_ms=1000, group_id="kafka-group-id", enable_auto_commit=False) consumer.assign([TopicPartition('kafka-topic', 0)]) for msg in consumer: print(msg) consumer.commit()
class KConsumer: def __init__(self, settings): self.kafka_host = settings.BROKER['HOST'] self.kafka_port = settings.BROKER['PORT'] self.kafka_topics = settings.BROKER['TOPICS'] self.kafka_groups = settings.BROKER['GROUPS'] # self.kafka_topics = settings.kafka_topics # self.kafka_groups = settings.kafka_groups self.kprocessor = KProcessor(settings) self.consumer = KafkaConsumer(bootstrap_servers=[self.kafka_host + ':' + self.kafka_port]) def startThread(self): executor = ThreadPoolExecutor(max_workers=1) future = executor.submit(self.start()) print(future.result()) def start_old(self): # TODO here only the first topic is selected partition = TopicPartition(self.kafka_topics[0], 0) self.consumer.assign([partition]) self.consumer.seek_to_beginning(partition) for msg in self.consumer: self.kprocessor.process(msg[6]) def start(self): partitions = [TopicPartition(x, 0) for x in self.kafka_topics] self.consumer.assign(partitions) # TODO do that instead of the below loop # self.consumer.seek_to_beginning(partitions) for partition in partitions: self.consumer.seek_to_beginning(partition) while True: msg = next(self.consumer) try: print('good format :', msg.value) json_string = msg.value.decode("utf-8") self.kprocessor.process(json_string) except: print('event string not in the good format :', msg.value) # use KProducer here def close(self): self.consumer.close() def add_topics(self, topics): self.kafka_topics.append(topics) def remove_topics(self, topics): self.kafka_topics.remove(topics)
class AKConsumer(Thread): def __init__(self): self.stopConsuming = False self.brokerconf = None self.userconf = None self.callback = None self.topic_partitions = None def get_name(self): return self.userconf['topics'][0] def get_partition(self): return self.userconf['partition'][0] def configure(self, brokerconf, userconf): self.brokerconf = brokerconf self.userconf = userconf self.consumer = KafkaConsumer(**self.brokerconf) Thread.__init__(self) self.topic_partitions = [TopicPartition(self.userconf['topics'][0], self.userconf['partition'][0])] self.consumer.assign(self.topic_partitions) partitions = self.consumer.partitions_for_topic(self.userconf['topics'][0]) if partitions and self.userconf['partition'][0] in partitions: self._user_wants_old_messages(self.topic_partitions[0]) def _user_wants_old_messages(self, tp): current_offset = self.consumer.position(tp) user_shift_offset = self.userconf.get('resendnumber', 0) if user_shift_offset > 0: user_shift_offset -= 1 final_offset=current_offset - user_shift_offset if final_offset > 0: self.consumer.seek(tp, current_offset - user_shift_offset) print(' User selected to go from ', current_offset, ' to offset ', current_offset - user_shift_offset) def subscribe(self,callback): self.callback = callback def run(self): self.stopConsuming = False print('Consuming thread from ', self.userconf['topics'], ' in partition ', self.userconf['partition'], ' in.') while not self.stopConsuming: partitions = self.consumer.poll(300,1) for p in partitions: for response in partitions[p]: self.consumer.commit() self._receive(response) print('Consuming thread from ', self.userconf['topics'], ' in partition ', self.userconf['partition'], ' out.') def _receive(self, msg): self.callback(msg) def stop(self): self.stopConsuming = True self.join() self.consumer.close() print('Consumer to ', self.userconf['topics'], ' in partition ', self.userconf['partition'], ' stopped.')
def get_current_offsets(): cons = KafkaConsumer(bootstrap_servers=bootstrap_servers) tps = [ TopicPartition(topic_src, x) for x in sorted(cons.partitions_for_topic(topic_src)) ] cons.assign(tps) ret = [cons.position(tp) for tp in tps] cons.close(autocommit=False) return ret
def _init_consumer(self): consumer = KafkaConsumer(client_id=self.task_name(), bootstrap_servers=self._brokers, request_timeout_ms=1000, enable_auto_commit=False, auto_offset_reset="latest") consumer.assign(self._topic_partitions) for tps in self._topic_partitions: consumer.seek_to_beginning(tps) return consumer
class Consumer(BaseStreamConsumer): """ Used in DB and SW worker. SW consumes per partition. """ def __init__(self, location, enable_ssl, cert_path, topic, group, partition_id): self._location = location self._group = group self._topic = topic kwargs = _prepare_kafka_ssl_kwargs(cert_path) if enable_ssl else {} self._consumer = KafkaConsumer( bootstrap_servers=self._location, group_id=self._group, max_partition_fetch_bytes=10485760, consumer_timeout_ms=100, client_id="%s-%s" % (self._topic, str(partition_id) if partition_id is not None else "all"), request_timeout_ms=120 * 1000, heartbeat_interval_ms=10000, **kwargs) # explicitly causing consumer to bootstrap the cluster metadata self._consumer.topics() if partition_id is not None: self._partitions = [TopicPartition(self._topic, partition_id)] self._consumer.assign(self._partitions) else: self._partitions = [ TopicPartition(self._topic, pid) for pid in self._consumer.partitions_for_topic(self._topic) ] self._consumer.subscribe(topics=[self._topic]) def get_messages(self, timeout=0.1, count=1): result = [] while count > 0: try: m = next(self._consumer) result.append(m.value) count -= 1 except StopIteration: break return result def get_offset(self, partition_id): for tp in self._partitions: if tp.partition == partition_id: return self._consumer.position(tp) raise KeyError("Can't find partition %d", partition_id) def close(self): self._consumer.commit() self._consumer.close()
def run(self): """ 抽取指定kafka集群中的Topic Logsize数据 将抽出的数据输出给transfer进行处理 """ brokers = base.config["collector"]["clusters"][self.cluster]["brokers"] consumer = KafkaConsumer(bootstrap_servers=brokers, enable_auto_commit=False, group_id="kafka_extract") consumer.assign([ TopicPartition(self.topic, int(partition)) for partition in self.stopPosition ]) finish = {} for partition, stopLogsize in self.stopPosition.items(): tp = TopicPartition(self.topic, int(partition)) finish[partition] = False try: startLogsize = self.startPosition[partition] consumer.seek(tp, startLogsize) self.progress[partition] = [startLogsize, stopLogsize] except KeyError: consumer.seek_to_beginning(tp) self.progress[partition] = [0, stopLogsize] if self.startPosition == self.stopPosition: return with DataTransfer(output=self.output, cluster=self.cluster, topic=self.topic, diskPath=self.diskPath, avroSchema=self.avroSchema, targetBrokers=self.targetBrokers, targetTopic=self.targetTopic, compressType=self.compressType) as dt: for message in consumer: partition = str(message.partition) offset = message.offset + 1 if offset <= self.stopPosition[partition]: dt.transfer(message) self.progress[partition][0] = offset self.get_progress() if offset >= self.stopPosition[partition]: finish[partition] = True if False not in finish.values(): sys.stdout.write("\n" * len(self.stopPosition)) sys.stdout.flush() break
def __init__(self, branch_id): consumer = KafkaConsumer( bootstrap_servers=['localhost:9092'], value_deserializer=lambda m: loads(m.decode('ascii'), )) topic_customer = TopicPartition('bank-customer-test', branch_id) #topic_transaction = TopicPartition('bank-customer-new', branch_id) partitions = list() partitions.append(topic_customer) consumer.assign(partitions) self.consumer = consumer self.branch_id = branch_id
def get_consumer(topic, offset=-1): # Check for offset, otherwise return consumer with group_id if offset == -1: consumer = KafkaConsumer(topic, group_id='MovieLog1', consumer_timeout_ms=KAFKA_TIMEOUT) else: consumer = KafkaConsumer(consumer_timeout_ms=KAFKA_TIMEOUT) consumer.assign([TopicPartition(topic, offset)]) consumer.seek_to_beginning(TopicPartition(topic, offset)) return consumer
def read_from_offset(self, offset=0, lang='json', schema=None): ''' Kafka read message Read json and avro messages from consumer ''' log.debug("[KafkaDriver][read_from_offset] lang: " + str(lang)) log.debug("[KafkaDriver][read_from_offset] offset: " + str(offset)) def outputJSON(obj): ''' Default JSON serializer. ''' if isinstance(obj, datetime.datetime): return int(obj.strftime("%s%f")[:-3]) return obj ret = None log.debug("[KafkaDriver][read_from_offset] read start: " + str(self.server)) consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092', auto_offset_reset='earliest', consumer_timeout_ms=1000) partition = TopicPartition(self.topic, 0) consumer.assign([partition]) consumer.seek_to_end(partition) start = int(offset) consumer.seek(partition, offset) for msg in consumer: if (lang == 'avro'): #message = AvroDecoder.decode(schema, msg.value) schema_registry = CachedSchemaRegistryClient(url='http://' + self.schema_registry + ':8081') self._serializer = MessageSerializer(schema_registry) message = self._serializer.decode_message(msg.value) message = json.dumps(message, indent=4, sort_keys=True, default=outputJSON) #log.debug("[KafkaDriver][read_from_offset] avro message: " + str(message)) ret = message else: message = msg.value #log.debug("[KafkaDriver][read_from_offset] other message: " + str(message)) ret = msg.value log.debug("[KafkaDriver][read_from_offset] msg: " + str(message) + " msg.offset: " + str(msg.offset)) consumer.close() log.debug("[KafkaDriver][read_from_offset] read end") return ret
def cam(cam_num): """ This is the heart of our video display. Notice we set the mimetype to multipart/x-mixed-replace. This tells Flask to replace any old images with new values streaming through the pipeline. """ consumer = KafkaConsumer(bootstrap_servers=['localhost:9092']) consumer.assign([TopicPartition(topic=topic, partition=int(cam_num))]) return Response(get_video_stream(consumer), mimetype='multipart/x-mixed-replace; boundary=frame')
def consume(self): """setup consumer""" consumer = KafkaConsumer( bootstrap_servers=[self.kafka_host], enable_auto_commit=False, value_deserializer=lambda x: json.loads(x.decode('utf-8'))) partition = TopicPartition(self.kafka_topic, 0) consumer.assign([partition]) consumer.seek_to_end() last_offset = consumer.position(partition) print(last_offset) consumer.seek(partition=partition, offset=last_offset - self.offset_decrement) for message in consumer: # message value and key are raw bytes -- decode if necessary! # e.g., for unicode: `message.value.decode('utf-8')` print("%s:%d:%d: key=%s value=" % (message.topic, message.partition, message.offset, message.key)) val = message.value if "cmd" in val: cmd = val["cmd"] print(cmd) if cmd == "FileWriter_new": self.previous_command = cmd if "file_attributes" in val: if "file_name" in val["file_attributes"]: self.attrib = val["file_attributes"] print(self.attrib["file_name"]) elif cmd == "FileWriter_stop": if self.previous_command == "FileWriter_new": self.previous_command = cmd time.sleep(5) bot = ScicatBot() bot.login() room_alias = "#" + self.proposal_id + ":ess" room_id = bot.get_room_id(room_alias) filename = self.attrib["file_name"] bot.post(room_id, filename) image_name = "im.png" try: with h5py.File(filename, "r", libver="latest", swmr=True) as file: pass # print(file["/entry/title"]) bot.upload_image(image_name) bot.post_image(room_id) except OSError as err: print("OS error: {0}".format(err)) print("Error reading hdf5 file")
def _test_produce_and_consume_kafka_message(bootstrap_server: str): topic = 'test-topics' producer = KafkaProducer(bootstrap_servers=[bootstrap_server]) producer.send(topic, b'producer message') producer.flush() producer.close() consumer = KafkaConsumer(bootstrap_servers=[bootstrap_server]) tp = TopicPartition(topic, 0) consumer.assign([tp]) consumer.seek_to_beginning() assert next(consumer).value.decode("utf-8") == 'producer message'
def get_consumer_kafkaConsumer(): consumer = KafkaConsumer( group_id='my-group1', bootstrap_servers=[ 'ip-172-31-15-110.us-west-2.compute.internal:6667', 'ip-172-31-15-237.us-west-2.compute.internal:6667', 'ip-172-31-5-184.us-west-2.compute.internal:6667' ]) consumer.assign([topic]) position = consumer.position(topic) consumer.seek_to_end(topic) return consumer
def Kafka_Consumer(): try: consumer = KafkaConsumer(group_id="black", bootstrap_servers=config.BOOTSTRAP_SERVERS, consumer_timeout_ms=1000) consumer.assign([TopicPartition(topic=KAFKA_TOPIC, partition=0)]) # consumer.subscribe(topics=['my_topic', 'topic_1'])#订阅多个topic for msg in consumer: print("%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value)) except KafkaError: print KafkaError
def create_consumer_with_partition(partition_number): partition = TopicPartition("final", partition_number) consumer = KafkaConsumer( bootstrap_servers=KAFKA_BROKER, auto_offset_reset="earliest", enable_auto_commit=True, group_id="my-group" # value_deserializer=lambda m: json.loads(m.decode("utf-8")) ) consumer.assign([partition]) return consumer
def create_consumer(topic): consumer = KafkaConsumer(bootstrap_servers="localhost:9092", value_deserializer=lambda x: json.loads(x.decode("utf-8"))) # Manually assign partitions # https://github.com/dpkp/kafka-python/issues/601#issuecomment-331419097 assignments = [] partitions = consumer.partitions_for_topic(topic) for p in partitions: print(f"topic {topic} - partition {p}") assignments.append(TopicPartition(topic, p)) consumer.assign(assignments) return consumer
def consumer(): # 获取数据的kafka topic kafka_topic = "test" # kafka 的节点 bootstrap_servers = ["localhost:9092"] # 为kafka动态分区所用到的group name # group_id = "test_group" # 用于反序列化数据的方法 # value_deserializer = lambda v: json.dumps(v) # kafka读取数据时最小的返回数据量 # fetch_min_bytes = 1 # 一般用法,一开始指定topic # consumer = KafkaConsumer(kafka_topic, bootstrap_servers=bootstrap_servers) # 缓存的数据量大小 cache_data = 10 # 在后面设置topic consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers) # todo:从redis/mysql中读取offset kafka_offset_key = "kafka:offset" kafka_offset = redis.get(kafka_offset_key) tp = TopicPartition(kafka_topic, 0) consumer.assign([tp]) consumer.seek_to_end(tp) lastOffset = consumer.position(tp) # 若要从最新的消息消费kafka 则调用 assignment 方法 # consumer.assignment() # 从最旧的数据开始消费 # consumer.seek_to_beginning(tp) consumer.seek(tp, int(kafka_offset)) if int(kafka_offset) < int(lastOffset): data_list = [] for msg in consumer: logger.info("message topic: %s" % msg.topic) logger.info("message partition: %s" % msg.partition) logger.info("message offset: %s" % msg.offset) data_list.append(parseData(msg.value)) if len(data_list) > cache_data: toHbase(data_list) data_list = [] if msg.offset == lastOffset - 1: if len(data_list) > 0: toHbase(data_list) redis.set(kafka_offset_key, lastOffset) break else: logger.info("no new data")
def run(self): consumer = KafkaConsumer( bootstrap_servers=self.server, auto_offset_reset='earliest', group_id=self.groupid) if consumer.partitions_for_topic(self.topic) is None: print("El tópico %s no existe!" % self.topic) sys.exit(2) if self.partition is None: partitions = [TopicPartition(self.topic, partition) for partition in consumer.partitions_for_topic(self.topic)] else: partitions = [TopicPartition(self.topic, int(self.partition))] consumer.assign(partitions) if self.offset is None: if self.inicio: for partition in partitions: consumer.seek_to_beginning(partition) else: for partition in partitions: consumer.seek(partition, int(self.offset)) while not self.stop_event.is_set(): try: for message in consumer: logging.info(message) try: valor = json.loads(message.value) if self.words: valor = valor['words'] except (ValueError): valor = message.value.decode('utf-8') print ("Recibiendo Mensaje (%s/%d/%d) %s" % (message.topic, message.partition, message.offset, #message.key, valor)) if self.stop_event.is_set(): break except IndexError: pass consumer.close()
def consume_from_beginning(host='192.168.11.137:9092', topic='first_topic'): consumer = KafkaConsumer(group_id='1', bootstrap_servers=host, auto_offset_reset='earliest', enable_auto_commit=True, auto_commit_interval_ms=3000) tp = TopicPartition(topic, 0) consumer.assign([tp]) consumer.poll() consumer.seek_to_beginning(tp) for msg in consumer: print(msg)
def poll(topic, offset=0, hostname=None, port_num=None, max_timeout=100): hostname, port_num = insure_host_port(hostname, port_num) server = hostname+':'+str(port_num) topic_partition = TopicPartition(topic, partition) consumer = KafkaConsumer(bootstrap_servers=server, group_id=None) consumer.assign([topic_partition]) consumer.seek(topic_partition, offset) msgs = consumer.poll(max_timeout).values() consumer.close() if len(msgs) > 0: return msgs[0] else: return {}
class Consumer(BaseStreamConsumer): """ Used in DB and SW worker. SW consumes per partition. """ def __init__(self, location, enable_ssl, cert_path, topic, group, partition_id): self._location = location self._group = group self._topic = topic kwargs = _prepare_kafka_ssl_kwargs(cert_path) if enable_ssl else {} self._consumer = KafkaConsumer( bootstrap_servers=self._location, group_id=self._group, max_partition_fetch_bytes=10485760, consumer_timeout_ms=100, client_id="%s-%s" % (self._topic, str(partition_id) if partition_id is not None else "all"), request_timeout_ms=120 * 1000, heartbeat_interval_ms=10000, **kwargs ) # explicitly causing consumer to bootstrap the cluster metadata self._consumer.topics() if partition_id is not None: self._partitions = [TopicPartition(self._topic, partition_id)] self._consumer.assign(self._partitions) else: self._partitions = [TopicPartition(self._topic, pid) for pid in self._consumer.partitions_for_topic(self._topic)] self._consumer.subscribe(topics=[self._topic]) def get_messages(self, timeout=0.1, count=1): result = [] while count > 0: try: m = next(self._consumer) result.append(m.value) count -= 1 except StopIteration: break return result def get_offset(self, partition_id): for tp in self._partitions: if tp.partition == partition_id: return self._consumer.position(tp) raise KeyError("Can't find partition %d", partition_id) def close(self): self._consumer.commit() self._consumer.close()
def run(self): global useavro, useextra, schema_id, sslEnable print("start Consumer") if useavro: topic="avro.log.localtest" else: topic="raw.log.localtest" print("on topic %s" % topic) if sslEnable: print("setting up SSL to PROTOCOL_TLSv1") ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1) ctx.load_cert_chain(certfile="../ca-cert", keyfile="../ca-key", password="******") consumer = KafkaConsumer(bootstrap_servers=["ip6-localhost:9093"],security_protocol="SASL_SSL",ssl_context=ctx,\ sasl_mechanism="PLAIN",sasl_plain_username="******",sasl_plain_password="******", group_id="test") else: consumer = KafkaConsumer(bootstrap_servers=["ip6-localhost:9092"]) consumer.assign([TopicPartition(topic, 0)]) ## Skip the consumer to the head of the log - this is a personal choice ## It mean we are loosing messages when the py consumer was off ## Not a problem for testing purposes #consumer.seek(0,2) for message in consumer: print('-'*60) try: consume_message(message) except: print('error') print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60)
def kafka_consumer_test(): topic_name = 'topic_test' bootstrap_servers = ['localhost:9092'] # consumer = KafkaConsumer(topic_name, bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest') consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest') # enable_auto_commit=True(默认)才能断点续消,此时服务端会保存该group_id的offset # auto_offset_reset='earliest',默认值是latest,只在offset发生异常是起作用, partition_set = consumer.partitions_for_topic(topic_name) partitions = [ TopicPartition(topic_name, partition_idx) for partition_idx in partition_set ] consumer.assign(partitions) topic_partition_set = consumer.assignment() #consumer.seek_to_beginning() # 设置offset到集群中保存的第一个值,不一定是0, 没有参数则,对consumer的每一个partition设置 #consumer.seek_to_end() # 设置offset到当前没有消费的第一个值, 没有参数则,对consumer的每一个partition设置 for topic_partition in topic_partition_set: offset = consumer.position(topic_partition) print "partition: %d, offset: %d" % (topic_partition.partition, offset) #consumer.seek(topicTopicPartition, offset) # 尽量不要手动设置这个值 for msg in consumer: print ("topic:%s, partition:%d, offset:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value.decode("utf-8")))
from kafka import KafkaConsumer, TopicPartition topic_name = "test" consumer = KafkaConsumer(bootstrap_servers=['u1401.ambari.apache.org:6667', 'u1402.ambari.apache.org:6667', 'u1403.ambari.apache.org:6667']) partitions = [TopicPartition(topic_name, partition) for partition in consumer.partitions_for_topic(topic_name) if partition < 5] consumer.assign(partitions) consumer.seek_to_beginning() for message in consumer: print message
import sys, os, re import json from kafka import KafkaConsumer, TopicPartition consumer = KafkaConsumer() consumer.assign([TopicPartition('flight_delay_classification_request', 0)]) consumer.seek_to_beginning() for message in consumer: message_bytes = message.value message_string = message_bytes.decode() message_object = json.loads(message_string) print(message_object)
class KafkaChangeFeed(ChangeFeed): """ Kafka-based implementation of a ChangeFeed """ sequence_format = 'json' def __init__(self, topics, client_id, strict=False, num_processes=1, process_num=0): """ Create a change feed listener for a list of kafka topics, a client ID, and partition. See http://kafka.apache.org/documentation.html#introduction for a description of what these are. """ self._topics = topics self._client_id = client_id self._processed_topic_offsets = {} self.strict = strict self.num_processes = num_processes self.process_num = process_num self._consumer = None def __str__(self): return 'KafkaChangeFeed: topics: {}, client: {}'.format(self._topics, self._client_id) @property def topics(self): return self._topics def _get_single_topic_or_fail(self): if len(self._topics) != 1: raise ValueError("This function requires a single topic but found {}!".format(self._topics)) return self._topics[0] def iter_changes(self, since, forever): """ Since must be a dictionary of topic partition offsets. """ timeout = float('inf') if forever else MIN_TIMEOUT start_from_latest = since is None reset = 'largest' if start_from_latest else 'smallest' self._init_consumer(timeout, auto_offset_reset=reset) since = self._filter_offsets(since) # a special value of since=None will start from the end of the change stream if since is not None and (not isinstance(since, dict) or not since): raise ValueError("'since' must be None or a topic offset dictionary") if not start_from_latest: if self.strict: validate_offsets(since) checkpoint_topics = {tp[0] for tp in since} extra_topics = checkpoint_topics - set(self._topics) if extra_topics: raise ValueError("'since' contains extra topics: {}".format(list(extra_topics))) self._processed_topic_offsets = copy(since) # Tell the consumer to start from offsets that were passed in for topic_partition, offset in since.items(): self.consumer.seek(TopicPartition(topic_partition[0], topic_partition[1]), int(offset)) try: for message in self.consumer: self._processed_topic_offsets[(message.topic, message.partition)] = message.offset yield change_from_kafka_message(message) except StopIteration: assert not forever, 'Kafka pillow should not timeout when waiting forever!' # no need to do anything since this is just telling us we've reached the end of the feed def get_current_checkpoint_offsets(self): # the way kafka works, the checkpoint should increment by 1 because # querying the feed is inclusive of the value passed in. latest_offsets = self.get_latest_offsets() ret = {} for topic_partition, sequence in self.get_processed_offsets().items(): if sequence == latest_offsets[topic_partition]: # this topic and partition is totally up to date and if we add 1 # then kafka will give us an offset out of range error. # not adding 1 to the partition means that we may process this # change again later, but that should be OK sequence = latest_offsets[topic_partition] else: sequence += 1 ret[topic_partition] = sequence return self._filter_offsets(ret) def get_processed_offsets(self): return copy(self._processed_topic_offsets) def get_latest_offsets(self): return self.consumer.end_offsets(self.consumer.assignment()) def get_latest_offsets_json(self): return json.loads(kafka_seq_to_str(self.get_latest_offsets())) def get_latest_offsets_as_checkpoint_value(self): return self.get_latest_offsets() @property def consumer(self): if self._consumer is None: return self._init_consumer() return self._consumer def _init_consumer(self, timeout=MIN_TIMEOUT, auto_offset_reset='smallest'): """Allow re-initing the consumer if necessary """ config = { 'client_id': self._client_id, 'bootstrap_servers': settings.KAFKA_BROKERS, 'consumer_timeout_ms': timeout, 'auto_offset_reset': auto_offset_reset, 'enable_auto_commit': False, 'api_version': settings.KAFKA_API_VERSION, } self._consumer = KafkaConsumer(**config) topic_partitions = [] for topic in self.topics: for partition in self._consumer.partitions_for_topic(topic): topic_partitions.append(TopicPartition(topic, partition)) self._consumer.assign(self._filter_partitions(topic_partitions)) return self._consumer def _filter_offsets(self, offsets): if offsets is None: return offsets return { tp: offsets[tp] for tp in self.consumer.assignment() if tp in offsets } def _filter_partitions(self, topic_partitions): topic_partitions.sort() return [ topic_partitions[num::self.num_processes] for num in range(self.num_processes) ][self.process_num]
import sys, os, re import json from kafka import KafkaConsumer, TopicPartition consumer = KafkaConsumer() consumer.assign([TopicPartition('test', 0)]) consumer.seek_to_beginning() for message in consumer: message_bytes = message.value message_string = message_bytes.decode() message_object = json.loads(message_string) print(message_object)
class Consumer(BaseStreamConsumer): """ Used in DB and SW worker. SW consumes per partition. """ def __init__(self, location, topic, group, partition_id): self._location = location self._group = group self._topic = topic self._consumer = KafkaConsumer( bootstrap_servers=self._location, group_id=self._group, max_partition_fetch_bytes=10485760, consumer_timeout_ms=100, client_id="%s-%s" % (self._topic, str(partition_id) if partition_id is not None else "all"), request_timeout_ms=120 * 1000, ) if partition_id is not None: self._partition_ids = [TopicPartition(self._topic, partition_id)] self._consumer.assign(self._partition_ids) else: self._partition_ids = [TopicPartition(self._topic, pid) for pid in self._consumer.partitions_for_topic(self._topic)] self._consumer.subscribe(topics=[self._topic]) if self._consumer._use_consumer_group(): self._consumer._coordinator.ensure_coordinator_known() self._consumer._coordinator.ensure_active_group() self._consumer._update_fetch_positions(self._partition_ids) self._start_looping_call() def _start_looping_call(self, interval=60): def errback(failure): logger.exception(failure.value) if failure.frames: logger.critical(str("").join(format_tb(failure.getTracebackObject()))) self._poll_task.start(interval).addErrback(errback) self._poll_task = LoopingCall(self._poll_client) self._poll_task.start(interval).addErrback(errback) def _poll_client(self): self._consumer._client.poll() def get_messages(self, timeout=0.1, count=1): result = [] while count > 0: try: m = next(self._consumer) result.append(m.value) count -= 1 except StopIteration: break return result def get_offset(self, partition_id): for tp in self._partition_ids: if tp.partition == partition_id: return self._consumer.position(tp) raise KeyError("Can't find partition %d", partition_id) def close(self): self._poll_task.stop() self._consumer.commit() # getting kafka client event loop running some more and execute commit tries = 3 while tries: self.get_messages() sleep(2.0) tries -= 1 self._consumer.close()
class CheckKafka(PubSubNagiosPlugin): def __init__(self): # Python 2.x super(CheckKafka, self).__init__() # Python 3.x # super().__init__() self.name = 'Kafka' self.default_port = 9092 self.producer = None self.consumer = None self.topic = None self.client_id = 'Hari Sekhon ' + os.path.basename(get_topfile()) + ' ' + __version__ self.group_id = self.client_id + ' ' + str(os.getpid()) + ' ' + random_alnum(10) self.acks = '1' self.retries = 0 self.partition = None self.topic_partition = None self.brokers = None self.timeout_ms = None self.start_offset = None def add_options(self): # super(CheckKafka, self).add_options() # TODO: (host_envs, default_host) = getenvs2('HOST', default_host, name) # TODO: env support for Kafka brokers self.add_opt('-H', '--host', \ '-B', '--brokers', \ dest='brokers', metavar='broker_list', default='localhost:9092', help='Kafka Broker seed list in form host[:port],host2[:port2]... (default: localhost:9092)') self.add_opt('-T', '--topic', help='Kafka Topic') self.add_opt('-p', '--partition', type=int, help='Kafka Partition (default: 0)', default=0) self.add_opt('-a', '--acks', default=1, choices=['1', 'all'], help='Acks to require from Kafka. Valid options are \'1\' for Kafka ' + 'partition leader, or \'all\' for all In-Sync Replicas (may block causing ' + 'timeout if replicas aren\'t available, default: 1)') self.add_opt('-s', '--sleep', metavar='secs', help='Sleep in seconds between producing and consuming from given topic (default: 0.5)') self.add_opt('--list-topics', action='store_true', help='List Kafka topics from broker(s) and exit') self.add_opt('--list-partitions', action='store_true', help='List Kafka topic paritions from broker(s) and exit') self.add_thresholds(default_warning=1, default_critical=2) def run(self): try: super(CheckKafka, self).run() #except KafkaError as _: #raise CriticalError(_) except KafkaError: err = self.exception_msg() if 'NoBrokersAvailable' in err: err += ' ({0})'.format(self.brokers) raise CriticalError(err) @staticmethod def exception_msg(): return traceback.format_exc().split('\n')[-2] def get_topics(self): self.consumer = KafkaConsumer( bootstrap_servers=self.brokers, client_id=self.client_id, request_timeout_ms=self.timeout_ms ) return self.consumer.topics() def print_topics(self): print('Kafka Topics:\n') for topic in self.get_topics(): print(topic) def get_topic_partitions(self, topic): self.consumer = KafkaConsumer( topic, bootstrap_servers=self.brokers, client_id=self.client_id, request_timeout_ms=self.timeout_ms ) if topic not in self.get_topics(): raise CriticalError("topic '{0}' does not exist on Kafka broker".format(topic)) partitions = self.consumer.partitions_for_topic(topic) assert isSet(partitions) return partitions def print_topic_partitions(self, topic): print('Kafka topic \'{0}\' partitions:\n'.format(topic)) #for partition in self.get_topic_partitions(topic): # print(partition) print(list(self.get_topic_partitions(topic))) print() def process_args(self): self.brokers = self.get_opt('brokers') # TODO: add broker list validation back in # validate_hostport(self.brokers) log_option('brokers', self.brokers) self.timeout_ms = max((self.timeout * 1000 - 1000) / 2, 1000) try: list_topics = self.get_opt('list_topics') list_partitions = self.get_opt('list_partitions') if list_topics: self.print_topics() sys.exit(ERRORS['UNKNOWN']) self.topic = self.get_opt('topic') except KafkaError: raise CriticalError(self.exception_msg()) if self.topic: validate_chars(self.topic, 'topic', 'A-Za-z-') elif list_topics or list_partitions: pass else: self.usage('--topic not specified') try: if list_partitions: if self.topic: self.print_topic_partitions(self.topic) else: for topic in self.get_topics(): self.print_topic_partitions(topic) sys.exit(ERRORS['UNKNOWN']) except KafkaError: raise CriticalError(self.exception_msg()) self.partition = self.get_opt('partition') # technically optional, will hash to a random partition, but need to know which partition to get offset # if self.partition is not None: validate_int(self.partition, "partition", 0, 10000) self.topic_partition = TopicPartition(self.topic, self.partition) self.acks = self.get_opt('acks') try: self.acks = int(self.acks) except ValueError: pass log_option('acks', self.acks) self.validate_thresholds() def subscribe(self): self.consumer = KafkaConsumer( #self.topic, bootstrap_servers=self.brokers, # client_id=self.client_id, # group_id=self.group_id, request_timeout_ms=self.timeout_ms ) #key_serializer #value_serializer # this is only a guess as Kafka doesn't expose it's API version #log.debug('kafka api version: %s', self.consumer.config['api_version']) log.debug('partition assignments: {0}'.format(self.consumer.assignment())) # log.debug('subscribing to topic \'{0}\' parition \'{1}\''.format(self.topic, self.partition)) # self.consumer.subscribe(TopicPartition(self.topic, self.partition)) # log.debug('partition assignments: {0}'.format(self.consumer.assignment())) log.debug('assigning partition {0} to consumer'.format(self.partition)) # self.consumer.assign([self.partition]) self.consumer.assign([self.topic_partition]) log.debug('partition assignments: {0}'.format(self.consumer.assignment())) log.debug('getting current offset') # see also highwater, committed, seek_to_end self.start_offset = self.consumer.position(self.topic_partition) if self.start_offset is None: # don't do this, I've seen scenario where None is returned and all messages are read again, better to fail # log.warn('consumer position returned None, resetting to zero') # self.start_offset = 0 raise UnknownError('Kafka Consumer reported current starting offset = {0}'.format(self.start_offset)) log.debug('recorded starting offset \'{0}\''.format(self.start_offset)) # self.consumer.pause() def publish(self): log.debug('creating producer') self.producer = KafkaProducer( bootstrap_servers=self.brokers, client_id=self.client_id, acks=self.acks, batch_size=0, max_block_ms=self.timeout_ms, request_timeout_ms=self.timeout_ms ) #key_serializer #value_serializer log.debug('producer.send()') self.producer.send( self.topic, key=self.key, partition=self.partition, value=self.publish_message ) log.debug('producer.flush()') self.producer.flush() def consume(self): self.consumer.assign([self.topic_partition]) log.debug('consumer.seek({0})'.format(self.start_offset)) self.consumer.seek(self.topic_partition, self.start_offset) # self.consumer.resume() log.debug('consumer.poll(timeout_ms={0})'.format(self.timeout_ms)) obj = self.consumer.poll(timeout_ms=self.timeout_ms) log.debug('msg object returned: %s', obj) msg = None try: for consumer_record in obj[self.topic_partition]: if consumer_record.key == self.key: msg = consumer_record.value break except KeyError: raise UnknownError('TopicPartition key was not found in response') if msg is None: raise UnknownError("failed to find matching consumer record with key '{0}'".format(self.key)) return msg
class ClusterZookeeper(object): def __init__(self, zookeeper_hosts, kafka_hosts): self.groups_dict = {} self.topics_dict = {} self.brokers_list = [] self.consumer = KafkaConsumer(bootstrap_servers=kafka_hosts.split(',')) self.zk = KazooClient(hosts=zookeeper_hosts) self.zk.add_listener(self.keep_start) self.zk.start() if self.zk.exists('/consumers') is None or self.zk.exists('/brokers') is None: raise ValueError(zookeeper_hosts + 'is not zookeeper of kafka') ChildrenWatch(self.zk, '/consumers', self.groups_watch) ChildrenWatch(self.zk, '/brokers/topics', self.topics_watch) ChildrenWatch(self.zk, '/brokers/ids/', self.brokers_watch) t = threading.Thread(target=self.latest, name=kafka_hosts) t.setDaemon(True) t.start() # 保证链接是可用的 def keep_start(self, client_status): if client_status != 'CONNECTED': try: self.zk.start() except(): pass # 监听consumers节点 def groups_watch(self, children): for group in [group for group in self.groups_dict.keys() if group not in children]: self.groups_dict.pop(group) for group in [group for group in children if group not in self.groups_dict.keys()]: owners_p = '/consumers/' + group + '/owners' if self.zk.exists(owners_p) is None: continue g_o_t = GroupOwnersTopic() self.groups_dict[group] = g_o_t ChildrenWatch(self.zk, owners_p, g_o_t.g_topic_watch) # 监听topic节点 def topics_watch(self, children): for topic in [topic for topic in self.topics_dict.keys() if topic not in children]: self.topics_dict.pop(topic) for topic in [topic for topic in children if topic not in self.topics_dict.keys()]: t_v = TopicValue() self.topics_dict[topic] = t_v DataWatch(self.zk, '/brokers/topics/' + topic, t_v.topic_watch) t_v.topic_partition = [TopicPartition(topic, p) for p in self.consumer.partitions_for_topic(topic)] # 监听broker节点 def brokers_watch(self, children): self.brokers_list = children def close_zk(self): try: self.zk.remove_listener(self.keep_start) self.zk.stop() self.zk.close() except(): pass def latest(self): while True: # time.sleep(0.1) time.sleep(0.001) for k, v in self.topics_dict.items(): try: partitions = v.topic_partition self.consumer.assign(partitions) self.consumer.seek_to_end(*partitions) log_offset = reduce(lambda x, y: x + y, [self.consumer.position(p) for p in partitions]) now_timestamp = int(time.mktime(time.localtime())) if 'timestamp' in v.__dict__ and v.timestamp is not None: v.speed = (log_offset - v.off_set) / (now_timestamp - v.timestamp) v.timestamp = now_timestamp v.off_set = log_offset except Exception as e: pass