def main(): consumer = KafkaConsumer(bootstrap_servers=[ 'localhost:9092', 'localhost:9093', 'localhost:9094' ], auto_offset_reset='earliest', client_id='test_fetcher', consumer_timeout_ms=1000) consumer.subscribe(['Person']) print(consumer.topics()) print(consumer.assignment()) print(consumer.end_offsets(consumer.assignment())) for msg in consumer: print(msg.topic) print(msg.partition) print(msg.offset) print(msg.key) print(msg.value) #resp = consumer.poll(max_records=1000) #print(resp) # for message in consumer: # print(message) #print(str(int.from_bytes(message, byteorder='little'))) consumer.close()
def create_consumer(args, policy): """ Refer to Python package kafka-python, a high-level message consumer of Kafka brokers. The consumer iterator returns consumer records, which expose basic message attributes: topic, partition, offset, key, and value. :param args: Input arguments :param policy: Object to store Network Policy for processing :return: KafkaConsumer object, messages from the message bus for processing """ consumer = KafkaConsumer(args.get('topic'), api_version=API_VERSION, bootstrap_servers=args.get('broker'), client_id=CLIENT_ID, # name passed to servers for identification auto_offset_reset=args.get('start_at'), # consume earliest or latest available msgs enable_auto_commit=AUTOCOMMIT, # autocommit offsets? consumer_timeout_ms=args.get('timeout'), # StopIteration if no message after 'n' seconds security_protocol=SSL, ssl_context=create_ssl_context(args) ) # Returned values are of type Set msg = ["All the topics available :{}".format(consumer.topics()), "Subscription:{}".format(consumer.subscription()), "Partitions for topic:{}".format(consumer.partitions_for_topic(args.get('topic'))), "TopicPartitions:{}".format(consumer.assignment()) ] policy.add_fact('consumer_debug', msg) # Offsets are type Int policy.add_fact('beginning_offsets', str(consumer.beginning_offsets(consumer.assignment()))) policy.add_fact('end_offsets', str(consumer.end_offsets(consumer.assignment()))) policy.start_at_offset = args.get('start_at_offset') policy.add_fact('start_at_offset', policy.start_at_offset) return consumer
def consume(self): consumer = KafkaConsumer(self.topic, bootstrap_servers=self.bootstrap_servers) print(consumer.partitions_for_topic(self.topic)) #获取test主题的分区信息 print(consumer.topics()) #获取主题列表 print(consumer.subscription()) #获取当前消费者订阅的主题 print(consumer.assignment()) #获取当前消费者topic、分区信息 print(consumer.beginning_offsets(consumer.assignment())) #获取当前消费者可消费的偏移量 consumer.seek(TopicPartition(topic=self.topic, partition=0), 1) #重置偏移量,从第1个偏移量消费 for message in consumer: print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
def test(): from kafka import KafkaConsumer consumer = KafkaConsumer( TOPIC, bootstrap_servers="localhost:9092", auto_offset_reset="earliest", enable_auto_commit=True, ) consumer.assignment() print(get_record_by_offset(consumer, TOPIC, 0)) print(get_record_by_offset(consumer, TOPIC, 1)) print(get_record_by_offset(consumer, TOPIC, 0)) print(get_record_by_offset(consumer, TOPIC, -1))
def consume(self): consumer = KafkaConsumer(self.topic, bootstrap_servers=self.bootstrap_servers) print(consumer.partitions_for_topic(self.topic)) print(consumer.topics()) print(consumer.subscription()) print(consumer.assignment()) print(consumer.beginning_offsets(consumer.assignment())) consumer.seek(TopicPartition(topic=self.topic, partition=0), 1) for message in consumer: print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
def main(): consumer = KafkaConsumer('topic_test_cluster', bootstrap_servers=['master:9092']) print consumer.partitions_for_topic('topic_test_cluster') print consumer.topics() print consumer.subscription() print consumer.assignment() print consumer.beginning_offsets(consumer.assignment()) # 读取partition为2、偏移量从5开始的数据 consumer.seek(TopicPartition(topic=u'topic_test_cluster', partition=2), 5) for msg in consumer: print('%s:%d:%d: key=%s value=%s' % (msg.topic, msg.partition, msg.offset, msg.key, msg.value))
def main(): consumer = KafkaConsumer(bootstrap_servers=["worker2.hengan.shop:9092"],group_id='me', auto_offset_reset = 'earliest', #value_deserializer = lambda m: json.loads(m.decode('utf-8')), enable_auto_commit=False) #consumer.assign([TopicPartition('foobar2',0)]) consumer.subscribe(['foobar2']) #consumer.seek(TopicPartition('foobar2',0),100) print(consumer.topics()) print(consumer.subscription()) ret = consumer.poll() print(ret) print(consumer.assignment()) #consumer.seek_to_beginning() try: for message in consumer: print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,message.offset, message.key,message.value)) print(consumer.partitions_for_topic('foobar2')) print("offset is %d" % message.offset) tp1 = TopicPartition(topic="foobar2",partition=0) om = OffsetAndMetadata(offset=message.offset+1,metadata=1) consumer.commit({tp1:om}) break except KeyboardInterrupt: sys.exit()
class XactionConsumer: def __init__(self, partition_id): self.consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'], value_deserializer=lambda m: loads(m.decode('ascii'))) partitions = self.consumer.partitions_for_topic('bank-customer-new') print(partitions) # partition = TopicPartition('bank-customer-new', partition_id) partition = TopicPartition('bank-customer-new', partition_id) print(partition) self.ledger = {} self.custBalances = {} self.consumer.assign([partition]) def handleMessages(self): print(self.consumer.assignment()) for msg in self.consumer: message = msg.value print('{} received'.format(message)) if message['custid'] not in self.custBalances: self.custBalances[message['custid']] = 0 if message['type'] == 'dep': self.custBalances[message['custid']] += message['amt'] else: self.custBalances[message['custid']] -= message['amt'] print(self.custBalances)
def all_messages_consumed(consumer: KafkaConsumer): partitions = list(consumer.assignment()) if len(partitions) < 1: return False last_commit = consumer.committed(partitions[0]) last_offset = consumer.end_offsets(partitions=partitions) last_offset = list(last_offset.values())[0] return last_commit == last_offset
def offset_manage_manually_consume(): """ 手动设置offset :return: """ consumer = KafkaConsumer(TOPIC, bootstrap_servers=BOOTSTRAP_SERVERS) print(consumer.partitions_for_topic(TOPIC)) # 获取topic的分区信息 print(consumer.topics()) # 获取topic列表 当前kafka server有哪些topic print(consumer.subscription()) # 获取当前消费者订阅的topic print(consumer.assignment()) # 获取当前消费者topic、分区信息 print(consumer.beginning_offsets(consumer.assignment())) # 获取当前消费者可消费的偏移量 print(consumer.assignment()) # 获取当前消费者可消费的偏移量 consumer.seek(TopicPartition(topic=u'%s' % TOPIC, partition=0), 235000) # 重置偏移量,从第235000个偏移量消费 for message in consumer: print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
def doTest(self): print(self.className + " - " + pyUtils.getCurrentRunningFunctionName() + "------------------") _topicName = "pro_bilog" local_url = ['网址:端口'] real_url = ['ip:端口'] _consumer = KafkaConsumer(_topicName, bootstrap_servers=local_url, group_id='test', request_timeout_ms=3000, session_timeout_ms=5000) # 获取test主题的分区信息 print(_consumer.partitions_for_topic(_topicName)) # 获取主题列表 print(_consumer.topics()) # 获取当前消费者订阅的主题 print(_consumer.subscription()) # 获取当前消费者topic、分区信息 print(_consumer.assignment()) # 获取当前消费者可消费的偏移量 print(_consumer.beginning_offsets(_consumer.assignment()))
def test_kafka_consumer(i): consumer = KafkaConsumer('kkll3', bootstrap_servers='localhost:9092',auto_offset_reset='earliest',group_id='123', partition_assignment_strategy=[RoundRobinPartitionAssignor]) # consumer2 = KafkaConsumer('kkll3', bootstrap_servers='localhost:9092',auto_offset_reset='earliest',group_id='123', partition_assignment_strategy=[RoundRobinPartitionAssignor]) time.sleep(3) for msg in consumer: recv = "%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value) consumer.commit_async() print(recv) ps = consumer.assignment() print(ps) print(i) time.sleep(5)
class TestMetricsProducer(unittest.TestCase): def setUp(self): self.settings = Settings() self.test_consumer = KafkaConsumer( self.settings.metrics_topic, bootstrap_servers=self.settings.kafka_addr, client_id="test-metrics-consumer-1", group_id="test-metrics-group", ) # make sure the consumer has partition assignments self.test_consumer.poll() # and all messages has been consumed self.test_consumer.poll() self.test_consumer.commit() def tearDown(self): self.test_consumer.commit() def test_initialization(self): init_success = False try: producer = MetricsProducer(self.settings) init_success = True except: self.assertFalse(True) # should never happen self.assertTrue(init_success) def test_writing_to_kafka(self): assignment = self.test_consumer.assignment() partition = assignment.pop() # get the offset before publishing old_offset = self.test_consumer.position(partition) # produce a message producer = MetricsProducer(self.settings) msg = producer.collect_metrics() producer.publish_metrics(msg) raw_result = self.test_consumer.poll() for _, items in raw_result.items(): self.assertEqual(len(items), 1) item = items.pop() # check that the consumed item is the same we published self.assertEqual(item.value.decode('utf-8'), json.dumps(msg)) self.test_consumer.commit() # get the offset after consuming and check it's +1 new_offset = self.test_consumer.position(partition) self.assertEqual(new_offset - old_offset, 1)
def consumer_from_offset(self, process_msg, offset): consumer = KafkaConsumer(group_id=self.group_name, bootstrap_servers=self.broker_list, enable_auto_commit=self.enable_auto_commit, auto_offset_reset=self.auto_offset_reset) consumer.subscribe(self.topic) consumer.poll(0) for topic_partition in consumer.assignment(): consumer.seek(topic_partition, offset) while True: consumer_records = consumer.poll(100) for partition_info, records in consumer_records.items(): for record in records: process_msg(record)
def main(args): kafka_brokers_sasl = [ "kafka02-prod01.messagehub.services.us-south.bluemix.net:9093" ] sasl_plain_username = "******" sasl_plain_password = "******" sasl_mechanism = 'PLAIN' # <-- changed from 'SASL_PLAINTEXT' security_protocol = 'SASL_SSL' oldoffset = args.get("offset", "0") oldoffset = int(oldoffset) # Create a new context using system defaults, disable all but TLS1.2 context = ssl.create_default_context() context.options &= ssl.OP_NO_TLSv1 context.options &= ssl.OP_NO_TLSv1_1 try: consumer = KafkaConsumer('kar2', bootstrap_servers=kafka_brokers_sasl, sasl_plain_username=sasl_plain_username, sasl_plain_password=sasl_plain_password, security_protocol=security_protocol, ssl_context=context, sasl_mechanism=sasl_mechanism) consumer.topics() part = consumer.assignment() # print part part = part.pop() offset = consumer.position(partition=part) # print offset consumer.seek(part, oldoffset) # print consumer.position(partition=part) message_queue = consumer.poll(timeout_ms=2000, max_records=20) message_queue = message_queue[message_queue.keys()[0]] message_dict = {} for i in message_queue: message_dict[i.offset] = i.value print message_dict return message_dict except: return {"-1": oldoffset}
def cusumer(): start = time.time() n = 0 _consumer = KafkaConsumer('4.1.1.1.python-test', group_id='test1', bootstrap_servers='192.168.18.134:9092', consumer_timeout_ms=1000) print(_consumer.partitions_for_topic('4.1.1.1.python-test')) #TopicPartition('4.1.1.1.python-test','0') #a = namedtuple("_TopicPartition",["_4.1.1.1.python-test", "_0"]) offset = _consumer.committed( TopicPartition(topic='4.1.1.1.python-test', partition=0)) #_consumer.seek_to_beginning() #_consumer.seek_to_beginning(TopicPartition(topic = '4.1.1.1.python-test',partition = 0)) #_consumer.assign(TopicPartition(topic = '4.1.1.1.python-test',partition = 0)) print(_consumer.assignment()) print(_consumer.subscription()) print( _consumer.beginning_offsets( TopicPartition(topic='4.1.1.1.python-test', partition=0))) #_consumer.seek(TopicPartition(topic = '4.1.1.1.python-test',partition = 0),offset-1) #print(_consumer.position(TopicPartition(topic = '4.1.1.1.python-test',partition = 0))) #_consumer.commit() return while 1: try: for message in _consumer: #yield message print(message.value) n = n + 1 stop = time.time() if stop - start > 1: print(n / (stop - start)) start = time.time() n = 0 #print('time out') except KafkaTimeoutError as e: print(e) except KafkaError as e: print(e) finally: pass
def process_sec_filings(): consumer = KafkaConsumer(bootstrap_servers=kafka_url, enable_auto_commit=False, group_id='sec-processor') topics = consumer.topics() assignments = consumer.assignment() metrics = consumer.metrics() print metrics print assignments print topics consumer.subscribe(topics) while True: i = 0 a = consumer.poll(100, 5) for msg in a: i = i + 1 print msg print i print "---------------------"
class KafkaPythonConsumer(BaseConsumer, ConsumerRebalanceListener): '''KafkaPythonConsumer''' def __init__(self, consumer_id, config, logger): BaseConsumer.__init__(self, consumer_id, config, logger) def run_consumer(self): '''core consumer code''' bootstrap_server = self.config.get('consumer', 'kafka_bootstrap') consumer_group = self.config.get('consumer', 'kafka_consumer_group') offset_reset = self.config.get('consumer', 'kafka_auto_offset_reset') self.consumer = KafkaConsumer(bootstrap_servers=bootstrap_server,\ consumer_timeout_ms=60000,\ group_id=consumer_group,\ auto_offset_reset=offset_reset) topic_whitelist = self.config.get('consumer', 'topic_whitelist') self.logger.info("Topic list is " + topic_whitelist) self.consumer.subscribe(topic_whitelist.split(","), None, self) self.logger.info("Consumer " + self.consumer_id + " starting.... " + str(self.consumer.assignment())) signal.signal(signal.SIGINT, self.exit_gracefully) signal.signal(signal.SIGTERM, self.exit_gracefully) while not self.shutting_down: for message in self.consumer: consumer_message = MessageInfo(message.topic, message.partition, message.key,\ message.value, message.offset) self.process_message(consumer_message) if self.shutting_down: break self.check_for_rotation() for part in self.partitions: self.partitions[part].writer.close() self.logger.info("Graceful shutdown of consumer " + str(self.consumer_id) + " successful")
def consume_exact_once(host='192.168.11.137:9092', topic='first_topic', only_new=True): consumer = KafkaConsumer(group_id='1', bootstrap_servers=host) consumer.subscribe(topic, listener=CRL(consumer)) consumer.poll() for tp in consumer.assignment(): offset = 0 if tp.partition in CRL.mem_db: offset = CRL.mem_db[tp.partition] consumer.seek(tp, offset) else: consumer.seek_to_end(tp) while True: message_batch = consumer.poll() for topic_partition, partition_batch in message_batch.items(): for message in partition_batch: print(message) CRL.mem_db[topic.partition] = message.offset
def run(self): group_id = "device-manager.monitor#" + str(uuid.uuid4()) start = time.time() LOGGER.debug( f' will create consumer {CONFIG.get_kafka_url()} {group_id} {self.topic}' ) consumer = KafkaConsumer(bootstrap_servers=CONFIG.get_kafka_url(), group_id=group_id) consumer.subscribe(topics=[self.topic], listener=Listener(self)) StatusMonitor.wait_init(consumer) LOGGER.debug( f' kafka consumer created {self.topic} - {time.time() - start}') LOGGER.debug(consumer.assignment()) for message in consumer: LOGGER.debug(f" Got kafka event [{self.topic}] {message}") data = None try: data = json.loads(message.value) except Exception as error: LOGGER.error(f" Received message is not valid json {error}") continue metadata = data.get('metadata', None) if metadata is None: LOGGER.error( f' Invalid kafka event detected - no metadata included') continue reason = metadata.get('reason', None) if reason == 'statusUpdate': continue deviceid = metadata.get('deviceid', None) tenant = metadata.get('tenant', None) if (deviceid is None) or (tenant is None): LOGGER.warning(f" Missing device identification from event") continue self.set_online(tenant, deviceid, message.partition, metadata.get('exp', None))
def kafka_load_dau(seconds=1800): dau = {} consumer = KafkaConsumer('posthistory', bootstrap_servers=['54.189.125.21:9092'], auto_offset_reset='earliest', enable_auto_commit=True, auto_commit_interval_ms=1000) # Finding end offset so that we could stop the loop. next(consumer) partition = consumer.assignment().pop() end_offset = consumer.end_offsets([partition])[partition] - 1 for raw_msg in consumer: msg = raw_msg.value.decode('utf-8') msg_data = json.loads(msg) ts = msg_data['_CreationDate'][:16] dau[ts] = dau.get(ts, 0) + 1 if raw_msg.offset == end_offset: break return dau
def __run_listen_for_config_changes_forever(self): """ Polls for config update messages. When a new message is recieved, locks instance state and applies update. """ # c = KafkaConsumer(topics=[self.scores_config_update_topic_name], configs = { 'bootstrap_servers': [ 'ec2-100-20-18-195.us-west-2.compute.amazonaws.com:9092', 'ec2-100-20-8-59.us-west-2.compute.amazonaws.com:9092', 'ec2-100-20-75-14.us-west-2.compute.amazonaws.com:9092' ], 'group_id': 1, 'auto_offset_reset': 'latest', 'enable_auto_commit': True, 'value_deserializer': lambda x: json.loads(x.decode('utf-8')) } c = KafkaConsumer(**configs) c.subscribe(self.scores_config_update_topic_name) while True: msgs = c.poll(float("Inf")) if len(msgs) == 0: continue p = list(c.assignment())[-1] m = list(msgs.values())[-1][-1] print(dir(m)) print("===================================") print("Config change received: %s" % m.value) self.__update_scoring_function(m.value) print("Config change affected") print("===================================")
def get_kafka_data(self): result = [] try: consumer = KafkaConsumer(KAFKA_TOPICS, bootstrap_servers=KAFKA_HOSTS, consumer_timeout_ms=1000) topics = consumer.topics() # 获取主题列表 assignment = consumer.assignment() # 获取当前消费者topic、分区信息 # 获取当前消费者可消费的偏移量 end_offsets = consumer.end_offsets(assignment) offset = end_offsets[end_offsets.keys()[0]] offset = offset - 5 if offset >= 5 else offset consumer.seek(TopicPartition(topic=KAFKA_TOPICS, partition=0), offset) # 重置偏移量,从第5个偏移量消费 for message in consumer: content = json.loads(message.value) for item in content: result.append({ "name": item['event_top_type'], "app": item['event_type'], "ip": item['dev_ip'], "time": datetime.strptime( item['event_time'], "%Y%m%d%H%M%S").strftime("%Y-%m-%d %H:%M:%S") }) if message.offset == offset - 1: break consumer.close() except Exception as e: logger.error(e) return result
def kafka_consumer_test(): topic_name = 'topic_test' bootstrap_servers = ['localhost:9092'] # consumer = KafkaConsumer(topic_name, bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest') consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest') # enable_auto_commit=True(默认)才能断点续消,此时服务端会保存该group_id的offset # auto_offset_reset='earliest',默认值是latest,只在offset发生异常是起作用, partition_set = consumer.partitions_for_topic(topic_name) partitions = [TopicPartition(topic_name, partition_idx) for partition_idx in partition_set] consumer.assign(partitions) topic_partition_set = consumer.assignment() # consumer.seek_to_beginning() # 设置offset到集群中保存的第一个值,不一定是0, 没有参数则,对consumer的每一个partition设置 # consumer.seek_to_end() # 设置offset到当前没有消费的第一个值, 没有参数则,对consumer的每一个partition设置 for topic_partition in topic_partition_set: offset = consumer.position(topic_partition) print("partition: %d, offset: %d" % (topic_partition.partition, offset)) # consumer.seek(topicTopicPartition, offset) # 尽量不要手动设置这个值 for msg in consumer: print("topic:%s, partition:%d, offset:%d: key=%s value=%s" % ( msg.topic, msg.partition, msg.offset, msg.key, msg.value.decode("utf-8")))
def kafka_consumer_test(): topic_name = 'topic_test' bootstrap_servers = ['localhost:9092'] # consumer = KafkaConsumer(topic_name, bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest') consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest') # enable_auto_commit=True(默认)才能断点续消,此时服务端会保存该group_id的offset # auto_offset_reset='earliest',默认值是latest,只在offset发生异常是起作用, partition_set = consumer.partitions_for_topic(topic_name) partitions = [ TopicPartition(topic_name, partition_idx) for partition_idx in partition_set ] consumer.assign(partitions) topic_partition_set = consumer.assignment() #consumer.seek_to_beginning() # 设置offset到集群中保存的第一个值,不一定是0, 没有参数则,对consumer的每一个partition设置 #consumer.seek_to_end() # 设置offset到当前没有消费的第一个值, 没有参数则,对consumer的每一个partition设置 for topic_partition in topic_partition_set: offset = consumer.position(topic_partition) print "partition: %d, offset: %d" % (topic_partition.partition, offset) #consumer.seek(topicTopicPartition, offset) # 尽量不要手动设置这个值 for msg in consumer: print ("topic:%s, partition:%d, offset:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value.decode("utf-8")))
print (i) print (message.offset) Tag = False break if i % 1000 == 0: print (i,message.offset,message.partition) consumer.close() lock.release() if __name__ == '__main__': client = InsecureClient('http://lg-11-152.ko.cn:50070', user='******') consumer = KafkaConsumer('kzmg_hunter_login', bootstrap_servers=['172.23.11.150:9092']) consumer.topics() for key, value in consumer.end_offsets(consumer.assignment()).items(): print (key, value) name_mid_num = str(key)[53:54] # name_mid_num=str(key)[51:52] print(name_mid_num) locals()['consumer' + name_mid_num]=KafkaConsumer('kzmg_hunter_login', bootstrap_servers=['172.23.11.150:9092']) locals()['consumer' + name_mid_num].topics() locals()['consumer' + name_mid_num].seek(key, 0) datanum = value try: lock = thread.allocate_lock() lock.acquire() locks.append(lock) thread.start_new_thread(save_jsondata, (client, locals()['consumer'+name_mid_num], datanum, name_mid_num,lock)) except:
class CheckKafka(PubSubNagiosPlugin): def __init__(self): # Python 2.x super(CheckKafka, self).__init__() # Python 3.x # super().__init__() self.name = 'Kafka' self.default_port = 9092 self.producer = None self.consumer = None self.topic = None self.client_id = 'Hari Sekhon ' + os.path.basename( get_topfile()) + ' ' + __version__ self.group_id = self.client_id + ' ' + str( os.getpid()) + ' ' + random_alnum(10) self.acks = '1' self.retries = 0 self.partition = None self.topic_partition = None self.brokers = None self.timeout_ms = None self.start_offset = None def add_options(self): # super(CheckKafka, self).add_options() # TODO: (host_envs, default_host) = getenvs2('HOST', default_host, name) # TODO: env support for Kafka brokers self.add_opt('-H', '--host', \ '-B', '--brokers', \ dest='brokers', metavar='broker_list', default='localhost:9092', help='Kafka Broker seed list in form host[:port],host2[:port2]... (default: localhost:9092)') self.add_opt('-T', '--topic', help='Kafka Topic') self.add_opt('-p', '--partition', type=int, help='Kafka Partition (default: 0)', default=0) self.add_opt( '-a', '--acks', default=1, choices=[1, 'all'], help= 'Acks to require from Kafka. Valid options are \'1\' for Kafka ' + 'partition leader, or \'all\' for all In-Sync Replicas (may block causing ' + 'timeout if replicas aren\'t available, default: 1)') self.add_opt( '-s', '--sleep', metavar='secs', help= 'Sleep in seconds between producing and consuming from given topic (default: 0.5)' ) self.add_opt('--list-topics', action='store_true', help='List Kafka topics from broker(s) and exit') self.add_opt('--list-partitions', action='store_true', help='List Kafka topic paritions from broker(s) and exit') self.add_thresholds(default_warning=1, default_critical=2) def run(self): try: super(CheckKafka, self).run() #except KafkaError as _: #raise CriticalError(_) except KafkaError: raise CriticalError(self.exception_msg()) @staticmethod def exception_msg(): return traceback.format_exc().split('\n')[-2] def get_topics(self): self.consumer = KafkaConsumer(bootstrap_servers=self.brokers, client_id=self.client_id, request_timeout_ms=self.timeout_ms) return self.consumer.topics() def print_topics(self): print('Kafka Topics:\n') for topic in self.get_topics(): print(topic) def get_topic_partitions(self, topic): self.consumer = KafkaConsumer(topic, bootstrap_servers=self.brokers, client_id=self.client_id, request_timeout_ms=self.timeout_ms) if topic not in self.get_topics(): raise CriticalError( "topic '{0}' does not exist on Kafka broker".format(topic)) partitions = self.consumer.partitions_for_topic(topic) assert isSet(partitions) return partitions def print_topic_partitions(self, topic): print('Kafka topic \'{0}\' partitions:\n'.format(topic)) #for partition in self.get_topic_partitions(topic): # print(partition) print(list(self.get_topic_partitions(topic))) print() def process_args(self): self.brokers = self.get_opt('brokers') # TODO: add broker list validation back in # validate_hostport(self.brokers) log_option('brokers', self.brokers) self.timeout_ms = max((self.timeout * 1000 - 1000) / 2, 1000) try: list_topics = self.get_opt('list_topics') list_partitions = self.get_opt('list_partitions') if list_topics: self.print_topics() sys.exit(ERRORS['UNKNOWN']) self.topic = self.get_opt('topic') except KafkaError: raise CriticalError(self.exception_msg()) if self.topic: validate_chars(self.topic, 'topic', 'A-Za-z-') elif list_topics or list_partitions: pass else: self.usage('--topic not specified') try: if list_partitions: if self.topic: self.print_topic_partitions(self.topic) else: for topic in self.get_topics(): self.print_topic_partitions(topic) sys.exit(ERRORS['UNKNOWN']) except KafkaError: raise CriticalError(self.exception_msg()) self.partition = self.get_opt('partition') # technically optional, will hash to a random partition, but need to know which partition to get offset # if self.partition is not None: validate_int(self.partition, "partition", 0, 10000) self.topic_partition = TopicPartition(self.topic, self.partition) self.acks = self.get_opt('acks') log_option('acks', self.acks) self.validate_thresholds() def subscribe(self): self.consumer = KafkaConsumer( #self.topic, bootstrap_servers=self.brokers, # client_id=self.client_id, # group_id=self.group_id, request_timeout_ms=self.timeout_ms) #key_serializer #value_serializer log.debug('partition assignments: {0}'.format( self.consumer.assignment())) # log.debug('subscribing to topic \'{0}\' parition \'{1}\''.format(self.topic, self.partition)) # self.consumer.subscribe(TopicPartition(self.topic, self.partition)) # log.debug('partition assignments: {0}'.format(self.consumer.assignment())) log.debug('assigning partition {0} to consumer'.format(self.partition)) # self.consumer.assign([self.partition]) self.consumer.assign([self.topic_partition]) log.debug('partition assignments: {0}'.format( self.consumer.assignment())) log.debug('getting current offset') # see also highwater, committed, seek_to_end self.start_offset = self.consumer.position(self.topic_partition) if self.start_offset is None: # don't do this, I've seen scenario where None is returned and all messages are read again, better to fail # log.warn('consumer position returned None, resetting to zero') # self.start_offset = 0 raise UnknownError( 'Kafka Consumer reported current starting offset = {0}'.format( self.start_offset)) log.debug('recorded starting offset \'{0}\''.format(self.start_offset)) # self.consumer.pause() def publish(self): log.debug('creating producer') self.producer = KafkaProducer(bootstrap_servers=self.brokers, client_id=self.client_id, acks=self.acks, batch_size=0, max_block_ms=self.timeout_ms, request_timeout_ms=self.timeout_ms) #key_serializer #value_serializer log.debug('producer.send()') self.producer.send(self.topic, key=self.key, partition=self.partition, value=self.publish_message) log.debug('producer.flush()') self.producer.flush() def consume(self): self.consumer.assign([self.topic_partition]) log.debug('consumer.seek({0})'.format(self.start_offset)) self.consumer.seek(self.topic_partition, self.start_offset) # self.consumer.resume() log.debug('consumer.poll(timeout_ms={0})'.format(self.timeout_ms)) obj = self.consumer.poll(timeout_ms=self.timeout_ms) log.debug('msg object returned: %s', obj) msg = None try: for consumer_record in obj[self.topic_partition]: if consumer_record.key == self.key: msg = consumer_record.value break except KeyError: raise UnknownError('TopicPartition key was not found in response') if msg is None: raise UnknownError( "failed to find matching consumer record with key '{0}'". format(self.key)) return msg
class WarriorKafkaConsumer(): """ This class contains all kafka consumer methods """ def __init__(self, *topics, **configs): """ Create Kafka Consumer object """ print_info("creating kafka consumer") try: self.kafka_consumer = KafkaConsumer(*topics, **configs) except KafkaError as exc: print_error("Kafka consumer - Exception during connecting to broker - {}".format(exc)) def subscribe_to_topics(self, topics, **kwargs): """ Subscribe to list of specified topics. Arguments: topics(list): list of topic names to subscribe pattern(list): list of topic name patterns to subscribe listener(func): callback function Returns: result(bool) : False if exception occures, True otherwise """ pattern = kwargs.get("pattern", None) listener = kwargs.get("listener", None) print_info("subscribe to topics {}".format(topics)) try: self.kafka_consumer.subscribe(topics=topics, pattern=pattern, listener=listener) result = True except KafkaError as exc: print_error("Exception during subscribing to topics - {}".format(exc)) result = False return result def unsubscribe_to_topics(self): """ Unsubscribe to all topics. Arguments: None. Returns: result(bool) : False if exception occures, True otherwise """ print_info("unsubscribe to all topics") try: self.kafka_consumer.unsubscribe() result = True except KafkaError as exc: print_error("Exception during unsubscibing to topics - {}".format(exc)) result = False return result def assign_partitions(self, partitions): """ Assign partitions to consumer. Arguments: partitions(list) : list of [topic, partition] lists example : [[topic1,1], [topic2,1]] Returns: None. """ print_info("assigning partitions to consumer {}".format(partitions)) topic_partitions = [TopicPartition(topic=tup[0], partition=tup[1]) for tup in partitions] try: self.kafka_consumer.assign(topic_partitions) result = True except KafkaError as exc: print_error("Exception during assiging partitions - {}".format(exc)) result = False return result def seek_to_position(self, topic, partition, offset): """ Seek to the given offset. Arguments: topic(str): topic name partition(int): partition number offset(int): offset number Returns: result(bool) : False if exception occures, True otherwise """ print_info("seeking to position {}:{}:{}".format(topic, partition, offset)) topic_partition = TopicPartition(topic=topic, partition=partition) try: self.kafka_consumer.seek(partition=topic_partition, offset=offset) result = True except KafkaError as exc: print_error("Exception during seek - {}".format(exc)) result = False return result def get_messages(self, get_all_messages=False, **kwargs): """ Get messages from consumer. Arguments: get_all_messages(bool): set this to True to get all the messages, seeks to the beginning. Defaults to False. timeout(int): timeout in milliseconds max_records(int): maximum messages to fetch Returns: messages(list): messages from the consumer """ timeout_ms = kwargs.get("timeout", 0) max_records = kwargs.get("max_records", None) messages = [] msg_pack = {} print_info("get messages published to subscribed topics") try: if get_all_messages: self.kafka_consumer.seek_to_beginning() msg_pack = self.kafka_consumer.poll(timeout_ms, max_records) except KafkaError as exc: print_error("Exception occured in get_messages - {}".format(exc)) for topic, message_list in msg_pack.items(): for message in message_list: messages.append(message.value) return messages def get_topics(self): """ Get subscribed topics of the consumer. Arguments: None. Returns: topic_list(list of lists): list of [topic, partition] lists example : [[topic1,1], [topic2,2]] """ print_info("get all the topics consumer is subscribed to") try: topic_partitions = self.kafka_consumer.assignment() topic_list = [[topic_partition.topic, topic_partition.partition] \ for topic_partition in topic_partitions] except KafkaError as exc: print_error("Exception during getting assigned partitions - {}".format(exc)) topic_list = None return topic_list
class ConsumerTimeStampWindow: def __init__(self, broker_list, group_name, topic, enable_auto_commit=True, auto_offset_reset='latest'): self.topic = topic self.consumer = KafkaConsumer(group_id=group_name, bootstrap_servers=broker_list, enable_auto_commit=enable_auto_commit, auto_offset_reset=auto_offset_reset) def consumer_from_offset_window(self, process_msg, begin_time, end_time): begin_offset_dic, end_offset_dic = self.get_offset_time_window( begin_time, end_time) for topic_partition, offset_and_timestamp in begin_offset_dic.items(): self.consumer.seek(topic_partition, offset_and_timestamp[0]) self.consumer.subscribe(self.topic) self.consumer.poll(0) topic_partition_info = self.consumer.assignment() partition_consumer_finish_flag = dict( zip(topic_partition_info, [False] * len(topic_partition_info))) while True: if False not in partition_consumer_finish_flag.values(): return consumer_records = self.consumer.poll(100) for partition_info, records in consumer_records.items(): if partition_consumer_finish_flag[partition_info]: print('-------------- {0} consumer finish --------------'. format(partition_info)) break for record in records: if record.offset <= end_offset_dic[partition_info][0]: process_msg(record) else: partition_consumer_finish_flag[partition_info] = True def get_offset_time_window(self, begin_time, end_time): partitions_structs = [] for partition_id in self.consumer.partitions_for_topic(self.topic): partitions_structs.append(TopicPartition(self.topic, partition_id)) begin_search = {} for partition in partitions_structs: begin_search[partition] = begin_time if isinstance( begin_time, int) else self.__str_to_timestamp(begin_time) begin_offset = self.consumer.offsets_for_times(begin_search) end_search = {} for partition in partitions_structs: end_search[partition] = end_time if isinstance( end_time, int) else self.__str_to_timestamp(end_time) end_offset = self.consumer.offsets_for_times(end_search) for topic_partition, offset_and_timestamp in begin_offset.items(): b_offset = 'null' if offset_and_timestamp is None else offset_and_timestamp[ 0] e_offset = 'null' if end_offset[ topic_partition] is None else end_offset[topic_partition][0] print('Between {0} and {1}, {2} offset range = [{3}, {4}]'.format( begin_time, end_time, topic_partition, b_offset, e_offset)) return begin_offset, end_offset @staticmethod def __str_to_timestamp(str_time, format_type='%Y-%m-%d %H:%M:%S'): time_array = time.strptime(str_time, format_type) return int(time.mktime(time_array)) * 1000
def main(): parser = argparse.ArgumentParser() parser.add_argument("topic") parser.add_argument("-H", "--host", type=str, help="Kafka server and port.", default="localhost:9092") parser.add_argument("-r", "--replay", action="store_true", help="Display all available log entries.", default=False) parser.add_argument("-m", "--match", type=str, help="Initial match pattern.", default=None) args = parser.parse_args() pattern = args.match if args.replay: auto_offset_reset = 'earliest' else: auto_offset_reset = 'latest' if args.topic[-5:] == '.json': value_deserializer = json_value_deserializer else: value_deserializer = None consumer = KafkaConsumer(args.topic, group_id=None, bootstrap_servers=args.host, value_deserializer=value_deserializer, auto_offset_reset=auto_offset_reset) while True: messages = consumer.poll(250) for tp in six.itervalues(messages): for message in tp: if isinstance(message.value, dict): if message.value['klog_level'] in colors: c = colors[message.value['klog_level']] else: c = attr(0) params = {'topic': message.topic, 'offset': message.offset, 'level': message.value['klog_level'].upper()} params['time'] = str(datetime.datetime.fromtimestamp(float(message.value['klog_time']))) params['msg'] = message.value['klog_message'] if pattern and re.search(pattern, params['msg']) is not None: c += match_color msg = msg_format.format(**params) else: c = attr(0) msg = message.value print(c+msg+attr(0)) po = select.poll() po.register(sys.stdin, select.POLLIN) if po.poll(0): ch = sys.stdin.read(1) if ch == 'm': pattern = sys.stdin.readline().rstrip('\n').encode('utf-8') pattern = pattern.rstrip('\n').encode('utf-8') elif ch == 'r': offset = sys.stdin.readline().rstrip('\n').encode('utf-8') offset = int(offset) for tp in consumer.assignment(): position = consumer.position(tp) consumer.seek(tp, max(0, position-offset)) elif ch == 'R': for tp in consumer.assignment(): consumer.seek_to_beginning(tp) elif ch == 'p': for tp in consumer.assignment(): consumer.pause(tp) elif ch == 'P': for tp in consumer.assignment(): consumer.resume(tp) elif ch == 'q': # FIXME: kafka currently (1.0.1) raises an exception on close #consumer.close() exit()
class LiveTable: """ LiveTable uses event sourcing on a "KTable" topic to reconstitute a full table for taking "snapshots" as reports. The constructor requires a KafkaConsumer to read "table updates" from. The windowing on the table is configurable. A user defined function can be supplied/updated that uses attributes of the post to create a new, derived column. """ def __init__(self, input_topic_name: str, bootstrap_servers: Sequence[str], time_window_size=datetime.timedelta(days=3), scoring_function=ScoringFunction()): """ :param input_topic_name: :param bootstrap_servers: :param time_window_size: :param scoring_function: """ self.producer = KafkaProducer( bootstrap_servers=bootstrap_servers, value_serializer=lambda x: json.dumps(x).encode('utf-8')) self.consumer = KafkaConsumer( input_topic_name, bootstrap_servers=list(bootstrap_servers), auto_offset_reset='earliest', enable_auto_commit=True, group_id=random.randint(0, 999999), value_deserializer=lambda x: json.loads(x.decode('utf-8'))) self.scoring_function = scoring_function self.time_window_size = time_window_size self.rolling_events_processed = 0 self.rolling_sum_ingest_latency = 0 self.rolling_sum_click_latency = 0 self.time_window_start = None self.time_window_start_epoch = None self.topic_partition = None self.__seek_to_window_start( ) #initializes time_window_start, time_window_start_epoch, and topic_partition self.posts = {} self.__bulk_consume_new_events() self.scoring_function_lock = threading.Lock() def update(self): """ Plays updates to the table from kafka topic. Purges table entries that are past their expiration date. Enriches all entries by applying scoring function :return: """ self.__garbage_collect_old() self.__bulk_consume_new_events() def get_snapshot(self): """ :return: return a copy of current state of table """ self.__apply_score() return self.posts.copy() def update_scoring_function(self, scoring_function: ScoringFunction): """ Updates scoring function for this table. Applies scoring function on all current entries in table :param scoring_function: :return: """ print("updating scoring fn") self.scoring_function_lock.acquire() self.scoring_function = scoring_function self.scoring_function_lock.release() print("updated") # self.__apply_score() def __apply_score(self): """ Applies scoring function on all entries in table """ self.scoring_function_lock.acquire() for key, json_dict in self.posts.items(): json_dict['score'] = self.scoring_function.score( json_dict['PREVIEW'], json_dict['FULL_VIEW']) json_dict['coldness_score'] = self.scoring_function.coldness_score( json_dict['PREVIEW']) json_dict['hotness_score'] = self.scoring_function.hotness_score( json_dict['PREVIEW'], json_dict['FULL_VIEW']) self.scoring_function_lock.release() def __bulk_consume_new_events(self): """ Reads kafka topic as an event source to reconstitute a "snapshot" of scores for all posts by replaying them into a dictionary. """ # end_offset = self.consumer.end_offsets([self.topic_partition])[self.topic_partition] - 1 end_offsets = {} partitions = {} topics_consumed = 0 for p in self.assignments: partitions[p.partition] = p end_offsets[p.partition] = self.consumer.end_offsets([p])[p] - 1 if self.consumer.committed( p) is not None and self.consumer.committed( p) >= end_offsets[p.partition]: topics_consumed += 1 else: self.consumer.resume(p) if topics_consumed >= len(partitions): print("no new data") return print("updates in prog") for m in self.consumer: if m is not None and m.value[ 'POST_TIMESTAMP'] > self.time_window_start_epoch: self.posts[m.value['PROPERTIES_SHOPPABLE_POST_ID']] = m.value # self.__track_latency(m) if m.offset >= end_offsets[m.partition]: self.consumer.pause(partitions[m.partition]) topics_consumed += 1 print(topics_consumed) if topics_consumed >= len(partitions): break self.consumer.commit() def __track_latency(self, m): if 'LAST_CLICK_TIMESTAMP' not in m.value or 'INGEST_TIMESTAMP' not in m.value: return click_timestamp = m.value['LAST_CLICK_TIMESTAMP'] ingest_timestamp = m.value['INGEST_TIMESTAMP'] if click_timestamp is None or ingest_timestamp is None: return now = round(time.time() * 1000) self.rolling_events_processed += 1 self.rolling_sum_ingest_latency += now - ingest_timestamp self.rolling_sum_click_latency += now - click_timestamp if self.rolling_events_processed >= 1000: metrics = { 'average_latency_ingest': self.rolling_sum_ingest_latency / self.rolling_events_processed, 'average_latency_click': self.rolling_sum_click_latency / self.rolling_events_processed } self.producer.send(topic="average_latency", value=metrics) #self.producer.flush() self.rolling_events_processed = 0 self.rolling_sum_ingest_latency = 0 self.rolling_sum_click_latency = 0 #print("===================================") #print(" PUSH LATENCY METRICS ") #print(metrics) #print("===================================") def __garbage_collect_old(self): """ Removes all expired table entries """ for post_id in list(self.posts.keys()): if self.posts[post_id][ 'POST_TIMESTAMP'] < self.time_window_start_epoch: self.posts.pop(post_id) def __seek_to_window_start(self): """ This function mutates the consumer to "seek" the kafka topic offset to that of the earliest event that is inside the time_window. """ self.__update_time_window_start() if len(self.consumer.assignment()) == 0: # poll consumer to generate a topic partition assignment message = self.consumer.poll(1, 1) while len(message) == 0: message = self.consumer.poll(1, 1) self.topic_partition = self.consumer.assignment().pop() self.assignments = self.consumer.assignment() time_window_start_epoch = int(self.time_window_start.timestamp() * 1000) # get first offset that is in the time window start_offset = self.consumer.offsets_for_times( {self.topic_partition: time_window_start_epoch})[self.topic_partition].offset # set the consumer to consume from this offset self.consumer.seek(self.topic_partition, start_offset) def __update_time_window_start(self): """ Returns start of time window from now - self.time_window_size. """ self.time_window_start = datetime.datetime.now( ) - self.time_window_size self.time_window_start_epoch = int(self.time_window_start.timestamp() * 1000)
import re import time import pandas as pd import json from kafka import KafkaConsumer, TopicPartition datalist = [] i = 0 """消费者(手动设置偏移量)""" consumer = KafkaConsumer('phone-game-userinfo', bootstrap_servers=['172.23.11.150:9092']) print (consumer.partitions_for_topic("phone-game-userinfo")) # 获取phone-game-userinfo主题的分区信息 print (consumer.topics()) # 获取主题列表 print (consumer.subscription()) # 获取当前消费者订阅的主题 print (consumer.assignment()) # 获取当前消费者topic、分区信息 print (consumer.beginning_offsets(consumer.assignment())) # 获取当前消费者可消费的偏移量 consumer.seek(TopicPartition(topic=u'phone-game-userinfo', partition=0), 202025) # 重置偏移量,从第50个偏移量消费 print(consumer.end_offsets(consumer.assignment())) # Get the last offset for the given partitions print(consumer.end_offsets([TopicPartition(topic='phone-game-userinfo', partition=0)])) # 同上一句等价 t= '2018-05-10' timeArray =time.strptime(t,'%Y-%m-%d') timeStamp=int(time.mktime(timeArray)) print(consumer.offsets_for_times({TopicPartition(topic='phone-game-userinfo', partition=0):timeStamp})) for message in consumer: print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value.decode('utf-8'))) # # print (message.value.decode('utf-8')) # # print (message.offset) # data = message.value.split(',')
class CheckKafka(PubSubNagiosPlugin): def __init__(self): # Python 2.x super(CheckKafka, self).__init__() # Python 3.x # super().__init__() self.name = 'Kafka' self.default_port = 9092 self.producer = None self.consumer = None self.topic = None self.client_id = 'Hari Sekhon ' + os.path.basename(get_topfile()) + ' ' + __version__ self.group_id = self.client_id + ' ' + str(os.getpid()) + ' ' + random_alnum(10) self.acks = '1' self.retries = 0 self.partition = None self.topic_partition = None self.brokers = None self.timeout_ms = None self.start_offset = None def add_options(self): # super(CheckKafka, self).add_options() # TODO: (host_envs, default_host) = getenvs2('HOST', default_host, name) # TODO: env support for Kafka brokers self.add_opt('-H', '--host', \ '-B', '--brokers', \ dest='brokers', metavar='broker_list', default='localhost:9092', help='Kafka Broker seed list in form host[:port],host2[:port2]... (default: localhost:9092)') self.add_opt('-T', '--topic', help='Kafka Topic') self.add_opt('-p', '--partition', type=int, help='Kafka Partition (default: 0)', default=0) self.add_opt('-a', '--acks', default=1, choices=['1', 'all'], help='Acks to require from Kafka. Valid options are \'1\' for Kafka ' + 'partition leader, or \'all\' for all In-Sync Replicas (may block causing ' + 'timeout if replicas aren\'t available, default: 1)') self.add_opt('-s', '--sleep', metavar='secs', help='Sleep in seconds between producing and consuming from given topic (default: 0.5)') self.add_opt('--list-topics', action='store_true', help='List Kafka topics from broker(s) and exit') self.add_opt('--list-partitions', action='store_true', help='List Kafka topic paritions from broker(s) and exit') self.add_thresholds(default_warning=1, default_critical=2) def run(self): try: super(CheckKafka, self).run() #except KafkaError as _: #raise CriticalError(_) except KafkaError: err = self.exception_msg() if 'NoBrokersAvailable' in err: err += ' ({0})'.format(self.brokers) raise CriticalError(err) @staticmethod def exception_msg(): return traceback.format_exc().split('\n')[-2] def get_topics(self): self.consumer = KafkaConsumer( bootstrap_servers=self.brokers, client_id=self.client_id, request_timeout_ms=self.timeout_ms ) return self.consumer.topics() def print_topics(self): print('Kafka Topics:\n') for topic in self.get_topics(): print(topic) def get_topic_partitions(self, topic): self.consumer = KafkaConsumer( topic, bootstrap_servers=self.brokers, client_id=self.client_id, request_timeout_ms=self.timeout_ms ) if topic not in self.get_topics(): raise CriticalError("topic '{0}' does not exist on Kafka broker".format(topic)) partitions = self.consumer.partitions_for_topic(topic) assert isSet(partitions) return partitions def print_topic_partitions(self, topic): print('Kafka topic \'{0}\' partitions:\n'.format(topic)) #for partition in self.get_topic_partitions(topic): # print(partition) print(list(self.get_topic_partitions(topic))) print() def process_args(self): self.brokers = self.get_opt('brokers') # TODO: add broker list validation back in # validate_hostport(self.brokers) log_option('brokers', self.brokers) self.timeout_ms = max((self.timeout * 1000 - 1000) / 2, 1000) try: list_topics = self.get_opt('list_topics') list_partitions = self.get_opt('list_partitions') if list_topics: self.print_topics() sys.exit(ERRORS['UNKNOWN']) self.topic = self.get_opt('topic') except KafkaError: raise CriticalError(self.exception_msg()) if self.topic: validate_chars(self.topic, 'topic', 'A-Za-z-') elif list_topics or list_partitions: pass else: self.usage('--topic not specified') try: if list_partitions: if self.topic: self.print_topic_partitions(self.topic) else: for topic in self.get_topics(): self.print_topic_partitions(topic) sys.exit(ERRORS['UNKNOWN']) except KafkaError: raise CriticalError(self.exception_msg()) self.partition = self.get_opt('partition') # technically optional, will hash to a random partition, but need to know which partition to get offset # if self.partition is not None: validate_int(self.partition, "partition", 0, 10000) self.topic_partition = TopicPartition(self.topic, self.partition) self.acks = self.get_opt('acks') try: self.acks = int(self.acks) except ValueError: pass log_option('acks', self.acks) self.validate_thresholds() def subscribe(self): self.consumer = KafkaConsumer( #self.topic, bootstrap_servers=self.brokers, # client_id=self.client_id, # group_id=self.group_id, request_timeout_ms=self.timeout_ms ) #key_serializer #value_serializer # this is only a guess as Kafka doesn't expose it's API version #log.debug('kafka api version: %s', self.consumer.config['api_version']) log.debug('partition assignments: {0}'.format(self.consumer.assignment())) # log.debug('subscribing to topic \'{0}\' parition \'{1}\''.format(self.topic, self.partition)) # self.consumer.subscribe(TopicPartition(self.topic, self.partition)) # log.debug('partition assignments: {0}'.format(self.consumer.assignment())) log.debug('assigning partition {0} to consumer'.format(self.partition)) # self.consumer.assign([self.partition]) self.consumer.assign([self.topic_partition]) log.debug('partition assignments: {0}'.format(self.consumer.assignment())) log.debug('getting current offset') # see also highwater, committed, seek_to_end self.start_offset = self.consumer.position(self.topic_partition) if self.start_offset is None: # don't do this, I've seen scenario where None is returned and all messages are read again, better to fail # log.warn('consumer position returned None, resetting to zero') # self.start_offset = 0 raise UnknownError('Kafka Consumer reported current starting offset = {0}'.format(self.start_offset)) log.debug('recorded starting offset \'{0}\''.format(self.start_offset)) # self.consumer.pause() def publish(self): log.debug('creating producer') self.producer = KafkaProducer( bootstrap_servers=self.brokers, client_id=self.client_id, acks=self.acks, batch_size=0, max_block_ms=self.timeout_ms, request_timeout_ms=self.timeout_ms ) #key_serializer #value_serializer log.debug('producer.send()') self.producer.send( self.topic, key=self.key, partition=self.partition, value=self.publish_message ) log.debug('producer.flush()') self.producer.flush() def consume(self): self.consumer.assign([self.topic_partition]) log.debug('consumer.seek({0})'.format(self.start_offset)) self.consumer.seek(self.topic_partition, self.start_offset) # self.consumer.resume() log.debug('consumer.poll(timeout_ms={0})'.format(self.timeout_ms)) obj = self.consumer.poll(timeout_ms=self.timeout_ms) log.debug('msg object returned: %s', obj) msg = None try: for consumer_record in obj[self.topic_partition]: if consumer_record.key == self.key: msg = consumer_record.value break except KeyError: raise UnknownError('TopicPartition key was not found in response') if msg is None: raise UnknownError("failed to find matching consumer record with key '{0}'".format(self.key)) return msg