def _get_offsets_from_kafka(brokers, topic, offset_time): """get dict representing kafka offsets. """ # get client client = KafkaClient(brokers) # get partitions for a topic partitions = client.topic_partitions[topic] # https://cwiki.apache.org/confluence/display/KAFKA/ # A+Guide+To+The+Kafka+Protocol# # AGuideToTheKafkaProtocol-OffsetRequest MAX_OFFSETS = 1 offset_requests = [OffsetRequestPayload(topic, part_name, offset_time, MAX_OFFSETS) for part_name in partitions.keys()] offsets_responses = client.send_offset_request(offset_requests) offset_dict = {} for response in offsets_responses: key = "_".join((response.topic, str(response.partition))) offset_dict[key] = response return offset_dict
class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None bytes_topic = None zk = None server = None def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): self.skipTest('Integration test requires KAFKA_VERSION') if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10)) self.topic = topic self.bytes_topic = topic.encode('utf-8') if self.create_client: self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port)) self.client.ensure_topic_exists(self.topic) self._messages = {} def tearDown(self): super(KafkaIntegrationTestCase, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return if self.create_client: self.client.close() def current_offset(self, topic, partition): try: offsets, = self.client.send_offset_request([ OffsetRequest(kafka_bytestring(topic), partition, -1, 1) ]) except: # XXX: We've seen some UnknownErrors here and cant debug w/o server logs self.zk.child.dump_logs() self.server.child.dump_logs() raise else: return offsets.offsets[0] def msgs(self, iterable): return [ self.msg(x) for x in iterable ] def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return self._messages[s].encode('utf-8') def key(self, k): return k.encode('utf-8')
class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None server = None def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): return if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10).decode('utf-8')) self.topic = topic.encode('utf-8') if self.create_client: self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port)) self.client.ensure_topic_exists(self.topic) self._messages = {} def tearDown(self): super(KafkaIntegrationTestCase, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return if self.create_client: self.client.close() def current_offset(self, topic, partition): offsets, = self.client.send_offset_request( [OffsetRequest(topic, partition, -1, 1)]) return offsets.offsets[0] def msgs(self, iterable): return [self.msg(x) for x in iterable] def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return self._messages[s].encode('utf-8') def key(self, k): return k.encode('utf-8')
class KafkaIntegrationTestCase(unittest.TestCase): create_client = True topic = None server = None def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): return if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10).decode('utf-8')) self.topic = topic.encode('utf-8') if self.create_client: self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port)) self.client.ensure_topic_exists(self.topic) self._messages = {} def tearDown(self): super(KafkaIntegrationTestCase, self).tearDown() if not os.environ.get('KAFKA_VERSION'): return if self.create_client: self.client.close() def current_offset(self, topic, partition): offsets, = self.client.send_offset_request([ OffsetRequest(topic, partition, -1, 1) ]) return offsets.offsets[0] def msgs(self, iterable): return [ self.msg(x) for x in iterable ] def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return self._messages[s].encode('utf-8') def key(self, k): return k.encode('utf-8')
class Kafka(object): executor = ThreadPoolExecutor(20) def __init__(self, broker): self.broker = broker self.client = KafkaClient(broker, timeout=3) @run_on_executor def getPartition(self, topic): """ 指定topic返回partition列表 """ return self.client.get_partition_ids_for_topic(topic) @run_on_executor def getLogsize(self, topic, partitions): """ 指定topic与partition列表, 返回logsize数据 """ tp = self.client.send_offset_request( [OffsetRequestPayload(topic, p, -1, 1) for p in partitions]) return {p.partition: p.offsets[0] for p in tp} @run_on_executor def getOffsets(self, topic, partitions, group): """ 指定topic、partition和group, 返回offsets数据 """ try: # 尝试使用zookeeper-storage api获取offsets数据 # 未获得指定group的offsets数据将抛出UnknownTopicOrPartitionError异常 tp = self.client.send_offset_fetch_request( group, [OffsetRequestPayload(topic, p, -1, 1) for p in partitions]) offsets = {p.partition: p.offset for p in tp} except UnknownTopicOrPartitionError: # 收到异常后使用kafka-storage api获取offsets数据 consumer = KafkaConsumer(group_id=group, bootstrap_servers=self.broker, enable_auto_commit=False) tp = [TopicPartition(topic, p) for p in partitions] consumer.assign(tp) offsets = {p.partition: consumer.position(p) for p in tp} return offsets
def handler(self): """ 查询指定Kafka集群Topic中每个Partition当前Logsize, 将Logsize写入LevelDB 每次收集Logsize数据后会检测retention_day参数,删除过期数据 """ clusters = base.config["collector"]["clusters"] for cluster, metric in clusters.items(): client = KafkaClient(metric["brokers"], timeout=3) for topic in metric["topics"]: partitions = client.get_partition_ids_for_topic(topic) payload = [ OffsetRequestPayload(topic, p, -1, 1) for p in partitions ] logsize = { p.partition: p.offsets[0] for p in client.send_offset_request(payload) } if logsize: key = str(int(time.time())).encode("utf-8") value = json.dumps(logsize).encode("utf-8") db = base.init_leveldb(cluster=cluster, topic=topic) db.Put(key, value) deadline = base.config["collector"]["clusters"][cluster][ "retention_hour"] * 3600 for key, _ in db.RangeIter(): if time.time() - int(key) > deadline: db.Delete(key) else: break client.close()
def check(self, instance): consumer_groups = self.read_config(instance, 'consumer_groups', cast=self._validate_consumer_groups) zk_connect_str = self.read_config(instance, 'zk_connect_str') kafka_host_ports = self.read_config(instance, 'kafka_connect_str') # Construct the Zookeeper path pattern zk_prefix = instance.get('zk_prefix', '') zk_path_tmpl = zk_prefix + '/consumers/%s/offsets/%s/%s' # Connect to Zookeeper zk_conn = KazooClient(zk_connect_str, timeout=self.zk_timeout) zk_conn.start() try: # Query Zookeeper for consumer offsets consumer_offsets = {} topics = defaultdict(set) for consumer_group, topic_partitions in consumer_groups.iteritems(): for topic, partitions in topic_partitions.iteritems(): # Remember the topic partitions that we've see so that we can # look up their broker offsets later topics[topic].update(set(partitions)) for partition in partitions: zk_path = zk_path_tmpl % (consumer_group, topic, partition) try: consumer_offset = int(zk_conn.get(zk_path)[0]) key = (consumer_group, topic, partition) consumer_offsets[key] = consumer_offset except NoNodeError: self.log.warn('No zookeeper node at %s' % zk_path) except Exception: self.log.exception('Could not read consumer offset from %s' % zk_path) finally: try: zk_conn.stop() zk_conn.close() except Exception: self.log.exception('Error cleaning up Zookeeper connection') # Connect to Kafka kafka_conn = KafkaClient(kafka_host_ports, timeout=self.kafka_timeout) try: # Query Kafka for the broker offsets broker_offsets = {} for topic, partitions in topics.items(): offset_responses = kafka_conn.send_offset_request([ OffsetRequest(topic, p, -1, 1) for p in partitions]) for resp in offset_responses: broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0] finally: try: kafka_conn.close() except Exception: self.log.exception('Error cleaning up Kafka connection') # Report the broker data for (topic, partition), broker_offset in broker_offsets.items(): broker_tags = ['topic:%s' % topic, 'partition:%s' % partition] broker_offset = broker_offsets.get((topic, partition)) self.gauge('kafka.broker_offset', broker_offset, tags=broker_tags) # Report the consumer for (consumer_group, topic, partition), consumer_offset in consumer_offsets.items(): # Get the broker offset broker_offset = broker_offsets.get((topic, partition)) # Report the consumer offset and lag tags = ['topic:%s' % topic, 'partition:%s' % partition, 'consumer_group:%s' % consumer_group] self.gauge('kafka.consumer_offset', consumer_offset, tags=tags) self.gauge('kafka.consumer_lag', broker_offset - consumer_offset, tags=tags)
def check(self, instance): consumer_groups = self.read_config(instance, 'consumer_groups', cast=self._validate_consumer_groups) zk_connect_str = self.read_config(instance, 'zk_connect_str') kafka_host_ports = self.read_config(instance, 'kafka_connect_str') # Construct the Zookeeper path pattern zk_prefix = instance.get('zk_prefix', '') zk_path_tmpl = zk_prefix + '/consumers/%s/offsets/%s/%s' # Connect to Zookeeper zk_conn = KazooClient(zk_connect_str, timeout=self.zk_timeout) zk_conn.start() try: # Query Zookeeper for consumer offsets consumer_offsets = {} topics = defaultdict(set) for consumer_group, topic_partitions in consumer_groups.iteritems(): for topic, partitions in topic_partitions.iteritems(): # Remember the topic partitions that we've see so that we can # look up their broker offsets later topics[topic].update(set(partitions)) for partition in partitions: zk_path = zk_path_tmpl % (consumer_group, topic, partition) try: consumer_offset = int(zk_conn.get(zk_path)[0]) key = (consumer_group, topic, partition) consumer_offsets[key] = consumer_offset except NoNodeError: self.log.warn('No zookeeper node at %s' % zk_path) except Exception: self.log.exception('Could not read consumer offset from %s' % zk_path) finally: try: zk_conn.stop() zk_conn.close() except Exception: self.log.exception('Error cleaning up Zookeeper connection') # Connect to Kafka kafka_conn = KafkaClient(kafka_host_ports, timeout=self.kafka_timeout) try: # Query Kafka for the broker offsets broker_offsets = {} for topic, partitions in topics.items(): offset_responses = kafka_conn.send_offset_request([ OffsetRequest(topic, p, -1, 1) for p in partitions]) for resp in offset_responses: broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0] finally: try: kafka_conn.close() except Exception: self.log.exception('Error cleaning up Kafka connection') # Report the broker data for (topic, partition), broker_offset in broker_offsets.items(): broker_tags = ['topic:%s' % topic, 'partition:%s' % partition] broker_offset = broker_offsets.get((topic, partition)) self.gauge('kafka.broker_offset', broker_offset, tags=broker_tags) # Report the consumer for (consumer_group, topic, partition), consumer_offset in consumer_offsets.items(): # Get the broker offset broker_offset = broker_offsets.get((topic, partition)) # Report the consumer offset and lag tags = ['topic:%s' % topic, 'partition:%s' % partition, 'consumer_group:%s' % consumer_group] self.gauge('kafka.consumer_offset', consumer_offset, tags=tags) self.gauge('kafka.consumer_lag', broker_offset - consumer_offset, tags=tags)
def monitor(self): try: kafka_client = KafkaClient(KAFKA_HOSTS, timeout=self.timeout) except Exception as e: print "Error, cannot connect kafka broker." sys.exit(1) try: zookeeper_client = KazooClient(hosts=ZOO_HOSTS, read_only=True, timeout=self.timeout) zookeeper_client.start() except Exception as e: print "Error, cannot connect zookeeper server." sys.exit(1) for group in CONSUMER_GROUPS: for topic in TOPIC_LIST: try: partition_path = 'consumers/%s/offsets/%s' % (group, topic) partitions = map( int, zookeeper_client.get_children(self.zookeeper_url + partition_path)) for partition in partitions: offset_path = 'consumers/%s/offsets/%s/%s' % ( group, topic, partition) offset = zookeeper_client.get(self.zookeeper_url + offset_path)[0] if offset is None: continue obj = { 'timestamp': self.timestamp, 'group': group, 'topic': topic, 'partition': int(partition), 'metric': 'consumerlag:%s' % group, 'tags': 'topic=%s,partition=%s' % (topic, partition), 'offset': int(offset) } self.result.append(obj) except NoNodeError as e: print "Error, fail to get offset for group[%s], topic[%s]" % ( group, topic) continue zookeeper_client.stop() for kafka_topic in TOPIC_LIST: self.kafka_logsize[kafka_topic] = {} try: partitions = kafka_client.topic_partitions[kafka_topic] logsize_requests = [ OffsetRequestPayload(kafka_topic, p, -1, 1) for p in partitions.keys() ] logsize_responses = kafka_client.send_offset_request( logsize_requests) for r in logsize_responses: self.kafka_logsize[kafka_topic][r.partition] = r.offsets[0] except Exception as e: print "error to get logsize for topic: %s" % kafka_topic kafka_client.close() payload = [] for obj in self.result: try: logsize = self.kafka_logsize[obj['topic']][obj['partition']] lag = int(logsize) - int(obj['offset']) item = {} item['endpoint'] = ENDPOINT item['metric'] = obj['metric'] item['tags'] = obj['tags'] item['timestamp'] = obj['timestamp'] item['step'] = STEP item['value'] = lag item['counterType'] = 'GAUGE' payload.append(item) except Exception as e: print "error to compute (%s/%s/%s) lag-value" % ( obj['group'], obj['topic'], obj['partition']) # 1. Print print "log-lag details:" print payload # 2. report to falcon-agent if len(payload) > 0: requests.post(FALCON_AGENT_URL, data=json.dumps(payload), timeout=10)