Beispiel #1
0
def _get_topic_offsets(topics, latest):
    """
    :param topics: list of topics
    :param latest: True to fetch latest offsets, False to fetch earliest available
    :return: dict: { (topic, partition): offset, ... }
    """

    # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetRequest
    # https://cfchou.github.io/blog/2015/04/23/a-closer-look-at-kafka-offsetrequest/
    assert set(topics) <= set(ALL)
    with get_simple_kafka_client() as client:
        partition_meta = client.topic_partitions

        # only return the offset of the latest message in the partition
        num_offsets = 1
        time_value = -1 if latest else -2

        offsets = {}
        offset_requests = []
        for topic in topics:
            partitions = list(partition_meta.get(topic, {}))
            for partition in partitions:
                offsets[(topic, partition)] = None
                offset_requests.append(
                    OffsetRequestPayload(topic, partition, time_value,
                                         num_offsets))

        responses = client.send_offset_request(offset_requests)
        for r in responses:
            offsets[(r.topic, r.partition)] = r.offsets[0]

        return offsets
Beispiel #2
0
 def get_tail_offsets(self):
     request = [
         OffsetRequestPayload(self.topic, p, -1, 1)
         for p in self.partitions.keys()
     ]
     response = self.client.send_offset_request(request)
     offsets = {r.partition: r.offsets[0]
                for r in response}  # build dictionary
     return offsets
Beispiel #3
0
 def current_offset(self, topic, partition):
     try:
         offsets, = self.client.send_offset_request([OffsetRequestPayload(topic, partition, -1, 1)])
     except:
         # XXX: We've seen some UnknownErrors here and cant debug w/o server logs
         self.zk.child.dump_logs()
         self.server.child.dump_logs()
         raise
     else:
         return offsets.offsets[0]
Beispiel #4
0
def process(spouts):
    '''
    Returns a named tuple of type PartitionsSummary.
    '''
    results = []
    total_depth = 0
    total_delta = 0
    brokers = []
    for s in spouts:
        for p in s.partitions:
            try:
                k = SimpleClient([p['broker']['host'] + ":" + str(p['broker']['port'])])
            except socket.gaierror as e:
                raise ProcessorError('Failed to contact Kafka broker %s (%s)' % (p['broker']['host'], str(e)))

            earliest_off = OffsetRequestPayload(p['topic'], p['partition'], -2, 1)
            latest_off = OffsetRequestPayload(p['topic'], p['partition'], -1, 1)

            earliest = k.send_offset_request([earliest_off])[0].offsets[0]
            latest = k.send_offset_request([latest_off])[0].offsets[0]
            current = p['offset']

            brokers.append(p['broker']['host'])
            total_depth = total_depth + (latest - earliest)
            total_delta = total_delta + (latest - current)

            results.append(PartitionState._make([
                p['broker']['host'],
                p['topic'],
                p['partition'],
                earliest,
                latest,
                latest - earliest,
                s.id,
                current,
                latest - current]))
    return PartitionsSummary(total_depth=total_depth,
                             total_delta=total_delta,
                             num_partitions=len(results),
                             num_brokers=len(set(brokers)),
                             partitions=tuple(results))
 def _python_kafka_partitionoffset(self, topic):
     """
         Return offset and partition of the topic
     """
     topic = self.topic
     client = SimpleClient(self.brokers)
     partitions = client.topic_partitions[topic]
     offset_requests = [
         OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
     ]
     offsets_responses = client.send_offset_request(offset_requests)
     for r in offsets_responses:
         print("partition = %s, offset = %s" % (r.partition, r.offsets[0]))
    def reset_partition_offset(self, partition):
        """Update offsets using auto_offset_reset policy (smallest|largest)

        Arguments:
            partition (int): the partition for which offsets should be updated

        Returns: Updated offset on success, None on failure
        """
        LATEST = -1
        EARLIEST = -2
        if self.auto_offset_reset == 'largest':
            reqs = [OffsetRequestPayload(self.topic, partition, LATEST, 1)]
        elif self.auto_offset_reset == 'smallest':
            reqs = [OffsetRequestPayload(self.topic, partition, EARLIEST, 1)]
        else:
            # Let's raise an reasonable exception type if user calls
            # outside of an exception context
            if sys.exc_info() == (None, None, None):
                raise OffsetOutOfRangeError(
                    'Cannot reset partition offsets without a '
                    'valid auto_offset_reset setting '
                    '(largest|smallest)')
            # Otherwise we should re-raise the upstream exception
            # b/c it typically includes additional data about
            # the request that triggered it, and we do not want to drop that
            raise  # pylint: disable=E0704

        # send_offset_request
        log.info('Resetting topic-partition offset to %s for %s:%d',
                 self.auto_offset_reset, self.topic, partition)
        try:
            (resp, ) = self.client.send_offset_request(reqs)
        except KafkaError as e:
            log.error('%s sending offset request for %s:%d',
                      e.__class__.__name__, self.topic, partition)
        else:
            self.offsets[partition] = resp.offsets[0]
            self.fetch_offsets[partition] = resp.offsets[0]
            return resp.offsets[0]
 def getoffset(self, topic):
     '''目前未使用'''
     from kafka import SimpleClient
     from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy
     from kafka.common import OffsetRequestPayload
     client = SimpleClient(self.server)
     partitions = client.topic_partitions[topic]
     offset_requests = [
         OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
     ]
     offsets_responses = client.send_offset_request(offset_requests)
     for r in offsets_responses:
         print "partition = %s, offset = %s" % (r.partition, r.offsets[0])
 def _python_kafka_offsetcount(self, topic):
     """
         Count no of offset of the topic
     """
     client = SimpleClient(self.brokers)
     self.topic = topic
     partitions = client.topic_partitions[self.topic]
     offset_requests = [
         OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
     ]
     offsets_responses = client.send_offset_request(offset_requests)
     totaloffset = 0
     for r in offsets_responses:
         totaloffset = totaloffset + r.offsets[0]
     return totaloffset
Beispiel #9
0
def get_topic_max(topic, k_client):
    """Return the max offset of a kafka topic
    
    Args:
        topic (str): Name of kafka topic
        k_client (obj): Kafka client object
    
    Returns:
        int: Max offset
    """

    partitions = k_client.topic_partitions[topic]
    offset_requests = [OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()]
    offsets_responses = k_client.send_offset_request(offset_requests)

    for r in offsets_responses:
        if r.partition == 0:
            return r.offsets[0]
Beispiel #10
0
    def _update_produced_offsets(self):
        """
        Arguments:
            request_time_ms (int): Used to ask for all messages before a
                certain time (ms). There are two special values. Specify -1 to receive the latest
                offset (i.e. the offset of the next coming message) and -2 to receive the earliest
                available offset. Note that because offsets are pulled in descending order, asking for
                the earliest offset will always return you a single element.
        """
        for partition in self._client.get_partition_ids_for_topic(self._topic):
            reqs = [OffsetRequestPayload(self._topic, partition, -1, 1)]

            (resp, ) = self._client.send_offset_request(reqs)

            check_error(resp)
            assert resp.topic == self._topic
            assert resp.partition == partition
            self._offsets.produced[partition] = resp.offsets[0]
Beispiel #11
0
    def getOffsets(self, topic, partitions, group):
        """ 指定topic、partition和group, 返回offsets数据 """

        try:
            # 尝试使用zookeeper-storage api获取offsets数据
            # 未获得指定group的offsets数据将抛出UnknownTopicOrPartitionError异常
            tp = self.client.send_offset_fetch_request(
                group,
                [OffsetRequestPayload(topic, p, -1, 1) for p in partitions])
            offsets = {p.partition: p.offset for p in tp}

        except UnknownTopicOrPartitionError:
            # 收到异常后使用kafka-storage api获取offsets数据
            consumer = KafkaConsumer(group_id=group,
                                     bootstrap_servers=self.broker,
                                     enable_auto_commit=False)
            tp = [TopicPartition(topic, p) for p in partitions]
            consumer.assign(tp)
            offsets = {p.partition: consumer.position(p) for p in tp}

        return offsets
Beispiel #12
0
def count_kafka_mssg(topic, server):
    """Returns the total number of messages (sum of all partitions) in given kafka topic

    """
    client = SimpleClient(server)

    partitions = client.topic_partitions[topic]
    offset_requests = [
        OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
    ]

    offsets_responses = client.send_offset_request(offset_requests)

    total_mssg = 0

    for r in offsets_responses:
        logging.info("partition = {}, offset = {}".format(
            r.partition, r.offsets[0]))
        total_mssg += int(r.offsets[0])

    return total_mssg
Beispiel #13
0
    def handler(self):
        """ 查询指定Kafka集群Topic中每个Partition当前Logsize, 将Logsize写入LevelDB
            每次收集Logsize数据后会检测retention_day参数,删除过期数据
        """

        clusters = base.config["collector"]["clusters"]

        for cluster, metric in clusters.items():
            client = KafkaClient(metric["brokers"], timeout=3)

            for topic in metric["topics"]:
                partitions = client.get_partition_ids_for_topic(topic)
                payload = [
                    OffsetRequestPayload(topic, p, -1, 1) for p in partitions
                ]
                logsize = {
                    p.partition: p.offsets[0]
                    for p in client.send_offset_request(payload)
                }

                if logsize:
                    key = str(int(time.time())).encode("utf-8")
                    value = json.dumps(logsize).encode("utf-8")

                    db = base.init_leveldb(cluster=cluster, topic=topic)
                    db.Put(key, value)
                    deadline = base.config["collector"]["clusters"][cluster][
                        "retention_hour"] * 3600

                    for key, _ in db.RangeIter():
                        if time.time() - int(key) > deadline:
                            db.Delete(key)
                        else:
                            break

            client.close()
Beispiel #14
0
    def pending(self, partitions=None):
        """
        Gets the pending message count

        Keyword Arguments:
            partitions (list): list of partitions to check for, default is to check all
        """
        if partitions is None:
            partitions = self.offsets.keys()

        total = 0
        reqs = []

        for partition in partitions:
            reqs.append(OffsetRequestPayload(self.topic, partition, -1, 1))

        resps = self.client.send_offset_request(reqs)
        for resp in resps:
            partition = resp.partition
            pending = resp.offsets[0]
            offset = self.offsets[partition]
            total += pending - offset

        return total
Beispiel #15
0
    def spoorer(self):  #连接kafka,获取topics
        try:
            kafka_client = SimpleClient(self.kafka_hosts, timeout=self.timeout)
            # print kafka_client.topics
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            kafka_topics = kafka_client.topics
        finally:
            kafka_client.close()

        #连接zk,获取当前消费进度current offset
        try:
            zookeeper_client = KazooClient(hosts=self.zookeeper_hosts, read_only=True, timeout=self.timeout)
            zookeeper_client.start()
        except Exception as e:
            print "Error, cannot connect zookeeper server."
            sys.exit(1)

        try:
            groups = map(str,zookeeper_client.get_children(self.zookeeper_url + 'consumers'))
        except NoNodeError as e:
            print "Error, invalid zookeeper url."
            zookeeper_client.stop()
            sys.exit(2)
        else:
            for group in groups:
                if 'offsets' not in zookeeper_client.get_children(self.zookeeper_url + 'consumers/%s' % group):continue
                topic_path = 'consumers/%s/offsets' % (group)
                topics = map(str,zookeeper_client.get_children(self.zookeeper_url + topic_path))
                if len(topics) == 0: continue
                for topic in topics:
                    # print topic
                    # print self.white_topic_group.keys()
                    if topic not in self.white_topic_group.keys():
                        continue
                    # elif group not in self.white_topic_group[topic].replace(' ','').split(','):
                    #     continue
                    partition_path = 'consumers/%s/offsets/%s' % (group,topic)
                    partitions = map(int,zookeeper_client.get_children(self.zookeeper_url + partition_path))

                    for partition in partitions:
                        base_path = 'consumers/%s/%s/%s/%s' % (group, '%s', topic, partition)
                        owner_path, offset_path = base_path % 'owners', base_path % 'offsets'
                        offset = zookeeper_client.get(self.zookeeper_url + offset_path)[0]

                        try:
                            owner = zookeeper_client.get(self.zookeeper_url + owner_path)[0]
                        except NoNodeError as e:
                            owner = 'null'
                        #消费进度放在字典metric中
                        metric = {'datetime':time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'topic':topic, 'group':group, 'partition':int(partition), 'logsize':None, 'offset':int(offset), 'lag':None, 'owner':owner}
                        self.result.append(metric)
        finally:
            zookeeper_client.stop()
        #获取每个分片的logsize
        try:
            client = SimpleClient(self.kafka_hosts)
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            for kafka_topic in kafka_topics:
                self.kafka_logsize[kafka_topic] = {}
                partitions = client.topic_partitions[kafka_topic]
                offset_requests = [OffsetRequestPayload(kafka_topic, p, -1, 1) for p in partitions.keys()]
                offsets_responses = client.send_offset_request(offset_requests)
                for r in offsets_responses:
                    self.kafka_logsize[kafka_topic][r.partition] = r.offsets[0]

            #logsize减去current offset等于lag
        f1 = open(self.log_file,'w')
        # f2 = open(self.log_day_file,'a')
        # print self.result
        for metric in self.result:
            logsize = self.kafka_logsize[metric['topic']][metric['partition']]
            metric['logsize'] = int(logsize)
            metric['lag'] = int(logsize) - int(metric['offset'])
            f1.write(json.dumps(metric,sort_keys=True) + '\n')
            f1.flush()
            # f2.write(json.dumps(metric,sort_keys=True) + '\n')
            # f2.flush()
        # finally:
        f1.close()
        client.close()
Beispiel #16
0
    def getLogsize(self, topic, partitions):
        """ 指定topic与partition列表, 返回logsize数据 """

        tp = self.client.send_offset_request(
            [OffsetRequestPayload(topic, p, -1, 1) for p in partitions])
        return {p.partition: p.offsets[0] for p in tp}
Beispiel #17
0
    def monitor(self):
        try:
            kafka_client = KafkaClient(KAFKA_HOSTS, timeout=self.timeout)
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)

        try:
            zookeeper_client = KazooClient(hosts=ZOO_HOSTS,
                                           read_only=True,
                                           timeout=self.timeout)
            zookeeper_client.start()
        except Exception as e:
            print "Error, cannot connect zookeeper server."
            sys.exit(1)

        for group in CONSUMER_GROUPS:
            for topic in TOPIC_LIST:
                try:
                    partition_path = 'consumers/%s/offsets/%s' % (group, topic)
                    partitions = map(
                        int,
                        zookeeper_client.get_children(self.zookeeper_url +
                                                      partition_path))

                    for partition in partitions:
                        offset_path = 'consumers/%s/offsets/%s/%s' % (
                            group, topic, partition)
                        offset = zookeeper_client.get(self.zookeeper_url +
                                                      offset_path)[0]
                        if offset is None:
                            continue

                        obj = {
                            'timestamp': self.timestamp,
                            'group': group,
                            'topic': topic,
                            'partition': int(partition),
                            'metric': 'consumerlag:%s' % group,
                            'tags':
                            'topic=%s,partition=%s' % (topic, partition),
                            'offset': int(offset)
                        }

                        self.result.append(obj)
                except NoNodeError as e:
                    print "Error, fail to get offset for group[%s], topic[%s]" % (
                        group, topic)
                    continue

        zookeeper_client.stop()

        for kafka_topic in TOPIC_LIST:
            self.kafka_logsize[kafka_topic] = {}
            try:
                partitions = kafka_client.topic_partitions[kafka_topic]
                logsize_requests = [
                    OffsetRequestPayload(kafka_topic, p, -1, 1)
                    for p in partitions.keys()
                ]

                logsize_responses = kafka_client.send_offset_request(
                    logsize_requests)

                for r in logsize_responses:
                    self.kafka_logsize[kafka_topic][r.partition] = r.offsets[0]
            except Exception as e:
                print "error to get logsize for topic: %s" % kafka_topic

        kafka_client.close()

        payload = []
        for obj in self.result:
            try:
                logsize = self.kafka_logsize[obj['topic']][obj['partition']]
                lag = int(logsize) - int(obj['offset'])
                item = {}
                item['endpoint'] = ENDPOINT
                item['metric'] = obj['metric']
                item['tags'] = obj['tags']
                item['timestamp'] = obj['timestamp']
                item['step'] = STEP
                item['value'] = lag
                item['counterType'] = 'GAUGE'

                payload.append(item)

            except Exception as e:
                print "error to compute (%s/%s/%s) lag-value" % (
                    obj['group'], obj['topic'], obj['partition'])

        # 1. Print
        print "log-lag details:"
        print payload

        # 2. report to falcon-agent
        if len(payload) > 0:
            requests.post(FALCON_AGENT_URL,
                          data=json.dumps(payload),
                          timeout=10)
    def seek(self, offset, whence=None, partition=None):
        """
        Alter the current offset in the consumer, similar to fseek

        Arguments:
            offset: how much to modify the offset
            whence: where to modify it from, default is None

                * None is an absolute offset
                * 0    is relative to the earliest available offset (head)
                * 1    is relative to the current offset
                * 2    is relative to the latest known offset (tail)

            partition: modify which partition, default is None.
                If partition is None, would modify all partitions.
        """

        if whence is None:  # set an absolute offset
            if partition is None:
                for tmp_partition in self.offsets:
                    self.offsets[tmp_partition] = offset
            else:
                self.offsets[partition] = offset
        elif whence == 1:  # relative to current position
            if partition is None:
                for tmp_partition, _offset in self.offsets.items():
                    self.offsets[tmp_partition] = _offset + offset
            else:
                self.offsets[partition] += offset
        elif whence in (0, 2):  # relative to beginning or end
            reqs = []
            deltas = {}
            if partition is None:
                # divide the request offset by number of partitions,
                # distribute the remained evenly
                (delta, rem) = divmod(offset, len(self.offsets))
                for tmp_partition, r in izip_longest(self.offsets.keys(),
                                                     repeat(1, rem),
                                                     fillvalue=0):
                    deltas[tmp_partition] = delta + r

                for tmp_partition in self.offsets.keys():
                    if whence == 0:
                        reqs.append(
                            OffsetRequestPayload(self.topic, tmp_partition, -2,
                                                 1))
                    elif whence == 2:
                        reqs.append(
                            OffsetRequestPayload(self.topic, tmp_partition, -1,
                                                 1))
                    else:
                        pass
            else:
                deltas[partition] = offset
                if whence == 0:
                    reqs.append(
                        OffsetRequestPayload(self.topic, partition, -2, 1))
                elif whence == 2:
                    reqs.append(
                        OffsetRequestPayload(self.topic, partition, -1, 1))
                else:
                    pass

            resps = self.client.send_offset_request(reqs)
            for resp in resps:
                self.offsets[resp.partition] = \
                    resp.offsets[0] + deltas[resp.partition]
        else:
            raise ValueError('Unexpected value for `whence`, %d' % whence)

        # Reset queue and fetch offsets since they are invalid
        self.fetch_offsets = self.offsets.copy()
        self.count_since_commit += 1
        if self.auto_commit:
            self.commit()

        self.queue = queue.Queue()
DBNAME='kafka_monitor'
host=''
port=8086

## kafka cluster name, measurement in influxdb
kafka_name=""
zk_path='/consumers/'
consumer_group=''
kafka_brokers=sys.argv[2]
zk_clusters=sys.argv[4]
topic=sys.argv[6]

client = SimpleClient(kafka_brokers)

partitions = client.topic_partitions[topic]
offset_requests = [OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()]

offsets_responses = client.send_offset_request(offset_requests)

zk = KazooClient(hosts=zk_clusters,read_only=True)
zk.start()

zk_path=zk_path+consumer_group
if zk.exists(zk_path):
	data, stat = zk.get(zk_path+"/offsets/"+topic+"/1")
	sum_lag=0
	sum_offset=0
	for r in offsets_responses:
		consumer_offset, stat = zk.get(zk_path+"/offsets/"+topic+"/"+str(r.partition))
		producer_offset=r.offsets[0]
		lag_partition=producer_offset - int(consumer_offset)
Beispiel #20
0
import kafka
from kafka import SimpleClient
from kafka.protocol.offset import offsetRequest, OffsetResetStrategy
from kafka.common import OffsetRequestPayload

topic = sys.argv[1]
variables.setVariables()
topic_prefix = 'my_topic_'
table = []
sum_total = []

broker = "%s:%s" % (str(
    os.environ['KAFKA_BROKER_ADDR']), str(os.environ['KAFKA_BROKER_PORT']))

consumer = kafka.KafkaConsumer(group_id='count_check',
                               bootstrap_Servers=[broker])
client = SimpleClient(broker)

for tpc in table:
    partitions = client.topic_partitions[tpc]
    offset_requests = [
        OffsetRequestPayload(tpc, p, -1, 1) for p in partitions.keys()
    ]
    offset_responses = client.send_offset_request(offset_requests)
    my_list = []
    for r in offset_responses:
        my_list.append(r.offsets(0))
    sum_total.append(sum(my_list))
    my_list = []
print("%s, %s" % (topic, sum_total))