Ejemplo n.º 1
0
def discovery_info_from_group(connect, group):
    """discovery topics and parts from group
    """
    lags = []
    for member in connect.describe_consumer_groups([group])[0][5]:
        for data in MemberAssignment.decode(member[4]).assignment:
            topic = data[0]
            parts = data[1]
            for part in parts:
                lags.append({"{#CGRP}": group, "{#PRT}": part, "{#TOP}": topic})
    print(json.dumps(lags))
Ejemplo n.º 2
0
def get_lag_from_group(connect, group):
    """get lag from group
    """
    consumer = KafkaConsumer(**CONNECT_PARAMS, request_timeout_ms=10001, group_id=group)
    for member in connect.describe_consumer_groups([group])[0][5]:
        for data in MemberAssignment.decode(member[4]).assignment:
            topic = data[0]
            parts = data[1]
            for part in parts:
                topart = TopicPartition(topic, part)
                last_offsets = consumer.end_offsets([topart])
                committed = consumer.committed(topart)
                if last_offsets != None and committed != None:
                    lag = last_offsets.get(topart) - committed
                    print("group: {} topic: {} partition: {} lag: {}".\
                            format(group, topic, part, lag))
 def generate_consumer_groups_for_broker(broker, response):
     """
 From a `broker` and `response` generate a list of consumer groups
     """
     consumer_groups = {}
     for err, gid, gstate, prot_type, prot, _members in response.groups:
         members = {}
         for mid, cid, chost, mdata, assign in _members:
             mdata = ProtocolMetadata.decode(mdata)
             assign = MemberAssignment.decode(assign)
             assignment = {}
             for t, p in assign.assignment:
                 assignment[t] = p
             members[mid] = {
                 'client_id': cid,
                 'client_host': chost,
                 'member_metadata': {
                     'version': mdata.version,
                     'subscription': mdata.subscription,
                     'user_data': mdata.user_data.decode('utf-8')
                 },
                 'member_assignment': {
                     'version': assign.version,
                     'assignment': assignment,
                     'user_data': assign.user_data.decode('utf-8')
                 }
             }
         group = {
             'error_code': err,
             'group_state': gstate,
             'members': members,
             'protocol_type': prot_type,
             'protocol': prot,
             'coordinator': {
                 'host': broker.host,
                 'nodeId': broker.nodeId,
                 'port': broker.port,
                 'rack': broker.rack
             }
         }
         consumer_groups[gid] = group
     return consumer_groups
Ejemplo n.º 4
0
    def check(self, group_topics=None, discovery=None):
        """
        {
            "<group>": {
                "state": <str>,
                "topics": {
                    "<topic>": {
                        "consumer_lag": <int>,
                        "partitions": {
                            "<partition>": {
                                "offset_first": <int>,
                                "offset_consumed": <int>,
                                "offset_last": <int>,
                                "lag": <int>
                            }
                        }
                    }
                }
            }
        }
        :param persist_groups:
        :return: consumer statistics
        """
        cluster = self.client.cluster
        brokers = cluster.brokers()

        # Consumer group ID -> list(topics)
        if group_topics is None:
            group_topics = {}

            if discovery is None:
                discovery = True
        else:
            group_topics = copy.deepcopy(group_topics)

        # Set of consumer group IDs
        consumer_groups = set(group_topics.iterkeys())

        # Set of all known topics
        topics = set(itertools.chain(*group_topics.itervalues()))

        # Consumer group ID -> coordinating broker
        consumer_coordinator = {}

        # Coordinating broker - > list(consumer group IDs)
        coordinator_consumers = {}

        results = {}

        for consumer_group in group_topics.iterkeys():
            results[consumer_group] = {'state': None, 'topics': {}}

        # Ensure connections to all brokers
        for broker in brokers:
            while not self.client.is_ready(broker.nodeId):
                self.client.ready(broker.nodeId)

        # Collect all active consumer groups
        if discovery:
            for broker in brokers:
                response = self._send(broker.nodeId, _ListGroupsRequest(), _ListGroupsResponse)

                if response:
                    for group in response.groups:
                        consumer_groups.add(group[0])

        # Identify which broker is coordinating each consumer group
        for group in consumer_groups:

            response = self._send(next(iter(brokers)).nodeId, _GroupCoordinatorRequest(group), _GroupCoordinatorResponse)

            if response:
                consumer_coordinator[group] = response.coordinator_id

                if response.coordinator_id not in coordinator_consumers:
                    coordinator_consumers[response.coordinator_id] = []

                coordinator_consumers[response.coordinator_id].append(group)

        # Populate consumer groups into dict
        for group in consumer_groups:
            if group not in group_topics:
                group_topics[group] = []

        # Add groups to results dict
        for group, topic_list in group_topics.iteritems():
            results[group] = {'state': None, 'topics': {}}

        # Identify group information and topics read by each consumer group
        for coordinator, consumers in coordinator_consumers.iteritems():

            response = self._send(coordinator, _DescribeGroupsRequest(consumers), _DescribeGroupsResponse)

            for group in response.groups:

                if group[1] in results:
                    results[group[1]]['state'] = group[2]
                    # TODO Also include member data?

                if discovery:
                    members = group[5]
                    for member in members:
                        try:
                            assignment = MemberAssignment.decode(member[4])
                            if assignment:
                                for partition in assignment.partition_assignment:
                                    topic = partition[0]

                                    # Add topic to topic set
                                    topics.add(topic)

                                    # Add topic to group
                                    group_topics[group[1]].append(topic)
                        except:
                            pass

        # Add topics to groups in results dict
        for group, topic_list in group_topics.iteritems():
            for topic in topic_list:
                results[group]['topics'][topic] = {'consumer_lag': 0, 'partitions': {}}

        # For storing the latest offset for all partitions of all topics
        # topic -> partition -> offset
        start_offsets = {}
        end_offsets = {}

        # Identify all the topic partitions that each broker is leader for
        # and request next new offset for each partition
        for broker, partitions in cluster._broker_partitions.iteritems():

            # topic -> List(partition, time, max_offsets)
            request_partitions = {}

            for tp in partitions:
                if tp.topic in topics:
                    if tp.topic not in request_partitions:
                        request_partitions[tp.topic] = []

                    # Time value '-2' is to get the offset for first available message
                    request_partitions[tp.topic].append((tp.partition, -2, 1))

            # List(topic, List(partition, time, max_offsets))
            topic_partitions = []

            for tp in request_partitions.iteritems():
                topic_partitions.append(tp)

            # Request partition start offsets
            response = self._send(broker, _OffsetRequest(-1, topic_partitions), _OffsetResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    if topic not in start_offsets:
                        start_offsets[topic] = {}

                    for p in offset[1]:
                        start_offsets[topic][p[0]] = p[2][0]

            for tp in topic_partitions:
                for i, ptm in enumerate(tp[1]):
                    # Time value '-1' is to get the offset for next new message
                    tp[1][i] = (ptm[0], -1, 1)

            # Request partition end offsets
            response = self._send(broker, _OffsetRequest(-1, topic_partitions), _OffsetResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    if topic not in end_offsets:
                        end_offsets[topic] = {}

                    for p in offset[1]:
                        end_offsets[topic][p[0]] = p[2][0]

        # Populate with offset values
        for group, topics in group_topics.iteritems():

            coordinator = consumer_coordinator[group]

            # topic -> list(partition)
            request_partitions = {}

            for topic in topics:
                results[group]['topics'][topic]['consumer_lag'] = 0
                results[group]['topics'][topic]['partitions'] = {}

                if topic in start_offsets:
                    for p in start_offsets[topic]:
                        results[group]['topics'][topic]['partitions'][p] = {
                            'offset_first': start_offsets[topic][p],
                            'offset_last': end_offsets[topic][p],
                            'offset_consumed': 0,
                            'lag' : 0}

                        if topic not in request_partitions:
                            request_partitions[topic] = []
                        request_partitions[topic].append(p)

            # List(topic -> list(partition))
            topic_partitions = []

            for tp in request_partitions.iteritems():
                topic_partitions.append(tp)

            response = self._send(coordinator, _OffsetFetchRequest(group, topic_partitions), _OffsetFetchResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    offsets = offset[1]

                    if topic not in results[group]['topics']:
                        continue

                    for p_offset in offsets:
                        partition = p_offset[0]
                        offset_consumed = p_offset[1]
                        p_results = results[group]['topics'][topic]['partitions'][partition]

                        if offset_consumed != -1:
                            p_results['offset_consumed'] = offset_consumed
                            p_results['lag'] = p_results['offset_last'] - offset_consumed
                        else:
                            p_results['offset_consumed'] = 0
                            p_results['lag'] = p_results['offset_last'] - p_results['offset_first']

                        results[group]['topics'][topic]['consumer_lag'] += p_results['lag']

        return results
Ejemplo n.º 5
0
    def check(self, group_topics=None, discovery=None):
        """
        {
            "<group>": {
                "state": <str>,
                "topics": {
                    "<topic>": {
                        "consumer_lag": <int>,
                        "partitions": {
                            "<partition>": {
                                "offset_first": <int>,
                                "offset_consumed": <int>,
                                "offset_last": <int>,
                                "lag": <int>
                            }
                        }
                    }
                }
            }
        }
        :param persist_groups:
        :return: consumer statistics
        """
        cluster = self.client.cluster
        brokers = cluster.brokers()

        # Consumer group ID -> list(topics)
        if group_topics is None:
            group_topics = {}

            if discovery is None:
                discovery = True
        else:
            group_topics = copy.deepcopy(group_topics)

        # Set of consumer group IDs
        consumer_groups = set(group_topics.iterkeys())

        # Set of all known topics
        topics = set(itertools.chain(*group_topics.itervalues()))

        # Consumer group ID -> coordinating broker
        consumer_coordinator = {}

        # Coordinating broker - > list(consumer group IDs)
        coordinator_consumers = {}

        results = {}

        for consumer_group in group_topics.iterkeys():
            results[consumer_group] = {'state': None, 'topics': {}}

        # Ensure connections to all brokers
        for broker in brokers:
            while not self.client.is_ready(broker.nodeId):
                self.client.ready(broker.nodeId)

        # Collect all active consumer groups
        if discovery:
            for broker in brokers:
                response = self._send(broker.nodeId, _ListGroupsRequest(),
                                      _ListGroupsResponse)

                if response:
                    for group in response.groups:
                        consumer_groups.add(group[0])

        # Identify which broker is coordinating each consumer group
        for group in consumer_groups:

            response = self._send(
                next(iter(brokers)).nodeId, _GroupCoordinatorRequest(group),
                _GroupCoordinatorResponse)

            if response:
                consumer_coordinator[group] = response.coordinator_id

                if response.coordinator_id not in coordinator_consumers:
                    coordinator_consumers[response.coordinator_id] = []

                coordinator_consumers[response.coordinator_id].append(group)

        # Populate consumer groups into dict
        for group in consumer_groups:
            if group not in group_topics:
                group_topics[group] = []

        # Add groups to results dict
        for group, topic_list in group_topics.iteritems():
            results[group] = {'state': None, 'topics': {}}

        # Identify group information and topics read by each consumer group
        for coordinator, consumers in coordinator_consumers.iteritems():

            response = self._send(coordinator,
                                  _DescribeGroupsRequest(consumers),
                                  _DescribeGroupsResponse)

            for group in response.groups:

                if group[1] in results:
                    results[group[1]]['state'] = group[2]
                    # TODO Also include member data?

                if discovery:
                    members = group[5]
                    for member in members:
                        try:
                            assignment = MemberAssignment.decode(member[4])
                            if assignment:
                                for partition in assignment.partition_assignment:
                                    topic = partition[0]

                                    # Add topic to topic set
                                    topics.add(topic)

                                    # Add topic to group
                                    group_topics[group[1]].append(topic)
                        except:
                            pass

        # Add topics to groups in results dict
        for group, topic_list in group_topics.iteritems():
            for topic in topic_list:
                results[group]['topics'][topic] = {
                    'consumer_lag': 0,
                    'partitions': {}
                }

        # For storing the latest offset for all partitions of all topics
        # topic -> partition -> offset
        start_offsets = {}
        end_offsets = {}

        # Identify all the topic partitions that each broker is leader for
        # and request next new offset for each partition
        for broker, partitions in cluster._broker_partitions.iteritems():

            # topic -> List(partition, time, max_offsets)
            request_partitions = {}

            for tp in partitions:
                if tp.topic in topics:
                    if tp.topic not in request_partitions:
                        request_partitions[tp.topic] = []

                    # Time value '-2' is to get the offset for first available message
                    request_partitions[tp.topic].append((tp.partition, -2, 1))

            # List(topic, List(partition, time, max_offsets))
            topic_partitions = []

            for tp in request_partitions.iteritems():
                topic_partitions.append(tp)

            # Request partition start offsets
            response = self._send(broker, _OffsetRequest(-1, topic_partitions),
                                  _OffsetResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    if topic not in start_offsets:
                        start_offsets[topic] = {}

                    for p in offset[1]:
                        start_offsets[topic][p[0]] = p[2][0]

            for tp in topic_partitions:
                for i, ptm in enumerate(tp[1]):
                    # Time value '-1' is to get the offset for next new message
                    tp[1][i] = (ptm[0], -1, 1)

            # Request partition end offsets
            response = self._send(broker, _OffsetRequest(-1, topic_partitions),
                                  _OffsetResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    if topic not in end_offsets:
                        end_offsets[topic] = {}

                    for p in offset[1]:
                        end_offsets[topic][p[0]] = p[2][0]

        # Populate with offset values
        for group, topics in group_topics.iteritems():

            coordinator = consumer_coordinator[group]

            # topic -> list(partition)
            request_partitions = {}

            for topic in topics:
                results[group]['topics'][topic]['consumer_lag'] = 0
                results[group]['topics'][topic]['partitions'] = {}

                if topic in start_offsets:
                    for p in start_offsets[topic]:
                        results[group]['topics'][topic]['partitions'][p] = {
                            'offset_first': start_offsets[topic][p],
                            'offset_last': end_offsets[topic][p],
                            'offset_consumed': 0,
                            'lag': 0
                        }

                        if topic not in request_partitions:
                            request_partitions[topic] = []
                        request_partitions[topic].append(p)

            # List(topic -> list(partition))
            topic_partitions = []

            for tp in request_partitions.iteritems():
                topic_partitions.append(tp)

            response = self._send(coordinator,
                                  _OffsetFetchRequest(group, topic_partitions),
                                  _OffsetFetchResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    offsets = offset[1]

                    if topic not in results[group]['topics']:
                        continue

                    for p_offset in offsets:
                        partition = p_offset[0]
                        offset_consumed = p_offset[1]
                        p_results = results[group]['topics'][topic][
                            'partitions'][partition]

                        if offset_consumed != -1:
                            p_results['offset_consumed'] = offset_consumed
                            p_results['lag'] = p_results[
                                'offset_last'] - offset_consumed
                        else:
                            p_results['offset_consumed'] = 0
                            p_results['lag'] = p_results[
                                'offset_last'] - p_results['offset_first']

                        results[group]['topics'][topic][
                            'consumer_lag'] += p_results['lag']

        return results
Ejemplo n.º 6
0
    def describe(self, node_id, group_name):
        describe_groups_request = admin.DescribeGroupsRequest_v0(
            groups=[(group_name)])
        future = self.client.send(node_id, describe_groups_request)
        self.client.poll(timeout_ms=self.timeout, future=future)

        (error_code, group_id, state, protocol_type, protocol,
         members) = future.value.groups[0]

        if error_code != 0:
            print(
                "Kafka API - RET admin.DescribeGroupsRequest, error_code={}, group_id={}, state={}, protocol_type={}, protocol={}, members_count={}"
                .format(error_code, group_id, state, protocol_type, protocol,
                        len(members)))
            exit(1)

        metadata_consumer_group = {
            'id': group_name,
            'state': state,
            'topics': [],
            'lag': 0,
            'members': []
        }

        if len(members) != 0:
            for member in members:
                (member_id, client_id, client_host, member_metadata,
                 member_assignment) = member
                member_topics_assignment = []
                for (topic, partitions) in MemberAssignment.decode(
                        member_assignment).assignment:
                    member_topics_assignment.append(topic)

                metadata_consumer_group['members'].append({
                    'member_id':
                    member_id,
                    'client_id':
                    client_id,
                    'client_host':
                    client_host,
                    'topic':
                    member_topics_assignment
                })

                metadata_consumer_group['topics'] += member_topics_assignment
                (lag_total, topics_found) = self.get_lag_by_topic_list(
                    group_name, topics=metadata_consumer_group['topics'])
                metadata_consumer_group[
                    'lag'] = metadata_consumer_group['lag'] + lag_total
        else:
            all_topics = self.client.cluster.topics()

            while '__consumer_offsets' in all_topics:
                all_topics.remove('__consumer_offsets')
            (lag_total,
             topics_found) = self.get_lag_by_topic_list(group_name,
                                                        topics=all_topics)

            metadata_consumer_group[
                'lag'] = metadata_consumer_group['lag'] + lag_total
            metadata_consumer_group['topics'] = topics_found

        return metadata_consumer_group