def test_is_ready(mocker, conn):
    cli = KafkaClient()
    cli._maybe_connect(0)
    cli._maybe_connect(1)

    # metadata refresh blocks ready nodes
    assert cli.is_ready(0)
    assert cli.is_ready(1)
    cli._metadata_refresh_in_progress = True
    assert not cli.is_ready(0)
    assert not cli.is_ready(1)

    # requesting metadata update also blocks ready nodes
    cli._metadata_refresh_in_progress = False
    assert cli.is_ready(0)
    assert cli.is_ready(1)
    cli.cluster.request_update()
    cli.cluster.config['retry_backoff_ms'] = 0
    assert not cli._metadata_refresh_in_progress
    assert not cli.is_ready(0)
    assert not cli.is_ready(1)
    cli.cluster._need_update = False

    # if connection can't send more, not ready
    assert cli.is_ready(0)
    conn.can_send_more.return_value = False
    assert not cli.is_ready(0)
    conn.can_send_more.return_value = True

    # disconnected nodes, not ready
    assert cli.is_ready(0)
    conn.state = ConnectionStates.DISCONNECTED
    assert not cli.is_ready(0)
def test_is_ready(mocker, conn):
    cli = KafkaClient()
    cli._maybe_connect(0)
    cli._maybe_connect(1)

    # metadata refresh blocks ready nodes
    assert cli.is_ready(0)
    assert cli.is_ready(1)
    cli._metadata_refresh_in_progress = True
    assert not cli.is_ready(0)
    assert not cli.is_ready(1)

    # requesting metadata update also blocks ready nodes
    cli._metadata_refresh_in_progress = False
    assert cli.is_ready(0)
    assert cli.is_ready(1)
    cli.cluster.request_update()
    cli.cluster.config['retry_backoff_ms'] = 0
    assert not cli._metadata_refresh_in_progress
    assert not cli.is_ready(0)
    assert not cli.is_ready(1)
    cli.cluster._need_update = False

    # if connection can't send more, not ready
    assert cli.is_ready(0)
    conn.can_send_more.return_value = False
    assert not cli.is_ready(0)
    conn.can_send_more.return_value = True

    # disconnected nodes, not ready
    assert cli.is_ready(0)
    conn.state = ConnectionStates.DISCONNECTED
    assert not cli.is_ready(0)
class KafkaConsumerLag:

    def __init__(self, bootstrap_servers):

        self.client = KafkaClient(bootstrap_servers=bootstrap_servers)
        self.client.check_version()

    def _send(self, broker_id, request, response_type=None):

        f = self.client.send(broker_id, request)
        response = self.client.poll(future=f)

        if response_type:
            if response and len(response) > 0:
                for r in response:
                    if isinstance(r, response_type):
                        return r
        else:
            if response and len(response) > 0:
                return response[0]

        return None

    def check(self, group_topics=None, discovery=None):
        """
        {
            "<group>": {
                "state": <str>,
                "topics": {
                    "<topic>": {
                        "consumer_lag": <int>,
                        "partitions": {
                            "<partition>": {
                                "offset_first": <int>,
                                "offset_consumed": <int>,
                                "offset_last": <int>,
                                "lag": <int>
                            }
                        }
                    }
                }
            }
        }
        :param persist_groups:
        :return: consumer statistics
        """
        cluster = self.client.cluster
        brokers = cluster.brokers()

        # Consumer group ID -> list(topics)
        if group_topics is None:
            group_topics = {}

            if discovery is None:
                discovery = True
        else:
            group_topics = copy.deepcopy(group_topics)

        # Set of consumer group IDs
        consumer_groups = set(group_topics.iterkeys())

        # Set of all known topics
        topics = set(itertools.chain(*group_topics.itervalues()))

        # Consumer group ID -> coordinating broker
        consumer_coordinator = {}

        # Coordinating broker - > list(consumer group IDs)
        coordinator_consumers = {}

        results = {}

        for consumer_group in group_topics.iterkeys():
            results[consumer_group] = {'state': None, 'topics': {}}

        # Ensure connections to all brokers
        for broker in brokers:
            while not self.client.is_ready(broker.nodeId):
                self.client.ready(broker.nodeId)

        # Collect all active consumer groups
        if discovery:
            for broker in brokers:
                response = self._send(broker.nodeId, _ListGroupsRequest(), _ListGroupsResponse)

                if response:
                    for group in response.groups:
                        consumer_groups.add(group[0])

        # Identify which broker is coordinating each consumer group
        for group in consumer_groups:

            response = self._send(next(iter(brokers)).nodeId, _GroupCoordinatorRequest(group), _GroupCoordinatorResponse)

            if response:
                consumer_coordinator[group] = response.coordinator_id

                if response.coordinator_id not in coordinator_consumers:
                    coordinator_consumers[response.coordinator_id] = []

                coordinator_consumers[response.coordinator_id].append(group)

        # Populate consumer groups into dict
        for group in consumer_groups:
            if group not in group_topics:
                group_topics[group] = []

        # Add groups to results dict
        for group, topic_list in group_topics.iteritems():
            results[group] = {'state': None, 'topics': {}}

        # Identify group information and topics read by each consumer group
        for coordinator, consumers in coordinator_consumers.iteritems():

            response = self._send(coordinator, _DescribeGroupsRequest(consumers), _DescribeGroupsResponse)

            for group in response.groups:

                if group[1] in results:
                    results[group[1]]['state'] = group[2]
                    # TODO Also include member data?

                if discovery:
                    members = group[5]
                    for member in members:
                        try:
                            assignment = MemberAssignment.decode(member[4])
                            if assignment:
                                for partition in assignment.partition_assignment:
                                    topic = partition[0]

                                    # Add topic to topic set
                                    topics.add(topic)

                                    # Add topic to group
                                    group_topics[group[1]].append(topic)
                        except:
                            pass

        # Add topics to groups in results dict
        for group, topic_list in group_topics.iteritems():
            for topic in topic_list:
                results[group]['topics'][topic] = {'consumer_lag': 0, 'partitions': {}}

        # For storing the latest offset for all partitions of all topics
        # topic -> partition -> offset
        start_offsets = {}
        end_offsets = {}

        # Identify all the topic partitions that each broker is leader for
        # and request next new offset for each partition
        for broker, partitions in cluster._broker_partitions.iteritems():

            # topic -> List(partition, time, max_offsets)
            request_partitions = {}

            for tp in partitions:
                if tp.topic in topics:
                    if tp.topic not in request_partitions:
                        request_partitions[tp.topic] = []

                    # Time value '-2' is to get the offset for first available message
                    request_partitions[tp.topic].append((tp.partition, -2, 1))

            # List(topic, List(partition, time, max_offsets))
            topic_partitions = []

            for tp in request_partitions.iteritems():
                topic_partitions.append(tp)

            # Request partition start offsets
            response = self._send(broker, _OffsetRequest(-1, topic_partitions), _OffsetResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    if topic not in start_offsets:
                        start_offsets[topic] = {}

                    for p in offset[1]:
                        start_offsets[topic][p[0]] = p[2][0]

            for tp in topic_partitions:
                for i, ptm in enumerate(tp[1]):
                    # Time value '-1' is to get the offset for next new message
                    tp[1][i] = (ptm[0], -1, 1)

            # Request partition end offsets
            response = self._send(broker, _OffsetRequest(-1, topic_partitions), _OffsetResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    if topic not in end_offsets:
                        end_offsets[topic] = {}

                    for p in offset[1]:
                        end_offsets[topic][p[0]] = p[2][0]

        # Populate with offset values
        for group, topics in group_topics.iteritems():

            coordinator = consumer_coordinator[group]

            # topic -> list(partition)
            request_partitions = {}

            for topic in topics:
                results[group]['topics'][topic]['consumer_lag'] = 0
                results[group]['topics'][topic]['partitions'] = {}

                if topic in start_offsets:
                    for p in start_offsets[topic]:
                        results[group]['topics'][topic]['partitions'][p] = {
                            'offset_first': start_offsets[topic][p],
                            'offset_last': end_offsets[topic][p],
                            'offset_consumed': 0,
                            'lag' : 0}

                        if topic not in request_partitions:
                            request_partitions[topic] = []
                        request_partitions[topic].append(p)

            # List(topic -> list(partition))
            topic_partitions = []

            for tp in request_partitions.iteritems():
                topic_partitions.append(tp)

            response = self._send(coordinator, _OffsetFetchRequest(group, topic_partitions), _OffsetFetchResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    offsets = offset[1]

                    if topic not in results[group]['topics']:
                        continue

                    for p_offset in offsets:
                        partition = p_offset[0]
                        offset_consumed = p_offset[1]
                        p_results = results[group]['topics'][topic]['partitions'][partition]

                        if offset_consumed != -1:
                            p_results['offset_consumed'] = offset_consumed
                            p_results['lag'] = p_results['offset_last'] - offset_consumed
                        else:
                            p_results['offset_consumed'] = 0
                            p_results['lag'] = p_results['offset_last'] - p_results['offset_first']

                        results[group]['topics'][topic]['consumer_lag'] += p_results['lag']

        return results

    def close(self):
        if self.client:
            self.client.close()
Beispiel #4
0
class KafkaConsumerLag:
    def __init__(self, bootstrap_servers):

        self.client = KafkaClient(bootstrap_servers=bootstrap_servers)
        self.client.check_version()

    def _send(self, broker_id, request, response_type=None):

        f = self.client.send(broker_id, request)
        response = self.client.poll(future=f)

        if response_type:
            if response and len(response) > 0:
                for r in response:
                    if isinstance(r, response_type):
                        return r
        else:
            if response and len(response) > 0:
                return response[0]

        return None

    def check(self, group_topics=None, discovery=None):
        """
        {
            "<group>": {
                "state": <str>,
                "topics": {
                    "<topic>": {
                        "consumer_lag": <int>,
                        "partitions": {
                            "<partition>": {
                                "offset_first": <int>,
                                "offset_consumed": <int>,
                                "offset_last": <int>,
                                "lag": <int>
                            }
                        }
                    }
                }
            }
        }
        :param persist_groups:
        :return: consumer statistics
        """
        cluster = self.client.cluster
        brokers = cluster.brokers()

        # Consumer group ID -> list(topics)
        if group_topics is None:
            group_topics = {}

            if discovery is None:
                discovery = True
        else:
            group_topics = copy.deepcopy(group_topics)

        # Set of consumer group IDs
        consumer_groups = set(group_topics.iterkeys())

        # Set of all known topics
        topics = set(itertools.chain(*group_topics.itervalues()))

        # Consumer group ID -> coordinating broker
        consumer_coordinator = {}

        # Coordinating broker - > list(consumer group IDs)
        coordinator_consumers = {}

        results = {}

        for consumer_group in group_topics.iterkeys():
            results[consumer_group] = {'state': None, 'topics': {}}

        # Ensure connections to all brokers
        for broker in brokers:
            while not self.client.is_ready(broker.nodeId):
                self.client.ready(broker.nodeId)

        # Collect all active consumer groups
        if discovery:
            for broker in brokers:
                response = self._send(broker.nodeId, _ListGroupsRequest(),
                                      _ListGroupsResponse)

                if response:
                    for group in response.groups:
                        consumer_groups.add(group[0])

        # Identify which broker is coordinating each consumer group
        for group in consumer_groups:

            response = self._send(
                next(iter(brokers)).nodeId, _GroupCoordinatorRequest(group),
                _GroupCoordinatorResponse)

            if response:
                consumer_coordinator[group] = response.coordinator_id

                if response.coordinator_id not in coordinator_consumers:
                    coordinator_consumers[response.coordinator_id] = []

                coordinator_consumers[response.coordinator_id].append(group)

        # Populate consumer groups into dict
        for group in consumer_groups:
            if group not in group_topics:
                group_topics[group] = []

        # Add groups to results dict
        for group, topic_list in group_topics.iteritems():
            results[group] = {'state': None, 'topics': {}}

        # Identify group information and topics read by each consumer group
        for coordinator, consumers in coordinator_consumers.iteritems():

            response = self._send(coordinator,
                                  _DescribeGroupsRequest(consumers),
                                  _DescribeGroupsResponse)

            for group in response.groups:

                if group[1] in results:
                    results[group[1]]['state'] = group[2]
                    # TODO Also include member data?

                if discovery:
                    members = group[5]
                    for member in members:
                        try:
                            assignment = MemberAssignment.decode(member[4])
                            if assignment:
                                for partition in assignment.partition_assignment:
                                    topic = partition[0]

                                    # Add topic to topic set
                                    topics.add(topic)

                                    # Add topic to group
                                    group_topics[group[1]].append(topic)
                        except:
                            pass

        # Add topics to groups in results dict
        for group, topic_list in group_topics.iteritems():
            for topic in topic_list:
                results[group]['topics'][topic] = {
                    'consumer_lag': 0,
                    'partitions': {}
                }

        # For storing the latest offset for all partitions of all topics
        # topic -> partition -> offset
        start_offsets = {}
        end_offsets = {}

        # Identify all the topic partitions that each broker is leader for
        # and request next new offset for each partition
        for broker, partitions in cluster._broker_partitions.iteritems():

            # topic -> List(partition, time, max_offsets)
            request_partitions = {}

            for tp in partitions:
                if tp.topic in topics:
                    if tp.topic not in request_partitions:
                        request_partitions[tp.topic] = []

                    # Time value '-2' is to get the offset for first available message
                    request_partitions[tp.topic].append((tp.partition, -2, 1))

            # List(topic, List(partition, time, max_offsets))
            topic_partitions = []

            for tp in request_partitions.iteritems():
                topic_partitions.append(tp)

            # Request partition start offsets
            response = self._send(broker, _OffsetRequest(-1, topic_partitions),
                                  _OffsetResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    if topic not in start_offsets:
                        start_offsets[topic] = {}

                    for p in offset[1]:
                        start_offsets[topic][p[0]] = p[2][0]

            for tp in topic_partitions:
                for i, ptm in enumerate(tp[1]):
                    # Time value '-1' is to get the offset for next new message
                    tp[1][i] = (ptm[0], -1, 1)

            # Request partition end offsets
            response = self._send(broker, _OffsetRequest(-1, topic_partitions),
                                  _OffsetResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    if topic not in end_offsets:
                        end_offsets[topic] = {}

                    for p in offset[1]:
                        end_offsets[topic][p[0]] = p[2][0]

        # Populate with offset values
        for group, topics in group_topics.iteritems():

            coordinator = consumer_coordinator[group]

            # topic -> list(partition)
            request_partitions = {}

            for topic in topics:
                results[group]['topics'][topic]['consumer_lag'] = 0
                results[group]['topics'][topic]['partitions'] = {}

                if topic in start_offsets:
                    for p in start_offsets[topic]:
                        results[group]['topics'][topic]['partitions'][p] = {
                            'offset_first': start_offsets[topic][p],
                            'offset_last': end_offsets[topic][p],
                            'offset_consumed': 0,
                            'lag': 0
                        }

                        if topic not in request_partitions:
                            request_partitions[topic] = []
                        request_partitions[topic].append(p)

            # List(topic -> list(partition))
            topic_partitions = []

            for tp in request_partitions.iteritems():
                topic_partitions.append(tp)

            response = self._send(coordinator,
                                  _OffsetFetchRequest(group, topic_partitions),
                                  _OffsetFetchResponse)

            if response:
                for offset in response.topics:
                    topic = offset[0]
                    offsets = offset[1]

                    if topic not in results[group]['topics']:
                        continue

                    for p_offset in offsets:
                        partition = p_offset[0]
                        offset_consumed = p_offset[1]
                        p_results = results[group]['topics'][topic][
                            'partitions'][partition]

                        if offset_consumed != -1:
                            p_results['offset_consumed'] = offset_consumed
                            p_results['lag'] = p_results[
                                'offset_last'] - offset_consumed
                        else:
                            p_results['offset_consumed'] = 0
                            p_results['lag'] = p_results[
                                'offset_last'] - p_results['offset_first']

                        results[group]['topics'][topic][
                            'consumer_lag'] += p_results['lag']

        return results

    def close(self):
        if self.client:
            self.client.close()