Exemple #1
0
    def _list_consumer_group_offsets_process_response(self, response):
        """Process an OffsetFetchResponse.

        :param response: an OffsetFetchResponse.
        :return: A dictionary composed of TopicPartition keys and
            OffsetAndMetada values.
        """
        if response.API_VERSION <= 3:

            # OffsetFetchResponse_v1 lacks a top-level error_code
            if response.API_VERSION > 1:
                error_type = Errors.for_code(response.error_code)
                if error_type is not Errors.NoError:
                    # optionally we could retry if error_type.retriable
                    raise error_type(
                        "OffsetFetchResponse failed with response '{}'."
                        .format(response))

            # transform response into a dictionary with TopicPartition keys and
            # OffsetAndMetada values--this is what the Java AdminClient returns
            offsets = {}
            for topic, partitions in response.topics:
                for partition, offset, metadata, error_code in partitions:
                    error_type = Errors.for_code(error_code)
                    if error_type is not Errors.NoError:
                        raise error_type(
                            "Unable to fetch consumer group offsets for topic {}, partition {}"
                            .format(topic, partition))
                    offsets[TopicPartition(topic, partition)] = OffsetAndMetadata(offset, metadata)
        else:
            raise NotImplementedError(
                "Support for OffsetFetchResponse_v{} has not yet been added to KafkaAdminClient."
                .format(response.API_VERSION))
        return offsets
Exemple #2
0
    def update_topics(api_version, metadata):
        logging.info('Received topics and partition assignments')

        global topics

        if api_version == 0:
            TOPIC_ERROR = 0
            TOPIC_NAME = 1
            TOPIC_PARTITIONS = 2
            PARTITION_ERROR = 0
            PARTITION_NUMBER = 1
            PARTITION_LEADER = 2
        else:
            TOPIC_ERROR = 0
            TOPIC_NAME = 1
            TOPIC_PARTITIONS = 3
            PARTITION_ERROR = 0
            PARTITION_NUMBER = 1
            PARTITION_LEADER = 2

        new_topics = {}
        for t in metadata.topics:
            error_code = t[TOPIC_ERROR]
            if error_code:
                error = Errors.for_code(error_code)(t)
                logging.warning(
                    'Received error in metadata response at topic level: %s',
                    error)
            else:
                topic = t[TOPIC_NAME]
                partitions = t[TOPIC_PARTITIONS]

                new_partitions = {}
                for p in partitions:
                    error_code = p[PARTITION_ERROR]
                    if error_code:
                        error = Errors.for_code(error_code)(p)
                        logging.warning(
                            'Received error in metadata response at partition level for topic %(topic)s: %(error)s',
                            {
                                'topic': topic,
                                'error': error
                            })
                    else:
                        partition = p[PARTITION_NUMBER]
                        leader = p[PARTITION_LEADER]
                        logging.debug(
                            'Received partition assignment for partition %(partition)s of topic %(topic)s',
                            {
                                'partition': partition,
                                'topic': topic
                            })

                        new_partitions[partition] = leader

                new_topics[topic] = new_partitions

        topics = new_topics
Exemple #3
0
    def _handle_offset_response(self, partition, future, response):
        """Callback for the response of the list offset call above.

        Arguments:
            partition (TopicPartition): The partition that was fetched
            future (Future): the future to update based on response
            response (OffsetResponse): response from the server

        Raises:
            AssertionError: if response does not match partition
        """
        topic, partition_info = response.topics[0]
        assert len(response.topics) == 1 and len(partition_info) == 1, (
            'OffsetResponse should only be for a single topic-partition')

        part, error_code, offsets = partition_info[0]
        assert topic == partition.topic and part == partition.partition, (
            'OffsetResponse partition does not match OffsetRequest partition')

        error_type = Errors.for_code(error_code)
        if error_type is Errors.NoError:
            assert len(offsets) == 1, 'Expected OffsetResponse with one offset'
            offset = offsets[0]
            log.debug("Fetched offset %d for partition %s", offset, partition)
            future.success(offset)
        elif error_type in (Errors.NotLeaderForPartitionError,
                       Errors.UnknownTopicOrPartitionError):
            log.debug("Attempt to fetch offsets for partition %s failed due"
                      " to obsolete leadership information, retrying.",
                      partition)
            future.failure(error_type(partition))
        else:
            log.warning("Attempt to fetch offsets for partition %s failed due to:"
                        " %s", partition, error_type)
            future.failure(error_type(partition))
 def _get_group_coordinator(self, group):
     """Determine which broker is the Group Coordinator for a specific consumer group."""
     request = GroupCoordinatorRequest[0](group)
     response = self._make_blocking_req(request)
     error_type = kafka_errors.for_code(response.error_code)
     if error_type is kafka_errors.NoError:
         return response.coordinator_id
Exemple #5
0
    def _handle_offset_response(self, future, response):
        """Callback for the response of the list offset call above.

        Arguments:
            future (Future): the future to update based on response
            response (OffsetResponse): response from the server

        Raises:
            AssertionError: if response does not match partition
        """
        timestamp_offset_map = {}
        for topic, part_data in response.topics:
            for partition_info in part_data:
                partition, error_code = partition_info[:2]
                partition = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if error_type is Errors.NoError:
                    if response.API_VERSION == 0:
                        offsets = partition_info[2]
                        assert len(offsets) <= 1, 'Expected OffsetResponse with one offset'
                        if not offsets:
                            offset = UNKNOWN_OFFSET
                        else:
                            offset = offsets[0]
                        log.debug("Handling v0 ListOffsetResponse response for %s. "
                                  "Fetched offset %s", partition, offset)
                        if offset != UNKNOWN_OFFSET:
                            timestamp_offset_map[partition] = (offset, None)
                    else:
                        timestamp, offset = partition_info[2:]
                        log.debug("Handling ListOffsetResponse response for %s. "
                                  "Fetched offset %s, timestamp %s",
                                  partition, offset, timestamp)
                        if offset != UNKNOWN_OFFSET:
                            timestamp_offset_map[partition] = (offset, timestamp)
                elif error_type is Errors.UnsupportedForMessageFormatError:
                    # The message format on the broker side is before 0.10.0,
                    # we simply put None in the response.
                    log.debug("Cannot search by timestamp for partition %s because the"
                              " message format version is before 0.10.0", partition)
                elif error_type is Errors.NotLeaderForPartitionError:
                    log.debug("Attempt to fetch offsets for partition %s failed due"
                              " to obsolete leadership information, retrying.",
                              partition)
                    future.failure(error_type(partition))
                    return
                elif error_type is Errors.UnknownTopicOrPartitionError:
                    log.warn("Received unknown topic or partition error in ListOffset "
                             "request for partition %s. The topic/partition " +
                             "may not exist or the user may not have Describe access "
                             "to it.", partition)
                    future.failure(error_type(partition))
                    return
                else:
                    log.warning("Attempt to fetch offsets for partition %s failed due to:"
                                " %s", partition, error_type)
                    future.failure(error_type(partition))
                    return
        if not future.is_done:
            future.success(timestamp_offset_map)
Exemple #6
0
    def _send_request_to_controller(self, request):
        """Send a Kafka protocol message to the cluster controller.

        Will block until the message result is received.

        :param request: The message to send.
        :return: The Kafka protocol response for the message.
        """
        tries = 2  # in case our cached self._controller_id is outdated
        while tries:
            tries -= 1
            response = self._send_request_to_node(self._controller_id, request)
            # DeleteTopicsResponse returns topic_error_codes rather than topic_errors
            for topic, error_code in getattr(response, "topic_errors", response.topic_error_codes):
                error_type = Errors.for_code(error_code)
                if tries and isinstance(error_type, NotControllerError):
                    # No need to inspect the rest of the errors for
                    # non-retriable errors because NotControllerError should
                    # either be thrown for all errors or no errors.
                    self._refresh_controller_id()
                    break
                elif error_type is not Errors.NoError:
                    raise error_type(
                        "Request '{}' failed with response '{}'."
                        .format(request, response))
            else:
                return response
        raise RuntimeError("This should never happen, please file a bug with full stacktrace if encountered")
Exemple #7
0
    def _handle_join_group_response(self, future, send_time, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            log.debug("Received successful JoinGroup response for group %s: %s",
                      self.group_id, response)
            self.sensors.join_latency.record((time.time() - send_time) * 1000)
            with self._lock:
                if self.state is not MemberState.REBALANCING:
                    # if the consumer was woken up before a rebalance completes,
                    # we may have already left the group. In this case, we do
                    # not want to continue with the sync group.
                    future.failure(UnjoinedGroupException())
                else:
                    self._generation = Generation(response.generation_id,
                                                  response.member_id,
                                                  response.group_protocol)
                    self.rejoin_needed = False

                if response.leader_id == response.member_id:
                    log.info("Elected group leader -- performing partition"
                             " assignments using %s", self._generation.protocol)
                    self._on_join_leader(response).chain(future)
                else:
                    self._on_join_follower().chain(future)

        elif error_type is Errors.GroupLoadInProgressError:
            log.debug("Attempt to join group %s rejected since coordinator %s"
                      " is loading the group.", self.group_id, self.coordinator_id)
            # backoff and retry
            future.failure(error_type(response))
        elif error_type is Errors.UnknownMemberIdError:
            # reset the member id and retry immediately
            error = error_type(self._generation.member_id)
            self.reset_generation()
            log.debug("Attempt to join group %s failed due to unknown member id",
                      self.group_id)
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            # re-discover the coordinator and retry with backoff
            self.coordinator_dead(error_type())
            log.debug("Attempt to join group %s failed due to obsolete "
                      "coordinator information: %s", self.group_id,
                      error_type.__name__)
            future.failure(error_type())
        elif error_type in (Errors.InconsistentGroupProtocolError,
                            Errors.InvalidSessionTimeoutError,
                            Errors.InvalidGroupIdError):
            # log the error and re-throw the exception
            error = error_type(response)
            log.error("Attempt to join group %s failed due to fatal error: %s",
                      self.group_id, error)
            future.failure(error)
        elif error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        else:
            # unexpected error, throw the exception
            error = error_type()
            log.error("Unexpected error in join group response: %s", error)
            future.failure(error)
Exemple #8
0
    def _handle_produce_response(self, node_id, send_time, batches, response):
        """Handle a produce response."""
        # if we have a response, parse it
        log.debug('Parsing produce response: %r', response)
        if response:
            batches_by_partition = dict([(batch.topic_partition, batch)
                                         for batch in batches])

            for topic, partitions in response.topics:
                for partition_info in partitions:
                    if response.API_VERSION < 2:
                        partition, error_code, offset = partition_info
                        ts = None
                    else:
                        partition, error_code, offset, ts = partition_info
                    tp = TopicPartition(topic, partition)
                    error = Errors.for_code(error_code)
                    batch = batches_by_partition[tp]
                    self._complete_batch(batch, error, offset, ts)

            if response.API_VERSION > 0:
                self._sensors.record_throttle_time(response.throttle_time_ms, node=node_id)

        else:
            # this is the acks = 0 case, just complete all requests
            for batch in batches:
                self._complete_batch(batch, None, -1, None)
Exemple #9
0
    def _handle_sync_group_response(self, future, send_time, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            self.sensors.sync_latency.record((time.time() - send_time) * 1000)
            future.success(response.member_assignment)
            return

        # Always rejoin on error
        self.request_rejoin()
        if error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        elif error_type is Errors.RebalanceInProgressError:
            log.debug(
                "SyncGroup for group %s failed due to coordinator"
                " rebalance", self.group_id)
            future.failure(error_type(self.group_id))
        elif error_type in (Errors.UnknownMemberIdError,
                            Errors.IllegalGenerationError):
            error = error_type()
            log.debug("SyncGroup for group %s failed due to %s", self.group_id,
                      error)
            self.reset_generation()
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            error = error_type()
            log.debug("SyncGroup for group %s failed due to %s", self.group_id,
                      error)
            self.coordinator_dead(error)
            future.failure(error)
        else:
            error = error_type()
            log.error("Unexpected error from SyncGroup: %s", error)
            future.failure(error)
def update_lowwater(node, offsets):
    logging.info('Received low-water marks from node {}'.format(node))

    lowwaters = {}
    for topic, partitions in offsets.topics:
        for partition, error_code, offsets in partitions:
            if error_code:
                error = Errors.for_code(error_code)(
                    (partition, error_code, offsets))
                logging.warning(
                    'Received error in offset response for topic %(topic)s: %(error)s',
                    {
                        'topic': topic,
                        'error': error
                    })
            else:
                logging.debug(
                    'Received low-water marks for partition %(partition)s of topic %(topic)s',
                    {
                        'partition': partition,
                        'topic': topic
                    })

                lowwater = offsets[0]

                if topic not in lowwaters:
                    lowwaters[topic] = {}
                lowwaters[topic][partition] = lowwater

    global node_lowwaters
    node_lowwaters[node] = lowwaters
Exemple #11
0
 def _handle_offset_fetch_response(self, future, response):
     offsets = {}
     for topic, partitions in response.topics:
         for partition, offset, metadata, error_code in partitions:
             tp = TopicPartition(topic, partition)
             error_type = Errors.for_code(error_code)
             if error_type is not Errors.NoError:
                 error = error_type()
                 log.debug("Group %s failed to fetch offset for partition"
                           " %s: %s", self.group_id, tp, error)
                 if error_type is Errors.GroupLoadInProgressError:
                     # just retry
                     future.failure(error)
                 elif error_type is Errors.NotCoordinatorForGroupError:
                     # re-discover the coordinator and retry
                     self.coordinator_dead(error_type())
                     future.failure(error)
                 elif error_type is Errors.UnknownTopicOrPartitionError:
                     log.warning("OffsetFetchRequest -- unknown topic %s"
                                 " (have you committed any offsets yet?)",
                                 topic)
                     continue
                 else:
                     log.error("Unknown error fetching offsets for %s: %s",
                               tp, error)
                     future.failure(error)
                 return
             elif offset >= 0:
                 # record the position with the offset
                 # (-1 indicates no committed offset to fetch)
                 offsets[tp] = OffsetAndMetadata(offset, metadata)
             else:
                 log.debug("Group %s has no committed offset for partition"
                           " %s", self.group_id, tp)
     future.success(offsets)
Exemple #12
0
    def _handle_group_coordinator_response(self, future, response):
        log.debug("Received group coordinator response %s", response)
        if not self._coordinator_unknown():
            # We already found the coordinator, so ignore the request
            log.debug("Coordinator already known -- ignoring metadata response")
            future.success(self._coordinator_id)
            return

        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            ok = self._client.cluster.add_group_coordinator(self.group_id, response)
            if not ok:
                # This could happen if coordinator metadata is different
                # than broker metadata
                future.failure(Errors.IllegalStateError())
                return

            self._coordinator_id = response.coordinator_id
            log.info("Discovered coordinator %s for group %s",
                     self._coordinator_id, self.group_id)
            self._client.ready(self._coordinator_id)
            future.success(self._coordinator_id)
        elif error_type is Errors.GroupCoordinatorNotAvailableError:
            log.debug("Group Coordinator Not Available; retry")
            future.failure(error_type())
        elif error_type is Errors.GroupAuthorizationFailedError:
            error = error_type(self.group_id)
            log.error("Group Coordinator Request failed: %s", error)
            future.failure(error)
        else:
            error = error_type()
            log.error("Unrecognized failure in Group Coordinator Request: %s",
                      error)
            future.failure(error)
    def _handle_group_coordinator_response(self, future, response):
        log.debug("Received group coordinator response %s", response)
        if not self._coordinator_unknown():
            # We already found the coordinator, so ignore the request
            log.debug("Coordinator already known -- ignoring metadata response")
            future.success(self._coordinator_id)
            return

        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            ok = self._client.cluster.add_group_coordinator(self.group_id, response)
            if not ok:
                # This could happen if coordinator metadata is different
                # than broker metadata
                future.failure(Errors.IllegalStateError())
                return

            self._coordinator_id = response.coordinator_id
            log.info("Discovered coordinator %s for group %s",
                     self._coordinator_id, self.group_id)
            self._client.ready(self._coordinator_id)
            future.success(self._coordinator_id)
        elif error_type is Errors.GroupCoordinatorNotAvailableError:
            log.debug("Group Coordinator Not Available; retry")
            future.failure(error_type())
        elif error_type is Errors.GroupAuthorizationFailedError:
            error = error_type(self.group_id)
            log.error("Group Coordinator Request failed: %s", error)
            future.failure(error)
        else:
            error = error_type()
            log.error("Unrecognized failure in Group Coordinator Request: %s",
                      error)
            future.failure(error)
Exemple #14
0
    def _make_metadata_response(metadata):
        resp_brokers = metadata.brokers
        brokers = set()
        for b in resp_brokers:
            node_id, _, _, _ = b
            brokers.add(node_id)
        resp = {"topics": {}, "brokers": list(brokers)}
        if not metadata.topics:
            return resp

        for tup in metadata.topics:
            err, topic, _, partitions = tup
            if err:
                raise for_code(err)
            topic_data = []
            for part in partitions:
                _, partition_index, leader_id, replica_nodes, isr_nodes = part
                isr_nodes = set(isr_nodes)
                topic_response = {
                    "partition": partition_index,
                    "leader": leader_id,
                    "replicas": []
                }
                for node in replica_nodes:
                    topic_response["replicas"].append({
                        "broker":
                        node,
                        "leader":
                        node == leader_id,
                        "in_sync":
                        node in isr_nodes
                    })
                topic_data.append(topic_response)
            resp["topics"][topic] = {"partitions": topic_data}
        return resp
Exemple #15
0
    def get_offsets(self, topic: str, partition_id: int) -> dict:
        beginning_f = self.make_offsets_request(topic, partition_id,
                                                OffsetResetStrategy.EARLIEST)
        end_f = self.make_offsets_request(topic, partition_id,
                                          OffsetResetStrategy.LATEST)
        self._wait_for_futures([beginning_f, end_f])
        beginning_resp = beginning_f.value
        end_resp = end_f.value
        v = self._matching_api_version(OffsetRequest)
        assert len(beginning_resp.topics) == 1
        assert len(end_resp.topics) == 1
        _, beginning_partitions = beginning_resp.topics[0]
        _, end_partitions = end_resp.topics[0]

        assert len(beginning_partitions) == 1
        assert len(end_partitions) == 1
        if v == 0:
            assert len(beginning_partitions[0][2]) == 1
            assert partition_id == beginning_partitions[0][0]
            assert partition_id == end_partitions[0][0]
            start_err = beginning_partitions[0][1]
            end_err = beginning_partitions[0][1]
            for e in [start_err, end_err]:
                if e != 0:
                    raise for_code(e)
            rv = {
                "beginning_offset": beginning_partitions[0][2][0],
                "end_offset": end_partitions[0][2][0],
            }
        else:
            rv = {
                "beginning_offset": beginning_partitions[0][3],
                "end_offset": end_partitions[0][3],
            }
        return rv
Exemple #16
0
    def _get_single_group_offsets_from_kafka(self, consumer_group,
                                             topic_partitions):
        """Get offsets for a single consumer group from Kafka"""
        consumer_offsets = {}
        tps = defaultdict(set)
        for topic, partitions in topic_partitions.items():
            if len(partitions) == 0:
                # If partitions omitted, then we assume the group is consuming all partitions for the topic.
                # Fetch consumer offsets even for unavailable partitions because those will be valid once the partition
                # finishes leader failover.
                partitions = self._kafka_client.cluster.partitions_for_topic(
                    topic)
            tps[topic].update(partitions)

        coordinator_id = self._get_group_coordinator(consumer_group)
        if coordinator_id is not None:
            # Kafka protocol uses OffsetFetchRequests to retrieve consumer offsets:
            # https://kafka.apache.org/protocol#The_Messages_OffsetFetch
            # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetFetchRequest
            request = OffsetFetchRequest[1](consumer_group, list(tps.items()))
            response = self._make_blocking_req(request, node_id=coordinator_id)
            for (topic, partition_offsets) in response.topics:
                for partition, offset, _, error_code in partition_offsets:
                    error_type = kafka_errors.for_code(error_code)
                    if error_type is not kafka_errors.NoError:
                        continue
                    consumer_offsets[(topic, partition)] = offset
        else:
            self.log.info("unable to find group coordinator for %s",
                          consumer_group)

        return consumer_offsets
Exemple #17
0
    def _handle_sync_group_response(self, future, send_time, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            self.sensors.sync_latency.record((time.time() - send_time) * 1000)
            future.success(response.member_assignment)
            return

        # Always rejoin on error
        self.request_rejoin()
        if error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        elif error_type is Errors.RebalanceInProgressError:
            log.debug("SyncGroup for group %s failed due to coordinator"
                      " rebalance", self.group_id)
            future.failure(error_type(self.group_id))
        elif error_type in (Errors.UnknownMemberIdError,
                            Errors.IllegalGenerationError):
            error = error_type()
            log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
            self.reset_generation()
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            error = error_type()
            log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
            self.coordinator_dead(error)
            future.failure(error)
        else:
            error = error_type()
            log.error("Unexpected error from SyncGroup: %s", error)
            future.failure(error)
Exemple #18
0
 def _describe_consumer_groups_process_response(self, response):
     """Process a DescribeGroupsResponse into a group description."""
     if response.API_VERSION <= 1:
         assert len(response.groups) == 1
         # TODO need to implement converting the response tuple into
         # a more accessible interface like a namedtuple and then stop
         # hardcoding tuple indices here. Several Java examples,
         # including KafkaAdminClient.java
         group_description = response.groups[0]
         error_code = group_description[0]
         error_type = Errors.for_code(error_code)
         # Java has the note: KAFKA-6789, we can retry based on the error code
         if error_type is not Errors.NoError:
             raise error_type(
                 "DescribeGroupsResponse failed with response '{}'.".format(
                     response))
         # TODO Java checks the group protocol type, and if consumer
         # (ConsumerProtocol.PROTOCOL_TYPE) or empty string, it decodes
         # the members' partition assignments... that hasn't yet been
         # implemented here so just return the raw struct results
     else:
         raise NotImplementedError(
             "Support for DescribeGroupsResponse_v{} has not yet been added to KafkaAdminClient."
             .format(response.API_VERSION))
     return group_description
Exemple #19
0
    def update_highwater(offsets):
        logging.info('Received high-water marks')

        for topic, partitions in offsets.topics:
            for partition, error_code, offsets in partitions:
                if error_code:
                    error = Errors.for_code(error_code)(
                        (partition, error_code, offsets))
                    logging.warning(
                        'Received error in offset response for topic %(topic)s: %(error)s',
                        {
                            'topic': topic,
                            'error': error
                        })
                else:
                    logging.debug(
                        'Received high-water marks for partition %(partition)s of topic %(topic)s',
                        {
                            'partition': partition,
                            'topic': topic
                        })

                    update_gauge(
                        metric_name='kafka_topic_highwater',
                        label_dict={
                            'topic': topic,
                            'partition': partition
                        },
                        value=offsets[0],
                        doc='The offset of the head of a partition in a topic.'
                    )
 def _handle_offset_fetch_response(self, future, response):
     offsets = {}
     for topic, partitions in response.topics:
         for partition, offset, metadata, error_code in partitions:
             tp = TopicPartition(topic, partition)
             error_type = Errors.for_code(error_code)
             if error_type is not Errors.NoError:
                 error = error_type()
                 log.debug("Group %s failed to fetch offset for partition"
                           " %s: %s", self.group_id, tp, error)
                 if error_type is Errors.GroupLoadInProgressError:
                     # just retry
                     future.failure(error)
                 elif error_type is Errors.NotCoordinatorForGroupError:
                     # re-discover the coordinator and retry
                     self.coordinator_dead(error_type())
                     future.failure(error)
                 elif error_type is Errors.UnknownTopicOrPartitionError:
                     log.warning("OffsetFetchRequest -- unknown topic %s"
                                 " (have you committed any offsets yet?)",
                                 topic)
                     continue
                 else:
                     log.error("Unknown error fetching offsets for %s: %s",
                               tp, error)
                     future.failure(error)
                 return
             elif offset >= 0:
                 # record the position with the offset
                 # (-1 indicates no committed offset to fetch)
                 offsets[tp] = OffsetAndMetadata(offset, metadata)
             else:
                 log.debug("Group %s has no committed offset for partition"
                           " %s", self.group_id, tp)
     future.success(offsets)
Exemple #21
0
    def _handle_group_coordinator_response(self, future, response):
        log.debug("Received group coordinator response %s", response)

        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            with self._lock:
                ok = self._client.cluster.add_group_coordinator(
                    self.group_id, response)
                if not ok:
                    # This could happen if coordinator metadata is different
                    # than broker metadata
                    future.failure(Errors.IllegalStateError())
                    return

                self.coordinator_id = response.coordinator_id
                log.info("Discovered coordinator %s for group %s",
                         self.coordinator_id, self.group_id)
                self._client.ready(self.coordinator_id)
                self.heartbeat.reset_timeouts()
            future.success(self.coordinator_id)

        elif error_type is Errors.GroupCoordinatorNotAvailableError:
            log.debug("Group Coordinator Not Available; retry")
            future.failure(error_type())
        elif error_type is Errors.GroupAuthorizationFailedError:
            error = error_type(self.group_id)
            log.error("Group Coordinator Request failed: %s", error)
            future.failure(error)
        else:
            error = error_type()
            log.error("Group coordinator lookup for group %s failed: %s",
                      self.group_id, error)
            future.failure(error)
Exemple #22
0
    def _handle_offset_response(self, future, response):
        """Callback for the response of the list offset call above.

        Arguments:
            future (Future): the future to update based on response
            response (OffsetResponse): response from the server

        Raises:
            AssertionError: if response does not match partition
        """
        timestamp_offset_map = {}
        for topic, part_data in response.topics:
            for partition_info in part_data:
                partition, error_code = partition_info[:2]
                partition = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if error_type is Errors.NoError:
                    if response.API_VERSION == 0:
                        offsets = partition_info[2]
                        assert len(offsets) <= 1, 'Expected OffsetResponse with one offset'
                        if not offsets:
                            offset = UNKNOWN_OFFSET
                        else:
                            offset = offsets[0]
                        log.debug("Handling v0 ListOffsetResponse response for %s. "
                                  "Fetched offset %s", partition, offset)
                        if offset != UNKNOWN_OFFSET:
                            timestamp_offset_map[partition] = (offset, None)
                    else:
                        timestamp, offset = partition_info[2:]
                        log.debug("Handling ListOffsetResponse response for %s. "
                                  "Fetched offset %s, timestamp %s",
                                  partition, offset, timestamp)
                        if offset != UNKNOWN_OFFSET:
                            timestamp_offset_map[partition] = (offset, timestamp)
                elif error_type is Errors.UnsupportedForMessageFormatError:
                    # The message format on the broker side is before 0.10.0,
                    # we simply put None in the response.
                    log.debug("Cannot search by timestamp for partition %s because the"
                              " message format version is before 0.10.0", partition)
                elif error_type is Errors.NotLeaderForPartitionError:
                    log.debug("Attempt to fetch offsets for partition %s failed due"
                              " to obsolete leadership information, retrying.",
                              partition)
                    future.failure(error_type(partition))
                    return
                elif error_type is Errors.UnknownTopicOrPartitionError:
                    log.warning("Received unknown topic or partition error in ListOffset "
                             "request for partition %s. The topic/partition " +
                             "may not exist or the user may not have Describe access "
                             "to it.", partition)
                    future.failure(error_type(partition))
                    return
                else:
                    log.warning("Attempt to fetch offsets for partition %s failed due to:"
                                " %s", partition, error_type)
                    future.failure(error_type(partition))
                    return
        if not future.is_done:
            future.success(timestamp_offset_map)
Exemple #23
0
    def _handle_sync_group_response(self, future, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            log.info("Successfully joined group %s with generation %s",
                      self.group_id, self.generation)
            #self.sensors.syncLatency.record(response.requestLatencyMs())
            future.success(response.member_assignment)
            return

        # Always rejoin on error
        self.rejoin_needed = True
        if error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        elif error_type is Errors.RebalanceInProgressError:
            log.debug("SyncGroup for group %s failed due to coordinator"
                      " rebalance", self.group_id)
            future.failure(error_type(self.group_id))
        elif error_type in (Errors.UnknownMemberIdError,
                            Errors.IllegalGenerationError):
            error = error_type()
            log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
            self.member_id = JoinGroupRequest[0].UNKNOWN_MEMBER_ID
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            error = error_type()
            log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
            self.coordinator_dead()
            future.failure(error)
        else:
            error = error_type()
            log.error("Unexpected error from SyncGroup: %s", error)
            future.failure(error)
 def _process_highwater_offsets(self, response):
     """Parse an OffsetFetchResponse and save it to the highwater_offsets dict."""
     if type(response) not in OffsetResponse:
         raise RuntimeError("response type should be OffsetResponse, but instead was %s." % type(response))
     for topic, partitions_data in response.topics:
         for partition, error_code, offsets in partitions_data:
             error_type = kafka_errors.for_code(error_code)
             if error_type is kafka_errors.NoError:
                 self._highwater_offsets[(topic, partition)] = offsets[0]
             elif error_type is kafka_errors.NotLeaderForPartitionError:
                 self.log.warning(
                     "Kafka broker returned %s (error_code %s) for topic %s, partition: %s. This should only happen "
                     "if the broker that was the partition leader when kafka_admin_client last fetched metadata is "
                     "no longer the leader.",
                     error_type.message,
                     error_type.errno,
                     topic,
                     partition,
                 )
                 self._kafka_client.cluster.request_update()  # force metadata update on next poll()
             elif error_type is kafka_errors.UnknownTopicOrPartitionError:
                 self.log.warning(
                     "Kafka broker returned %s (error_code %s) for topic: %s, partition: %s. This should only "
                     "happen if the topic is currently being deleted or the check configuration lists non-existent "
                     "topic partitions.",
                     error_type.message,
                     error_type.errno,
                     topic,
                     partition,
                 )
             else:
                 raise error_type(
                     "Unexpected error encountered while attempting to fetch the highwater offsets for topic: %s, "
                     "partition: %s." % (topic, partition)
                 )
Exemple #25
0
    def _handle_sync_group_response(self, future, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            log.info("Successfully joined group %s with generation %s",
                      self.group_id, self.generation)
            #self.sensors.syncLatency.record(response.requestLatencyMs())
            future.success(response.member_assignment)
            return

        # Always rejoin on error
        self.rejoin_needed = True
        if error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        elif error_type is Errors.RebalanceInProgressError:
            log.debug("SyncGroup for group %s failed due to coordinator"
                      " rebalance", self.group_id)
            future.failure(error_type(self.group_id))
        elif error_type in (Errors.UnknownMemberIdError,
                            Errors.IllegalGenerationError):
            error = error_type()
            log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
            self.member_id = JoinGroupRequest[0].UNKNOWN_MEMBER_ID
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            error = error_type()
            log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
            self.coordinator_dead()
            future.failure(error)
        else:
            error = error_type()
            log.error("Unexpected error from SyncGroup: %s", error)
            future.failure(error)
Exemple #26
0
    def _handle_group_coordinator_response(self, future, response):
        log.debug("Received group coordinator response %s", response)

        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            with self._client._lock, self._lock:
                ok = self._client.cluster.add_group_coordinator(self.group_id, response)
                if not ok:
                    # This could happen if coordinator metadata is different
                    # than broker metadata
                    future.failure(Errors.IllegalStateError())
                    return

                self.coordinator_id = response.coordinator_id
                log.info("Discovered coordinator %s for group %s",
                         self.coordinator_id, self.group_id)
                self._client.ready(self.coordinator_id)
                self.heartbeat.reset_timeouts()
            future.success(self.coordinator_id)

        elif error_type is Errors.GroupCoordinatorNotAvailableError:
            log.debug("Group Coordinator Not Available; retry")
            future.failure(error_type())
        elif error_type is Errors.GroupAuthorizationFailedError:
            error = error_type(self.group_id)
            log.error("Group Coordinator Request failed: %s", error)
            future.failure(error)
        else:
            error = error_type()
            log.error("Group coordinator lookup for group %s failed: %s",
                      self.group_id, error)
            future.failure(error)
Exemple #27
0
    def add_group_coordinator(self, group, response):
        """Update with metadata for a group coordinator

        Arguments:
            group (str): name of group from GroupCoordinatorRequest
            response (GroupCoordinatorResponse): broker response

        Returns:
            string: coordinator node_id if metadata is updated, None on error
        """
        log.debug("Updating coordinator for %s: %s", group, response)
        error_type = Errors.for_code(response.error_code)
        if error_type is not Errors.NoError:
            log.error("GroupCoordinatorResponse error: %s", error_type)
            self._groups[group] = -1
            return

        # Use a coordinator-specific node id so that group requests
        # get a dedicated connection
        node_id = "coordinator-{}".format(response.coordinator_id)
        coordinator = BrokerMetadata(node_id, response.host, response.port,
                                     None)

        log.info("Group coordinator for %s is %s", group, coordinator)
        self._coordinator_brokers[node_id] = coordinator
        self._groups[group] = node_id
        return node_id
Exemple #28
0
    def _handle_produce_response(self, node_id, send_time, batches, response):
        """Handle a produce response."""
        # if we have a response, parse it
        log.debug('Parsing produce response: %r', response)
        if response:
            batches_by_partition = dict([(batch.topic_partition, batch)
                                         for batch in batches])

            for topic, partitions in response.topics:
                for partition_info in partitions:
                    if response.API_VERSION < 2:
                        partition, error_code, offset = partition_info
                        ts = None
                    else:
                        partition, error_code, offset, ts = partition_info
                    tp = TopicPartition(topic, partition)
                    error = Errors.for_code(error_code)
                    batch = batches_by_partition[tp]
                    self._complete_batch(batch, error, offset, ts)

            if response.API_VERSION > 0:
                self._sensors.record_throttle_time(response.throttle_time_ms,
                                                   node=node_id)

        else:
            # this is the acks = 0 case, just complete all requests
            for batch in batches:
                self._complete_batch(batch, None, -1, None)
Exemple #29
0
    def _handle_join_group_response(self, future, send_time, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            log.debug("Received successful JoinGroup response for group %s: %s",
                      self.group_id, response)
            self.sensors.join_latency.record((time.time() - send_time) * 1000)
            with self._client._lock, self._lock:
                if self.state is not MemberState.REBALANCING:
                    # if the consumer was woken up before a rebalance completes,
                    # we may have already left the group. In this case, we do
                    # not want to continue with the sync group.
                    future.failure(UnjoinedGroupException())
                else:
                    self._generation = Generation(response.generation_id,
                                                  response.member_id,
                                                  response.group_protocol)
                    self.rejoin_needed = False

                if response.leader_id == response.member_id:
                    log.info("Elected group leader -- performing partition"
                             " assignments using %s", self._generation.protocol)
                    self._on_join_leader(response).chain(future)
                else:
                    self._on_join_follower().chain(future)

        elif error_type is Errors.GroupLoadInProgressError:
            log.debug("Attempt to join group %s rejected since coordinator %s"
                      " is loading the group.", self.group_id, self.coordinator_id)
            # backoff and retry
            future.failure(error_type(response))
        elif error_type is Errors.UnknownMemberIdError:
            # reset the member id and retry immediately
            error = error_type(self._generation.member_id)
            self.reset_generation()
            log.debug("Attempt to join group %s failed due to unknown member id",
                      self.group_id)
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            # re-discover the coordinator and retry with backoff
            self.coordinator_dead(error_type())
            log.debug("Attempt to join group %s failed due to obsolete "
                      "coordinator information: %s", self.group_id,
                      error_type.__name__)
            future.failure(error_type())
        elif error_type in (Errors.InconsistentGroupProtocolError,
                            Errors.InvalidSessionTimeoutError,
                            Errors.InvalidGroupIdError):
            # log the error and re-throw the exception
            error = error_type(response)
            log.error("Attempt to join group %s failed due to fatal error: %s",
                      self.group_id, error)
            future.failure(error)
        elif error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        else:
            # unexpected error, throw the exception
            error = error_type()
            log.error("Unexpected error in join group response: %s", error)
            future.failure(error)
Exemple #30
0
    def _handle_offset_response(self, partition, future, response):
        """Callback for the response of the list offset call above.

        Arguments:
            partition (TopicPartition): The partition that was fetched
            future (Future): the future to update based on response
            response (OffsetResponse): response from the server

        Raises:
            AssertionError: if response does not match partition
        """
        topic, partition_info = response.topics[0]
        assert len(response.topics) == 1 and len(partition_info) == 1, (
            'OffsetResponse should only be for a single topic-partition')

        part, error_code, offsets = partition_info[0]
        assert topic == partition.topic and part == partition.partition, (
            'OffsetResponse partition does not match OffsetRequest partition')

        error_type = Errors.for_code(error_code)
        if error_type is Errors.NoError:
            assert len(offsets) == 1, 'Expected OffsetResponse with one offset'
            offset = offsets[0]
            log.debug("Fetched offset %d for partition %s", offset, partition)
            future.success(offset)
        elif error_type in (Errors.NotLeaderForPartitionError,
                       Errors.UnknownTopicOrPartitionError):
            log.debug("Attempt to fetch offsets for partition %s failed due"
                      " to obsolete leadership information, retrying.",
                      partition)
            future.failure(error_type(partition))
        else:
            log.warning("Attempt to fetch offsets for partition %s failed due to:"
                        " %s", partition, error_type)
            future.failure(error_type(partition))
Exemple #31
0
 def _process_highwater_offsets(self, response):
     """Parse an OffsetFetchResponse and save it to the highwater_offsets dict."""
     for topic, partitions_data in response.topics:
         for partition, error_code, offsets in partitions_data:
             error_type = kafka_errors.for_code(error_code)
             if error_type is kafka_errors.NoError:
                 self._highwater_offsets[(topic, partition)] = offsets[0]
                 # Valid error codes:
                 # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-PossibleErrorCodes.2
             elif error_type is kafka_errors.NotLeaderForPartitionError:
                 self.log.warn(
                     "Kafka broker returned %s (error_code %s) for topic %s, partition: %s. This should only happen "
                     "if the broker that was the partition leader when kafka_admin_client last fetched metadata is "
                     "no longer the leader.",
                     error_type.message,
                     error_type.errno,
                     topic,
                     partition,
                 )
                 self._kafka_client.cluster.request_update(
                 )  # force metadata update on next poll()
             elif error_type is kafka_errors.UnknownTopicOrPartitionError:
                 self.log.warn(
                     "Kafka broker returned %s (error_code %s) for topic: %s, partition: %s. This should only "
                     "happen if the topic is currently being deleted or the check configuration lists non-existent "
                     "topic partitions.",
                     error_type.message,
                     error_type.errno,
                     topic,
                     partition,
                 )
             else:
                 raise error_type(
                     "Unexpected error encountered while attempting to fetch the highwater offsets for topic: %s, "
                     "partition: %s." % (topic, partition))
Exemple #32
0
    def _get_single_group_offsets_from_kafka(self, consumer_group,
                                             topic_partitions):
        """Get offsets for a single consumer group from Kafka"""
        consumer_offsets = {}
        tps = defaultdict(set)
        for topic, partitions in iteritems(topic_partitions):
            if len(partitions) == 0:
                partitions = self._kafka_client.cluster.available_partitions_for_topic(
                    topic)
            tps[topic] = tps[text_type(topic)].union(set(partitions))

        coordinator_id = self._get_group_coordinator(consumer_group)
        if coordinator_id is not None:
            # Kafka protocol uses OffsetFetchRequests to retrieve consumer offsets:
            # https://kafka.apache.org/protocol#The_Messages_OffsetFetch
            # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetFetchRequest
            request = OffsetFetchRequest[1](consumer_group,
                                            list(iteritems(tps)))
            response = self._make_blocking_req(request, node_id=coordinator_id)
            for (topic, partition_offsets) in response.topics:
                for partition, offset, _, error_code in partition_offsets:
                    error_type = kafka_errors.for_code(error_code)
                    if error_type is not kafka_errors.NoError:
                        continue
                    consumer_offsets[(topic, partition)] = offset
        else:
            self.log.info("unable to find group coordinator for %s",
                          consumer_group)

        return consumer_offsets
Exemple #33
0
    def _handle_join_group_response(self, future, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            log.debug(
                "Received successful JoinGroup response for group %s: %s",
                self.group_id, response)
            self.member_id = response.member_id
            self.generation = response.generation_id
            self.rejoin_needed = False
            self.protocol = response.group_protocol
            log.info("Joined group '%s' (generation %s) with member_id %s",
                     self.group_id, self.generation, self.member_id)
            #self.sensors.join_latency.record(response.requestLatencyMs())
            if response.leader_id == response.member_id:
                log.info(
                    "Elected group leader -- performing partition"
                    " assignments using %s", self.protocol)
                self._on_join_leader(response).chain(future)
            else:
                self._on_join_follower().chain(future)

        elif error_type is Errors.GroupLoadInProgressError:
            log.debug(
                "Attempt to join group %s rejected since coordinator %s"
                " is loading the group.", self.group_id, self.coordinator_id)
            # backoff and retry
            future.failure(error_type(response))
        elif error_type is Errors.UnknownMemberIdError:
            # reset the member id and retry immediately
            error = error_type(self.member_id)
            self.member_id = JoinGroupRequest[0].UNKNOWN_MEMBER_ID
            log.debug(
                "Attempt to join group %s failed due to unknown member id",
                self.group_id)
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            # re-discover the coordinator and retry with backoff
            self.coordinator_dead()
            log.debug(
                "Attempt to join group %s failed due to obsolete "
                "coordinator information: %s", self.group_id,
                error_type.__name__)
            future.failure(error_type())
        elif error_type in (Errors.InconsistentGroupProtocolError,
                            Errors.InvalidSessionTimeoutError,
                            Errors.InvalidGroupIdError):
            # log the error and re-throw the exception
            error = error_type(response)
            log.error("Attempt to join group %s failed due to fatal error: %s",
                      self.group_id, error)
            future.failure(error)
        elif error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        else:
            # unexpected error, throw the exception
            error = error_type()
            log.error("Unexpected error in join group response: %s", error)
            future.failure(error)
Exemple #34
0
 def _handle_leave_group_response(self, response):
     error_type = Errors.for_code(response.error_code)
     if error_type is Errors.NoError:
         log.debug("LeaveGroup request for group %s returned successfully",
                   self.group_id)
     else:
         log.error("LeaveGroup request for group %s failed with error: %s",
                   self.group_id, error_type())
Exemple #35
0
 def _handle_api_version_response(self, response):
     error_type = Errors.for_code(response.error_code)
     assert error_type is Errors.NoError, "API version check failed"
     self._api_versions = dict([
         (api_key, (min_version, max_version))
         for api_key, min_version, max_version in response.api_versions
     ])
     return self._api_versions
Exemple #36
0
 def _handle_leave_group_response(self, response):
     error_type = Errors.for_code(response.error_code)
     if error_type is Errors.NoError:
         log.debug("LeaveGroup request for group %s returned successfully",
                   self.group_id)
     else:
         log.error("LeaveGroup request for group %s failed with error: %s",
                   self.group_id, error_type())
Exemple #37
0
    def _get_kafka_consumer_offsets(self, contexts_limit):
        """
        Fetch Consumer Group offsets from Kafka.

        These offsets are stored in the __consumer_offsets topic rather than in Zookeeper.
        """
        for consumer_group, topic_partitions in self._consumer_groups.items():
            if not topic_partitions:
                raise ConfigurationError(
                    'Invalid configuration - if you are collecting consumer offsets from Kafka, and your brokers are '
                    'older than 0.10.2, then you _must_ specify consumer groups and their topics. Older brokers lack '
                    'the necessary protocol support to determine which topics a consumer is consuming. See KIP-88 for '
                    'details.')
            try:  # catch exceptions on a group-by-group basis so that if one fails we still fetch the other groups
                for topic, partitions in topic_partitions.items():
                    if not partitions:
                        # If partitions omitted, then we assume the group is consuming all partitions for the topic.
                        # Fetch consumer offsets even for unavailable partitions because those will be valid once the
                        # partition finishes leader failover.
                        topic_partitions[
                            topic] = self._kafka_client.cluster.partitions_for_topic(
                                topic)

                coordinator_id = self._get_group_coordinator(consumer_group)
                if coordinator_id is not None:
                    # Kafka protocol uses OffsetFetchRequests to retrieve consumer offsets:
                    # https://kafka.apache.org/protocol#The_Messages_OffsetFetch
                    # https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetFetchRequest
                    request = OffsetFetchRequest[1](
                        consumer_group, list(topic_partitions.items()))
                    response = self._make_blocking_req(request,
                                                       node_id=coordinator_id)
                    for (topic, partition_offsets) in response.topics:
                        for partition, offset, _metadata, error_code in partition_offsets:
                            error_type = kafka_errors.for_code(error_code)
                            # If the OffsetFetchRequest explicitly specified partitions, the offset could returned as
                            # -1, meaning there is no recorded offset for that partition... for example, if the
                            # partition doesn't exist in the cluster. So ignore it.
                            if offset == -1 or error_type is not kafka_errors.NoError:
                                self._kafka_client.cluster.request_update(
                                )  # force metadata update on next poll()
                                continue
                            key = (consumer_group, topic, partition)
                            self._kafka_consumer_offsets[key] = offset

                            if len(self._kafka_consumer_offsets
                                   ) >= contexts_limit:
                                self.warning(
                                    "Context limit reached. Skipping kafka consumer offsets collection."
                                )
                                return
                else:
                    self.log.info("unable to find group coordinator for %s",
                                  consumer_group)
            except Exception:
                self.log.exception(
                    'Could not read consumer offsets from Kafka for group: %s',
                    consumer_group)
Exemple #38
0
    def describe_consumer_groups(self, group_ids, group_coordinator_id=None):
        """Describe a set of consumer groups.

        Any errors are immediately raised.

        :param group_ids: A list of consumer group IDs. These are typically the
            group names as strings.
        :param group_coordinator_id: The node_id of the groups' coordinator
            broker. If set to None, it will query the cluster for each group to
            find that group's coordinator. Explicitly specifying this can be
            useful for avoiding extra network round trips if you already know
            the group coordinator. This is only useful when all the group_ids
            have the same coordinator, otherwise it will error. Default: None.
        :return: A list of group descriptions. For now the group descriptions
            are the raw results from the DescribeGroupsResponse. Long-term, we
            plan to change this to return namedtuples as well as decoding the
            partition assignments.
        """
        group_descriptions = []
        version = self._matching_api_version(DescribeGroupsRequest)
        for group_id in group_ids:
            if group_coordinator_id is not None:
                this_groups_coordinator_id = group_coordinator_id
            else:
                this_groups_coordinator_id = self._find_group_coordinator_id(
                    group_id)
            if version <= 1:
                # Note: KAFKA-6788 A potential optimization is to group the
                # request per coordinator and send one request with a list of
                # all consumer groups. Java still hasn't implemented this
                # because the error checking is hard to get right when some
                # groups error and others don't.
                request = DescribeGroupsRequest[version](groups=(group_id, ))
                response = self._send_request_to_node(
                    this_groups_coordinator_id, request)
                assert len(response.groups) == 1
                # TODO need to implement converting the response tuple into
                # a more accessible interface like a namedtuple and then stop
                # hardcoding tuple indices here. Several Java examples,
                # including KafkaAdminClient.java
                group_description = response.groups[0]
                error_code = group_description[0]
                error_type = Errors.for_code(error_code)
                # Java has the note: KAFKA-6789, we can retry based on the error code
                if error_type is not Errors.NoError:
                    raise error_type(
                        "Request '{}' failed with response '{}'.".format(
                            request, response))
                # TODO Java checks the group protocol type, and if consumer
                # (ConsumerProtocol.PROTOCOL_TYPE) or empty string, it decodes
                # the members' partition assignments... that hasn't yet been
                # implemented here so just return the raw struct results
                group_descriptions.append(group_description)
            else:
                raise NotImplementedError(
                    "Support for DescribeGroups v{} has not yet been added to KafkaAdminClient."
                    .format(version))
        return group_descriptions
Exemple #39
0
    def list_consumer_groups(self, broker_ids=None):
        """List all consumer groups known to the cluster.

        This returns a list of Consumer Group tuples. The tuples are
        composed of the consumer group name and the consumer group protocol
        type.

        Only consumer groups that store their offsets in Kafka are returned.
        The protocol type will be an empty string for groups created using
        Kafka < 0.9 APIs because, although they store their offsets in Kafka,
        they don't use Kafka for group coordination. For groups created using
        Kafka >= 0.9, the protocol type will typically be "consumer".

        As soon as any error is encountered, it is immediately raised.

        :param broker_ids: A list of broker node_ids to query for consumer
            groups. If set to None, will query all brokers in the cluster.
            Explicitly specifying broker(s) can be useful for determining which
            consumer groups are coordinated by those broker(s). Default: None
        :return list: List of tuples of Consumer Groups.
        :exception GroupCoordinatorNotAvailableError: The coordinator is not
            available, so cannot process requests.
        :exception GroupLoadInProgressError: The coordinator is loading and
            hence can't process requests.
        """
        # While we return a list, internally use a set to prevent duplicates
        # because if a group coordinator fails after being queried, and its
        # consumer groups move to new brokers that haven't yet been queried,
        # then the same group could be returned by multiple brokers.
        consumer_groups = set()
        futures = []
        if broker_ids is None:
            broker_ids = [
                broker.nodeId for broker in self._client.cluster.brokers()
            ]
        version = self._matching_api_version(ListGroupsRequest)
        if version <= 2:
            request = ListGroupsRequest[version]()
            for broker_id in broker_ids:
                futures.append(self._send_request_to_node(broker_id, request))

            self._wait_for_futures(futures)

            for future in futures:
                response = future.value
                error_type = Errors.for_code(response.error_code)
                if error_type is not Errors.NoError:
                    raise error_type(
                        "Request '{}' failed with response '{}'.".format(
                            request, response))
                consumer_groups.update(response.groups)
        else:
            raise NotImplementedError(
                "Support for ListGroups v{} has not yet been added to KafkaAdminClient."
                .format(version))
        return list(consumer_groups)
Exemple #40
0
    def describe_consumer_groups(self, group_ids, group_coordinator_id=None):
        """Describe a set of consumer groups.

        Any errors are immediately raised.

        :param group_ids: A list of consumer group IDs. These are typically the
            group names as strings.
        :param group_coordinator_id: The node_id of the groups' coordinator
            broker. If set to None, it will query the cluster for each group to
            find that group's coordinator. Explicitly specifying this can be
            useful for avoiding extra network round trips if you already know
            the group coordinator. This is only useful when all the group_ids
            have the same coordinator, otherwise it will error. Default: None.
        :return: A list of group descriptions. For now the group descriptions
            are the raw results from the DescribeGroupsResponse. Long-term, we
            plan to change this to return namedtuples as well as decoding the
            partition assignments.
        """
        group_descriptions = []
        version = self._matching_api_version(DescribeGroupsRequest)
        for group_id in group_ids:
            if group_coordinator_id is not None:
                this_groups_coordinator_id = group_coordinator_id
            else:
                this_groups_coordinator_id = self._find_group_coordinator_id(group_id)
            if version <= 1:
                # Note: KAFKA-6788 A potential optimization is to group the
                # request per coordinator and send one request with a list of
                # all consumer groups. Java still hasn't implemented this
                # because the error checking is hard to get right when some
                # groups error and others don't.
                request = DescribeGroupsRequest[version](groups=(group_id,))
                response = self._send_request_to_node(this_groups_coordinator_id, request)
                assert len(response.groups) == 1
                # TODO need to implement converting the response tuple into
                # a more accessible interface like a namedtuple and then stop
                # hardcoding tuple indices here. Several Java examples,
                # including KafkaAdminClient.java
                group_description = response.groups[0]
                error_code = group_description[0]
                error_type = Errors.for_code(error_code)
                # Java has the note: KAFKA-6789, we can retry based on the error code
                if error_type is not Errors.NoError:
                    raise error_type(
                        "Request '{}' failed with response '{}'."
                        .format(request, response))
                # TODO Java checks the group protocol type, and if consumer
                # (ConsumerProtocol.PROTOCOL_TYPE) or empty string, it decodes
                # the members' partition assignments... that hasn't yet been
                # implemented here so just return the raw struct results
                group_descriptions.append(group_description)
            else:
                raise NotImplementedError(
                    "Support for DescribeGroups v{} has not yet been added to KafkaAdminClient."
                    .format(version))
        return group_descriptions
Exemple #41
0
    def _handle_join_group_response(self, future, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            log.debug("Received successful JoinGroup response for group %s: %s",
                      self.group_id, response)
            self.member_id = response.member_id
            self.generation = response.generation_id
            self.rejoin_needed = False
            self.protocol = response.group_protocol
            log.info("Joined group '%s' (generation %s) with member_id %s",
                     self.group_id, self.generation, self.member_id)
            #self.sensors.join_latency.record(response.requestLatencyMs())
            if response.leader_id == response.member_id:
                log.info("Elected group leader -- performing partition"
                         " assignments using %s", self.protocol)
                self._on_join_leader(response).chain(future)
            else:
                self._on_join_follower().chain(future)

        elif error_type is Errors.GroupLoadInProgressError:
            log.debug("Attempt to join group %s rejected since coordinator %s"
                      " is loading the group.", self.group_id, self.coordinator_id)
            # backoff and retry
            future.failure(error_type(response))
        elif error_type is Errors.UnknownMemberIdError:
            # reset the member id and retry immediately
            error = error_type(self.member_id)
            self.member_id = JoinGroupRequest[0].UNKNOWN_MEMBER_ID
            log.debug("Attempt to join group %s failed due to unknown member id",
                      self.group_id)
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            # re-discover the coordinator and retry with backoff
            self.coordinator_dead()
            log.debug("Attempt to join group %s failed due to obsolete "
                      "coordinator information: %s", self.group_id,
                      error_type.__name__)
            future.failure(error_type())
        elif error_type in (Errors.InconsistentGroupProtocolError,
                            Errors.InvalidSessionTimeoutError,
                            Errors.InvalidGroupIdError):
            # log the error and re-throw the exception
            error = error_type(response)
            log.error("Attempt to join group %s failed due to fatal error: %s",
                      self.group_id, error)
            future.failure(error)
        elif error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        else:
            # unexpected error, throw the exception
            error = error_type()
            log.error("Unexpected error in join group response: %s", error)
            future.failure(error)
Exemple #42
0
    def _create_topic(self,
                      topic_name,
                      num_partitions,
                      replication_factor,
                      timeout_ms=10000):
        if num_partitions is None:
            num_partitions = self.partitions
        if replication_factor is None:
            replication_factor = self.replicas

        # Try different methods to create a topic, from the fastest to the slowest
        if self.auto_create_topic and \
           num_partitions == self.partitions and \
           replication_factor == self.replicas:
            self._send_request(MetadataRequest[0]([topic_name]))
        elif version() >= (0, 10, 1, 0):
            request = CreateTopicsRequest[0](
                [(topic_name, num_partitions, replication_factor, [], [])],
                timeout_ms)
            result = self._send_request(request, timeout=timeout_ms)
            for topic_result in result[0].topic_error_codes:
                error_code = topic_result[1]
                if error_code != 0:
                    raise errors.for_code(error_code)
        else:
            args = self.kafka_run_class_args('kafka.admin.TopicCommand',
                                             '--zookeeper', '%s:%s/%s' % (self.zookeeper.host,
                                                                          self.zookeeper.port,
                                                                          self.zk_chroot),
                                             '--create',
                                             '--topic', topic_name,
                                             '--partitions', self.partitions \
                                                 if num_partitions is None else num_partitions,
                                             '--replication-factor', self.replicas \
                                                 if replication_factor is None \
                                                 else replication_factor)
            if version() >= (0, 10):
                args.append('--if-not-exists')
            env = self.kafka_run_class_env()
            proc = subprocess.Popen(args,
                                    env=env,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            ret = proc.wait()
            if ret != 0 or proc.returncode != 0:
                output = proc.stdout.read()
                if not 'kafka.common.TopicExistsException' in output:
                    self.out("Failed to create topic %s" % (topic_name, ))
                    self.out(output)
                    self.out(proc.stderr.read())
                    raise RuntimeError("Failed to create topic %s" %
                                       (topic_name, ))
Exemple #43
0
    def _handle_sasl_handshake_response(self, future, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is not Errors.NoError:
            error = error_type(self)
            self.close(error=error)
            return future.failure(error_type(self))

        if self.config['sasl_mechanism'] == 'PLAIN':
            return self._try_authenticate_plain(future)
        else:
            return future.failure(
                Errors.UnsupportedSaslMechanismError(
                    'kafka-python does not support SASL mechanism %s' %
                    self.config['sasl_mechanism']))
Exemple #44
0
    def list_consumer_groups(self, broker_ids=None):
        """List all consumer groups known to the cluster.

        This returns a list of Consumer Group tuples. The tuples are
        composed of the consumer group name and the consumer group protocol
        type.

        Only consumer groups that store their offsets in Kafka are returned.
        The protocol type will be an empty string for groups created using
        Kafka < 0.9 APIs because, although they store their offsets in Kafka,
        they don't use Kafka for group coordination. For groups created using
        Kafka >= 0.9, the protocol type will typically be "consumer".

        As soon as any error is encountered, it is immediately raised.

        :param broker_ids: A list of broker node_ids to query for consumer
            groups. If set to None, will query all brokers in the cluster.
            Explicitly specifying broker(s) can be useful for determining which
            consumer groups are coordinated by those broker(s). Default: None
        :return list: List of tuples of Consumer Groups.
        :exception GroupCoordinatorNotAvailableError: The coordinator is not
            available, so cannot process requests.
        :exception GroupLoadInProgressError: The coordinator is loading and
            hence can't process requests.
        """
        # While we return a list, internally use a set to prevent duplicates
        # because if a group coordinator fails after being queried, and its
        # consumer groups move to new brokers that haven't yet been queried,
        # then the same group could be returned by multiple brokers.
        consumer_groups = set()
        if broker_ids is None:
            broker_ids = [broker.nodeId for broker in self._client.cluster.brokers()]
        version = self._matching_api_version(ListGroupsRequest)
        if version <= 2:
            request = ListGroupsRequest[version]()
            for broker_id in broker_ids:
                response = self._send_request_to_node(broker_id, request)
                error_type = Errors.for_code(response.error_code)
                if error_type is not Errors.NoError:
                    raise error_type(
                        "Request '{}' failed with response '{}'."
                        .format(request, response))
                consumer_groups.update(response.groups)
        else:
            raise NotImplementedError(
                "Support for ListGroups v{} has not yet been added to KafkaAdminClient."
                .format(version))
        return list(consumer_groups)
Exemple #45
0
    def add_group_coordinator(self, group, response):
        """Update with metadata for a group coordinator

        Arguments:
            group (str): name of group from GroupCoordinatorRequest
            response (GroupCoordinatorResponse): broker response

        Returns:
            bool: True if metadata is updated, False on error
        """
        log.debug("Updating coordinator for %s: %s", group, response)
        error_type = Errors.for_code(response.error_code)
        if error_type is not Errors.NoError:
            log.error("GroupCoordinatorResponse error: %s", error_type)
            self._groups[group] = -1
            return False

        node_id = response.coordinator_id
        coordinator = BrokerMetadata(
            response.coordinator_id,
            response.host,
            response.port,
            None)

        # Assume that group coordinators are just brokers
        # (this is true now, but could diverge in future)
        if node_id not in self._brokers:
            self._brokers[node_id] = coordinator

        # If this happens, either brokers have moved without
        # changing IDs, or our assumption above is wrong
        else:
            node = self._brokers[node_id]
            if coordinator.host != node.host or coordinator.port != node.port:
                log.error("GroupCoordinator metadata conflicts with existing"
                          " broker metadata. Coordinator: %s, Broker: %s",
                          coordinator, node)
                self._groups[group] = node_id
                return False

        log.info("Group coordinator for %s is %s", group, coordinator)
        self._groups[group] = node_id
        return True
Exemple #46
0
    def _create_topic(self, topic_name, num_partitions, replication_factor, timeout_ms=10000):
        if num_partitions is None:
            num_partitions = self.partitions
        if replication_factor is None:
            replication_factor = self.replicas

        # Try different methods to create a topic, from the fastest to the slowest
        if self.auto_create_topic and \
           num_partitions == self.partitions and \
           replication_factor == self.replicas:
            self._send_request(MetadataRequest[0]([topic_name]))
        elif version() >= (0, 10, 1, 0):
            request = CreateTopicsRequest[0]([(topic_name, num_partitions,
                                               replication_factor, [], [])], timeout_ms)
            result = self._send_request(request, timeout=timeout_ms)
            for topic_result in result[0].topic_error_codes:
                error_code = topic_result[1]
                if error_code != 0:
                    raise errors.for_code(error_code)
        else:
            args = self.kafka_run_class_args('kafka.admin.TopicCommand',
                                             '--zookeeper', '%s:%s/%s' % (self.zookeeper.host,
                                                                          self.zookeeper.port,
                                                                          self.zk_chroot),
                                             '--create',
                                             '--topic', topic_name,
                                             '--partitions', self.partitions \
                                                 if num_partitions is None else num_partitions,
                                             '--replication-factor', self.replicas \
                                                 if replication_factor is None \
                                                 else replication_factor)
            if version() >= (0, 10):
                args.append('--if-not-exists')
            env = self.kafka_run_class_env()
            proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            ret = proc.wait()
            if ret != 0 or proc.returncode != 0:
                output = proc.stdout.read()
                if not 'kafka.common.TopicExistsException' in output:
                    self.out("Failed to create topic %s" % (topic_name,))
                    self.out(output)
                    self.out(proc.stderr.read())
                    raise RuntimeError("Failed to create topic %s" % (topic_name,))
Exemple #47
0
    def _send_request_to_controller(self, request):
        """Send a Kafka protocol message to the cluster controller.

        Will block until the message result is received.

        :param request: The message to send.
        :return: The Kafka protocol response for the message.
        """
        tries = 2  # in case our cached self._controller_id is outdated
        while tries:
            tries -= 1
            response = self._send_request_to_node(self._controller_id, request)
            # In Java, the error fieldname is inconsistent:
            #  - CreateTopicsResponse / CreatePartitionsResponse uses topic_errors
            #  - DeleteTopicsResponse uses topic_error_codes
            # So this is a little brittle in that it assumes all responses have
            # one of these attributes and that they always unpack into
            # (topic, error_code) tuples.
            topic_error_tuples = (response.topic_errors if hasattr(response, 'topic_errors')
                else response.topic_error_codes)
            # Also small py2/py3 compatibility -- py3 can ignore extra values
            # during unpack via: for x, y, *rest in list_of_values. py2 cannot.
            # So for now we have to map across the list and explicitly drop any
            # extra values (usually the error_message)
            for topic, error_code in map(lambda e: e[:2], topic_error_tuples):
                error_type = Errors.for_code(error_code)
                if tries and error_type is NotControllerError:
                    # No need to inspect the rest of the errors for
                    # non-retriable errors because NotControllerError should
                    # either be thrown for all errors or no errors.
                    self._refresh_controller_id()
                    break
                elif error_type is not Errors.NoError:
                    raise error_type(
                        "Request '{}' failed with response '{}'."
                        .format(request, response))
            else:
                return response
        raise RuntimeError("This should never happen, please file a bug with full stacktrace if encountered")
Exemple #48
0
 def _handle_heartbeat_response(self, future, response):
     #self.sensors.heartbeat_latency.record(response.requestLatencyMs())
     error_type = Errors.for_code(response.error_code)
     if error_type is Errors.NoError:
         log.debug("Received successful heartbeat response for group %s",
                   self.group_id)
         future.success(None)
     elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                         Errors.NotCoordinatorForGroupError):
         log.warning("Heartbeat failed for group %s: coordinator (node %s)"
                     " is either not started or not valid", self.group_id,
                     self.coordinator_id)
         self.coordinator_dead()
         future.failure(error_type())
     elif error_type is Errors.RebalanceInProgressError:
         log.warning("Heartbeat failed for group %s because it is"
                     " rebalancing", self.group_id)
         self.rejoin_needed = True
         future.failure(error_type())
     elif error_type is Errors.IllegalGenerationError:
         log.warning("Heartbeat failed for group %s: generation id is not "
                     " current.", self.group_id)
         self.rejoin_needed = True
         future.failure(error_type())
     elif error_type is Errors.UnknownMemberIdError:
         log.warning("Heartbeat: local member_id was not recognized;"
                     " this consumer needs to re-join")
         self.member_id = JoinGroupRequest[0].UNKNOWN_MEMBER_ID
         self.rejoin_needed = True
         future.failure(error_type)
     elif error_type is Errors.GroupAuthorizationFailedError:
         error = error_type(self.group_id)
         log.error("Heartbeat failed: authorization error: %s", error)
         future.failure(error)
     else:
         error = error_type()
         log.error("Heartbeat failed: Unhandled error: %s", error)
         future.failure(error)
Exemple #49
0
    def _find_group_coordinator_id(self, group_id):
        """Find the broker node_id of the coordinator of the given group.

        Sends a FindCoordinatorRequest message to the cluster. Will block until
        the FindCoordinatorResponse is received. Any errors are immediately
        raised.

        :param group_id: The consumer group ID. This is typically the group
            name as a string.
        :return: The node_id of the broker that is the coordinator.
        """
        # Note: Java may change how this is implemented in KAFKA-6791.
        #
        # TODO add support for dynamically picking version of
        # GroupCoordinatorRequest which was renamed to FindCoordinatorRequest.
        # When I experimented with this, GroupCoordinatorResponse_v1 didn't
        # match GroupCoordinatorResponse_v0 and I couldn't figure out why.
        gc_request = GroupCoordinatorRequest[0](group_id)
        gc_response = self._send_request_to_node(self._client.least_loaded_node(), gc_request)
        # use the extra error checking in add_group_coordinator() rather than
        # immediately returning the group coordinator.
        success = self._client.cluster.add_group_coordinator(group_id, gc_response)
        if not success:
            error_type = Errors.for_code(gc_response.error_code)
            assert error_type is not Errors.NoError
            # Note: When error_type.retriable, Java will retry... see
            # KafkaAdminClient's handleFindCoordinatorError method
            raise error_type(
                "Could not identify group coordinator for group_id '{}' from response '{}'."
                .format(group_id, gc_response))
        group_coordinator = self._client.cluster.coordinator_for_group(group_id)
        # will be None if the coordinator was never populated, which should never happen here
        assert group_coordinator is not None
        # will be -1 if add_group_coordinator() failed... but by this point the
        # error should have been raised.
        assert group_coordinator != -1
        return group_coordinator
Exemple #50
0
    def list_consumer_group_offsets(self, group_id, group_coordinator_id=None,
                                    partitions=None):
        """Fetch Consumer Group Offsets.

        Note:
        This does not verify that the group_id or partitions actually exist
        in the cluster.

        As soon as any error is encountered, it is immediately raised.

        :param group_id: The consumer group id name for which to fetch offsets.
        :param group_coordinator_id: The node_id of the group's coordinator
            broker. If set to None, will query the cluster to find the group
            coordinator. Explicitly specifying this can be useful to prevent
            that extra network round trip if you already know the group
            coordinator. Default: None.
        :param partitions: A list of TopicPartitions for which to fetch
            offsets. On brokers >= 0.10.2, this can be set to None to fetch all
            known offsets for the consumer group. Default: None.
        :return dictionary: A dictionary with TopicPartition keys and
            OffsetAndMetada values. Partitions that are not specified and for
            which the group_id does not have a recorded offset are omitted. An
            offset value of `-1` indicates the group_id has no offset for that
            TopicPartition. A `-1` can only happen for partitions that are
            explicitly specified.
        """
        group_offsets_listing = {}
        if group_coordinator_id is None:
            group_coordinator_id = self._find_group_coordinator_id(group_id)
        version = self._matching_api_version(OffsetFetchRequest)
        if version <= 3:
            if partitions is None:
                if version <= 1:
                    raise ValueError(
                        """OffsetFetchRequest_v{} requires specifying the
                        partitions for which to fetch offsets. Omitting the
                        partitions is only supported on brokers >= 0.10.2.
                        For details, see KIP-88.""".format(version))
                topics_partitions = None
            else:
                # transform from [TopicPartition("t1", 1), TopicPartition("t1", 2)] to [("t1", [1, 2])]
                topics_partitions_dict = defaultdict(set)
                for topic, partition in partitions:
                    topics_partitions_dict[topic].add(partition)
                topics_partitions = list(six.iteritems(topics_partitions_dict))
            request = OffsetFetchRequest[version](group_id, topics_partitions)
            response = self._send_request_to_node(group_coordinator_id, request)
            if version > 1:  # OffsetFetchResponse_v1 lacks a top-level error_code
                error_type = Errors.for_code(response.error_code)
                if error_type is not Errors.NoError:
                    # optionally we could retry if error_type.retriable
                    raise error_type(
                        "Request '{}' failed with response '{}'."
                        .format(request, response))
            # transform response into a dictionary with TopicPartition keys and
            # OffsetAndMetada values--this is what the Java AdminClient returns
            for topic, partitions in response.topics:
                for partition, offset, metadata, error_code in partitions:
                    error_type = Errors.for_code(error_code)
                    if error_type is not Errors.NoError:
                        raise error_type(
                            "Unable to fetch offsets for group_id {}, topic {}, partition {}"
                            .format(group_id, topic, partition))
                    group_offsets_listing[TopicPartition(topic, partition)] = OffsetAndMetadata(offset, metadata)
        else:
            raise NotImplementedError(
                "Support for OffsetFetch v{} has not yet been added to KafkaAdminClient."
                .format(version))
        return group_offsets_listing
Exemple #51
0
    def _handle_offset_commit_response(self, offsets, future, send_time, response):
        # TODO look at adding request_latency_ms to response (like java kafka)
        self.consumer_sensors.commit_latency.record((time.time() - send_time) * 1000)
        unauthorized_topics = set()

        for topic, partitions in response.topics:
            for partition, error_code in partitions:
                tp = TopicPartition(topic, partition)
                offset = offsets[tp]

                error_type = Errors.for_code(error_code)
                if error_type is Errors.NoError:
                    log.debug("Group %s committed offset %s for partition %s",
                              self.group_id, offset, tp)
                    if self._subscription.is_assigned(tp):
                        self._subscription.assignment[tp].committed = offset.offset
                elif error_type is Errors.GroupAuthorizationFailedError:
                    log.error("Not authorized to commit offsets for group %s",
                              self.group_id)
                    future.failure(error_type(self.group_id))
                    return
                elif error_type is Errors.TopicAuthorizationFailedError:
                    unauthorized_topics.add(topic)
                elif error_type in (Errors.OffsetMetadataTooLargeError,
                                    Errors.InvalidCommitOffsetSizeError):
                    # raise the error to the user
                    log.debug("OffsetCommit for group %s failed on partition %s"
                              " %s", self.group_id, tp, error_type.__name__)
                    future.failure(error_type())
                    return
                elif error_type is Errors.GroupLoadInProgressError:
                    # just retry
                    log.debug("OffsetCommit for group %s failed: %s",
                              self.group_id, error_type.__name__)
                    future.failure(error_type(self.group_id))
                    return
                elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                                    Errors.NotCoordinatorForGroupError,
                                    Errors.RequestTimedOutError):
                    log.debug("OffsetCommit for group %s failed: %s",
                              self.group_id, error_type.__name__)
                    self.coordinator_dead(error_type())
                    future.failure(error_type(self.group_id))
                    return
                elif error_type in (Errors.UnknownMemberIdError,
                                    Errors.IllegalGenerationError,
                                    Errors.RebalanceInProgressError):
                    # need to re-join group
                    error = error_type(self.group_id)
                    log.debug("OffsetCommit for group %s failed: %s",
                              self.group_id, error)
                    self.reset_generation()
                    future.failure(Errors.CommitFailedError())
                    return
                else:
                    log.error("Group %s failed to commit partition %s at offset"
                              " %s: %s", self.group_id, tp, offset,
                              error_type.__name__)
                    future.failure(error_type())
                    return

        if unauthorized_topics:
            log.error("Not authorized to commit to topics %s for group %s",
                      unauthorized_topics, self.group_id)
            future.failure(Errors.TopicAuthorizationFailedError(unauthorized_topics))
        else:
            future.success(None)
Exemple #52
0
    def update_metadata(self, metadata):
        """Update cluster state given a MetadataResponse.

        Arguments:
            metadata (MetadataResponse): broker response to a metadata request

        Returns: None
        """
        # In the common case where we ask for a single topic and get back an
        # error, we should fail the future
        if len(metadata.topics) == 1 and metadata.topics[0][0] != 0:
            error_code, topic = metadata.topics[0][:2]
            error = Errors.for_code(error_code)(topic)
            return self.failed_update(error)

        if not metadata.brokers:
            log.warning("No broker metadata found in MetadataResponse -- ignoring.")
            return self.failed_update(Errors.MetadataEmptyBrokerList(metadata))

        _new_brokers = {}
        for broker in metadata.brokers:
            if metadata.API_VERSION == 0:
                node_id, host, port = broker
                rack = None
            else:
                node_id, host, port, rack = broker
            _new_brokers.update({
                node_id: BrokerMetadata(node_id, host, port, rack)
            })

        if metadata.API_VERSION == 0:
            _new_controller = None
        else:
            _new_controller = _new_brokers.get(metadata.controller_id)

        _new_partitions = {}
        _new_broker_partitions = collections.defaultdict(set)
        _new_unauthorized_topics = set()
        _new_internal_topics = set()

        for topic_data in metadata.topics:
            if metadata.API_VERSION == 0:
                error_code, topic, partitions = topic_data
                is_internal = False
            else:
                error_code, topic, is_internal, partitions = topic_data
            if is_internal:
                _new_internal_topics.add(topic)
            error_type = Errors.for_code(error_code)
            if error_type is Errors.NoError:
                _new_partitions[topic] = {}
                for p_error, partition, leader, replicas, isr in partitions:
                    _new_partitions[topic][partition] = PartitionMetadata(
                        topic=topic, partition=partition, leader=leader,
                        replicas=replicas, isr=isr, error=p_error)
                    if leader != -1:
                        _new_broker_partitions[leader].add(
                            TopicPartition(topic, partition))

            elif error_type is Errors.LeaderNotAvailableError:
                log.warning("Topic %s is not available during auto-create"
                            " initialization", topic)
            elif error_type is Errors.UnknownTopicOrPartitionError:
                log.error("Topic %s not found in cluster metadata", topic)
            elif error_type is Errors.TopicAuthorizationFailedError:
                log.error("Topic %s is not authorized for this client", topic)
                _new_unauthorized_topics.add(topic)
            elif error_type is Errors.InvalidTopicError:
                log.error("'%s' is not a valid topic name", topic)
            else:
                log.error("Error fetching metadata for topic %s: %s",
                          topic, error_type)

        with self._lock:
            self._brokers = _new_brokers
            self.controller = _new_controller
            self._partitions = _new_partitions
            self._broker_partitions = _new_broker_partitions
            self.unauthorized_topics = _new_unauthorized_topics
            self.internal_topics = _new_internal_topics
            f = None
            if self._future:
                f = self._future
            self._future = None
            self._need_update = False

        now = time.time() * 1000
        self._last_refresh_ms = now
        self._last_successful_refresh_ms = now

        if f:
            f.success(self)
        log.debug("Updated cluster metadata to %s", self)

        for listener in self._listeners:
            listener(self)

        if self.need_all_topic_metadata:
            # the listener may change the interested topics,
            # which could cause another metadata refresh.
            # If we have already fetched all topics, however,
            # another fetch should be unnecessary.
            self._need_update = False
Exemple #53
0
    def _parse_fetched_data(self, completed_fetch):
        tp = completed_fetch.topic_partition
        fetch_offset = completed_fetch.fetched_offset
        num_bytes = 0
        records_count = 0
        parsed_records = None

        error_code, highwater = completed_fetch.partition_data[:2]
        error_type = Errors.for_code(error_code)

        try:
            if not self._subscriptions.is_fetchable(tp):
                # this can happen when a rebalance happened or a partition
                # consumption paused while fetch is still in-flight
                log.debug("Ignoring fetched records for partition %s"
                          " since it is no longer fetchable", tp)

            elif error_type is Errors.NoError:
                self._subscriptions.assignment[tp].highwater = highwater

                # we are interested in this fetch only if the beginning
                # offset (of the *request*) matches the current consumed position
                # Note that the *response* may return a messageset that starts
                # earlier (e.g., compressed messages) or later (e.g., compacted topic)
                position = self._subscriptions.assignment[tp].position
                if position is None or position != fetch_offset:
                    log.debug("Discarding fetch response for partition %s"
                              " since its offset %d does not match the"
                              " expected offset %d", tp, fetch_offset,
                              position)
                    return None

                records = MemoryRecords(completed_fetch.partition_data[-1])
                if records.has_next():
                    log.debug("Adding fetched record for partition %s with"
                              " offset %d to buffered record list", tp,
                              position)
                    unpacked = list(self._unpack_message_set(tp, records))
                    parsed_records = self.PartitionRecords(fetch_offset, tp, unpacked)
                    last_offset = unpacked[-1].offset
                    self._sensors.records_fetch_lag.record(highwater - last_offset)
                    num_bytes = records.valid_bytes()
                    records_count = len(unpacked)
                elif records.size_in_bytes() > 0:
                    # we did not read a single message from a non-empty
                    # buffer because that message's size is larger than
                    # fetch size, in this case record this exception
                    record_too_large_partitions = {tp: fetch_offset}
                    raise RecordTooLargeError(
                        "There are some messages at [Partition=Offset]: %s "
                        " whose size is larger than the fetch size %s"
                        " and hence cannot be ever returned."
                        " Increase the fetch size, or decrease the maximum message"
                        " size the broker will allow." % (
                            record_too_large_partitions,
                            self.config['max_partition_fetch_bytes']),
                        record_too_large_partitions)
                self._sensors.record_topic_fetch_metrics(tp.topic, num_bytes, records_count)

            elif error_type in (Errors.NotLeaderForPartitionError,
                                Errors.UnknownTopicOrPartitionError):
                self._client.cluster.request_update()
            elif error_type is Errors.OffsetOutOfRangeError:
                position = self._subscriptions.assignment[tp].position
                if position is None or position != fetch_offset:
                    log.debug("Discarding stale fetch response for partition %s"
                              " since the fetched offset %d does not match the"
                              " current offset %d", tp, fetch_offset, position)
                elif self._subscriptions.has_default_offset_reset_policy():
                    log.info("Fetch offset %s is out of range for topic-partition %s", fetch_offset, tp)
                    self._subscriptions.need_offset_reset(tp)
                else:
                    raise Errors.OffsetOutOfRangeError({tp: fetch_offset})

            elif error_type is Errors.TopicAuthorizationFailedError:
                log.warning("Not authorized to read from topic %s.", tp.topic)
                raise Errors.TopicAuthorizationFailedError(set(tp.topic))
            elif error_type is Errors.UnknownError:
                log.warning("Unknown error fetching data for topic-partition %s", tp)
            else:
                raise error_type('Unexpected error while fetching data')

        finally:
            completed_fetch.metric_aggregator.record(tp, num_bytes, records_count)

        return parsed_records
Exemple #54
0
    def _handle_fetch_response(self, request, send_time, response):
        """The callback for fetch completion"""
        total_bytes = 0
        total_count = 0
        recv_time = time.time()

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, messages in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if not self._subscriptions.is_fetchable(tp):
                    # this can happen when a rebalance happened or a partition
                    # consumption paused while fetch is still in-flight
                    log.debug("Ignoring fetched records for partition %s"
                              " since it is no longer fetchable", tp)

                elif error_type is Errors.NoError:
                    self._subscriptions.assignment[tp].highwater = highwater

                    # we are interested in this fetch only if the beginning
                    # offset matches the current consumed position
                    fetch_offset = fetch_offsets[tp]
                    position = self._subscriptions.assignment[tp].position
                    if position is None or position != fetch_offset:
                        log.debug("Discarding fetch response for partition %s"
                                  " since its offset %d does not match the"
                                  " expected offset %d", tp, fetch_offset,
                                  position)
                        continue

                    num_bytes = 0
                    partial = None
                    if messages and isinstance(messages[-1][-1], PartialMessage):
                        partial = messages.pop()

                    if messages:
                        log.debug("Adding fetched record for partition %s with"
                                  " offset %d to buffered record list", tp,
                                  position)
                        self._records.append((fetch_offset, tp, messages))
                        last_offset, _, _ = messages[-1]
                        self._sensors.records_fetch_lag.record(highwater - last_offset)
                        num_bytes = sum(msg[1] for msg in messages)
                    elif partial:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        self._record_too_large_partitions[tp] = fetch_offset

                    self._sensors.record_topic_fetch_metrics(topic, num_bytes, len(messages))
                    total_bytes += num_bytes
                    total_count += len(messages)
                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.cluster.request_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    fetch_offset = fetch_offsets[tp]
                    if self._subscriptions.has_default_offset_reset_policy():
                        self._subscriptions.need_offset_reset(tp)
                    else:
                        self._offset_out_of_range_partitions[tp] = fetch_offset
                    log.info("Fetch offset %s is out of range, resetting offset",
                             fetch_offset)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warn("Not authorized to read from topic %s.", tp.topic)
                    self._unauthorized_topics.add(tp.topic)
                elif error_type is Errors.UnknownError:
                    log.warn("Unknown error fetching data for topic-partition %s", tp)
                else:
                    raise error_type('Unexpected error while fetching data')

        self._sensors.bytes_fetched.record(total_bytes)
        self._sensors.records_fetched.record(total_count)
        self._sensors.fetch_throttle_time_sensor.record(response['throttle_time_ms'])
        self._sensors.fetch_latency.record((recv_time - send_time) * 1000)
Exemple #55
0
 def _handle_leave_group_response(self, response):
     error_type = Errors.for_code(response.error_code)
     if error_type is Errors.NoError:
         log.info("LeaveGroup request succeeded")
     else:
         log.error("LeaveGroup request failed: %s", error_type())