Example #1
0
    def _handle_sync_group_response(self, future, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            log.info("Successfully joined group %s with generation %s",
                      self.group_id, self.generation)
            #self.sensors.syncLatency.record(response.requestLatencyMs())
            future.success(response.member_assignment)
            return

        # Always rejoin on error
        self.rejoin_needed = True
        if error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        elif error_type is Errors.RebalanceInProgressError:
            log.debug("SyncGroup for group %s failed due to coordinator"
                      " rebalance", self.group_id)
            future.failure(error_type(self.group_id))
        elif error_type in (Errors.UnknownMemberIdError,
                            Errors.IllegalGenerationError):
            error = error_type()
            log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
            self.member_id = JoinGroupRequest[0].UNKNOWN_MEMBER_ID
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            error = error_type()
            log.debug("SyncGroup for group %s failed due to %s", self.group_id, error)
            self.coordinator_dead()
            future.failure(error)
        else:
            error = error_type()
            log.error("Unexpected error from SyncGroup: %s", error)
            future.failure(error)
Example #2
0
    def _handle_join_group_response(self, future, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            log.debug("Received successful JoinGroup response for group %s: %s",
                      self.group_id, response)
            self.member_id = response.member_id
            self.generation = response.generation_id
            self.rejoin_needed = False
            self.protocol = response.group_protocol
            log.info("Joined group '%s' (generation %s) with member_id %s",
                     self.group_id, self.generation, self.member_id)
            #self.sensors.join_latency.record(response.requestLatencyMs())
            if response.leader_id == response.member_id:
                log.info("Elected group leader -- performing partition"
                         " assignments using %s", self.protocol)
                self._on_join_leader(response).chain(future)
            else:
                self._on_join_follower().chain(future)

        elif error_type is Errors.GroupLoadInProgressError:
            log.debug("Attempt to join group %s rejected since coordinator %s"
                      " is loading the group.", self.group_id, self.coordinator_id)
            # backoff and retry
            future.failure(error_type(response))
        elif error_type is Errors.UnknownMemberIdError:
            # reset the member id and retry immediately
            error = error_type(self.member_id)
            self.member_id = JoinGroupRequest[0].UNKNOWN_MEMBER_ID
            log.debug("Attempt to join group %s failed due to unknown member id",
                      self.group_id)
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            # re-discover the coordinator and retry with backoff
            self.coordinator_dead()
            log.debug("Attempt to join group %s failed due to obsolete "
                      "coordinator information: %s", self.group_id,
                      error_type.__name__)
            future.failure(error_type())
        elif error_type in (Errors.InconsistentGroupProtocolError,
                            Errors.InvalidSessionTimeoutError,
                            Errors.InvalidGroupIdError):
            # log the error and re-throw the exception
            error = error_type(response)
            log.error("Attempt to join group %s failed due to fatal error: %s",
                      self.group_id, error)
            future.failure(error)
        elif error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        else:
            # unexpected error, throw the exception
            error = error_type()
            log.error("Unexpected error in join group response: %s", error)
            future.failure(error)
Example #3
0
 def _handle_heartbeat_response(self, future, response):
     #self.sensors.heartbeat_latency.record(response.requestLatencyMs())
     error_type = Errors.for_code(response.error_code)
     if error_type is Errors.NoError:
         log.debug("Received successful heartbeat response for group %s",
                   self.group_id)
         future.success(None)
     elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                         Errors.NotCoordinatorForGroupError):
         log.warning("Heartbeat failed for group %s: coordinator (node %s)"
                     " is either not started or not valid", self.group_id,
                     self.coordinator_id)
         self.coordinator_dead()
         future.failure(error_type())
     elif error_type is Errors.RebalanceInProgressError:
         log.warning("Heartbeat failed for group %s because it is"
                     " rebalancing", self.group_id)
         self.rejoin_needed = True
         future.failure(error_type())
     elif error_type is Errors.IllegalGenerationError:
         log.warning("Heartbeat failed for group %s: generation id is not "
                     " current.", self.group_id)
         self.rejoin_needed = True
         future.failure(error_type())
     elif error_type is Errors.UnknownMemberIdError:
         log.warning("Heartbeat: local member_id was not recognized;"
                     " this consumer needs to re-join")
         self.member_id = JoinGroupRequest[0].UNKNOWN_MEMBER_ID
         self.rejoin_needed = True
         future.failure(error_type)
     elif error_type is Errors.GroupAuthorizationFailedError:
         error = error_type(self.group_id)
         log.error("Heartbeat failed: authorization error: %s", error)
         future.failure(error)
     else:
         error = error_type()
         log.error("Heartbeat failed: Unhandled error: %s", error)
         future.failure(error)
Example #4
0
    def _handle_group_coordinator_response(self, future, response):
        log.debug("Received group coordinator response %s", response)
        if not self.coordinator_unknown():
            # We already found the coordinator, so ignore the request
            log.debug("Coordinator already known -- ignoring metadata response")
            future.success(self.coordinator_id)
            return

        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            ok = self._client.cluster.add_group_coordinator(self.group_id, response)
            if not ok:
                # This could happen if coordinator metadata is different
                # than broker metadata
                future.failure(Errors.IllegalStateError())
                return

            self.coordinator_id = response.coordinator_id
            log.info("Discovered coordinator %s for group %s",
                     self.coordinator_id, self.group_id)
            self._client.ready(self.coordinator_id)

            # start sending heartbeats only if we have a valid generation
            if self.generation > 0:
                self.heartbeat_task.reset()
            future.success(self.coordinator_id)
        elif error_type is Errors.GroupCoordinatorNotAvailableError:
            log.debug("Group Coordinator Not Available; retry")
            future.failure(error_type())
        elif error_type is Errors.GroupAuthorizationFailedError:
            error = error_type(self.group_id)
            log.error("Group Coordinator Request failed: %s", error)
            future.failure(error)
        else:
            error = error_type()
            log.error("Unrecognized failure in Group Coordinator Request: %s",
                      error)
            future.failure(error)
Example #5
0
    def _handle_offset_response(self, partition, future, response):
        """Callback for the response of the list offset call above.

        Arguments:
            partition (TopicPartition): The partition that was fetched
            future (Future): the future to update based on response
            response (OffsetResponse): response from the server

        Raises:
            AssertionError: if response does not match partition
        """
        topic, partition_info = response.topics[0]
        assert len(response.topics) == 1 and len(partition_info) == 1, (
            'OffsetResponse should only be for a single topic-partition')

        part, error_code, offsets = partition_info[0]
        assert topic == partition.topic and part == partition.partition, (
            'OffsetResponse partition does not match OffsetRequest partition')

        error_type = Errors.for_code(error_code)
        if error_type is Errors.NoError:
            assert len(offsets) == 1, 'Expected OffsetResponse with one offset'
            offset = offsets[0]
            log.debug("Fetched offset %d for partition %s", offset, partition)
            future.success(offset)
        elif error_type in (Errors.NotLeaderForPartitionError,
                            Errors.UnknownTopicOrPartitionError):
            log.debug(
                "Attempt to fetch offsets for partition %s failed due"
                " to obsolete leadership information, retrying.", partition)
            future.failure(error_type(partition))
        else:
            log.warning(
                "Attempt to fetch offsets for partition %s failed due to:"
                " %s", partition, error_type)
            future.failure(error_type(partition))
Example #6
0
    def _handle_fetch_response(self, request, send_time, response):
        """The callback for fetch completion"""
        total_bytes = 0
        total_count = 0
        recv_time = time.time()

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, messages in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if not self._subscriptions.is_fetchable(tp):
                    # this can happen when a rebalance happened or a partition
                    # consumption paused while fetch is still in-flight
                    log.debug(
                        "Ignoring fetched records for partition %s"
                        " since it is no longer fetchable", tp)

                elif error_type is Errors.NoError:
                    self._subscriptions.assignment[tp].highwater = highwater

                    # we are interested in this fetch only if the beginning
                    # offset matches the current consumed position
                    fetch_offset = fetch_offsets[tp]
                    position = self._subscriptions.assignment[tp].position
                    if position is None or position != fetch_offset:
                        log.debug(
                            "Discarding fetch response for partition %s"
                            " since its offset %d does not match the"
                            " expected offset %d", tp, fetch_offset, position)
                        continue

                    num_bytes = 0
                    partial = None
                    if messages and isinstance(messages[-1][-1],
                                               PartialMessage):
                        partial = messages.pop()

                    if messages:
                        log.debug(
                            "Adding fetched record for partition %s with"
                            " offset %d to buffered record list", tp, position)
                        self._records.append((fetch_offset, tp, messages))
                        last_offset, _, _ = messages[-1]
                        self._sensors.records_fetch_lag.record(highwater -
                                                               last_offset)
                        num_bytes = sum(msg[1] for msg in messages)
                    elif partial:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        self._record_too_large_partitions[tp] = fetch_offset

                    self._sensors.record_topic_fetch_metrics(
                        topic, num_bytes, len(messages))
                    total_bytes += num_bytes
                    total_count += len(messages)
                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.cluster.request_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    fetch_offset = fetch_offsets[tp]
                    if self._subscriptions.has_default_offset_reset_policy():
                        self._subscriptions.need_offset_reset(tp)
                    else:
                        self._offset_out_of_range_partitions[tp] = fetch_offset
                    log.info(
                        "Fetch offset %s is out of range, resetting offset",
                        fetch_offset)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warn("Not authorized to read from topic %s.", tp.topic)
                    self._unauthorized_topics.add(tp.topic)
                elif error_type is Errors.UnknownError:
                    log.warn(
                        "Unknown error fetching data for topic-partition %s",
                        tp)
                else:
                    raise error_type('Unexpected error while fetching data')

        self._sensors.bytes_fetched.record(total_bytes)
        self._sensors.records_fetched.record(total_count)
        if response.API_VERSION >= 1:
            self._sensors.fetch_throttle_time_sensor.record(
                response.throttle_time_ms)
        self._sensors.fetch_latency.record((recv_time - send_time) * 1000)
Example #7
0
 def _handle_leave_group_response(self, response):
     error_type = Errors.for_code(response.error_code)
     if error_type is Errors.NoError:
         log.info("LeaveGroup request succeeded")
     else:
         log.error("LeaveGroup request failed: %s", error_type())