Exemple #1
0
    def _decode_message_set_iter(cls, data):
        """
        Iteratively decode a MessageSet

        Reads repeated elements of (offset, message), calling decode_message
        to decode a single message. Since compressed messages contain futher
        MessageSets, these two methods have been decoupled so that they may
        recurse easily.
        """
        cur = 0
        read_message = False
        while cur < len(data):
            try:
                ((offset, ), cur) = relative_unpack('>q', data, cur)
                (msg, cur) = read_int_string(data, cur)
                for (offset,
                     message) in KafkaProtocol._decode_message(msg, offset):
                    read_message = True
                    yield OffsetAndMessage(offset, message)
            except BufferUnderflowError:
                # NOTE: Not sure this is correct error handling:
                # Is it possible to get a BUE if the message set is somewhere
                # in the middle of the fetch response? If so, we probably have
                # an issue that's not fetch size too small.
                # Aren't we ignoring errors if we fail to unpack data by
                # raising StopIteration()?
                # If _decode_message() raises a ChecksumError, couldn't that
                # also be due to the fetch size being too small?
                if read_message is False:
                    # If we get a partial read of a message, but haven't
                    # yielded anything there's a problem
                    raise ConsumerFetchSizeTooSmall()
                else:
                    raise StopIteration()
Exemple #2
0
    def _decode_message_set_iter(cls, data):
        """
        Iteratively decode a MessageSet

        Reads repeated elements of (offset, message), calling decode_message
        to decode a single message. Since compressed messages contain futher
        MessageSets, these two methods have been decoupled so that they may
        recurse easily.
        """
        cur = 0
        read_message = False
        while cur < len(data):
            try:
                ((offset, ), cur) = relative_unpack('>q', data, cur)
                (msg, cur) = read_int_string(data, cur)
                for (offset,
                     message) in KafkaProtocol._decode_message(msg, offset):
                    read_message = True
                    yield OffsetAndMessage(offset, message)
            except BufferUnderflowError:
                if read_message is False:
                    # If we get a partial read of a message, but haven't yielded anyhting
                    # there's a problem
                    raise ConsumerFetchSizeTooSmall()
                else:
                    raise StopIteration()
    def _fetch(self):
        # Create fetch request payloads for all the partitions
        partitions = dict(
            (p, self.buffer_size) for p in self.fetch_offsets.keys())
        while partitions:
            requests = []
            for partition, buffer_size in six.iteritems(partitions):
                requests.append(
                    FetchRequestPayload(self.topic, partition,
                                        self.fetch_offsets[partition],
                                        buffer_size))
            # Send request
            responses = self.client.send_fetch_request(
                requests,
                max_wait_time=int(self.fetch_max_wait_time),
                min_bytes=self.fetch_min_bytes,
                fail_on_error=False)

            retry_partitions = {}
            for resp in responses:

                try:
                    check_error(resp)
                except UnknownTopicOrPartitionError:
                    log.error('UnknownTopicOrPartitionError for %s:%d',
                              resp.topic, resp.partition)
                    self.client.reset_topic_metadata(resp.topic)
                    raise
                except NotLeaderForPartitionError:
                    log.error('NotLeaderForPartitionError for %s:%d',
                              resp.topic, resp.partition)
                    self.client.reset_topic_metadata(resp.topic)
                    continue
                except OffsetOutOfRangeError:
                    log.warning(
                        'OffsetOutOfRangeError for %s:%d. '
                        'Resetting partition offset...', resp.topic,
                        resp.partition)
                    self.reset_partition_offset(resp.partition)
                    # Retry this partition
                    retry_partitions[resp.partition] = partitions[
                        resp.partition]
                    continue
                except FailedPayloadsError as e:
                    log.warning('FailedPayloadsError for %s:%d',
                                e.payload.topic, e.payload.partition)
                    # Retry this partition
                    retry_partitions[e.payload.partition] = partitions[
                        e.payload.partition]
                    continue

                partition = resp.partition
                buffer_size = partitions[partition]

                # Check for partial message
                if resp.messages and isinstance(resp.messages[-1].message,
                                                PartialMessage):

                    # If buffer is at max and all we got was a partial message
                    # raise ConsumerFetchSizeTooSmall
                    if (self.max_buffer_size is not None
                            and buffer_size == self.max_buffer_size
                            and len(resp.messages) == 1):

                        log.error('Max fetch size %d too small',
                                  self.max_buffer_size)
                        raise ConsumerFetchSizeTooSmall()

                    if self.max_buffer_size is None:
                        buffer_size *= 2
                    else:
                        buffer_size = min(buffer_size * 2,
                                          self.max_buffer_size)
                    log.warning(
                        'Fetch size too small, increase to %d (2x) '
                        'and retry', buffer_size)
                    retry_partitions[partition] = buffer_size
                    resp.messages.pop()

                for message in resp.messages:
                    if message.offset < self.fetch_offsets[partition]:
                        log.debug(
                            'Skipping message %s because its offset is less than the consumer offset',
                            message)
                        continue
                    # Put the message in our queue
                    self.queue.put((partition, message))
                    self.fetch_offsets[partition] = message.offset + 1
            partitions = retry_partitions