Example #1
0
def func(loops: int, magic: int):
    # Jit can optimize out the whole function if the result is the same each
    # time, so we need some randomized input data )
    precomputed_samples = prepare(magic)
    results = []

    # Main benchmark code.
    batch_data = next(precomputed_samples)
    t0 = perf.perf_counter()
    for _ in range(loops):
        records = MemoryRecords(batch_data)
        while records.has_next():
            batch = records.next_batch()
            batch.validate_crc()
            for record in batch:
                results.append(record.value)

    res = perf.perf_counter() - t0
    finalize(results)

    return res
Example #2
0
def func(loops: int, magic: int):
    # Jit can optimize out the whole function if the result is the same each
    # time, so we need some randomized input data )
    precomputed_samples = prepare(magic)
    results = []

    # Main benchmark code.
    batch_data = next(precomputed_samples)
    t0 = perf.perf_counter()
    for _ in range(loops):
        records = MemoryRecords(batch_data)
        while records.has_next():
            batch = records.next_batch()
            batch.validate_crc()
            for record in batch:
                results.append(record.value)

    res = perf.perf_counter() - t0
    finalize(results)

    return res
Example #3
0
    async def test_solitary_abort_marker(self):
        # An abort marker may not be preceded by any aborted messages

        # Setup: Create a record batch (control batch) containing
        # a single transaction abort marker.
        builder = DefaultRecordBatchBuilder(magic=2,
                                            compression_type=0,
                                            is_transactional=True,
                                            producer_id=3,
                                            producer_epoch=1,
                                            base_sequence=-1,
                                            batch_size=999)
        orig_get_attributes = builder._get_attributes
        builder._get_attributes = lambda *args, **kwargs: (
            # Make batch a control batch
            orig_get_attributes(*args, **kwargs)
            | DefaultRecordBatchBuilder.CONTROL_MASK)
        builder.append(
            offset=0,
            timestamp=1631276519572,
            # transaction abort marker
            key=b'\x00\x00\x00\x00',
            value=b'\x00\x00\x00\x00\x00\x00',
            headers=[])
        buffer = builder.build()
        records = MemoryRecords(bytes(buffer))

        # Test: In aiokafka>=0.7.2, the following line would result in a an
        # exception, because the implementation assumed that any transaction
        # abort marker would be preceded by at least one aborted message
        # originating from the same producer_id. However, this appears to
        # not always be the case, as reported in
        # https://github.com/aio-libs/aiokafka/issues/781 .
        partition_recs = PartitionRecords(tp=TopicPartition('test-topic', 0),
                                          records=records,
                                          aborted_transactions=[],
                                          fetch_offset=0,
                                          key_deserializer=None,
                                          value_deserializer=None,
                                          check_crcs=True,
                                          isolation_level=READ_COMMITTED)

        # Since isolation_level is READ_COMMITTED, no consumer records are
        # expected to be returned here.
        self.assertEqual(len(list(partition_recs)), 0)
Example #4
0
    def _proc_fetch_request(self, assignment, node_id, request):
        needs_wakeup = False
        try:
            response = yield from self._client.send(node_id, request)
        except Errors.KafkaError as err:
            log.error("Failed fetch messages from %s: %s", node_id, err)
            return False
        except asyncio.CancelledError:
            # Either `close()` or partition unassigned. Either way the result
            # is no longer of interest.
            return False

        if not assignment.active:
            log.debug(
                "Discarding fetch response since the assignment changed during"
                " fetch")
            return False

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, *part_data in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                fetch_offset = fetch_offsets[tp]
                tp_state = assignment.state_value(tp)
                if not tp_state.has_valid_position or \
                        tp_state.position != fetch_offset:
                    log.debug(
                        "Discarding fetch response for partition %s "
                        "since its offset %s does not match the current "
                        "position", tp, fetch_offset)
                    continue

                if error_type is Errors.NoError:
                    tp_state.highwater = highwater

                    # part_data also contains lso, aborted_transactions.
                    # message_set is last
                    records = MemoryRecords(part_data[-1])
                    if records.has_next():
                        log.debug(
                            "Adding fetched record for partition %s with"
                            " offset %d to buffered record list", tp,
                            fetch_offset)

                        message_iterator = self._unpack_records(tp, records)
                        self._records[tp] = FetchResult(
                            tp,
                            message_iterator=message_iterator,
                            assignment=assignment,
                            backoff=self._prefetch_backoff,
                            fetch_offset=fetch_offset,
                            loop=self._loop)

                        # We added at least 1 successful record
                        needs_wakeup = True
                    elif records.size_in_bytes() > 0:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        err = RecordTooLargeError(
                            "There are some messages at [Partition=Offset]: "
                            "%s=%s whose size is larger than the fetch size %s"
                            " and hence cannot be ever returned. "
                            "Increase the fetch size, or decrease the maximum "
                            "message size the broker will allow.", tp,
                            fetch_offset, self._max_partition_fetch_bytes)
                        self._set_error(tp, err)
                        tp_state.consumed_to(tp_state.position + 1)
                        needs_wakeup = True

                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.force_metadata_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    if self._default_reset_strategy != \
                            OffsetResetStrategy.NONE:
                        tp_state.await_reset(self._default_reset_strategy)
                    else:
                        err = Errors.OffsetOutOfRangeError({tp: fetch_offset})
                        self._set_error(tp, err)
                        needs_wakeup = True
                    log.info(
                        "Fetch offset %s is out of range for partition %s,"
                        " resetting offset", fetch_offset, tp)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warning("Not authorized to read from topic %s.",
                                tp.topic)
                    err = Errors.TopicAuthorizationFailedError(tp.topic)
                    self._set_error(tp, err)
                    needs_wakeup = True
                else:
                    log.warning('Unexpected error while fetching data: %s',
                                error_type.__name__)
        return needs_wakeup
Example #5
0
    def _proc_fetch_request(self, node_id, request):
        needs_wakeup = False
        needs_position_update = []
        try:
            response = yield from self._client.send(node_id, request)
        except Errors.KafkaError as err:
            log.error("Failed fetch messages from %s: %s", node_id, err)
            return False
        finally:
            self._in_flight.remove(node_id)

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, raw_batch in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if not self._subscriptions.is_fetchable(tp):
                    # this can happen when a rebalance happened
                    log.debug(
                        "Ignoring fetched records for partition %s"
                        " since it is no longer fetchable", tp)

                elif error_type is Errors.NoError:
                    tp_assignment = self._subscriptions.assignment[tp]
                    tp_assignment.highwater = highwater

                    # `drop_pending_message_set` is set after a seek to another
                    # position. If we request the *new* position we have to
                    # drop this flag, so we catch future seek's.
                    fetch_offset = fetch_offsets[tp]
                    if fetch_offset == tp_assignment.position:
                        tp_assignment.drop_pending_message_set = False

                    records = MemoryRecords(raw_batch)
                    if records.has_next():
                        log.debug(
                            "Adding fetched record for partition %s with"
                            " offset %d to buffered record list", tp,
                            fetch_offset)

                        message_iterator = self._unpack_records(tp, records)
                        self._records[tp] = FetchResult(
                            tp,
                            records=message_iterator,
                            subscriptions=self._subscriptions,
                            backoff=self._prefetch_backoff,
                            loop=self._loop)

                        # We added at least 1 successful record
                        needs_wakeup = True
                    elif records.size_in_bytes() > 0:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        err = RecordTooLargeError(
                            "There are some messages at [Partition=Offset]: "
                            "%s=%s whose size is larger than the fetch size %s"
                            " and hence cannot be ever returned. "
                            "Increase the fetch size, or decrease the maximum "
                            "message size the broker will allow.", tp,
                            fetch_offset, self._max_partition_fetch_bytes)
                        self._set_error(tp, err)
                        needs_wakeup = True
                        self._subscriptions.assignment[tp].position += 1

                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.force_metadata_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    fetch_offset = fetch_offsets[tp]
                    if self._subscriptions.has_default_offset_reset_policy():
                        self._subscriptions.need_offset_reset(tp)
                        needs_position_update.append(tp)
                    else:
                        err = Errors.OffsetOutOfRangeError({tp: fetch_offset})
                        self._set_error(tp, err)
                        needs_wakeup = True
                    log.info(
                        "Fetch offset %s is out of range for partition %s,"
                        " resetting offset", fetch_offset, tp)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warn("Not authorized to read from topic %s.", tp.topic)
                    err = Errors.TopicAuthorizationFailedError(tp.topic)
                    self._set_error(tp, err)
                    needs_wakeup = True
                else:
                    log.warn('Unexpected error while fetching data: %s',
                             error_type.__name__)

        if needs_position_update:
            try:
                yield from self.update_fetch_positions(needs_position_update)
            except Exception:  # pragma: no cover
                log.error("Unexpected error updating fetch positions",
                          exc_info=True)

        return needs_wakeup
Example #6
0
    def _proc_fetch_request(self, assignment, node_id, request):
        needs_wakeup = False
        try:
            response = yield from self._client.send(node_id, request)
        except Errors.KafkaError as err:
            log.error("Failed fetch messages from %s: %s", node_id, err)
            return False
        except asyncio.CancelledError:
            # Either `close()` or partition unassigned. Either way the result
            # is no longer of interest.
            return False

        if not assignment.active:
            log.debug(
                "Discarding fetch response since the assignment changed during"
                " fetch")
            return False

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, *part_data in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                fetch_offset = fetch_offsets[tp]
                tp_state = assignment.state_value(tp)
                if not tp_state.has_valid_position or \
                        tp_state.position != fetch_offset:
                    log.debug(
                        "Discarding fetch response for partition %s "
                        "since its offset %s does not match the current "
                        "position", tp, fetch_offset)
                    continue

                if error_type is Errors.NoError:
                    if request.API_VERSION >= 4:
                        aborted_transactions = part_data[-2]
                        lso = part_data[-3]
                    else:
                        aborted_transactions = None
                        lso = None
                    tp_state.highwater = highwater
                    tp_state.lso = lso

                    # part_data also contains lso, aborted_transactions.
                    # message_set is last
                    records = MemoryRecords(part_data[-1])
                    if records.has_next():
                        log.debug(
                            "Adding fetched record for partition %s with"
                            " offset %d to buffered record list",
                            tp, fetch_offset)

                        partition_records = PartitionRecords(
                            tp, records, aborted_transactions, fetch_offset,
                            self._key_deserializer, self._value_deserializer,
                            self._check_crcs, self._isolation_level)

                        self._records[tp] = FetchResult(
                            tp, partition_records=partition_records,
                            assignment=assignment,
                            backoff=self._prefetch_backoff,
                            loop=self._loop)

                        # We added at least 1 successful record
                        needs_wakeup = True
                    elif records.size_in_bytes() > 0:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        err = RecordTooLargeError(
                            "There are some messages at [Partition=Offset]: "
                            "%s=%s whose size is larger than the fetch size %s"
                            " and hence cannot be ever returned. "
                            "Increase the fetch size, or decrease the maximum "
                            "message size the broker will allow.",
                            tp, fetch_offset, self._max_partition_fetch_bytes)
                        self._set_error(tp, err)
                        tp_state.consumed_to(tp_state.position + 1)
                        needs_wakeup = True

                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.force_metadata_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    if self._default_reset_strategy != \
                            OffsetResetStrategy.NONE:
                        tp_state.await_reset(self._default_reset_strategy)
                    else:
                        err = Errors.OffsetOutOfRangeError({tp: fetch_offset})
                        self._set_error(tp, err)
                        needs_wakeup = True
                    log.info(
                        "Fetch offset %s is out of range for partition %s,"
                        " resetting offset", fetch_offset, tp)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warning(
                        "Not authorized to read from topic %s.", tp.topic)
                    err = Errors.TopicAuthorizationFailedError(tp.topic)
                    self._set_error(tp, err)
                    needs_wakeup = True
                else:
                    log.warning('Unexpected error while fetching data: %s',
                                error_type.__name__)
        return needs_wakeup