def func(loops: int, magic: int): # Jit can optimize out the whole function if the result is the same each # time, so we need some randomized input data ) precomputed_samples = prepare(magic) results = [] # Main benchmark code. batch_data = next(precomputed_samples) t0 = perf.perf_counter() for _ in range(loops): records = MemoryRecords(batch_data) while records.has_next(): batch = records.next_batch() batch.validate_crc() for record in batch: results.append(record.value) res = perf.perf_counter() - t0 finalize(results) return res
async def test_solitary_abort_marker(self): # An abort marker may not be preceded by any aborted messages # Setup: Create a record batch (control batch) containing # a single transaction abort marker. builder = DefaultRecordBatchBuilder(magic=2, compression_type=0, is_transactional=True, producer_id=3, producer_epoch=1, base_sequence=-1, batch_size=999) orig_get_attributes = builder._get_attributes builder._get_attributes = lambda *args, **kwargs: ( # Make batch a control batch orig_get_attributes(*args, **kwargs) | DefaultRecordBatchBuilder.CONTROL_MASK) builder.append( offset=0, timestamp=1631276519572, # transaction abort marker key=b'\x00\x00\x00\x00', value=b'\x00\x00\x00\x00\x00\x00', headers=[]) buffer = builder.build() records = MemoryRecords(bytes(buffer)) # Test: In aiokafka>=0.7.2, the following line would result in a an # exception, because the implementation assumed that any transaction # abort marker would be preceded by at least one aborted message # originating from the same producer_id. However, this appears to # not always be the case, as reported in # https://github.com/aio-libs/aiokafka/issues/781 . partition_recs = PartitionRecords(tp=TopicPartition('test-topic', 0), records=records, aborted_transactions=[], fetch_offset=0, key_deserializer=None, value_deserializer=None, check_crcs=True, isolation_level=READ_COMMITTED) # Since isolation_level is READ_COMMITTED, no consumer records are # expected to be returned here. self.assertEqual(len(list(partition_recs)), 0)
def _proc_fetch_request(self, assignment, node_id, request): needs_wakeup = False try: response = yield from self._client.send(node_id, request) except Errors.KafkaError as err: log.error("Failed fetch messages from %s: %s", node_id, err) return False except asyncio.CancelledError: # Either `close()` or partition unassigned. Either way the result # is no longer of interest. return False if not assignment.active: log.debug( "Discarding fetch response since the assignment changed during" " fetch") return False fetch_offsets = {} for topic, partitions in request.topics: for partition, offset, _ in partitions: fetch_offsets[TopicPartition(topic, partition)] = offset for topic, partitions in response.topics: for partition, error_code, highwater, *part_data in partitions: tp = TopicPartition(topic, partition) error_type = Errors.for_code(error_code) fetch_offset = fetch_offsets[tp] tp_state = assignment.state_value(tp) if not tp_state.has_valid_position or \ tp_state.position != fetch_offset: log.debug( "Discarding fetch response for partition %s " "since its offset %s does not match the current " "position", tp, fetch_offset) continue if error_type is Errors.NoError: tp_state.highwater = highwater # part_data also contains lso, aborted_transactions. # message_set is last records = MemoryRecords(part_data[-1]) if records.has_next(): log.debug( "Adding fetched record for partition %s with" " offset %d to buffered record list", tp, fetch_offset) message_iterator = self._unpack_records(tp, records) self._records[tp] = FetchResult( tp, message_iterator=message_iterator, assignment=assignment, backoff=self._prefetch_backoff, fetch_offset=fetch_offset, loop=self._loop) # We added at least 1 successful record needs_wakeup = True elif records.size_in_bytes() > 0: # we did not read a single message from a non-empty # buffer because that message's size is larger than # fetch size, in this case record this exception err = RecordTooLargeError( "There are some messages at [Partition=Offset]: " "%s=%s whose size is larger than the fetch size %s" " and hence cannot be ever returned. " "Increase the fetch size, or decrease the maximum " "message size the broker will allow.", tp, fetch_offset, self._max_partition_fetch_bytes) self._set_error(tp, err) tp_state.consumed_to(tp_state.position + 1) needs_wakeup = True elif error_type in (Errors.NotLeaderForPartitionError, Errors.UnknownTopicOrPartitionError): self._client.force_metadata_update() elif error_type is Errors.OffsetOutOfRangeError: if self._default_reset_strategy != \ OffsetResetStrategy.NONE: tp_state.await_reset(self._default_reset_strategy) else: err = Errors.OffsetOutOfRangeError({tp: fetch_offset}) self._set_error(tp, err) needs_wakeup = True log.info( "Fetch offset %s is out of range for partition %s," " resetting offset", fetch_offset, tp) elif error_type is Errors.TopicAuthorizationFailedError: log.warning("Not authorized to read from topic %s.", tp.topic) err = Errors.TopicAuthorizationFailedError(tp.topic) self._set_error(tp, err) needs_wakeup = True else: log.warning('Unexpected error while fetching data: %s', error_type.__name__) return needs_wakeup
def _proc_fetch_request(self, node_id, request): needs_wakeup = False needs_position_update = [] try: response = yield from self._client.send(node_id, request) except Errors.KafkaError as err: log.error("Failed fetch messages from %s: %s", node_id, err) return False finally: self._in_flight.remove(node_id) fetch_offsets = {} for topic, partitions in request.topics: for partition, offset, _ in partitions: fetch_offsets[TopicPartition(topic, partition)] = offset for topic, partitions in response.topics: for partition, error_code, highwater, raw_batch in partitions: tp = TopicPartition(topic, partition) error_type = Errors.for_code(error_code) if not self._subscriptions.is_fetchable(tp): # this can happen when a rebalance happened log.debug( "Ignoring fetched records for partition %s" " since it is no longer fetchable", tp) elif error_type is Errors.NoError: tp_assignment = self._subscriptions.assignment[tp] tp_assignment.highwater = highwater # `drop_pending_message_set` is set after a seek to another # position. If we request the *new* position we have to # drop this flag, so we catch future seek's. fetch_offset = fetch_offsets[tp] if fetch_offset == tp_assignment.position: tp_assignment.drop_pending_message_set = False records = MemoryRecords(raw_batch) if records.has_next(): log.debug( "Adding fetched record for partition %s with" " offset %d to buffered record list", tp, fetch_offset) message_iterator = self._unpack_records(tp, records) self._records[tp] = FetchResult( tp, records=message_iterator, subscriptions=self._subscriptions, backoff=self._prefetch_backoff, loop=self._loop) # We added at least 1 successful record needs_wakeup = True elif records.size_in_bytes() > 0: # we did not read a single message from a non-empty # buffer because that message's size is larger than # fetch size, in this case record this exception err = RecordTooLargeError( "There are some messages at [Partition=Offset]: " "%s=%s whose size is larger than the fetch size %s" " and hence cannot be ever returned. " "Increase the fetch size, or decrease the maximum " "message size the broker will allow.", tp, fetch_offset, self._max_partition_fetch_bytes) self._set_error(tp, err) needs_wakeup = True self._subscriptions.assignment[tp].position += 1 elif error_type in (Errors.NotLeaderForPartitionError, Errors.UnknownTopicOrPartitionError): self._client.force_metadata_update() elif error_type is Errors.OffsetOutOfRangeError: fetch_offset = fetch_offsets[tp] if self._subscriptions.has_default_offset_reset_policy(): self._subscriptions.need_offset_reset(tp) needs_position_update.append(tp) else: err = Errors.OffsetOutOfRangeError({tp: fetch_offset}) self._set_error(tp, err) needs_wakeup = True log.info( "Fetch offset %s is out of range for partition %s," " resetting offset", fetch_offset, tp) elif error_type is Errors.TopicAuthorizationFailedError: log.warn("Not authorized to read from topic %s.", tp.topic) err = Errors.TopicAuthorizationFailedError(tp.topic) self._set_error(tp, err) needs_wakeup = True else: log.warn('Unexpected error while fetching data: %s', error_type.__name__) if needs_position_update: try: yield from self.update_fetch_positions(needs_position_update) except Exception: # pragma: no cover log.error("Unexpected error updating fetch positions", exc_info=True) return needs_wakeup
def _proc_fetch_request(self, assignment, node_id, request): needs_wakeup = False try: response = yield from self._client.send(node_id, request) except Errors.KafkaError as err: log.error("Failed fetch messages from %s: %s", node_id, err) return False except asyncio.CancelledError: # Either `close()` or partition unassigned. Either way the result # is no longer of interest. return False if not assignment.active: log.debug( "Discarding fetch response since the assignment changed during" " fetch") return False fetch_offsets = {} for topic, partitions in request.topics: for partition, offset, _ in partitions: fetch_offsets[TopicPartition(topic, partition)] = offset for topic, partitions in response.topics: for partition, error_code, highwater, *part_data in partitions: tp = TopicPartition(topic, partition) error_type = Errors.for_code(error_code) fetch_offset = fetch_offsets[tp] tp_state = assignment.state_value(tp) if not tp_state.has_valid_position or \ tp_state.position != fetch_offset: log.debug( "Discarding fetch response for partition %s " "since its offset %s does not match the current " "position", tp, fetch_offset) continue if error_type is Errors.NoError: if request.API_VERSION >= 4: aborted_transactions = part_data[-2] lso = part_data[-3] else: aborted_transactions = None lso = None tp_state.highwater = highwater tp_state.lso = lso # part_data also contains lso, aborted_transactions. # message_set is last records = MemoryRecords(part_data[-1]) if records.has_next(): log.debug( "Adding fetched record for partition %s with" " offset %d to buffered record list", tp, fetch_offset) partition_records = PartitionRecords( tp, records, aborted_transactions, fetch_offset, self._key_deserializer, self._value_deserializer, self._check_crcs, self._isolation_level) self._records[tp] = FetchResult( tp, partition_records=partition_records, assignment=assignment, backoff=self._prefetch_backoff, loop=self._loop) # We added at least 1 successful record needs_wakeup = True elif records.size_in_bytes() > 0: # we did not read a single message from a non-empty # buffer because that message's size is larger than # fetch size, in this case record this exception err = RecordTooLargeError( "There are some messages at [Partition=Offset]: " "%s=%s whose size is larger than the fetch size %s" " and hence cannot be ever returned. " "Increase the fetch size, or decrease the maximum " "message size the broker will allow.", tp, fetch_offset, self._max_partition_fetch_bytes) self._set_error(tp, err) tp_state.consumed_to(tp_state.position + 1) needs_wakeup = True elif error_type in (Errors.NotLeaderForPartitionError, Errors.UnknownTopicOrPartitionError): self._client.force_metadata_update() elif error_type is Errors.OffsetOutOfRangeError: if self._default_reset_strategy != \ OffsetResetStrategy.NONE: tp_state.await_reset(self._default_reset_strategy) else: err = Errors.OffsetOutOfRangeError({tp: fetch_offset}) self._set_error(tp, err) needs_wakeup = True log.info( "Fetch offset %s is out of range for partition %s," " resetting offset", fetch_offset, tp) elif error_type is Errors.TopicAuthorizationFailedError: log.warning( "Not authorized to read from topic %s.", tp.topic) err = Errors.TopicAuthorizationFailedError(tp.topic) self._set_error(tp, err) needs_wakeup = True else: log.warning('Unexpected error while fetching data: %s', error_type.__name__) return needs_wakeup