def test_default_batch_size_limit(): # First message can be added even if it's too big builder = DefaultRecordBatchBuilder( magic=2, compression_type=0, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=1024) meta = builder.append( 0, timestamp=None, key=None, value=b"M" * 2000, headers=[]) assert meta.size > 0 assert meta.crc is None assert meta.offset == 0 assert meta.timestamp is not None assert len(builder.build()) > 2000 builder = DefaultRecordBatchBuilder( magic=2, compression_type=0, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=1024) meta = builder.append( 0, timestamp=None, key=None, value=b"M" * 700, headers=[]) assert meta is not None meta = builder.append( 1, timestamp=None, key=None, value=b"M" * 700, headers=[]) assert meta is None meta = builder.append( 2, timestamp=None, key=None, value=b"M" * 700, headers=[]) assert meta is None assert len(builder.build()) < 1000
def test_read_write_serde_v2(compression_type, crc): builder = DefaultRecordBatchBuilder( magic=2, compression_type=compression_type, is_transactional=1, producer_id=123456, producer_epoch=123, base_sequence=9999, batch_size=999999) headers = [("header1", b"aaa"), ("header2", b"bbb")] for offset in range(10): builder.append( offset, timestamp=9999999 + offset, key=b"test", value=b"Super", headers=headers) buffer = builder.build() reader = DefaultRecordBatch(bytes(buffer)) assert reader.validate_crc() msgs = list(reader) assert reader.is_transactional is True assert reader.is_control_batch is False assert reader.compression_type == compression_type assert reader.magic == 2 assert reader.timestamp_type == 0 assert reader.base_offset == 0 assert reader.last_offset_delta == 9 assert reader.next_offset == 10 assert reader.first_timestamp == 9999999 assert reader.max_timestamp == 10000008 if crc is not None: assert reader.crc == crc for offset, msg in enumerate(msgs): assert msg.offset == offset assert msg.timestamp == 9999999 + offset assert msg.key == b"test" assert msg.value == b"Super" assert msg.headers == headers
def test_build_without_append(): builder = DefaultRecordBatchBuilder( magic=2, compression_type=0, is_transactional=1, producer_id=123456, producer_epoch=123, base_sequence=9999, batch_size=999999) buffer = builder.build() reader = DefaultRecordBatch(bytes(buffer)) msgs = list(reader) assert not msgs
def __init__(self, magic, batch_size, compression_type, *, is_transactional): if magic < 2: assert not is_transactional self._builder = LegacyRecordBatchBuilder( magic, compression_type, batch_size) else: self._builder = DefaultRecordBatchBuilder( magic, compression_type, is_transactional=is_transactional, producer_id=-1, producer_epoch=-1, base_sequence=0, batch_size=batch_size) self._relative_offset = 0 self._buffer = None self._closed = False
def test_set_producer_state(): builder = DefaultRecordBatchBuilder( magic=2, compression_type=0, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=999999) builder.set_producer_state( producer_id=700, producer_epoch=5, base_sequence=17) assert builder.producer_id == 700 buffer = builder.build() reader = DefaultRecordBatch(bytes(buffer)) assert reader.producer_id == 700 assert reader.producer_epoch == 5 assert reader.base_sequence == 17
def test_default_correct_metadata_response(): builder = DefaultRecordBatchBuilder( magic=2, compression_type=0, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=1024 * 1024) meta = builder.append( 0, timestamp=9999999, key=b"test", value=b"Super", headers=[]) assert meta.offset == 0 assert meta.timestamp == 9999999 assert meta.crc is None assert meta.size == 16 assert repr(meta) == ( "DefaultRecordMetadata(offset=0, size={}, timestamp={})" .format(meta.size, meta.timestamp) )
def test_written_bytes_equals_size_in_bytes_v2(): key = b"test" value = b"Super" headers = [("header1", b"aaa"), ("header2", b"bbb"), ("xx", None)] builder = DefaultRecordBatchBuilder( magic=2, compression_type=0, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=999999) size_in_bytes = builder.size_in_bytes( 0, timestamp=9999999, key=key, value=value, headers=headers) pos = builder.size() meta = builder.append( 0, timestamp=9999999, key=key, value=value, headers=headers) assert builder.size() - pos == size_in_bytes assert meta.size == size_in_bytes
def test_written_bytes_equals_size_in_bytes_v2(): key = b"test" value = b"Super" headers = [("header1", b"aaa"), ("header2", b"bbb"), ("xx", None)] builder = DefaultRecordBatchBuilder(magic=2, compression_type=0, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=999999) size_in_bytes = builder.size_in_bytes(0, timestamp=9999999, key=key, value=value, headers=headers) pos = builder.size() meta = builder.append(0, timestamp=9999999, key=key, value=value, headers=headers) assert builder.size() - pos == size_in_bytes assert meta.size == size_in_bytes
def test_unsupported_yet_codec(): compression_type = DefaultRecordBatch.CODEC_MASK # It doesn't exist builder = DefaultRecordBatchBuilder(magic=2, compression_type=compression_type, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=1024) with pytest.raises(UnsupportedCodecError): builder.append(0, timestamp=None, key=None, value=b"M", headers=[]) builder.build()
async def test_solitary_abort_marker(self): # An abort marker may not be preceded by any aborted messages # Setup: Create a record batch (control batch) containing # a single transaction abort marker. builder = DefaultRecordBatchBuilder(magic=2, compression_type=0, is_transactional=True, producer_id=3, producer_epoch=1, base_sequence=-1, batch_size=999) orig_get_attributes = builder._get_attributes builder._get_attributes = lambda *args, **kwargs: ( # Make batch a control batch orig_get_attributes(*args, **kwargs) | DefaultRecordBatchBuilder.CONTROL_MASK) builder.append( offset=0, timestamp=1631276519572, # transaction abort marker key=b'\x00\x00\x00\x00', value=b'\x00\x00\x00\x00\x00\x00', headers=[]) buffer = builder.build() records = MemoryRecords(bytes(buffer)) # Test: In aiokafka>=0.7.2, the following line would result in a an # exception, because the implementation assumed that any transaction # abort marker would be preceded by at least one aborted message # originating from the same producer_id. However, this appears to # not always be the case, as reported in # https://github.com/aio-libs/aiokafka/issues/781 . partition_recs = PartitionRecords(tp=TopicPartition('test-topic', 0), records=records, aborted_transactions=[], fetch_offset=0, key_deserializer=None, value_deserializer=None, check_crcs=True, isolation_level=READ_COMMITTED) # Since isolation_level is READ_COMMITTED, no consumer records are # expected to be returned here. self.assertEqual(len(list(partition_recs)), 0)
def test_estimate_size_in_bytes_bigger_than_batch_v2(): key = b"Super Key" value = b"1" * 100 headers = [("header1", b"aaa"), ("header2", b"bbb")] estimate_size = DefaultRecordBatchBuilder.estimate_size_in_bytes( key, value, headers) builder = DefaultRecordBatchBuilder( magic=2, compression_type=0, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=999999) builder.append( 0, timestamp=9999999, key=key, value=value, headers=headers) buf = builder.build() assert len(buf) <= estimate_size, \ "Estimate should always be upper bound"
class BatchBuilder: def __init__(self, magic, batch_size, compression_type, *, is_transactional): if magic < 2: assert not is_transactional self._builder = LegacyRecordBatchBuilder( magic, compression_type, batch_size) else: self._builder = DefaultRecordBatchBuilder( magic, compression_type, is_transactional=is_transactional, producer_id=-1, producer_epoch=-1, base_sequence=0, batch_size=batch_size) self._relative_offset = 0 self._buffer = None self._closed = False def append(self, *, timestamp, key, value, headers=[]): """Add a message to the batch. Arguments: timestamp (float or None): epoch timestamp in seconds. If None, the timestamp will be set to the current time. If submitting to an 0.8.x or 0.9.x broker, the timestamp will be ignored. key (bytes or None): the message key. `key` and `value` may not both be None. value (bytes or None): the message value. `key` and `value` may not both be None. Returns: If the message was successfully added, returns a metadata object with crc, offset, size, and timestamp fields. If the batch is full or closed, returns None. """ if self._closed: return None metadata = self._builder.append( self._relative_offset, timestamp, key, value, headers=headers) # Check if we could add the message if metadata is None: return None self._relative_offset += 1 return metadata def close(self): """Close the batch to further updates. Closing the batch before submitting to the producer ensures that no messages are added via the ``producer.send()`` interface. To gracefully support both the batch and individual message interfaces, leave the batch open. For complete control over the batch's contents, close before submission. Closing a batch has no effect on when it's sent to the broker. A batch may not be reopened after it's closed. """ if self._closed: return self._closed = True def _set_producer_state(self, producer_id, producer_epoch, base_sequence): assert type(self._builder) is DefaultRecordBatchBuilder self._builder.set_producer_state( producer_id, producer_epoch, base_sequence) def _build(self): self.close() if self._buffer is None: self._buffer = self._builder.build() del self._builder # We may only call self._builder.build() once! return self._buffer def size(self): """Get the size of batch in bytes.""" if self._buffer is not None: return len(self._buffer) else: return self._builder.size() def record_count(self): """Get the number of records in the batch.""" return self._relative_offset
def test_unavailable_codec(compression_type, name, checker_name): builder = DefaultRecordBatchBuilder(magic=2, compression_type=compression_type, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=1024) builder.append(0, timestamp=None, key=None, value=b"M" * 2000, headers=[]) correct_buffer = builder.build() with mock.patch.object(kafka.codec, checker_name, return_value=False): # Check that builder raises error builder = DefaultRecordBatchBuilder(magic=2, compression_type=compression_type, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=1024) error_msg = "Libraries for {} compression codec not found".format(name) with pytest.raises(UnsupportedCodecError, match=error_msg): builder.append(0, timestamp=None, key=None, value=b"M", headers=[]) builder.build() # Check that reader raises same error batch = DefaultRecordBatch(bytes(correct_buffer)) with pytest.raises(UnsupportedCodecError, match=error_msg): list(batch)
def test_default_batch_builder_validates_arguments(): builder = DefaultRecordBatchBuilder(magic=2, compression_type=0, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=999999) # Key should not be str with pytest.raises(TypeError): builder.append(0, timestamp=9999999, key="some string", value=None, headers=[]) # Value should not be str with pytest.raises(TypeError): builder.append(0, timestamp=9999999, key=None, value="some string", headers=[]) # Timestamp should be of proper type with pytest.raises(TypeError): builder.append(0, timestamp="1243812793", key=None, value=b"some string", headers=[]) # Offset of invalid type with pytest.raises(TypeError): builder.append("0", timestamp=9999999, key=None, value=b"some string", headers=[]) # Ok to pass value as None builder.append(0, timestamp=9999999, key=b"123", value=None, headers=[]) # Timestamp can be None builder.append(1, timestamp=None, key=None, value=b"some string", headers=[]) # Ok to pass offsets in not incremental order. This should not happen thou builder.append(5, timestamp=9999999, key=b"123", value=None, headers=[]) # in case error handling code fails to fix inner buffer in builder assert len(builder.build()) == 104
def test_default_batch_builder_validates_arguments(): builder = DefaultRecordBatchBuilder( magic=2, compression_type=0, is_transactional=0, producer_id=-1, producer_epoch=-1, base_sequence=-1, batch_size=999999) # Key should not be str with pytest.raises(TypeError): builder.append( 0, timestamp=9999999, key="some string", value=None, headers=[]) # Value should not be str with pytest.raises(TypeError): builder.append( 0, timestamp=9999999, key=None, value="some string", headers=[]) # Timestamp should be of proper type with pytest.raises(TypeError): builder.append( 0, timestamp="1243812793", key=None, value=b"some string", headers=[]) # Offset of invalid type with pytest.raises(TypeError): builder.append( "0", timestamp=9999999, key=None, value=b"some string", headers=[]) # Ok to pass value as None builder.append( 0, timestamp=9999999, key=b"123", value=None, headers=[]) # Timestamp can be None builder.append( 1, timestamp=None, key=None, value=b"some string", headers=[]) # Ok to pass offsets in not incremental order. This should not happen thou builder.append( 5, timestamp=9999999, key=b"123", value=None, headers=[]) # in case error handling code fails to fix inner buffer in builder assert len(builder.build()) == 104