예제 #1
0
def test_default_batch_size_limit():
    # First message can be added even if it's too big
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=1024)

    meta = builder.append(
        0, timestamp=None, key=None, value=b"M" * 2000, headers=[])
    assert meta.size > 0
    assert meta.crc is None
    assert meta.offset == 0
    assert meta.timestamp is not None
    assert len(builder.build()) > 2000

    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=1024)
    meta = builder.append(
        0, timestamp=None, key=None, value=b"M" * 700, headers=[])
    assert meta is not None
    meta = builder.append(
        1, timestamp=None, key=None, value=b"M" * 700, headers=[])
    assert meta is None
    meta = builder.append(
        2, timestamp=None, key=None, value=b"M" * 700, headers=[])
    assert meta is None
    assert len(builder.build()) < 1000
예제 #2
0
def test_read_write_serde_v2(compression_type, crc):
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=compression_type, is_transactional=1,
        producer_id=123456, producer_epoch=123, base_sequence=9999,
        batch_size=999999)
    headers = [("header1", b"aaa"), ("header2", b"bbb")]
    for offset in range(10):
        builder.append(
            offset, timestamp=9999999 + offset, key=b"test", value=b"Super",
            headers=headers)
    buffer = builder.build()
    reader = DefaultRecordBatch(bytes(buffer))
    assert reader.validate_crc()
    msgs = list(reader)

    assert reader.is_transactional is True
    assert reader.is_control_batch is False
    assert reader.compression_type == compression_type
    assert reader.magic == 2
    assert reader.timestamp_type == 0
    assert reader.base_offset == 0
    assert reader.last_offset_delta == 9
    assert reader.next_offset == 10
    assert reader.first_timestamp == 9999999
    assert reader.max_timestamp == 10000008
    if crc is not None:
        assert reader.crc == crc
    for offset, msg in enumerate(msgs):
        assert msg.offset == offset
        assert msg.timestamp == 9999999 + offset
        assert msg.key == b"test"
        assert msg.value == b"Super"
        assert msg.headers == headers
예제 #3
0
def test_build_without_append():
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=1,
        producer_id=123456, producer_epoch=123, base_sequence=9999,
        batch_size=999999)
    buffer = builder.build()

    reader = DefaultRecordBatch(bytes(buffer))
    msgs = list(reader)
    assert not msgs
예제 #4
0
 def __init__(self, magic, batch_size, compression_type,
              *, is_transactional):
     if magic < 2:
         assert not is_transactional
         self._builder = LegacyRecordBatchBuilder(
             magic, compression_type, batch_size)
     else:
         self._builder = DefaultRecordBatchBuilder(
             magic, compression_type, is_transactional=is_transactional,
             producer_id=-1, producer_epoch=-1, base_sequence=0,
             batch_size=batch_size)
     self._relative_offset = 0
     self._buffer = None
     self._closed = False
예제 #5
0
def test_set_producer_state():
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=999999)
    builder.set_producer_state(
        producer_id=700,
        producer_epoch=5,
        base_sequence=17)
    assert builder.producer_id == 700
    buffer = builder.build()

    reader = DefaultRecordBatch(bytes(buffer))
    assert reader.producer_id == 700
    assert reader.producer_epoch == 5
    assert reader.base_sequence == 17
예제 #6
0
def test_default_correct_metadata_response():
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=1024 * 1024)
    meta = builder.append(
        0, timestamp=9999999, key=b"test", value=b"Super", headers=[])

    assert meta.offset == 0
    assert meta.timestamp == 9999999
    assert meta.crc is None
    assert meta.size == 16
    assert repr(meta) == (
        "DefaultRecordMetadata(offset=0, size={}, timestamp={})"
        .format(meta.size, meta.timestamp)
    )
예제 #7
0
def test_written_bytes_equals_size_in_bytes_v2():
    key = b"test"
    value = b"Super"
    headers = [("header1", b"aaa"), ("header2", b"bbb"), ("xx", None)]
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=999999)

    size_in_bytes = builder.size_in_bytes(
        0, timestamp=9999999, key=key, value=value, headers=headers)

    pos = builder.size()
    meta = builder.append(
        0, timestamp=9999999, key=key, value=value, headers=headers)

    assert builder.size() - pos == size_in_bytes
    assert meta.size == size_in_bytes
예제 #8
0
def test_written_bytes_equals_size_in_bytes_v2():
    key = b"test"
    value = b"Super"
    headers = [("header1", b"aaa"), ("header2", b"bbb"), ("xx", None)]
    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=0,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=999999)

    size_in_bytes = builder.size_in_bytes(0,
                                          timestamp=9999999,
                                          key=key,
                                          value=value,
                                          headers=headers)

    pos = builder.size()
    meta = builder.append(0,
                          timestamp=9999999,
                          key=key,
                          value=value,
                          headers=headers)

    assert builder.size() - pos == size_in_bytes
    assert meta.size == size_in_bytes
예제 #9
0
def test_unsupported_yet_codec():
    compression_type = DefaultRecordBatch.CODEC_MASK  # It doesn't exist
    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=compression_type,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=1024)
    with pytest.raises(UnsupportedCodecError):
        builder.append(0, timestamp=None, key=None, value=b"M", headers=[])
        builder.build()
예제 #10
0
    async def test_solitary_abort_marker(self):
        # An abort marker may not be preceded by any aborted messages

        # Setup: Create a record batch (control batch) containing
        # a single transaction abort marker.
        builder = DefaultRecordBatchBuilder(magic=2,
                                            compression_type=0,
                                            is_transactional=True,
                                            producer_id=3,
                                            producer_epoch=1,
                                            base_sequence=-1,
                                            batch_size=999)
        orig_get_attributes = builder._get_attributes
        builder._get_attributes = lambda *args, **kwargs: (
            # Make batch a control batch
            orig_get_attributes(*args, **kwargs)
            | DefaultRecordBatchBuilder.CONTROL_MASK)
        builder.append(
            offset=0,
            timestamp=1631276519572,
            # transaction abort marker
            key=b'\x00\x00\x00\x00',
            value=b'\x00\x00\x00\x00\x00\x00',
            headers=[])
        buffer = builder.build()
        records = MemoryRecords(bytes(buffer))

        # Test: In aiokafka>=0.7.2, the following line would result in a an
        # exception, because the implementation assumed that any transaction
        # abort marker would be preceded by at least one aborted message
        # originating from the same producer_id. However, this appears to
        # not always be the case, as reported in
        # https://github.com/aio-libs/aiokafka/issues/781 .
        partition_recs = PartitionRecords(tp=TopicPartition('test-topic', 0),
                                          records=records,
                                          aborted_transactions=[],
                                          fetch_offset=0,
                                          key_deserializer=None,
                                          value_deserializer=None,
                                          check_crcs=True,
                                          isolation_level=READ_COMMITTED)

        # Since isolation_level is READ_COMMITTED, no consumer records are
        # expected to be returned here.
        self.assertEqual(len(list(partition_recs)), 0)
예제 #11
0
def test_estimate_size_in_bytes_bigger_than_batch_v2():
    key = b"Super Key"
    value = b"1" * 100
    headers = [("header1", b"aaa"), ("header2", b"bbb")]
    estimate_size = DefaultRecordBatchBuilder.estimate_size_in_bytes(
        key, value, headers)

    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=999999)
    builder.append(
        0, timestamp=9999999, key=key, value=value, headers=headers)
    buf = builder.build()
    assert len(buf) <= estimate_size, \
        "Estimate should always be upper bound"
예제 #12
0
class BatchBuilder:
    def __init__(self, magic, batch_size, compression_type,
                 *, is_transactional):
        if magic < 2:
            assert not is_transactional
            self._builder = LegacyRecordBatchBuilder(
                magic, compression_type, batch_size)
        else:
            self._builder = DefaultRecordBatchBuilder(
                magic, compression_type, is_transactional=is_transactional,
                producer_id=-1, producer_epoch=-1, base_sequence=0,
                batch_size=batch_size)
        self._relative_offset = 0
        self._buffer = None
        self._closed = False

    def append(self, *, timestamp, key, value, headers=[]):
        """Add a message to the batch.

        Arguments:
            timestamp (float or None): epoch timestamp in seconds. If None,
                the timestamp will be set to the current time. If submitting to
                an 0.8.x or 0.9.x broker, the timestamp will be ignored.
            key (bytes or None): the message key. `key` and `value` may not
                both be None.
            value (bytes or None): the message value. `key` and `value` may not
                both be None.

        Returns:
            If the message was successfully added, returns a metadata object
            with crc, offset, size, and timestamp fields. If the batch is full
            or closed, returns None.
        """
        if self._closed:
            return None

        metadata = self._builder.append(
            self._relative_offset, timestamp, key, value,
            headers=headers)

        # Check if we could add the message
        if metadata is None:
            return None

        self._relative_offset += 1
        return metadata

    def close(self):
        """Close the batch to further updates.

        Closing the batch before submitting to the producer ensures that no
        messages are added via the ``producer.send()`` interface. To gracefully
        support both the batch and individual message interfaces, leave the
        batch open. For complete control over the batch's contents, close
        before submission. Closing a batch has no effect on when it's sent to
        the broker.

        A batch may not be reopened after it's closed.
        """
        if self._closed:
            return
        self._closed = True

    def _set_producer_state(self, producer_id, producer_epoch, base_sequence):
        assert type(self._builder) is DefaultRecordBatchBuilder
        self._builder.set_producer_state(
            producer_id, producer_epoch, base_sequence)

    def _build(self):
        self.close()
        if self._buffer is None:
            self._buffer = self._builder.build()
            del self._builder  # We may only call self._builder.build() once!
        return self._buffer

    def size(self):
        """Get the size of batch in bytes."""
        if self._buffer is not None:
            return len(self._buffer)
        else:
            return self._builder.size()

    def record_count(self):
        """Get the number of records in the batch."""
        return self._relative_offset
예제 #13
0
def test_unavailable_codec(compression_type, name, checker_name):
    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=compression_type,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=1024)
    builder.append(0, timestamp=None, key=None, value=b"M" * 2000, headers=[])
    correct_buffer = builder.build()

    with mock.patch.object(kafka.codec, checker_name, return_value=False):
        # Check that builder raises error
        builder = DefaultRecordBatchBuilder(magic=2,
                                            compression_type=compression_type,
                                            is_transactional=0,
                                            producer_id=-1,
                                            producer_epoch=-1,
                                            base_sequence=-1,
                                            batch_size=1024)
        error_msg = "Libraries for {} compression codec not found".format(name)
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            builder.append(0, timestamp=None, key=None, value=b"M", headers=[])
            builder.build()

        # Check that reader raises same error
        batch = DefaultRecordBatch(bytes(correct_buffer))
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            list(batch)
예제 #14
0
def test_default_batch_builder_validates_arguments():
    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=0,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=999999)

    # Key should not be str
    with pytest.raises(TypeError):
        builder.append(0,
                       timestamp=9999999,
                       key="some string",
                       value=None,
                       headers=[])

    # Value should not be str
    with pytest.raises(TypeError):
        builder.append(0,
                       timestamp=9999999,
                       key=None,
                       value="some string",
                       headers=[])

    # Timestamp should be of proper type
    with pytest.raises(TypeError):
        builder.append(0,
                       timestamp="1243812793",
                       key=None,
                       value=b"some string",
                       headers=[])

    # Offset of invalid type
    with pytest.raises(TypeError):
        builder.append("0",
                       timestamp=9999999,
                       key=None,
                       value=b"some string",
                       headers=[])

    # Ok to pass value as None
    builder.append(0, timestamp=9999999, key=b"123", value=None, headers=[])

    # Timestamp can be None
    builder.append(1,
                   timestamp=None,
                   key=None,
                   value=b"some string",
                   headers=[])

    # Ok to pass offsets in not incremental order. This should not happen thou
    builder.append(5, timestamp=9999999, key=b"123", value=None, headers=[])

    # in case error handling code fails to fix inner buffer in builder
    assert len(builder.build()) == 104
예제 #15
0
def test_default_batch_builder_validates_arguments():
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=999999)

    # Key should not be str
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp=9999999, key="some string", value=None, headers=[])

    # Value should not be str
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp=9999999, key=None, value="some string", headers=[])

    # Timestamp should be of proper type
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp="1243812793", key=None, value=b"some string",
            headers=[])

    # Offset of invalid type
    with pytest.raises(TypeError):
        builder.append(
            "0", timestamp=9999999, key=None, value=b"some string", headers=[])

    # Ok to pass value as None
    builder.append(
        0, timestamp=9999999, key=b"123", value=None, headers=[])

    # Timestamp can be None
    builder.append(
        1, timestamp=None, key=None, value=b"some string", headers=[])

    # Ok to pass offsets in not incremental order. This should not happen thou
    builder.append(
        5, timestamp=9999999, key=b"123", value=None, headers=[])

    # in case error handling code fails to fix inner buffer in builder
    assert len(builder.build()) == 104