Esempio n. 1
0
def test_read_write_serde_v2(compression_type):
    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=compression_type,
                                        is_transactional=1,
                                        producer_id=123456,
                                        producer_epoch=123,
                                        base_sequence=9999,
                                        batch_size=999999)
    headers = [("header1", b"aaa"), ("header2", b"bbb")]
    for offset in range(10):
        builder.append(offset,
                       timestamp=9999999,
                       key=b"test",
                       value=b"Super",
                       headers=headers)
    buffer = builder.build()
    reader = DefaultRecordBatch(bytes(buffer))
    msgs = list(reader)

    assert reader.is_transactional is True
    assert reader.compression_type == compression_type
    assert reader.magic == 2
    assert reader.timestamp_type == 0
    assert reader.base_offset == 0
    for offset, msg in enumerate(msgs):
        assert msg.offset == offset
        assert msg.timestamp == 9999999
        assert msg.key == b"test"
        assert msg.value == b"Super"
        assert msg.headers == headers
def test_read_write_serde_v2(compression_type):
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=compression_type, is_transactional=1,
        producer_id=123456, producer_epoch=123, base_sequence=9999,
        batch_size=999999)
    headers = [("header1", b"aaa"), ("header2", b"bbb")]
    for offset in range(10):
        builder.append(
            offset, timestamp=9999999, key=b"test", value=b"Super",
            headers=headers)
    buffer = builder.build()
    reader = DefaultRecordBatch(bytes(buffer))
    msgs = list(reader)

    assert reader.is_transactional is True
    assert reader.compression_type == compression_type
    assert reader.magic == 2
    assert reader.timestamp_type == 0
    assert reader.base_offset == 0
    for offset, msg in enumerate(msgs):
        assert msg.offset == offset
        assert msg.timestamp == 9999999
        assert msg.key == b"test"
        assert msg.value == b"Super"
        assert msg.headers == headers
def test_default_batch_size_limit():
    # First message can be added even if it's too big
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=1024)

    meta = builder.append(
        0, timestamp=None, key=None, value=b"M" * 2000, headers=[])
    assert meta.size > 0
    assert meta.crc is None
    assert meta.offset == 0
    assert meta.timestamp is not None
    assert len(builder.build()) > 2000

    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=1024)
    meta = builder.append(
        0, timestamp=None, key=None, value=b"M" * 700, headers=[])
    assert meta is not None
    meta = builder.append(
        1, timestamp=None, key=None, value=b"M" * 700, headers=[])
    assert meta is None
    meta = builder.append(
        2, timestamp=None, key=None, value=b"M" * 700, headers=[])
    assert meta is None
    assert len(builder.build()) < 1000
    def __init__(self, magic, compression_type, batch_size):
        assert magic in [0, 1, 2], "Not supported magic"
        assert compression_type in [0, 1, 2, 3,
                                    4], "Not valid compression type"
        if magic >= 2:
            self._builder = DefaultRecordBatchBuilder(
                magic=magic,
                compression_type=compression_type,
                is_transactional=False,
                producer_id=-1,
                producer_epoch=-1,
                base_sequence=-1,
                batch_size=batch_size,
            )
        else:
            self._builder = LegacyRecordBatchBuilder(
                magic=magic,
                compression_type=compression_type,
                batch_size=batch_size)
        self._batch_size = batch_size
        self._buffer = None

        self._next_offset = 0
        self._closed = False
        self._bytes_written = 0
def test_estimate_size_in_bytes_bigger_than_batch_v2():
    key = b"Super Key"
    value = b"1" * 100
    headers = [("header1", b"aaa"), ("header2", b"bbb")]
    estimate_size = DefaultRecordBatchBuilder.estimate_size_in_bytes(
        key, value, headers)

    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=999999)
    builder.append(
        0, timestamp=9999999, key=key, value=value, headers=headers)
    buf = builder.build()
    assert len(buf) <= estimate_size, \
        "Estimate should always be upper bound"
def test_default_correct_metadata_response():
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=1024 * 1024)
    meta = builder.append(
        0, timestamp=9999999, key=b"test", value=b"Super", headers=[])

    assert meta.offset == 0
    assert meta.timestamp == 9999999
    assert meta.crc is None
    assert meta.size == 16
    assert repr(meta) == (
        "DefaultRecordMetadata(offset=0, size={}, timestamp={})"
        .format(meta.size, meta.timestamp)
    )
Esempio n. 7
0
def test_default_batch_size_limit():
    # First message can be added even if it's too big
    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=0,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=1024)

    meta = builder.append(0,
                          timestamp=None,
                          key=None,
                          value=b"M" * 2000,
                          headers=[])
    assert meta.size > 0
    assert meta.crc is None
    assert meta.offset == 0
    assert meta.timestamp is not None
    assert len(builder.build()) > 2000

    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=0,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=1024)
    meta = builder.append(0,
                          timestamp=None,
                          key=None,
                          value=b"M" * 700,
                          headers=[])
    assert meta is not None
    meta = builder.append(1,
                          timestamp=None,
                          key=None,
                          value=b"M" * 700,
                          headers=[])
    assert meta is None
    meta = builder.append(2,
                          timestamp=None,
                          key=None,
                          value=b"M" * 700,
                          headers=[])
    assert meta is None
    assert len(builder.build()) < 1000
Esempio n. 8
0
def random_record_batch():
    builder = DefaultRecordBatchBuilder(
        magic=2,
        compression_type=DefaultRecordBatch.CODEC_NONE,
        is_transactional=False,
        producer_id=-1,  #random_int64(), disable idempotent
        producer_epoch=random_int16(),
        base_sequence=random_int32(),
        batch_size=9999999999)

    builder.append(offset=random_int32(),
                   timestamp=random_int64(),
                   key=random_bytes(True),
                   value=random_bytes(True),
                   headers=())

    return builder.build()
Esempio n. 9
0
 def _estimate_size_in_bytes(self, key, value, headers=[]):
     magic = self._max_usable_produce_magic()
     if magic == 2:
         return DefaultRecordBatchBuilder.estimate_size_in_bytes(
             key, value, headers)
     else:
         return LegacyRecordBatchBuilder.estimate_size_in_bytes(
             magic, self.config['compression_type'], key, value)
Esempio n. 10
0
 def _estimate_size_in_bytes(self, key, value, headers=[]):
     magic = self._max_usable_produce_magic()
     if magic == 2:
         return DefaultRecordBatchBuilder.estimate_size_in_bytes(
             key, value, headers)
     else:
         return LegacyRecordBatchBuilder.estimate_size_in_bytes(
             magic, self.config['compression_type'], key, value)
Esempio n. 11
0
def test_written_bytes_equals_size_in_bytes_v2():
    key = b"test"
    value = b"Super"
    headers = [("header1", b"aaa"), ("header2", b"bbb"), ("xx", None)]
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=999999)

    size_in_bytes = builder.size_in_bytes(
        0, timestamp=9999999, key=key, value=value, headers=headers)

    pos = builder.size()
    meta = builder.append(
        0, timestamp=9999999, key=key, value=value, headers=headers)

    assert builder.size() - pos == size_in_bytes
    assert meta.size == size_in_bytes
Esempio n. 12
0
def test_default_correct_metadata_response():
    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=0,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=1024 * 1024)
    meta = builder.append(0,
                          timestamp=9999999,
                          key=b"test",
                          value=b"Super",
                          headers=[])

    assert meta.offset == 0
    assert meta.timestamp == 9999999
    assert meta.crc is None
    assert meta.size == 16
    assert repr(meta) == (
        "DefaultRecordMetadata(offset=0, size={}, timestamp={})".format(
            meta.size, meta.timestamp))
Esempio n. 13
0
def test_written_bytes_equals_size_in_bytes_v2():
    key = b"test"
    value = b"Super"
    headers = [("header1", b"aaa"), ("header2", b"bbb"), ("xx", None)]
    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=0,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=999999)

    size_in_bytes = builder.size_in_bytes(0,
                                          timestamp=9999999,
                                          key=key,
                                          value=value,
                                          headers=headers)

    pos = builder.size()
    meta = builder.append(0,
                          timestamp=9999999,
                          key=key,
                          value=value,
                          headers=headers)

    assert builder.size() - pos == size_in_bytes
    assert meta.size == size_in_bytes
Esempio n. 14
0
def test_unavailable_codec(magic, compression_type, name, checker_name):
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=compression_type, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=1024)
    builder.append(0, timestamp=None, key=None, value=b"M" * 2000, headers=[])
    correct_buffer = builder.build()

    with patch.object(kafka.codec, checker_name) as mocked:
        mocked.return_value = False
        # Check that builder raises error
        builder = DefaultRecordBatchBuilder(
            magic=2, compression_type=compression_type, is_transactional=0,
            producer_id=-1, producer_epoch=-1, base_sequence=-1,
            batch_size=1024)
        error_msg = "Libraries for {} compression codec not found".format(name)
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            builder.append(0, timestamp=None, key=None, value=b"M", headers=[])
            builder.build()

        # Check that reader raises same error
        batch = DefaultRecordBatch(bytes(correct_buffer))
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            list(batch)
Esempio n. 15
0
def test_estimate_size_in_bytes_bigger_than_batch_v2():
    key = b"Super Key"
    value = b"1" * 100
    headers = [("header1", b"aaa"), ("header2", b"bbb")]
    estimate_size = DefaultRecordBatchBuilder.estimate_size_in_bytes(
        key, value, headers)

    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=0,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=999999)
    builder.append(0, timestamp=9999999, key=key, value=value, headers=headers)
    buf = builder.build()
    assert len(buf) <= estimate_size, \
        "Estimate should always be upper bound"
Esempio n. 16
0
def test_default_batch_builder_validates_arguments():
    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=0,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=999999)

    # Key should not be str
    with pytest.raises(TypeError):
        builder.append(0,
                       timestamp=9999999,
                       key="some string",
                       value=None,
                       headers=[])

    # Value should not be str
    with pytest.raises(TypeError):
        builder.append(0,
                       timestamp=9999999,
                       key=None,
                       value="some string",
                       headers=[])

    # Timestamp should be of proper type
    with pytest.raises(TypeError):
        builder.append(0,
                       timestamp="1243812793",
                       key=None,
                       value=b"some string",
                       headers=[])

    # Offset of invalid type
    with pytest.raises(TypeError):
        builder.append("0",
                       timestamp=9999999,
                       key=None,
                       value=b"some string",
                       headers=[])

    # Ok to pass value as None
    builder.append(0, timestamp=9999999, key=b"123", value=None, headers=[])

    # Timestamp can be None
    builder.append(1,
                   timestamp=None,
                   key=None,
                   value=b"some string",
                   headers=[])

    # Ok to pass offsets in not incremental order. This should not happen thou
    builder.append(5, timestamp=9999999, key=b"123", value=None, headers=[])

    # in case error handling code fails to fix inner buffer in builder
    assert len(builder.build()) == 104
Esempio n. 17
0
def test_unavailable_codec(magic, compression_type, name, checker_name):
    builder = DefaultRecordBatchBuilder(magic=2,
                                        compression_type=compression_type,
                                        is_transactional=0,
                                        producer_id=-1,
                                        producer_epoch=-1,
                                        base_sequence=-1,
                                        batch_size=1024)
    builder.append(0, timestamp=None, key=None, value=b"M" * 2000, headers=[])
    correct_buffer = builder.build()

    with patch.object(kafka.codec, checker_name) as mocked:
        mocked.return_value = False
        # Check that builder raises error
        builder = DefaultRecordBatchBuilder(magic=2,
                                            compression_type=compression_type,
                                            is_transactional=0,
                                            producer_id=-1,
                                            producer_epoch=-1,
                                            base_sequence=-1,
                                            batch_size=1024)
        error_msg = "Libraries for {} compression codec not found".format(name)
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            builder.append(0, timestamp=None, key=None, value=b"M", headers=[])
            builder.build()

        # Check that reader raises same error
        batch = DefaultRecordBatch(bytes(correct_buffer))
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            list(batch)
Esempio n. 18
0
def test_default_batch_builder_validates_arguments():
    builder = DefaultRecordBatchBuilder(
        magic=2, compression_type=0, is_transactional=0,
        producer_id=-1, producer_epoch=-1, base_sequence=-1,
        batch_size=999999)

    # Key should not be str
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp=9999999, key="some string", value=None, headers=[])

    # Value should not be str
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp=9999999, key=None, value="some string", headers=[])

    # Timestamp should be of proper type
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp="1243812793", key=None, value=b"some string",
            headers=[])

    # Offset of invalid type
    with pytest.raises(TypeError):
        builder.append(
            "0", timestamp=9999999, key=None, value=b"some string", headers=[])

    # Ok to pass value as None
    builder.append(
        0, timestamp=9999999, key=b"123", value=None, headers=[])

    # Timestamp can be None
    builder.append(
        1, timestamp=None, key=None, value=b"some string", headers=[])

    # Ok to pass offsets in not incremental order. This should not happen thou
    builder.append(
        5, timestamp=9999999, key=b"123", value=None, headers=[])

    # in case error handling code fails to fix inner buffer in builder
    assert len(builder.build()) == 104