def __init__(self, magic, compression_type, batch_size):
        assert magic in [0, 1, 2], "Not supported magic"
        assert compression_type in [0, 1, 2, 3,
                                    4], "Not valid compression type"
        if magic >= 2:
            self._builder = DefaultRecordBatchBuilder(
                magic=magic,
                compression_type=compression_type,
                is_transactional=False,
                producer_id=-1,
                producer_epoch=-1,
                base_sequence=-1,
                batch_size=batch_size,
            )
        else:
            self._builder = LegacyRecordBatchBuilder(
                magic=magic,
                compression_type=compression_type,
                batch_size=batch_size)
        self._batch_size = batch_size
        self._buffer = None

        self._next_offset = 0
        self._closed = False
        self._bytes_written = 0
Esempio n. 2
0
def test_legacy_correct_metadata_response(magic):
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=1024 * 1024)
    meta = builder.append(0, timestamp=9999999, key=b"test", value=b"Super")

    assert meta.offset == 0
    assert meta.timestamp == (9999999 if magic else -1)
    assert meta.crc == (-2095076219 if magic else 278251978) & 0xffffffff
    assert repr(meta) == ("LegacyRecordMetadata(offset=0, crc={!r}, size={}, "
                          "timestamp={})".format(meta.crc, meta.size,
                                                 meta.timestamp))
Esempio n. 3
0
def test_legacy_correct_metadata_response(magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024 * 1024)
    meta = builder.append(
        0, timestamp=9999999, key=b"test", value=b"Super")

    assert meta.offset == 0
    assert meta.timestamp == (9999999 if magic else -1)
    assert meta.crc == (-2095076219 if magic else 278251978) & 0xffffffff
    assert repr(meta) == (
        "LegacyRecordMetadata(offset=0, crc={!r}, size={}, "
        "timestamp={})".format(meta.crc, meta.size, meta.timestamp)
    )
Esempio n. 4
0
def test_estimate_size_in_bytes_bigger_than_batch(magic):
    key = b"Super Key"
    value = b"1" * 100
    estimate_size = LegacyRecordBatchBuilder.estimate_size_in_bytes(
        magic, compression_type=0, key=key, value=value)

    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=9999999)
    builder.append(
        0, timestamp=9999999, key=key, value=value)
    buf = builder.build()
    assert len(buf) <= estimate_size, \
        "Estimate should always be upper bound"
Esempio n. 5
0
 def _estimate_size_in_bytes(self, key, value, headers=[]):
     magic = self._max_usable_produce_magic()
     if magic == 2:
         return DefaultRecordBatchBuilder.estimate_size_in_bytes(
             key, value, headers)
     else:
         return LegacyRecordBatchBuilder.estimate_size_in_bytes(
             magic, self.config['compression_type'], key, value)
Esempio n. 6
0
 def _estimate_size_in_bytes(self, key, value, headers=[]):
     magic = self._max_usable_produce_magic()
     if magic == 2:
         return DefaultRecordBatchBuilder.estimate_size_in_bytes(
             key, value, headers)
     else:
         return LegacyRecordBatchBuilder.estimate_size_in_bytes(
             magic, self.config['compression_type'], key, value)
Esempio n. 7
0
def test_read_write_serde_v0_v1_no_compression(magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=9999999)
    builder.append(
        0, timestamp=9999999, key=b"test", value=b"Super")
    buffer = builder.build()

    batch = LegacyRecordBatch(bytes(buffer), magic)
    msgs = list(batch)
    assert len(msgs) == 1
    msg = msgs[0]

    assert msg.offset == 0
    assert msg.timestamp == (9999999 if magic else None)
    assert msg.timestamp_type == (0 if magic else None)
    assert msg.key == b"test"
    assert msg.value == b"Super"
    assert msg.checksum == (-2095076219 if magic else 278251978) & 0xffffffff
Esempio n. 8
0
def test_read_write_serde_v0_v1_no_compression(magic):
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=9999999)
    builder.append(0, timestamp=9999999, key=b"test", value=b"Super")
    buffer = builder.build()

    batch = LegacyRecordBatch(bytes(buffer), magic)
    msgs = list(batch)
    assert len(msgs) == 1
    msg = msgs[0]

    assert msg.offset == 0
    assert msg.timestamp == (9999999 if magic else None)
    assert msg.timestamp_type == (0 if magic else None)
    assert msg.key == b"test"
    assert msg.value == b"Super"
    assert msg.checksum == (-2095076219 if magic else 278251978) & 0xffffffff
Esempio n. 9
0
    def __init__(self, magic, compression_type, batch_size):
        assert magic in [0, 1, 2], "Not supported magic"
        assert compression_type in [0, 1, 2, 3], "Not valid compression type"
        if magic >= 2:
            self._builder = DefaultRecordBatchBuilder(
                magic=magic, compression_type=compression_type,
                is_transactional=False, producer_id=-1, producer_epoch=-1,
                base_sequence=-1, batch_size=batch_size)
        else:
            self._builder = LegacyRecordBatchBuilder(
                magic=magic, compression_type=compression_type,
                batch_size=batch_size)
        self._batch_size = batch_size
        self._buffer = None

        self._next_offset = 0
        self._closed = False
        self._bytes_written = 0
Esempio n. 10
0
def test_read_write_serde_v0_v1_with_compression(compression_type, magic):
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=compression_type,
                                       batch_size=9999999)
    for offset in range(10):
        builder.append(offset, timestamp=9999999, key=b"test", value=b"Super")
    buffer = builder.build()

    batch = LegacyRecordBatch(bytes(buffer), magic)
    msgs = list(batch)

    for offset, msg in enumerate(msgs):
        assert msg.offset == offset
        assert msg.timestamp == (9999999 if magic else None)
        assert msg.timestamp_type == (0 if magic else None)
        assert msg.key == b"test"
        assert msg.value == b"Super"
        assert msg.checksum == (-2095076219 if magic else 278251978) & \
            0xffffffff
Esempio n. 11
0
def test_read_write_serde_v0_v1_with_compression(compression_type, magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=compression_type, batch_size=9999999)
    for offset in range(10):
        builder.append(
            offset, timestamp=9999999, key=b"test", value=b"Super")
    buffer = builder.build()

    batch = LegacyRecordBatch(bytes(buffer), magic)
    msgs = list(batch)

    for offset, msg in enumerate(msgs):
        assert msg.offset == offset
        assert msg.timestamp == (9999999 if magic else None)
        assert msg.timestamp_type == (0 if magic else None)
        assert msg.key == b"test"
        assert msg.value == b"Super"
        assert msg.checksum == (-2095076219 if magic else 278251978) & \
            0xffffffff
Esempio n. 12
0
def test_legacy_batch_size_limit(magic):
    # First message can be added even if it's too big
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=1024)
    meta = builder.append(0, timestamp=None, key=None, value=b"M" * 2000)
    assert meta.size > 0
    assert meta.crc is not None
    assert meta.offset == 0
    assert meta.timestamp is not None
    assert len(builder.build()) > 2000

    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=1024)
    meta = builder.append(0, timestamp=None, key=None, value=b"M" * 700)
    assert meta is not None
    meta = builder.append(1, timestamp=None, key=None, value=b"M" * 700)
    assert meta is None
    meta = builder.append(2, timestamp=None, key=None, value=b"M" * 700)
    assert meta is None
    assert len(builder.build()) < 1000
Esempio n. 13
0
def test_unavailable_codec(magic, compression_type, name, checker_name):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=compression_type, batch_size=1024)
    builder.append(0, timestamp=None, key=None, value=b"M")
    correct_buffer = builder.build()

    with patch.object(kafka.codec, checker_name) as mocked:
        mocked.return_value = False
        # Check that builder raises error
        builder = LegacyRecordBatchBuilder(
            magic=magic, compression_type=compression_type, batch_size=1024)
        error_msg = "Libraries for {} compression codec not found".format(name)
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            builder.append(0, timestamp=None, key=None, value=b"M")
            builder.build()

        # Check that reader raises same error
        batch = LegacyRecordBatch(bytes(correct_buffer), magic)
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            list(batch)
Esempio n. 14
0
def test_written_bytes_equals_size_in_bytes(magic):
    key = b"test"
    value = b"Super"
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=9999999)

    size_in_bytes = builder.size_in_bytes(0,
                                          timestamp=9999999,
                                          key=key,
                                          value=value)

    pos = builder.size()
    builder.append(0, timestamp=9999999, key=key, value=value)

    assert builder.size() - pos == size_in_bytes
Esempio n. 15
0
def test_legacy_batch_size_limit(magic):
    # First message can be added even if it's too big
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024)
    meta = builder.append(0, timestamp=None, key=None, value=b"M" * 2000)
    assert meta.size > 0
    assert meta.crc is not None
    assert meta.offset == 0
    assert meta.timestamp is not None
    assert len(builder.build()) > 2000

    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024)
    meta = builder.append(0, timestamp=None, key=None, value=b"M" * 700)
    assert meta is not None
    meta = builder.append(1, timestamp=None, key=None, value=b"M" * 700)
    assert meta is None
    meta = builder.append(2, timestamp=None, key=None, value=b"M" * 700)
    assert meta is None
    assert len(builder.build()) < 1000
Esempio n. 16
0
def test_estimate_size_in_bytes_bigger_than_batch(magic):
    key = b"Super Key"
    value = b"1" * 100
    estimate_size = LegacyRecordBatchBuilder.estimate_size_in_bytes(
        magic, compression_type=0, key=key, value=value)

    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=9999999)
    builder.append(0, timestamp=9999999, key=key, value=value)
    buf = builder.build()
    assert len(buf) <= estimate_size, \
        "Estimate should always be upper bound"
Esempio n. 17
0
def test_written_bytes_equals_size_in_bytes(magic):
    key = b"test"
    value = b"Super"
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=9999999)

    size_in_bytes = builder.size_in_bytes(
        0, timestamp=9999999, key=key, value=value)

    pos = builder.size()
    builder.append(0, timestamp=9999999, key=key, value=value)

    assert builder.size() - pos == size_in_bytes
Esempio n. 18
0
class MemoryRecordsBuilder(object):

    __slots__ = ("_builder", "_batch_size", "_buffer", "_next_offset",
                 "_closed", "_bytes_written")

    def __init__(self, magic, compression_type, batch_size):
        assert magic in [0, 1, 2], "Not supported magic"
        assert compression_type in [0, 1, 2, 3,
                                    4], "Not valid compression type"
        if magic >= 2:
            self._builder = DefaultRecordBatchBuilder(
                magic=magic,
                compression_type=compression_type,
                is_transactional=False,
                producer_id=-1,
                producer_epoch=-1,
                base_sequence=-1,
                batch_size=batch_size)
        else:
            self._builder = LegacyRecordBatchBuilder(
                magic=magic,
                compression_type=compression_type,
                batch_size=batch_size)
        self._batch_size = batch_size
        self._buffer = None

        self._next_offset = 0
        self._closed = False
        self._bytes_written = 0

    def append(self, timestamp, key, value, headers=[]):
        """ Append a message to the buffer.

        Returns: RecordMetadata or None if unable to append
        """
        if self._closed:
            return None

        offset = self._next_offset
        metadata = self._builder.append(offset, timestamp, key, value, headers)
        # Return of None means there's no space to add a new message
        if metadata is None:
            return None

        self._next_offset += 1
        return metadata

    def close(self):
        # This method may be called multiple times on the same batch
        # i.e., on retries
        # we need to make sure we only close it out once
        # otherwise compressed messages may be double-compressed
        # see Issue 718
        if not self._closed:
            self._bytes_written = self._builder.size()
            self._buffer = bytes(self._builder.build())
            self._builder = None
        self._closed = True

    def size_in_bytes(self):
        if not self._closed:
            return self._builder.size()
        else:
            return len(self._buffer)

    def compression_rate(self):
        assert self._closed
        return self.size_in_bytes() / self._bytes_written

    def is_full(self):
        if self._closed:
            return True
        else:
            return self._builder.size() >= self._batch_size

    def next_offset(self):
        return self._next_offset

    def buffer(self):
        assert self._closed
        return self._buffer
Esempio n. 19
0
def test_legacy_batch_builder_validates_arguments(magic):
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=0,
                                       batch_size=1024 * 1024)

    # Key should not be str
    with pytest.raises(TypeError):
        builder.append(0, timestamp=9999999, key="some string", value=None)

    # Value should not be str
    with pytest.raises(TypeError):
        builder.append(0, timestamp=9999999, key=None, value="some string")

    # Timestamp should be of proper type
    if magic != 0:
        with pytest.raises(TypeError):
            builder.append(0,
                           timestamp="1243812793",
                           key=None,
                           value=b"some string")

    # Offset of invalid type
    with pytest.raises(TypeError):
        builder.append("0", timestamp=9999999, key=None, value=b"some string")

    # Ok to pass value as None
    builder.append(0, timestamp=9999999, key=b"123", value=None)

    # Timestamp can be None
    builder.append(1, timestamp=None, key=None, value=b"some string")

    # Ok to pass offsets in not incremental order. This should not happen thou
    builder.append(5, timestamp=9999999, key=b"123", value=None)

    # in case error handling code fails to fix inner buffer in builder
    assert len(builder.build()) == 119 if magic else 95
Esempio n. 20
0
def test_unavailable_codec(magic, compression_type, name, checker_name):
    builder = LegacyRecordBatchBuilder(magic=magic,
                                       compression_type=compression_type,
                                       batch_size=1024)
    builder.append(0, timestamp=None, key=None, value=b"M")
    correct_buffer = builder.build()

    with patch.object(kafka.codec, checker_name) as mocked:
        mocked.return_value = False
        # Check that builder raises error
        builder = LegacyRecordBatchBuilder(magic=magic,
                                           compression_type=compression_type,
                                           batch_size=1024)
        error_msg = "Libraries for {} compression codec not found".format(name)
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            builder.append(0, timestamp=None, key=None, value=b"M")
            builder.build()

        # Check that reader raises same error
        batch = LegacyRecordBatch(bytes(correct_buffer), magic)
        with pytest.raises(UnsupportedCodecError, match=error_msg):
            list(batch)
Esempio n. 21
0
class MemoryRecordsBuilder(object):

    def __init__(self, magic, compression_type, batch_size):
        assert magic in [0, 1, 2], "Not supported magic"
        assert compression_type in [0, 1, 2, 3], "Not valid compression type"
        if magic >= 2:
            self._builder = DefaultRecordBatchBuilder(
                magic=magic, compression_type=compression_type,
                is_transactional=False, producer_id=-1, producer_epoch=-1,
                base_sequence=-1, batch_size=batch_size)
        else:
            self._builder = LegacyRecordBatchBuilder(
                magic=magic, compression_type=compression_type,
                batch_size=batch_size)
        self._batch_size = batch_size
        self._buffer = None

        self._next_offset = 0
        self._closed = False
        self._bytes_written = 0

    def append(self, timestamp, key, value, headers=[]):
        """ Append a message to the buffer.

        Returns: RecordMetadata or None if unable to append
        """
        if self._closed:
            return None

        offset = self._next_offset
        metadata = self._builder.append(offset, timestamp, key, value, headers)
        # Return of None means there's no space to add a new message
        if metadata is None:
            return None

        self._next_offset += 1
        return metadata

    def close(self):
        # This method may be called multiple times on the same batch
        # i.e., on retries
        # we need to make sure we only close it out once
        # otherwise compressed messages may be double-compressed
        # see Issue 718
        if not self._closed:
            self._bytes_written = self._builder.size()
            self._buffer = bytes(self._builder.build())
            self._builder = None
        self._closed = True

    def size_in_bytes(self):
        if not self._closed:
            return self._builder.size()
        else:
            return len(self._buffer)

    def compression_rate(self):
        assert self._closed
        return self.size_in_bytes() / self._bytes_written

    def is_full(self):
        if self._closed:
            return True
        else:
            return self._builder.size() >= self._batch_size

    def next_offset(self):
        return self._next_offset

    def buffer(self):
        assert self._closed
        return self._buffer
Esempio n. 22
0
def test_legacy_batch_builder_validates_arguments(magic):
    builder = LegacyRecordBatchBuilder(
        magic=magic, compression_type=0, batch_size=1024 * 1024)

    # Key should not be str
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp=9999999, key="some string", value=None)

    # Value should not be str
    with pytest.raises(TypeError):
        builder.append(
            0, timestamp=9999999, key=None, value="some string")

    # Timestamp should be of proper type
    if magic != 0:
        with pytest.raises(TypeError):
            builder.append(
                0, timestamp="1243812793", key=None, value=b"some string")

    # Offset of invalid type
    with pytest.raises(TypeError):
        builder.append(
            "0", timestamp=9999999, key=None, value=b"some string")

    # Ok to pass value as None
    builder.append(
        0, timestamp=9999999, key=b"123", value=None)

    # Timestamp can be None
    builder.append(
        1, timestamp=None, key=None, value=b"some string")

    # Ok to pass offsets in not incremental order. This should not happen thou
    builder.append(
        5, timestamp=9999999, key=b"123", value=None)

    # in case error handling code fails to fix inner buffer in builder
    assert len(builder.build()) == 119 if magic else 95