예제 #1
0
def test__unpack_message_set_compressed_v1(fetcher):
    fetcher.config['check_crcs'] = False
    tp = TopicPartition('foo', 0)
    messages = [
        (0, None, Message(b'a')),
        (1, None, Message(b'b')),
        (2, None, Message(b'c')),
    ]
    message_bytes = []
    for offset, _, m in messages:
        encoded = m.encode()
        message_bytes.append(
            Int64.encode(offset) + Int32.encode(len(encoded)) + encoded)
    compressed_bytes = gzip_encode(b''.join(message_bytes))
    compressed_base_offset = 10
    compressed_msgs = [(compressed_base_offset, None,
                        Message(compressed_bytes,
                                magic=1,
                                attributes=Message.CODEC_GZIP))]
    records = list(fetcher._unpack_message_set(tp, compressed_msgs))
    assert len(records) == 3
    assert all(map(lambda x: isinstance(x, ConsumerRecord), records))
    assert records[0].value == b'a'
    assert records[1].value == b'b'
    assert records[2].value == b'c'
    assert records[0].offset == 8
    assert records[1].offset == 9
    assert records[2].offset == 10
예제 #2
0
    def decode(cls, data, bytes_to_read=None):
        """Compressed messages should pass in bytes_to_read (via message size)
        otherwise, we decode from data as Int32
        """
        if isinstance(data, bytes):
            data = io.BytesIO(data)
        if bytes_to_read is None:
            bytes_to_read = Int32.decode(data)

        # if FetchRequest max_bytes is smaller than the available message set
        # the server returns partial data for the final message
        # So create an internal buffer to avoid over-reading
        raw = io.BytesIO(data.read(bytes_to_read))

        items = []
        while bytes_to_read:
            try:
                offset = Int64.decode(raw)
                msg_bytes = Bytes.decode(raw)
                bytes_to_read -= 8 + 4 + len(msg_bytes)
                items.append((offset, len(msg_bytes), Message.decode(msg_bytes)))
            except ValueError:
                # PartialMessage to signal that max_bytes may be too small
                items.append((None, None, PartialMessage()))
                break
        return items
예제 #3
0
    def drain_ready(self):
        """Compress batch to be ready for send"""
        memview = self._buffer.getbuffer()
        self._drain_waiter.set_result(None)
        if self._compression_type:
            _, compressor, attrs = self._COMPRESSORS[self._compression_type]
            msg = Message(compressor(memview[4:].tobytes()), attributes=attrs,
                          magic=self._version_id)
            encoded = msg.encode()
            # if compressed message is longer than original
            # we should send it as is (not compressed)
            header_size = 16   # 4(all size) + 8(offset) + 4(compressed size)
            if len(encoded) + header_size < len(memview):
                # write compressed message set (with header) to buffer
                # using memory view (for avoid memory copying)
                memview[:4] = Int32.encode(len(encoded) + 12)
                memview[4:12] = Int64.encode(0)  # offset 0
                memview[12:16] = Int32.encode(len(encoded))
                memview[16:16 + len(encoded)] = encoded
                self._buffer.seek(0)
                return

        # update batch size (first 4 bytes of buffer)
        memview[:4] = Int32.encode(self._buffer.tell() - 4)
        self._buffer.seek(0)
예제 #4
0
    def _build(self):
        if self._closed:
            self._buffer.seek(0)
            return self._buffer

        self._closed = True
        memview = self._buffer.getbuffer()
        if self._compression_type:
            _, compressor, attrs = self._COMPRESSORS[self._compression_type]
            msg = Message(compressor(memview[4:].tobytes()),
                          attributes=attrs,
                          magic=self._magic)
            encoded = msg.encode()
            # if compressed message is longer than original
            # we should send it as is (not compressed)
            header_size = 16  # 4(all size) + 8(offset) + 4(compressed size)
            if len(encoded) + header_size < len(memview):
                # write compressed message set (with header) to buffer
                # using memory view (for avoid memory copying)
                memview[:4] = Int32.encode(len(encoded) + 12)
                memview[4:12] = Int64.encode(0)  # offset 0
                memview[12:16] = Int32.encode(len(encoded))
                memview[16:16 + len(encoded)] = encoded

                memview.release()
                self._buffer.seek(16 + len(encoded))
                self._buffer.truncate()
                self._buffer.seek(0)
                return self._buffer

        # update batch size (first 4 bytes of buffer)
        memview[:4] = Int32.encode(self._buffer.tell() - 4)
        self._buffer.seek(0)
        return self._buffer
예제 #5
0
    def decode(cls, data, bytes_to_read=None):
        """Compressed messages should pass in bytes_to_read (via message size)
        otherwise, we decode from data as Int32
        """
        if isinstance(data, bytes):
            data = io.BytesIO(data)
        if bytes_to_read is None:
            bytes_to_read = Int32.decode(data)

        # if FetchRequest max_bytes is smaller than the available message set
        # the server returns partial data for the final message
        # So create an internal buffer to avoid over-reading
        raw = io.BytesIO(data.read(bytes_to_read))

        items = []
        while bytes_to_read:
            try:
                offset = Int64.decode(raw)
                msg_bytes = Bytes.decode(raw)
                bytes_to_read -= 8 + 4 + len(msg_bytes)
                items.append(
                    (offset, len(msg_bytes), Message.decode(msg_bytes)))
            except ValueError:
                # PartialMessage to signal that max_bytes may be too small
                items.append((None, None, PartialMessage()))
                break
        return items
예제 #6
0
    def append(self, key, value, timestamp_ms):
        """Append message (key and value) to batch

        Returns:
            None if batch is full
              or
            asyncio.Future that will resolved when message is delivered
        """
        if self._is_full(key, value):
            return None

        # `.encode()` is a weak method for some reason, so we need to save
        # reference before calling it.
        if self._version_id == 0:
            msg_inst = Message(value, key=key, magic=self._version_id)
        else:
            msg_inst = Message(value,
                               key=key,
                               magic=self._version_id,
                               timestamp=timestamp_ms)

        encoded = msg_inst.encode()
        msg = Int64.encode(self._relative_offset) + Int32.encode(len(encoded))
        msg += encoded
        self._buffer.write(msg)

        future = asyncio.Future(loop=self._loop)
        self._msg_futures.append(future)
        self._relative_offset += 1
        return future
예제 #7
0
def test_decode_fetch_response_partial():
    encoded = b''.join([
        Int32.encode(1),               # Num Topics (Array)
        String('utf-8').encode('foobar'),
        Int32.encode(2),               # Num Partitions (Array)
        Int32.encode(0),               # Partition id
        Int16.encode(0),               # Error Code
        Int64.encode(1234),            # Highwater offset
        Int32.encode(52),              # MessageSet size
        Int64.encode(0),               # Msg Offset
        Int32.encode(18),              # Msg Size
        struct.pack('>i', 1474775406), # CRC
        struct.pack('>bb', 0, 0),      # Magic, flags
        struct.pack('>i', 2),          # Length of key
        b'k1',                         # Key
        struct.pack('>i', 2),          # Length of value
        b'v1',                         # Value

        Int64.encode(1),               # Msg Offset
        struct.pack('>i', 24),         # Msg Size (larger than remaining MsgSet size)
        struct.pack('>i', -16383415),  # CRC
        struct.pack('>bb', 0, 0),      # Magic, flags
        struct.pack('>i', 2),          # Length of key
        b'k2',                         # Key
        struct.pack('>i', 8),          # Length of value
        b'ar',                         # Value (truncated)
        Int32.encode(1),
        Int16.encode(0),
        Int64.encode(2345),
        Int32.encode(52),              # MessageSet size
        Int64.encode(0),               # Msg Offset
        Int32.encode(18),              # Msg Size
        struct.pack('>i', 1474775406), # CRC
        struct.pack('>bb', 0, 0),      # Magic, flags
        struct.pack('>i', 2),          # Length of key
        b'k1',                         # Key
        struct.pack('>i', 2),          # Length of value
        b'v1',                         # Value

        Int64.encode(1),               # Msg Offset
        struct.pack('>i', 24),         # Msg Size (larger than remaining MsgSet size)
        struct.pack('>i', -16383415),  # CRC
        struct.pack('>bb', 0, 0),      # Magic, flags
        struct.pack('>i', 2),          # Length of key
        b'k2',                         # Key
        struct.pack('>i', 8),          # Length of value
        b'ar',                         # Value (truncated)
    ])
    resp = FetchResponse[0].decode(io.BytesIO(encoded))
    assert len(resp.topics) == 1
    topic, partitions = resp.topics[0]
    assert topic == 'foobar'
    assert len(partitions) == 2

    m1 = MessageSet.decode(
        partitions[0][3], bytes_to_read=len(partitions[0][3]))
    assert len(m1) == 2
    assert m1[1] == (None, None, PartialMessage())
예제 #8
0
def test_decode_fetch_response_partial():
    encoded = b''.join([
        Int32.encode(1),               # Num Topics (Array)
        String('utf-8').encode('foobar'),
        Int32.encode(2),               # Num Partitions (Array)
        Int32.encode(0),               # Partition id
        Int16.encode(0),               # Error Code
        Int64.encode(1234),            # Highwater offset
        Int32.encode(52),              # MessageSet size
        Int64.encode(0),               # Msg Offset
        Int32.encode(18),              # Msg Size
        struct.pack('>i', 1474775406), # CRC
        struct.pack('>bb', 0, 0),      # Magic, flags
        struct.pack('>i', 2),          # Length of key
        b'k1',                         # Key
        struct.pack('>i', 2),          # Length of value
        b'v1',                         # Value

        Int64.encode(1),               # Msg Offset
        struct.pack('>i', 24),         # Msg Size (larger than remaining MsgSet size)
        struct.pack('>i', -16383415),  # CRC
        struct.pack('>bb', 0, 0),      # Magic, flags
        struct.pack('>i', 2),          # Length of key
        b'k2',                         # Key
        struct.pack('>i', 8),          # Length of value
        b'ar',                         # Value (truncated)
        Int32.encode(1),
        Int16.encode(0),
        Int64.encode(2345),
        Int32.encode(52),              # MessageSet size
        Int64.encode(0),               # Msg Offset
        Int32.encode(18),              # Msg Size
        struct.pack('>i', 1474775406), # CRC
        struct.pack('>bb', 0, 0),      # Magic, flags
        struct.pack('>i', 2),          # Length of key
        b'k1',                         # Key
        struct.pack('>i', 2),          # Length of value
        b'v1',                         # Value

        Int64.encode(1),               # Msg Offset
        struct.pack('>i', 24),         # Msg Size (larger than remaining MsgSet size)
        struct.pack('>i', -16383415),  # CRC
        struct.pack('>bb', 0, 0),      # Magic, flags
        struct.pack('>i', 2),          # Length of key
        b'k2',                         # Key
        struct.pack('>i', 8),          # Length of value
        b'ar',                         # Value (truncated)
    ])

    resp = FetchResponse[0].decode(io.BytesIO(encoded))
    assert len(resp.topics) == 1
    topic, partitions = resp.topics[0]
    assert topic == 'foobar'
    assert len(partitions) == 2
    m1 = partitions[0][3]
    assert len(m1) == 2
    assert m1[1] == (None, None, PartialMessage())
예제 #9
0
    def encode(cls, items, prepend_size=True):
        # RecordAccumulator encodes messagesets internally
        if isinstance(items, (io.BytesIO, KafkaBytes)):
            size = Int32.decode(items)
            if prepend_size:
                # rewind and return all the bytes
                items.seek(items.tell() - 4)
                size += 4
            return items.read(size)

        encoded_values = []
        for (offset, message) in items:
            encoded_values.append(Int64.encode(offset))
            encoded_values.append(Bytes.encode(message))
        encoded = b''.join(encoded_values)
        if prepend_size:
            return Bytes.encode(encoded)
        else:
            return encoded
예제 #10
0
    def encode(cls, items, prepend_size=True):
        # RecordAccumulator encodes messagesets internally
        if isinstance(items, (io.BytesIO, KafkaBytes)):
            size = Int32.decode(items)
            if prepend_size:
                # rewind and return all the bytes
                items.seek(items.tell() - 4)
                size += 4
            return items.read(size)

        encoded_values = []
        for (offset, message) in items:
            encoded_values.append(Int64.encode(offset))
            encoded_values.append(Bytes.encode(message))
        encoded = b''.join(encoded_values)
        if prepend_size:
            return Bytes.encode(encoded)
        else:
            return encoded
예제 #11
0
    def append(self, key, value):
        """Append message (key and value) to batch

        Returns:
            None if batch is full
              or
            asyncio.Future that will resolved when message is delivered
        """
        if self._is_full(key, value):
            return None

        encoded = Message(value, key=key).encode()
        msg = Int64.encode(self._relative_offset) + Int32.encode(len(encoded))
        msg += encoded
        self._buffer.write(msg)

        future = asyncio.Future(loop=self._loop)
        self._msg_futures.append(future)
        self._relative_offset += 1
        return future
예제 #12
0
    def append(self, key, value):
        """Append message (key and value) to batch

        Returns:
            None if batch is full
              or
            asyncio.Future that will resolved when message is delivered
        """
        if self._is_full(key, value):
            return None

        encoded = Message(value, key=key).encode()
        msg = Int64.encode(self._relative_offset) + Int32.encode(len(encoded))
        msg += encoded
        self._buffer.write(msg)

        future = asyncio.Future(loop=self._loop)
        self._msg_futures.append(future)
        self._relative_offset += 1
        return future
예제 #13
0
    def append(self, *, timestamp, key, value):
        if not self._has_room_for(key, value):
            return 0

        # `.encode()` is a weak method for some reason, so we need to save
        # reference before calling it.
        if self._magic == 0:
            msg_inst = Message(value, key=key, magic=self._magic)
        else:
            msg_inst = Message(value,
                               key=key,
                               magic=self._magic,
                               timestamp=timestamp)

        encoded = msg_inst.encode()
        msg = Int64.encode(self._relative_offset) + Int32.encode(len(encoded))
        msg += encoded
        actual_size = self._buffer.write(msg)
        self._relative_offset += 1
        return actual_size
예제 #14
0
    def drain_ready(self):
        """Compress batch to be ready for send"""
        memview = self._buffer.getbuffer()
        self._drain_waiter.set_result(None)
        if self._compression_type:
            _, compressor, attrs = self._COMPRESSORS[self._compression_type]
            msg = Message(compressor(memview[4:].tobytes()), attributes=attrs)
            encoded = msg.encode()
            # if compressed message is longer than original
            # we should send it as is (not compressed)
            header_size = 16   # 4(all size) + 8(offset) + 4(compressed size)
            if len(encoded) + header_size < len(memview):
                # write compressed message set (with header) to buffer
                # using memory view (for avoid memory copying)
                memview[:4] = Int32.encode(len(encoded) + 12)
                memview[4:12] = Int64.encode(0)  # offset 0
                memview[12:16] = Int32.encode(len(encoded))
                memview[16:16+len(encoded)] = encoded
                self._buffer.seek(0)
                return

        # update batch size (first 4 bytes of buffer)
        memview[:4] = Int32.encode(self._buffer.tell()-4)
        self._buffer.seek(0)