Example #1
0
    def _next_partition(self, topic):
        if topic not in self.partition_cycles:
            if not self.client.has_metadata_for_topic(topic):
                self.client.ensure_topic_exists(topic)

            self.partition_cycles[topic] = cycle(self.client.get_partition_ids_for_topic(topic))

            # Randomize the initial partition that is returned
            if self.random_start:
                num_partitions = len(self.client.get_partition_ids_for_topic(topic))
                for _ in xrange(random.randint(0, num_partitions-1)):
                    next(self.partition_cycles[topic])

        return next(self.partition_cycles[topic])
Example #2
0
    def _next_partition(self, topic):
        if topic not in self.partition_cycles:
            if not self.client.has_metadata_for_topic(topic):
                self.client.ensure_topic_exists(topic)

            self.partition_cycles[topic] = cycle(self.client.get_partition_ids_for_topic(topic))

            # Randomize the initial partition that is returned
            if self.random_start:
                num_partitions = len(self.client.get_partition_ids_for_topic(topic))
                for _ in xrange(random.randint(0, num_partitions-1)):
                    next(self.partition_cycles[topic])

        return next(self.partition_cycles[topic])
Example #3
0
def snappy_encode(payload, xerial_compatible=True, xerial_blocksize=32*1024):
    """Encodes the given data with snappy compression.

    If xerial_compatible is set then the stream is encoded in a fashion
    compatible with the xerial snappy library.

    The block size (xerial_blocksize) controls how frequent the blocking occurs
    32k is the default in the xerial library.

    The format winds up being:


        +-------------+------------+--------------+------------+--------------+
        |   Header    | Block1 len | Block1 data  | Blockn len | Blockn data  |
        +-------------+------------+--------------+------------+--------------+
        |  16 bytes   |  BE int32  | snappy bytes |  BE int32  | snappy bytes |
        +-------------+------------+--------------+------------+--------------+


    It is important to note that the blocksize is the amount of uncompressed
    data presented to snappy at each block, whereas the blocklen is the number
    of bytes that will be present in the stream; so the length will always be
    <= blocksize.

    """

    if not has_snappy():
        raise NotImplementedError("Snappy codec is not available")

    if not xerial_compatible:
        return snappy.compress(payload)

    out = io.BytesIO()
    for fmt, dat in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER):
        out.write(struct.pack('!' + fmt, dat))

    # Chunk through buffers to avoid creating intermediate slice copies
    if PYPY:
        # on pypy, snappy.compress() on a sliced buffer consumes the entire
        # buffer... likely a python-snappy bug, so just use a slice copy
        chunker = lambda payload, i, size: payload[i:size+i]

    elif six.PY2:
        # Sliced buffer avoids additional copies
        # pylint: disable-msg=undefined-variable
        chunker = lambda payload, i, size: buffer(payload, i, size)
    else:
        # snappy.compress does not like raw memoryviews, so we have to convert
        # tobytes, which is a copy... oh well. it's the thought that counts.
        # pylint: disable-msg=undefined-variable
        chunker = lambda payload, i, size: memoryview(payload)[i:size+i].tobytes()

    for chunk in (chunker(payload, i, xerial_blocksize)
                  for i in xrange(0, len(payload), xerial_blocksize)):

        block = snappy.compress(chunk)
        block_size = len(block)
        out.write(struct.pack('!i', block_size))
        out.write(block)

    return out.getvalue()
Example #4
0
def snappy_encode(payload, xerial_compatible=True, xerial_blocksize=32*1024):
    """Encodes the given data with snappy compression.

    If xerial_compatible is set then the stream is encoded in a fashion
    compatible with the xerial snappy library.

    The block size (xerial_blocksize) controls how frequent the blocking occurs
    32k is the default in the xerial library.

    The format winds up being:


        +-------------+------------+--------------+------------+--------------+
        |   Header    | Block1 len | Block1 data  | Blockn len | Blockn data  |
        +-------------+------------+--------------+------------+--------------+
        |  16 bytes   |  BE int32  | snappy bytes |  BE int32  | snappy bytes |
        +-------------+------------+--------------+------------+--------------+


    It is important to note that the blocksize is the amount of uncompressed
    data presented to snappy at each block, whereas the blocklen is the number
    of bytes that will be present in the stream; so the length will always be
    <= blocksize.

    """

    if not has_snappy():
        raise NotImplementedError("Snappy codec is not available")

    if not xerial_compatible:
        return snappy.compress(payload)

    out = io.BytesIO()
    for fmt, dat in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER):
        out.write(struct.pack('!' + fmt, dat))

    # Chunk through buffers to avoid creating intermediate slice copies
    if PYPY:
        # on pypy, snappy.compress() on a sliced buffer consumes the entire
        # buffer... likely a python-snappy bug, so just use a slice copy
        chunker = lambda payload, i, size: payload[i:size+i]

    elif six.PY2:
        # Sliced buffer avoids additional copies
        # pylint: disable-msg=undefined-variable
        chunker = lambda payload, i, size: buffer(payload, i, size)
    else:
        # snappy.compress does not like raw memoryviews, so we have to convert
        # tobytes, which is a copy... oh well. it's the thought that counts.
        # pylint: disable-msg=undefined-variable
        chunker = lambda payload, i, size: memoryview(payload)[i:size+i].tobytes()

    for chunk in (chunker(payload, i, xerial_blocksize)
                  for i in xrange(0, len(payload), xerial_blocksize)):

        block = snappy.compress(chunk)
        block_size = len(block)
        out.write(struct.pack('!i', block_size))
        out.write(block)

    return out.getvalue()