def _next_partition(self, topic): if topic not in self.partition_cycles: if not self.client.has_metadata_for_topic(topic): self.client.ensure_topic_exists(topic) self.partition_cycles[topic] = cycle(self.client.get_partition_ids_for_topic(topic)) # Randomize the initial partition that is returned if self.random_start: num_partitions = len(self.client.get_partition_ids_for_topic(topic)) for _ in xrange(random.randint(0, num_partitions-1)): next(self.partition_cycles[topic]) return next(self.partition_cycles[topic])
def snappy_encode(payload, xerial_compatible=True, xerial_blocksize=32*1024): """Encodes the given data with snappy compression. If xerial_compatible is set then the stream is encoded in a fashion compatible with the xerial snappy library. The block size (xerial_blocksize) controls how frequent the blocking occurs 32k is the default in the xerial library. The format winds up being: +-------------+------------+--------------+------------+--------------+ | Header | Block1 len | Block1 data | Blockn len | Blockn data | +-------------+------------+--------------+------------+--------------+ | 16 bytes | BE int32 | snappy bytes | BE int32 | snappy bytes | +-------------+------------+--------------+------------+--------------+ It is important to note that the blocksize is the amount of uncompressed data presented to snappy at each block, whereas the blocklen is the number of bytes that will be present in the stream; so the length will always be <= blocksize. """ if not has_snappy(): raise NotImplementedError("Snappy codec is not available") if not xerial_compatible: return snappy.compress(payload) out = io.BytesIO() for fmt, dat in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER): out.write(struct.pack('!' + fmt, dat)) # Chunk through buffers to avoid creating intermediate slice copies if PYPY: # on pypy, snappy.compress() on a sliced buffer consumes the entire # buffer... likely a python-snappy bug, so just use a slice copy chunker = lambda payload, i, size: payload[i:size+i] elif six.PY2: # Sliced buffer avoids additional copies # pylint: disable-msg=undefined-variable chunker = lambda payload, i, size: buffer(payload, i, size) else: # snappy.compress does not like raw memoryviews, so we have to convert # tobytes, which is a copy... oh well. it's the thought that counts. # pylint: disable-msg=undefined-variable chunker = lambda payload, i, size: memoryview(payload)[i:size+i].tobytes() for chunk in (chunker(payload, i, xerial_blocksize) for i in xrange(0, len(payload), xerial_blocksize)): block = snappy.compress(chunk) block_size = len(block) out.write(struct.pack('!i', block_size)) out.write(block) return out.getvalue()