def startCompressMessage(self): if self._isServer: if self._compressor is None or self.server_no_context_takeover: self._compressor = snappy.StreamCompressor() else: if self._compressor is None or self.client_no_context_takeover: self._compressor = snappy.StreamCompressor()
def test_concatenation(self): data1 = os.urandom(snappy.snappy._CHUNK_MAX * 2) data2 = os.urandom(4096) decompressor = snappy.StreamDecompressor() self.assertEqual( decompressor.decompress(snappy.StreamCompressor().compress(data1) + snappy.StreamCompressor().compress(data2)), data1 + data2)
def test_compression(self): # test that we can add compressed chunks compressor = snappy.StreamCompressor() data = b"\0" * 50 compressed_data = snappy.compress(data) crc = struct.pack("<L", snappy._masked_crc32c(data)) self.assertEqual(crc, b"\x8f)H\xbd") self.assertEqual(len(compressed_data), 6) self.assertEqual(compressor.add_chunk(data, compress=True), b"\xff\x06\x00\x00sNaPpY" b"\x00\x0a\x00\x00" + crc + compressed_data) # test that we can add uncompressed chunks data = b"\x01" * 50 crc = struct.pack("<L", snappy._masked_crc32c(data)) self.assertEqual(crc, b"\xb2\x14)\x8a") self.assertEqual(compressor.add_chunk(data, compress=False), b"\x01\x36\x00\x00" + crc + data) # test that we can add more data than will fit in one chunk data = b"\x01" * (snappy._CHUNK_MAX * 2 - 5) crc1 = struct.pack("<L", snappy._masked_crc32c(data[:snappy._CHUNK_MAX])) self.assertEqual(crc1, b"h#6\x8e") crc2 = struct.pack("<L", snappy._masked_crc32c(data[snappy._CHUNK_MAX:])) self.assertEqual(crc2, b"q\x8foE") self.assertEqual(compressor.add_chunk(data, compress=False), b"\x01\x04\x00\x01" + crc1 + data[:snappy._CHUNK_MAX] + b"\x01\xff\xff\x00" + crc2 + data[snappy._CHUNK_MAX:])
def __init__(self, *args, **kwargs): try: import snappy except ImportError: raise errors.DependencyNotInstalledError( "python-snappy library is required for snappy support") self._compressobj = snappy.StreamCompressor() six.BytesIO.__init__(self, *args, **kwargs)
def __init__(self, output): try: import snappy except ImportError: raise errors.DependencyNotInstalledError( "python-snappy library is required for snappy support") self._compressor = snappy.StreamCompressor() self._output = output
def __init__(self, buffer=None): if not snappy: raise RuntimeError('python-snappy required for compression') self._decompressor = snappy.StreamDecompressor() self._compressor = snappy.StreamCompressor() super().__init__(buffer)
def __init__(self, infile, mode, **kwargs): import snappy self.details = {"size": 999999999} # not true, but OK if we don't seek super().__init__(fs=None, path="snappy", mode=mode.strip("b") + "b", **kwargs) self.infile = infile if "r" in mode: self.codec = snappy.StreamDecompressor() else: self.codec = snappy.StreamCompressor()
def __init__(self, src_fp, algorithm, level=0): super().__init__(src_fp, minimum_read_size=32 * 1024) if algorithm == "lzma": self._compressor = lzma.LZMACompressor(lzma.FORMAT_XZ, -1, level, None) elif algorithm == "snappy": self._compressor = snappy.StreamCompressor() elif algorithm == "zstd": self._compressor = zstd.ZstdCompressor(level=level).compressobj() else: raise InvalidConfigurationError("invalid compression algorithm: {!r}".format(algorithm))
def test_decompression(self): # test that we check for the initial stream identifier data = b"\x01" * 50 self.assertRaises(snappy.UncompressError, snappy.StreamDecompressor().decompress, b"\x01\x36\x00\00" + struct.pack("<L", snappy._masked_crc32c(data)) + data) self.assertEqual( snappy.StreamDecompressor().decompress( b"\xff\x06\x00\x00sNaPpY" b"\x01\x36\x00\x00" + struct.pack("<L", snappy._masked_crc32c(data)) + data), data) decompressor = snappy.StreamDecompressor() decompressor.decompress(b"\xff\x06\x00\x00sNaPpY") self.assertEqual( decompressor.copy().decompress( b"\x01\x36\x00\x00" + struct.pack("<L", snappy._masked_crc32c(data)) + data), data) # test that we throw errors for unknown unskippable chunks self.assertRaises(snappy.UncompressError, decompressor.copy().decompress, b"\x03\x01\x00\x00") # test that we skip unknown skippable chunks self.assertEqual(b"", decompressor.copy().decompress(b"\xfe\x01\x00\x00")) # test that we check CRCs compressed_data = snappy.compress(data) real_crc = struct.pack("<L", snappy._masked_crc32c(data)) fake_crc = os.urandom(4) self.assertRaises(snappy.UncompressError, decompressor.copy().decompress, b"\x00\x0a\x00\x00" + fake_crc + compressed_data) self.assertEqual( decompressor.copy().decompress( b"\x00\x0a\x00\x00" + real_crc + compressed_data), data) # test that we buffer when we don't have enough uncompressed_data = os.urandom(100) compressor = snappy.StreamCompressor() compressed_data = (compressor.compress(uncompressed_data[:50]) + compressor.compress(uncompressed_data[50:])) for split1 in range(len(compressed_data) - 1): for split2 in range(split1, len(compressed_data)): decompressor = snappy.StreamDecompressor() self.assertEqual( (decompressor.decompress(compressed_data[:split1]) + decompressor.decompress(compressed_data[split1:split2]) + decompressor.decompress(compressed_data[split2:])), uncompressed_data)
def __init__(self, infile, mode, **kwargs): import snappy super().__init__( fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs ) self.infile = infile if "r" in mode: self.codec = snappy.StreamDecompressor() else: self.codec = snappy.StreamCompressor()
def compressor(self, compression_algorithm): if compression_algorithm == "lzma": return lzma.LZMACompressor(preset=0) elif compression_algorithm == "snappy": if not snappy: raise MissingLibraryError( "python-snappy is required when using snappy compression") return snappy.StreamCompressor() else: raise InvalidConfigurationError( "invalid compression algorithm: {!r}".format( compression_algorithm))
def __init__(self, infile, mode, **kwargs): import snappy self.details = {'size': 999999999} # not true, but OK if we don't seek super().__init__(fs=None, path='snappy', mode=mode.strip('b') + 'b', **kwargs) self.infile = infile if 'r' in mode: self.codec = snappy.StreamDecompressor() else: self.codec = snappy.StreamCompressor()
def backupWorker(k, p): # Inside this fuction/process 'script' is global logger.info("Backup: Processing %s ..." % k) # We acquire a file lock using the same locks whisper uses. flock() # exclusive locks are cleared when the file handle is closed. This # is the same practice that the whisper code uses. logger.debug("Locking file...") try: with open(p, "rb") as fh: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) # May block blob = fh.read() timestamp = utc() except IOError as e: logger.warning("An IOError occured locking %s: %s" \ % (k, str(e))) return except Exception as e: logger.error("An Unknown exception occurred, skipping metric: %s" % str(e)) return # We're going to backup this file, compress it as a normal .gz # file so that it can be restored manually if needed if not script.options.noop: logger.debug("Compressing data...") blobgz = StringIO() if script.options.algorithm == "gz": fd = gzip.GzipFile(fileobj=blobgz, mode="wb") fd.write(blob) fd.close() elif script.options.algorithm == "sz": compressor = snappy.StreamCompressor() blobgz.write(compressor.compress(blob)) else: raise StandardError("Unknown compression format requested") # Grab our timestamp and assemble final upstream key location logger.debug("Saving payload as: %s.wsp.%s" \ % (k, script.options.algorithm)) try: if not script.options.noop: t = time.time() script.store.put("%s.wsp.%s" \ % (k, script.options.algorithm), blobgz.getvalue()) # script.store.put("%s/%s.sha1" % (k, timestamp), blobSHA) logger.debug("Saving of %s took %d seconds" % (k, time.time() - t)) except Exception as e: logger.warning("Exception during saving: %s" % str(e)) # Free Memory blobgz.close() del blob
def __init__(self, socket, codec): self._codec = codec if six.PY3 else ord(codec) self._socketobj = ZmqConnection.SocketFile(socket) if self.codec == 0: self._compressor = self._socketobj elif self.codec == 1: self._compressor = gzip.GzipFile(fileobj=self._socketobj) elif self.codec == 2: self._compressor = ZmqConnection.CompressedFile( self._socketobj, snappy.StreamCompressor()) elif self.codec == 3: self._compressor = ZmqConnection.CompressedFile( self._socketobj, lzma.LZMACompressor(lzma.FORMAT_XZ)) else: raise ValueError("Unknown compression type")
def __init__(self, next_fp, mode): if snappy is None: raise io.UnsupportedOperation("Snappy is not available") if mode == "rb": self.decr = snappy.StreamDecompressor() self.encr = None elif mode == "wb": self.decr = None self.encr = snappy.StreamCompressor() else: raise io.UnsupportedOperation("unsupported mode for SnappyFile") super().__init__(next_fp) self.decr_done = False
def __init__(self, file_name_or_obj, file_mode, buffer_size=snappy._CHUNK_MAX): if isinstance(file_name_or_obj, str): self._file = open(file_name_or_obj, file_mode) else: self._file = file_name_or_obj self.buffer_pos = 0 if file_mode == "wb": self.buffer = bytearray(buffer_size) self._compressor = snappy.StreamCompressor() else: self.buffer = None self._decompressor = snappy.StreamDecompressor()
def _get_compressobj(cls, compress_option): if compress_option is None or \ compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_RAW: return None elif compress_option.algorithm == \ CompressOption.CompressAlgorithm.ODPS_ZLIB: return zlib.compressobj(compress_option.level, zlib.DEFLATED, zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, compress_option.strategy) elif compress_option.algorithm == \ CompressOption.CompressAlgorithm.ODPS_SNAPPY: try: import snappy except ImportError: raise errors.DependencyNotInstalledError( "python-snappy library is required for snappy support") return snappy.StreamCompressor() else: raise IOError('Invalid compression option.')
def test_random(self): for _ in range(100): compressor = snappy.StreamCompressor() decompressor = snappy.StreamDecompressor() data = b"" compressed = b"" for _ in range(random.randint(0, 3)): chunk = os.urandom(random.randint(0, snappy.snappy._CHUNK_MAX * 2)) data += chunk compressed += compressor.add_chunk( chunk, compress=random.choice([True, False, None])) upper_bound = random.choice([256, snappy.snappy._CHUNK_MAX * 2]) while compressed: size = random.randint(0, upper_bound) chunk, compressed = compressed[:size], compressed[size:] chunk = decompressor.decompress(chunk) self.assertEqual(data[:len(chunk)], chunk) data = data[len(chunk):] decompressor.flush() self.assertEqual(len(data), 0)
def test_snappy_framed(benchmark, file, use_cramjam: bool): """ Uses snappy compression framed """ import snappy data = bytearray(file.read_bytes()) if use_cramjam: benchmark( round_trip, compress=cramjam.snappy.compress, decompress=cramjam.snappy.decompress, data=data, ) else: compressor = snappy.StreamCompressor() decompressor = snappy.StreamDecompressor() benchmark( round_trip, compress=compressor.compress, decompress=decompressor.decompress, data=data, )
def __init__(self, socket): self._decompressor = snappy.StreamDecompressor() self._compressor = snappy.StreamCompressor() self._socket = socket self._bootstrapped = None
def __init__(self, socket): self._decompressor = snappy.StreamDecompressor() self._compressor = snappy.StreamCompressor() super(SnappySocket, self).__init__(socket)
def __init__(self, buffer=None): self._parser = Reader() self._decompressor = snappy.StreamDecompressor() self._compressor = snappy.StreamCompressor() buffer and self.feed(buffer)
def __init__(self): self.compressor = snappy.StreamCompressor()
def encode(data): return snappy.StreamCompressor().compress(data)
def backupWorker(k, p): # Inside this fuction/process 'script' is global logger.info("Backup: Processing %s ..." % k) # We acquire a file lock using the same locks whisper uses. flock() # exclusive locks are cleared when the file handle is closed. This # is the same practice that the whisper code uses. logger.debug("Locking file...") try: with open(p, "rb") as fh: fcntl.flock(fh.fileno(), fcntl.LOCK_EX) # May block blob = fh.read() timestamp = utc() except IOError as e: logger.warning("An IOError occured locking %s: %s" \ % (k, str(e))) return except Exception as e: logger.error("An Unknown exception occurred, skipping metric: %s" % str(e)) return # SHA1 hash...have we seen this metric DB file before? logger.debug("Calculating hash and searching data store...") blobSHA = hashlib.sha1(blob).hexdigest() knownBackups = [] for i in script.store.list(k + "/"): if i.endswith(".sha1"): knownBackups.append(i) knownBackups.sort() if len(knownBackups) > 0: i = knownBackups[-1] # The last known backup logger.debug("Examining %s from data store of %d backups" % (i, len(knownBackups))) if script.store.get(i) == blobSHA: logger.info("Metric DB %s is unchanged from last backup, " \ "skipping." % k) # We purposely do not check retention in this case return # We're going to backup this file, compress it as a normal .gz # file so that it can be restored manually if needed if not script.options.noop: logger.debug("Compressing data...") blobgz = StringIO() if script.options.algorithm == "gz": fd = gzip.GzipFile(fileobj=blobgz, mode="wb") fd.write(blob) fd.close() elif script.options.algorithm == "sz": compressor = snappy.StreamCompressor() blobgz.write(compressor.compress(blob)) else: raise StandardError("Unknown compression format requested") # Grab our timestamp and assemble final upstream key location logger.debug("Uploading payload as: %s/%s.wsp.%s" \ % (k, timestamp, script.options.algorithm)) logger.debug("Uploading SHA1 as : %s/%s.sha1" % (k, timestamp)) try: if not script.options.noop: t = time.time() script.store.put("%s/%s.wsp.%s" \ % (k, timestamp, script.options.algorithm), blobgz.getvalue()) script.store.put("%s/%s.sha1" % (k, timestamp), blobSHA) logger.debug("Upload of %s @ %s took %d seconds" % (k, timestamp, time.time() - t)) except Exception as e: logger.warning("Exception during upload: %s" % str(e)) # Free Memory blobgz.close() del blob # Handle our retention policy, we keep at most X backups while len(knownBackups) + 1 > script.options.retention: # The oldest (and not current) backup i = knownBackups[0].replace(".sha1", "") logger.info("Removing old backup: %s.wsp.%s" % (i, script.options.algorithm)) logger.debug("Removing old SHA1: %s.sha1" % i) try: t = time.time() if not script.options.noop: script.store.delete("%s.wsp.%s" % (i, script.options.algorithm)) script.store.delete("%s.sha1" % i) else: # Do a list, we want to log if there's a 404 d = [ i for i in script.store.list("%s.wsp.%s" \ % (i, script.options.algorithm)) ] if len(d) == 0: logger.warn("Missing file in store: %s.wsp.%s" \ % (i, script.options.algorithm)) d = [i for i in script.store.list("%s.sha1" % i)] if len(d) == 0: logger.warn("Missing file in store: %s.sha1" % i) logger.debug("Retention removal of %s took %d seconds" % (i, time.time() - t)) except Exception as e: # On an error here we want to leave files alone logger.warning("Exception during delete: %s" % str(e)) del knownBackups[0]
sample_record_count = 10 if len(sys.argv) > 1: try: sample_record_count = int(sys.argv[1]) except ValueError: sys.stderr.write('Invalid number of records: %s\n\n' % (sys.argv[1])) show_usage() # Generate sample record data. sample_record_stream = BytesIO() for size in range(100, 100 * (sample_record_count + 1), 100): write_record_v1(sample_record_stream, get_sample_data(size)) sample_records = sample_record_stream.getvalue() # Generate uncompressed sample. with open('uncompressed.v1.akaibu', 'wb') as f: write_header_v1(f, COMPRESSION_UNCOMPRESSED) f.write(sample_records) # Generate zlib compressed sample. with open('zlib.v1.akaibu', 'wb') as f: write_header_v1(f, COMPRESSION_ZLIB) f.write(zlib.compress(sample_records, 9)) # Generate Snappy compressed sample. if snappy: with open('snappy.v1.akaibu', 'wb') as f: write_header_v1(f, COMPRESSION_SNAPPY) f.write(snappy.StreamCompressor().compress(sample_records))
def __init__(self): snappy = _try_import_snappy() self.compressor = snappy.StreamCompressor() self.decompressor = snappy.StreamDecompressor()