def startCompressMessage(self):
     if self._isServer:
         if self._compressor is None or self.server_no_context_takeover:
             self._compressor = snappy.StreamCompressor()
     else:
         if self._compressor is None or self.client_no_context_takeover:
             self._compressor = snappy.StreamCompressor()
Example #2
0
 def test_concatenation(self):
     data1 = os.urandom(snappy.snappy._CHUNK_MAX * 2)
     data2 = os.urandom(4096)
     decompressor = snappy.StreamDecompressor()
     self.assertEqual(
         decompressor.decompress(snappy.StreamCompressor().compress(data1) +
                                 snappy.StreamCompressor().compress(data2)),
         data1 + data2)
Example #3
0
    def test_compression(self):
        # test that we can add compressed chunks
        compressor = snappy.StreamCompressor()
        data = b"\0" * 50
        compressed_data = snappy.compress(data)
        crc = struct.pack("<L", snappy._masked_crc32c(data))
        self.assertEqual(crc, b"\x8f)H\xbd")
        self.assertEqual(len(compressed_data), 6)
        self.assertEqual(compressor.add_chunk(data, compress=True),
                         b"\xff\x06\x00\x00sNaPpY"
                         b"\x00\x0a\x00\x00" + crc + compressed_data)

        # test that we can add uncompressed chunks
        data = b"\x01" * 50
        crc = struct.pack("<L", snappy._masked_crc32c(data))
        self.assertEqual(crc, b"\xb2\x14)\x8a")
        self.assertEqual(compressor.add_chunk(data, compress=False),
                         b"\x01\x36\x00\x00" + crc + data)

        # test that we can add more data than will fit in one chunk
        data = b"\x01" * (snappy._CHUNK_MAX * 2 - 5)
        crc1 = struct.pack("<L",
                snappy._masked_crc32c(data[:snappy._CHUNK_MAX]))
        self.assertEqual(crc1, b"h#6\x8e")
        crc2 = struct.pack("<L",
                snappy._masked_crc32c(data[snappy._CHUNK_MAX:]))
        self.assertEqual(crc2, b"q\x8foE")
        self.assertEqual(compressor.add_chunk(data, compress=False),
                b"\x01\x04\x00\x01" + crc1 + data[:snappy._CHUNK_MAX] +
                b"\x01\xff\xff\x00" + crc2 + data[snappy._CHUNK_MAX:])
Example #4
0
 def __init__(self, *args, **kwargs):
     try:
         import snappy
     except ImportError:
         raise errors.DependencyNotInstalledError(
             "python-snappy library is required for snappy support")
     self._compressobj = snappy.StreamCompressor()
     six.BytesIO.__init__(self, *args, **kwargs)
Example #5
0
 def __init__(self, output):
     try:
         import snappy
     except ImportError:
         raise errors.DependencyNotInstalledError(
             "python-snappy library is required for snappy support")
     self._compressor = snappy.StreamCompressor()
     self._output = output
Example #6
0
    def __init__(self, buffer=None):
        if not snappy:
            raise RuntimeError('python-snappy required for compression')

        self._decompressor = snappy.StreamDecompressor()
        self._compressor = snappy.StreamCompressor()

        super().__init__(buffer)
Example #7
0
    def __init__(self, infile, mode, **kwargs):
        import snappy

        self.details = {"size": 999999999}  # not true, but OK if we don't seek
        super().__init__(fs=None, path="snappy", mode=mode.strip("b") + "b", **kwargs)
        self.infile = infile
        if "r" in mode:
            self.codec = snappy.StreamDecompressor()
        else:
            self.codec = snappy.StreamCompressor()
Example #8
0
 def __init__(self, src_fp, algorithm, level=0):
     super().__init__(src_fp, minimum_read_size=32 * 1024)
     if algorithm == "lzma":
         self._compressor = lzma.LZMACompressor(lzma.FORMAT_XZ, -1, level, None)
     elif algorithm == "snappy":
         self._compressor = snappy.StreamCompressor()
     elif algorithm == "zstd":
         self._compressor = zstd.ZstdCompressor(level=level).compressobj()
     else:
         raise InvalidConfigurationError("invalid compression algorithm: {!r}".format(algorithm))
Example #9
0
    def test_decompression(self):
        # test that we check for the initial stream identifier
        data = b"\x01" * 50
        self.assertRaises(snappy.UncompressError,
                snappy.StreamDecompressor().decompress,
                    b"\x01\x36\x00\00" +
                    struct.pack("<L", snappy._masked_crc32c(data)) + data)
        self.assertEqual(
                snappy.StreamDecompressor().decompress(
                    b"\xff\x06\x00\x00sNaPpY"
                    b"\x01\x36\x00\x00" +
                    struct.pack("<L", snappy._masked_crc32c(data)) + data),
                data)
        decompressor = snappy.StreamDecompressor()
        decompressor.decompress(b"\xff\x06\x00\x00sNaPpY")
        self.assertEqual(
                decompressor.copy().decompress(
                    b"\x01\x36\x00\x00" +
                    struct.pack("<L", snappy._masked_crc32c(data)) + data),
                data)

        # test that we throw errors for unknown unskippable chunks
        self.assertRaises(snappy.UncompressError,
                decompressor.copy().decompress, b"\x03\x01\x00\x00")

        # test that we skip unknown skippable chunks
        self.assertEqual(b"",
                         decompressor.copy().decompress(b"\xfe\x01\x00\x00"))

        # test that we check CRCs
        compressed_data = snappy.compress(data)
        real_crc = struct.pack("<L", snappy._masked_crc32c(data))
        fake_crc = os.urandom(4)
        self.assertRaises(snappy.UncompressError,
                decompressor.copy().decompress,
                    b"\x00\x0a\x00\x00" + fake_crc + compressed_data)
        self.assertEqual(
                decompressor.copy().decompress(
                    b"\x00\x0a\x00\x00" + real_crc + compressed_data),
                data)

        # test that we buffer when we don't have enough
        uncompressed_data = os.urandom(100)
        compressor = snappy.StreamCompressor()
        compressed_data = (compressor.compress(uncompressed_data[:50]) +
                           compressor.compress(uncompressed_data[50:]))
        for split1 in range(len(compressed_data) - 1):
            for split2 in range(split1, len(compressed_data)):
                decompressor = snappy.StreamDecompressor()
                self.assertEqual(
                    (decompressor.decompress(compressed_data[:split1]) +
                     decompressor.decompress(compressed_data[split1:split2]) +
                     decompressor.decompress(compressed_data[split2:])),
                    uncompressed_data)
Example #10
0
    def __init__(self, infile, mode, **kwargs):
        import snappy

        super().__init__(
            fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
        )
        self.infile = infile
        if "r" in mode:
            self.codec = snappy.StreamDecompressor()
        else:
            self.codec = snappy.StreamCompressor()
Example #11
0
 def compressor(self, compression_algorithm):
     if compression_algorithm == "lzma":
         return lzma.LZMACompressor(preset=0)
     elif compression_algorithm == "snappy":
         if not snappy:
             raise MissingLibraryError(
                 "python-snappy is required when using snappy compression")
         return snappy.StreamCompressor()
     else:
         raise InvalidConfigurationError(
             "invalid compression algorithm: {!r}".format(
                 compression_algorithm))
 def __init__(self, infile, mode, **kwargs):
     import snappy
     self.details = {'size': 999999999}  # not true, but OK if we don't seek
     super().__init__(fs=None,
                      path='snappy',
                      mode=mode.strip('b') + 'b',
                      **kwargs)
     self.infile = infile
     if 'r' in mode:
         self.codec = snappy.StreamDecompressor()
     else:
         self.codec = snappy.StreamCompressor()
Example #13
0
def backupWorker(k, p):
    # Inside this fuction/process 'script' is global
    logger.info("Backup: Processing %s ..." % k)
    # We acquire a file lock using the same locks whisper uses.  flock()
    # exclusive locks are cleared when the file handle is closed.  This
    # is the same practice that the whisper code uses.
    logger.debug("Locking file...")
    try:
        with open(p, "rb") as fh:
            fcntl.flock(fh.fileno(), fcntl.LOCK_EX)  # May block
            blob = fh.read()
            timestamp = utc()
    except IOError as e:
        logger.warning("An IOError occured locking %s: %s" \
                % (k, str(e)))
        return
    except Exception as e:
        logger.error("An Unknown exception occurred, skipping metric: %s" %
                     str(e))
        return

    # We're going to backup this file, compress it as a normal .gz
    # file so that it can be restored manually if needed
    if not script.options.noop:
        logger.debug("Compressing data...")
        blobgz = StringIO()
        if script.options.algorithm == "gz":
            fd = gzip.GzipFile(fileobj=blobgz, mode="wb")
            fd.write(blob)
            fd.close()
        elif script.options.algorithm == "sz":
            compressor = snappy.StreamCompressor()
            blobgz.write(compressor.compress(blob))
        else:
            raise StandardError("Unknown compression format requested")

    # Grab our timestamp and assemble final upstream key location
    logger.debug("Saving payload as: %s.wsp.%s" \
            % (k, script.options.algorithm))
    try:
        if not script.options.noop:
            t = time.time()
            script.store.put("%s.wsp.%s" \
                    % (k, script.options.algorithm), blobgz.getvalue())
            # script.store.put("%s/%s.sha1" % (k, timestamp), blobSHA)
            logger.debug("Saving of %s took %d seconds" % (k, time.time() - t))
    except Exception as e:
        logger.warning("Exception during saving: %s" % str(e))

    # Free Memory
    blobgz.close()
    del blob
Example #14
0
 def __init__(self, socket, codec):
     self._codec = codec if six.PY3 else ord(codec)
     self._socketobj = ZmqConnection.SocketFile(socket)
     if self.codec == 0:
         self._compressor = self._socketobj
     elif self.codec == 1:
         self._compressor = gzip.GzipFile(fileobj=self._socketobj)
     elif self.codec == 2:
         self._compressor = ZmqConnection.CompressedFile(
             self._socketobj, snappy.StreamCompressor())
     elif self.codec == 3:
         self._compressor = ZmqConnection.CompressedFile(
             self._socketobj, lzma.LZMACompressor(lzma.FORMAT_XZ))
     else:
         raise ValueError("Unknown compression type")
Example #15
0
    def __init__(self, next_fp, mode):
        if snappy is None:
            raise io.UnsupportedOperation("Snappy is not available")

        if mode == "rb":
            self.decr = snappy.StreamDecompressor()
            self.encr = None
        elif mode == "wb":
            self.decr = None
            self.encr = snappy.StreamCompressor()
        else:
            raise io.UnsupportedOperation("unsupported mode for SnappyFile")

        super().__init__(next_fp)
        self.decr_done = False
Example #16
0
 def __init__(self,
              file_name_or_obj,
              file_mode,
              buffer_size=snappy._CHUNK_MAX):
     if isinstance(file_name_or_obj, str):
         self._file = open(file_name_or_obj, file_mode)
     else:
         self._file = file_name_or_obj
     self.buffer_pos = 0
     if file_mode == "wb":
         self.buffer = bytearray(buffer_size)
         self._compressor = snappy.StreamCompressor()
     else:
         self.buffer = None
         self._decompressor = snappy.StreamDecompressor()
Example #17
0
 def _get_compressobj(cls, compress_option):
     if compress_option is None or \
                     compress_option.algorithm == CompressOption.CompressAlgorithm.ODPS_RAW:
         return None
     elif compress_option.algorithm == \
             CompressOption.CompressAlgorithm.ODPS_ZLIB:
         return zlib.compressobj(compress_option.level, zlib.DEFLATED,
                                 zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL,
                                 compress_option.strategy)
     elif compress_option.algorithm == \
             CompressOption.CompressAlgorithm.ODPS_SNAPPY:
         try:
             import snappy
         except ImportError:
             raise errors.DependencyNotInstalledError(
                 "python-snappy library is required for snappy support")
         return snappy.StreamCompressor()
     else:
         raise IOError('Invalid compression option.')
Example #18
0
    def test_random(self):
        for _ in range(100):
            compressor = snappy.StreamCompressor()
            decompressor = snappy.StreamDecompressor()
            data = b""
            compressed = b""
            for _ in range(random.randint(0, 3)):
                chunk = os.urandom(random.randint(0, snappy.snappy._CHUNK_MAX * 2))
                data += chunk
                compressed += compressor.add_chunk(
                        chunk, compress=random.choice([True, False, None]))

            upper_bound = random.choice([256, snappy.snappy._CHUNK_MAX * 2])
            while compressed:
                size = random.randint(0, upper_bound)
                chunk, compressed = compressed[:size], compressed[size:]
                chunk = decompressor.decompress(chunk)
                self.assertEqual(data[:len(chunk)], chunk)
                data = data[len(chunk):]

            decompressor.flush()
            self.assertEqual(len(data), 0)
Example #19
0
def test_snappy_framed(benchmark, file, use_cramjam: bool):
    """
    Uses snappy compression framed
    """
    import snappy

    data = bytearray(file.read_bytes())
    if use_cramjam:
        benchmark(
            round_trip,
            compress=cramjam.snappy.compress,
            decompress=cramjam.snappy.decompress,
            data=data,
        )
    else:
        compressor = snappy.StreamCompressor()
        decompressor = snappy.StreamDecompressor()
        benchmark(
            round_trip,
            compress=compressor.compress,
            decompress=decompressor.decompress,
            data=data,
        )
Example #20
0
 def __init__(self, socket):
     self._decompressor = snappy.StreamDecompressor()
     self._compressor = snappy.StreamCompressor()
     self._socket = socket
     self._bootstrapped = None
 def __init__(self, socket):
     self._decompressor = snappy.StreamDecompressor()
     self._compressor = snappy.StreamCompressor()
     super(SnappySocket, self).__init__(socket)
Example #22
0
 def __init__(self, buffer=None):
     self._parser = Reader()
     self._decompressor = snappy.StreamDecompressor()
     self._compressor = snappy.StreamCompressor()
     buffer and self.feed(buffer)
Example #23
0
 def __init__(self):
     self.compressor = snappy.StreamCompressor()
Example #24
0
 def encode(data):
     return snappy.StreamCompressor().compress(data)
Example #25
0
def backupWorker(k, p):
    # Inside this fuction/process 'script' is global
    logger.info("Backup: Processing %s ..." % k)
    # We acquire a file lock using the same locks whisper uses.  flock()
    # exclusive locks are cleared when the file handle is closed.  This
    # is the same practice that the whisper code uses.
    logger.debug("Locking file...")
    try:
        with open(p, "rb") as fh:
            fcntl.flock(fh.fileno(), fcntl.LOCK_EX)  # May block
            blob = fh.read()
            timestamp = utc()
    except IOError as e:
        logger.warning("An IOError occured locking %s: %s" \
                % (k, str(e)))
        return
    except Exception as e:
        logger.error("An Unknown exception occurred, skipping metric: %s" %
                     str(e))
        return

    # SHA1 hash...have we seen this metric DB file before?
    logger.debug("Calculating hash and searching data store...")
    blobSHA = hashlib.sha1(blob).hexdigest()
    knownBackups = []
    for i in script.store.list(k + "/"):
        if i.endswith(".sha1"):
            knownBackups.append(i)

    knownBackups.sort()
    if len(knownBackups) > 0:
        i = knownBackups[-1]  # The last known backup
        logger.debug("Examining %s from data store of %d backups" %
                     (i, len(knownBackups)))
        if script.store.get(i) == blobSHA:
            logger.info("Metric DB %s is unchanged from last backup, " \
                        "skipping." % k)
            # We purposely do not check retention in this case
            return

    # We're going to backup this file, compress it as a normal .gz
    # file so that it can be restored manually if needed
    if not script.options.noop:
        logger.debug("Compressing data...")
        blobgz = StringIO()
        if script.options.algorithm == "gz":
            fd = gzip.GzipFile(fileobj=blobgz, mode="wb")
            fd.write(blob)
            fd.close()
        elif script.options.algorithm == "sz":
            compressor = snappy.StreamCompressor()
            blobgz.write(compressor.compress(blob))
        else:
            raise StandardError("Unknown compression format requested")

    # Grab our timestamp and assemble final upstream key location
    logger.debug("Uploading payload as: %s/%s.wsp.%s" \
            % (k, timestamp, script.options.algorithm))
    logger.debug("Uploading SHA1 as   : %s/%s.sha1" % (k, timestamp))
    try:
        if not script.options.noop:
            t = time.time()
            script.store.put("%s/%s.wsp.%s" \
                    % (k, timestamp, script.options.algorithm), blobgz.getvalue())
            script.store.put("%s/%s.sha1" % (k, timestamp), blobSHA)
            logger.debug("Upload of %s @ %s took %d seconds" %
                         (k, timestamp, time.time() - t))
    except Exception as e:
        logger.warning("Exception during upload: %s" % str(e))

    # Free Memory
    blobgz.close()
    del blob

    # Handle our retention policy, we keep at most X backups
    while len(knownBackups) + 1 > script.options.retention:
        # The oldest (and not current) backup
        i = knownBackups[0].replace(".sha1", "")
        logger.info("Removing old backup: %s.wsp.%s" %
                    (i, script.options.algorithm))
        logger.debug("Removing old SHA1: %s.sha1" % i)
        try:
            t = time.time()
            if not script.options.noop:
                script.store.delete("%s.wsp.%s" %
                                    (i, script.options.algorithm))
                script.store.delete("%s.sha1" % i)
            else:
                # Do a list, we want to log if there's a 404
                d = [ i for i in script.store.list("%s.wsp.%s" \
                        % (i, script.options.algorithm)) ]
                if len(d) == 0:
                    logger.warn("Missing file in store: %s.wsp.%s" \
                            % (i, script.options.algorithm))
                d = [i for i in script.store.list("%s.sha1" % i)]
                if len(d) == 0:
                    logger.warn("Missing file in store: %s.sha1" % i)

            logger.debug("Retention removal of %s took %d seconds" %
                         (i, time.time() - t))
        except Exception as e:
            # On an error here we want to leave files alone
            logger.warning("Exception during delete: %s" % str(e))

        del knownBackups[0]
Example #26
0
sample_record_count = 10
if len(sys.argv) > 1:
    try:
        sample_record_count = int(sys.argv[1])
    except ValueError:
        sys.stderr.write('Invalid number of records: %s\n\n' % (sys.argv[1]))
        show_usage()

# Generate sample record data.
sample_record_stream = BytesIO()
for size in range(100, 100 * (sample_record_count + 1), 100):
    write_record_v1(sample_record_stream, get_sample_data(size))
sample_records = sample_record_stream.getvalue()

# Generate uncompressed sample.
with open('uncompressed.v1.akaibu', 'wb') as f:
    write_header_v1(f, COMPRESSION_UNCOMPRESSED)
    f.write(sample_records)

# Generate zlib compressed sample.
with open('zlib.v1.akaibu', 'wb') as f:
    write_header_v1(f, COMPRESSION_ZLIB)
    f.write(zlib.compress(sample_records, 9))

# Generate Snappy compressed sample.
if snappy:
    with open('snappy.v1.akaibu', 'wb') as f:
        write_header_v1(f, COMPRESSION_SNAPPY)
        f.write(snappy.StreamCompressor().compress(sample_records))
Example #27
0
 def __init__(self):
     snappy = _try_import_snappy()
     self.compressor = snappy.StreamCompressor()
     self.decompressor = snappy.StreamDecompressor()