Example #1
0
    def set(self, id, data, ttl=None):
        data = json_dumps(data)

        row = self.connection.row(id)
        # Call to delete is just a state mutation,
        # and in this case is just used to clear all columns
        # so the entire row will be replaced. Otherwise,
        # if an existing row were mutated, and it took up more
        # than one column, it'd be possible to overwrite
        # beginning columns and still retain the end ones.
        row.delete()

        # If we are setting a TTL on this row,
        # we want to set the timestamp of the cells
        # into the future. This allows our GC policy
        # to delete them when the time comes. It also
        # allows us to filter the rows on read if
        # we are past the timestamp to not return.
        # We want to set a ttl column to the ttl
        # value in the future if we wanted to bump the timestamp
        # and rewrite a row with a new ttl.
        ttl = ttl or self.default_ttl
        if ttl is None:
            ts = None
        else:
            ts = timezone.now() + ttl
            row.set_cell(
                self.column_family,
                self.ttl_column,
                struct.pack('<I', int(ttl.total_seconds())),
                timestamp=ts,
            )

        # Track flags for metadata about this row.
        # This only flag we're tracking now is whether compression
        # is on or not for the data column.
        flags = 0
        if self.compression:
            flags |= self._FLAG_COMPRESSED
            data = zlib_compress(data)

        # Only need to write the column at all if any flags
        # are enabled. And if so, pack it into a single byte.
        if flags:
            row.set_cell(
                self.column_family,
                self.flags_column,
                struct.pack('B', flags),
                timestamp=ts,
            )

        assert len(data) <= self.max_size

        row.set_cell(
            self.column_family,
            self.data_column,
            data,
            timestamp=ts,
        )
        self.connection.mutate_rows([row])
Example #2
0
    def encode_row(self, id, data, ttl=None):
        data = json_dumps(data)

        row = self.connection.row(id)
        # Call to delete is just a state mutation,
        # and in this case is just used to clear all columns
        # so the entire row will be replaced. Otherwise,
        # if an existing row were mutated, and it took up more
        # than one column, it'd be possible to overwrite
        # beginning columns and still retain the end ones.
        row.delete()

        # If we are setting a TTL on this row,
        # we want to set the timestamp of the cells
        # into the future. This allows our GC policy
        # to delete them when the time comes. It also
        # allows us to filter the rows on read if
        # we are past the timestamp to not return.
        # We want to set a ttl column to the ttl
        # value in the future if we wanted to bump the timestamp
        # and rewrite a row with a new ttl.
        ttl = ttl or self.default_ttl
        if ttl is None:
            ts = None
        else:
            ts = timezone.now() + ttl
            row.set_cell(
                self.column_family,
                self.ttl_column,
                struct.pack('<I', int(ttl.total_seconds())),
                timestamp=ts,
            )

        # Track flags for metadata about this row.
        # This only flag we're tracking now is whether compression
        # is on or not for the data column.
        flags = 0
        if self.compression:
            flags |= self._FLAG_COMPRESSED
            data = zlib_compress(data)

        # Only need to write the column at all if any flags
        # are enabled. And if so, pack it into a single byte.
        if flags:
            row.set_cell(
                self.column_family,
                self.flags_column,
                struct.pack('B', flags),
                timestamp=ts,
            )

        assert len(data) <= self.max_size

        row.set_cell(
            self.column_family,
            self.data_column,
            data,
            timestamp=ts,
        )
        return row
Example #3
0
def analyse_json(filename):
    """Utility to return the ratio of key size, punctuation size, and leaf value size."""

    unique_keys = {}

    def __get_size(j):
        """Recurse to generate size."""
        (keys, punctuation, key_count) = (0, 0, 0)
        if isinstance(j, list):
            punctuation += 1  # [
            punctuation += (len(j) - 1)  # ,
            for v in j:
                sub_k, sub_p, sub_count = __get_size(v)
                keys += sub_k
                punctuation += sub_p
                key_count += sub_count
            punctuation += 1  # ]
        elif isinstance(j, dict):
            punctuation += 1  # {
            if len(j.keys()) > 1:
                punctuation += (len(j.keys()) - 1)  # ,
            for k, v in j.iteritems():
                if k not in unique_keys:
                    unique_keys[k] = True
                key_count += 1
                punctuation += 1  # "
                keys += len(k)
                punctuation += 1  # "
                punctuation += 1  # :
                sub_k, sub_p, sub_count = __get_size(v)
                keys += sub_k
                punctuation += sub_p
                key_count += sub_count
            punctuation += 1  # }
        elif isinstance(j, (str, unicode)):
            punctuation += 1  # "
            punctuation += 1  # "
        return (keys, punctuation, key_count)

    total_size = path_getsize(filename)
    with open(filename, 'r') as f:
        data = f.read()
        j = json_loads(data)

        (keys, punctuation, key_count) = __get_size(j)
        values = total_size - (keys + punctuation)
        unique_count = len(unique_keys.keys())
        compressed_size = len(zlib_compress(data, 6))

        return (keys, punctuation, values, key_count, unique_count, total_size,
                compressed_size)
Example #4
0
 def end_write(self):
     """finalizes the transaction and flushes all the write buffer to the 
     underlying stream. begin_write must have been called prior to this.
     must be called to finalize the transaction"""
     self._assert_wlock()
     data = "".join(self._wbuffer)
     del self._wbuffer[:]
     if data:
         packers.Int32.pack(self._wseq, self.outfile)
         if self.compression_threshold > 0 and len(data) > self.compression_threshold:
             uncompressed_length = len(data)
             data = zlib_compress(data)
         else:
             uncompressed_length = 0
         packers.Int32.pack(len(data), self.outfile)
         packers.Int32.pack(uncompressed_length, self.outfile)
         self.outfile.write(data)
         self.outfile.flush()
     self._wlock.release()
 def _save(self,hashed_name, content_file):
     content = content_file.read()
     try:
         for regexp,comp_function in self.compressors.iteritems():
             if regexp.search(hashed_name):
                 content = comp_function(content)
                 break
     except Exception as e:
         raise MinifiedStorageException("Could not compress file %s, error: %s" % (hashed_name,e,))
     # save minified file
     saved_name = super(MinifiedManifestStaticFilesStorage, self)._save(hashed_name,ContentFile(content))
     if MINIFIED_GZIP:
         # save gziped file as fell, we overwrite the content_file variable to save a tiny bit memory
         try:
             content = zlib_compress(content)
             super(MinifiedManifestStaticFilesStorage, self)._save("%s.gz" % saved_name,ContentFile(content))
         except Exception as e:
             raise MinifiedStorageException("Could not gzip file %s, error: %s" % (hashed_name,e,))
     return saved_name
Example #6
0
 def end_write(self):
     """finalizes the transaction and flushes all the write buffer to the 
     underlying stream. begin_write must have been called prior to this.
     must be called to finalize the transaction"""
     self._assert_wlock()
     self.logger.info("end_write")
     data = "".join(self._wbuffer)
     del self._wbuffer[:]
     self.logger.info("    data = %r bytes", len(data))
     if data:
         packers.Int32.pack(self._wseq, self.outfile)
         if self.compression_threshold > 0 and len(
                 data) > self.compression_threshold:
             uncompressed_length = len(data)
             data = zlib_compress(data)
         else:
             uncompressed_length = 0
         packers.Int32.pack(len(data), self.outfile)
         packers.Int32.pack(uncompressed_length, self.outfile)
         self.outfile.write(data)
         self.outfile.flush()
     self.logger.info("    ok")
     self._wlock.release()
Example #7
0
 def zcompress(packet, level):
     if isinstance(packet, memoryview):
         packet = packet.tobytes()
     elif not isinstance(packet, bytes):
         packet = bytes(packet, 'UTF-8')
     return level + ZLIB_FLAG, zlib_compress(packet, level)
Example #8
0
def compress(stream):
    """
    Compress stream using zlib lib.
    """

    return zlib_compress(stream)
Example #9
0
def create_tinfoil_index(index_to_write: dict,
                         out_path: Path,
                         compression_flag: int,
                         rsa_pub_key_path: Path = None,
                         vm_path: Path = None):
    to_compress_buffer = b""

    if vm_path is not None and vm_path.is_file():
        to_compress_buffer += b"\x13\x37\xB0\x0B"
        vm_buffer = b""

        with open(vm_path, "rb") as vm_stream:
            vm_buffer += vm_stream.read()

        to_compress_buffer += len(vm_buffer).to_bytes(4, "little")
        to_compress_buffer += vm_buffer

    to_compress_buffer += bytes(json_serialize(index_to_write).encode())

    to_write_buffer = b""
    session_key = b""

    if compression_flag == CompressionFlag.ZSTD_COMPRESSION:
        to_write_buffer += ZstdCompressor(
            level=22).compress(to_compress_buffer)

    elif compression_flag == CompressionFlag.ZLIB_COMPRESSION:
        to_write_buffer += zlib_compress(to_compress_buffer, 9)

    elif compression_flag == CompressionFlag.NO_COMPRESSION:
        to_write_buffer += to_compress_buffer

    else:
        raise NotImplementedError(
            "Compression method supplied is not implemented yet.")

    data_size = len(to_write_buffer)
    flag = None
    to_write_buffer += (b"\x00" * (0x10 - (data_size % 0x10)))

    if rsa_pub_key_path is not None and rsa_pub_key_path.is_file():

        def rand_aes_key_generator() -> bytes:
            return randint(0, 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF).to_bytes(
                0x10, byteorder="big")

        rsa_pub_key = import_rsa_key(open(rsa_pub_key_path).read())
        rand_aes_key = rand_aes_key_generator()

        pkcs1_oaep_ctx = new_pkcs1_oaep_ctx(rsa_pub_key,
                                            hashAlgo=SHA256,
                                            label=b"")
        aes_ctx = new_aes_ctx(rand_aes_key, MODE_ECB)

        session_key += pkcs1_oaep_ctx.encrypt(rand_aes_key)
        to_write_buffer = aes_ctx.encrypt(to_write_buffer)
        flag = compression_flag | EncryptionFlag.ENCRYPT
    else:
        session_key += b"\x00" * 0x100
        flag = compression_flag | EncryptionFlag.NO_ENCRYPT

    Path(out_path.parent).mkdir(parents=True, exist_ok=True)

    with open(out_path, "wb") as out_stream:
        out_stream.write(b"TINFOIL")
        out_stream.write(flag.to_bytes(1, byteorder="little"))
        out_stream.write(session_key)
        out_stream.write(data_size.to_bytes(8, "little"))
        out_stream.write(to_write_buffer)
Example #10
0
 def zcompress(packet, level):
     if isinstance(packet, memoryview):
         packet = packet.tobytes()
     else:
         packet = str(packet)
     return level + ZLIB_FLAG, zlib_compress(packet, level)
Example #11
0
def compress_test(data: bytes,
                  output: bool = True) -> Dict[str, Tuple[int, int, int]]:
    """
    Compare compress modules.
    :param data: the data to compress.
    :param output: if this value is True, print the results to console.
    :return: {'module name': (<size of the compressed data>, <time to compress>, <time to decompress>)}
    """
    res: Dict[str, Tuple[int, int, int]] = {}
    try_print('+++++++++++++++++++++++++++++++++++++++++++++++++++++',
              flag=output)
    size = len(data)
    try_print(f'Original size: {round(size/1024/1024), 4} MB', flag=output)
    # gzip
    for i in range(10):
        tmp = gzip_compress(data, compresslevel=i)
        key = f'gzip(compress level {i})'
        res[key] = (len(tmp),
                    check_function_speed(gzip_compress, data, compresslevel=i),
                    check_function_speed(gzip_decompress, tmp))
        __print(res, size, key, output)
    # bz2
    for i in range(1, 10):
        tmp = bz2_compress(data, compresslevel=i)
        key = f'bz2(compress level {i})'
        res[key] = (len(tmp),
                    check_function_speed(bz2_compress, data, compresslevel=i),
                    check_function_speed(bz2_decompress, tmp))
        __print(res, size, key, output)
    # zlib
    for i in range(10):
        tmp = zlib_compress(data, level=i)
        key = f'zlib(compress level {i})'
        res[key] = (len(tmp), check_function_speed(zlib_compress,
                                                   data,
                                                   level=i),
                    check_function_speed(zlib_decompress, tmp))
        __print(res, size, key, output)
    # lzma
    tmp = lzma_compress(data, FORMAT_XZ, CHECK_CRC64)
    res[f'lzma(XZ - CRC64)'] = (len(tmp),
                                check_function_speed(lzma_compress, data,
                                                     FORMAT_XZ, CHECK_CRC64),
                                check_function_speed(lzma_decompress,
                                                     tmp,
                                                     format=FORMAT_XZ))
    __print(res, size, f'lzma(XZ - CRC64)', output)
    tmp = lzma_compress(data, FORMAT_XZ, CHECK_CRC32)
    res[f'lzma(XZ - CRC32)'] = (len(tmp),
                                check_function_speed(lzma_compress, data,
                                                     FORMAT_XZ, CHECK_CRC32),
                                check_function_speed(lzma_decompress,
                                                     tmp,
                                                     format=FORMAT_XZ))
    __print(res, size, f'lzma(XZ - CRC32)', output)
    tmp = lzma_compress(data, FORMAT_XZ, CHECK_NONE)
    res[f'lzma(XZ - NONE)'] = (len(tmp),
                               check_function_speed(lzma_compress, data,
                                                    FORMAT_XZ, CHECK_NONE),
                               check_function_speed(lzma_decompress,
                                                    tmp,
                                                    format=FORMAT_XZ))
    __print(res, size, f'lzma(XZ - NONE)', output)
    tmp = lzma_compress(data, FORMAT_ALONE, CHECK_NONE)
    res[f'lzma(ALONE - NONE)'] = (len(tmp),
                                  check_function_speed(lzma_compress, data,
                                                       FORMAT_ALONE,
                                                       CHECK_NONE),
                                  check_function_speed(lzma_decompress,
                                                       tmp,
                                                       format=FORMAT_ALONE))
    __print(res, size, f'lzma(ALONE - NONE)', output)
    # brotli
    tmp = brotli_compress(data)
    key = 'brotli'
    res[key] = (len(tmp), check_function_speed(brotli_compress, data),
                check_function_speed(brotli_decompress, tmp))
    __print(res, size, key, output)
    try_print('+++++++++++++++++++++++++++++++++++++++++++++++++++++',
              flag=output)
    return res