def set(self, id, data, ttl=None): data = json_dumps(data) row = self.connection.row(id) # Call to delete is just a state mutation, # and in this case is just used to clear all columns # so the entire row will be replaced. Otherwise, # if an existing row were mutated, and it took up more # than one column, it'd be possible to overwrite # beginning columns and still retain the end ones. row.delete() # If we are setting a TTL on this row, # we want to set the timestamp of the cells # into the future. This allows our GC policy # to delete them when the time comes. It also # allows us to filter the rows on read if # we are past the timestamp to not return. # We want to set a ttl column to the ttl # value in the future if we wanted to bump the timestamp # and rewrite a row with a new ttl. ttl = ttl or self.default_ttl if ttl is None: ts = None else: ts = timezone.now() + ttl row.set_cell( self.column_family, self.ttl_column, struct.pack('<I', int(ttl.total_seconds())), timestamp=ts, ) # Track flags for metadata about this row. # This only flag we're tracking now is whether compression # is on or not for the data column. flags = 0 if self.compression: flags |= self._FLAG_COMPRESSED data = zlib_compress(data) # Only need to write the column at all if any flags # are enabled. And if so, pack it into a single byte. if flags: row.set_cell( self.column_family, self.flags_column, struct.pack('B', flags), timestamp=ts, ) assert len(data) <= self.max_size row.set_cell( self.column_family, self.data_column, data, timestamp=ts, ) self.connection.mutate_rows([row])
def encode_row(self, id, data, ttl=None): data = json_dumps(data) row = self.connection.row(id) # Call to delete is just a state mutation, # and in this case is just used to clear all columns # so the entire row will be replaced. Otherwise, # if an existing row were mutated, and it took up more # than one column, it'd be possible to overwrite # beginning columns and still retain the end ones. row.delete() # If we are setting a TTL on this row, # we want to set the timestamp of the cells # into the future. This allows our GC policy # to delete them when the time comes. It also # allows us to filter the rows on read if # we are past the timestamp to not return. # We want to set a ttl column to the ttl # value in the future if we wanted to bump the timestamp # and rewrite a row with a new ttl. ttl = ttl or self.default_ttl if ttl is None: ts = None else: ts = timezone.now() + ttl row.set_cell( self.column_family, self.ttl_column, struct.pack('<I', int(ttl.total_seconds())), timestamp=ts, ) # Track flags for metadata about this row. # This only flag we're tracking now is whether compression # is on or not for the data column. flags = 0 if self.compression: flags |= self._FLAG_COMPRESSED data = zlib_compress(data) # Only need to write the column at all if any flags # are enabled. And if so, pack it into a single byte. if flags: row.set_cell( self.column_family, self.flags_column, struct.pack('B', flags), timestamp=ts, ) assert len(data) <= self.max_size row.set_cell( self.column_family, self.data_column, data, timestamp=ts, ) return row
def analyse_json(filename): """Utility to return the ratio of key size, punctuation size, and leaf value size.""" unique_keys = {} def __get_size(j): """Recurse to generate size.""" (keys, punctuation, key_count) = (0, 0, 0) if isinstance(j, list): punctuation += 1 # [ punctuation += (len(j) - 1) # , for v in j: sub_k, sub_p, sub_count = __get_size(v) keys += sub_k punctuation += sub_p key_count += sub_count punctuation += 1 # ] elif isinstance(j, dict): punctuation += 1 # { if len(j.keys()) > 1: punctuation += (len(j.keys()) - 1) # , for k, v in j.iteritems(): if k not in unique_keys: unique_keys[k] = True key_count += 1 punctuation += 1 # " keys += len(k) punctuation += 1 # " punctuation += 1 # : sub_k, sub_p, sub_count = __get_size(v) keys += sub_k punctuation += sub_p key_count += sub_count punctuation += 1 # } elif isinstance(j, (str, unicode)): punctuation += 1 # " punctuation += 1 # " return (keys, punctuation, key_count) total_size = path_getsize(filename) with open(filename, 'r') as f: data = f.read() j = json_loads(data) (keys, punctuation, key_count) = __get_size(j) values = total_size - (keys + punctuation) unique_count = len(unique_keys.keys()) compressed_size = len(zlib_compress(data, 6)) return (keys, punctuation, values, key_count, unique_count, total_size, compressed_size)
def end_write(self): """finalizes the transaction and flushes all the write buffer to the underlying stream. begin_write must have been called prior to this. must be called to finalize the transaction""" self._assert_wlock() data = "".join(self._wbuffer) del self._wbuffer[:] if data: packers.Int32.pack(self._wseq, self.outfile) if self.compression_threshold > 0 and len(data) > self.compression_threshold: uncompressed_length = len(data) data = zlib_compress(data) else: uncompressed_length = 0 packers.Int32.pack(len(data), self.outfile) packers.Int32.pack(uncompressed_length, self.outfile) self.outfile.write(data) self.outfile.flush() self._wlock.release()
def _save(self,hashed_name, content_file): content = content_file.read() try: for regexp,comp_function in self.compressors.iteritems(): if regexp.search(hashed_name): content = comp_function(content) break except Exception as e: raise MinifiedStorageException("Could not compress file %s, error: %s" % (hashed_name,e,)) # save minified file saved_name = super(MinifiedManifestStaticFilesStorage, self)._save(hashed_name,ContentFile(content)) if MINIFIED_GZIP: # save gziped file as fell, we overwrite the content_file variable to save a tiny bit memory try: content = zlib_compress(content) super(MinifiedManifestStaticFilesStorage, self)._save("%s.gz" % saved_name,ContentFile(content)) except Exception as e: raise MinifiedStorageException("Could not gzip file %s, error: %s" % (hashed_name,e,)) return saved_name
def end_write(self): """finalizes the transaction and flushes all the write buffer to the underlying stream. begin_write must have been called prior to this. must be called to finalize the transaction""" self._assert_wlock() self.logger.info("end_write") data = "".join(self._wbuffer) del self._wbuffer[:] self.logger.info(" data = %r bytes", len(data)) if data: packers.Int32.pack(self._wseq, self.outfile) if self.compression_threshold > 0 and len( data) > self.compression_threshold: uncompressed_length = len(data) data = zlib_compress(data) else: uncompressed_length = 0 packers.Int32.pack(len(data), self.outfile) packers.Int32.pack(uncompressed_length, self.outfile) self.outfile.write(data) self.outfile.flush() self.logger.info(" ok") self._wlock.release()
def zcompress(packet, level): if isinstance(packet, memoryview): packet = packet.tobytes() elif not isinstance(packet, bytes): packet = bytes(packet, 'UTF-8') return level + ZLIB_FLAG, zlib_compress(packet, level)
def compress(stream): """ Compress stream using zlib lib. """ return zlib_compress(stream)
def create_tinfoil_index(index_to_write: dict, out_path: Path, compression_flag: int, rsa_pub_key_path: Path = None, vm_path: Path = None): to_compress_buffer = b"" if vm_path is not None and vm_path.is_file(): to_compress_buffer += b"\x13\x37\xB0\x0B" vm_buffer = b"" with open(vm_path, "rb") as vm_stream: vm_buffer += vm_stream.read() to_compress_buffer += len(vm_buffer).to_bytes(4, "little") to_compress_buffer += vm_buffer to_compress_buffer += bytes(json_serialize(index_to_write).encode()) to_write_buffer = b"" session_key = b"" if compression_flag == CompressionFlag.ZSTD_COMPRESSION: to_write_buffer += ZstdCompressor( level=22).compress(to_compress_buffer) elif compression_flag == CompressionFlag.ZLIB_COMPRESSION: to_write_buffer += zlib_compress(to_compress_buffer, 9) elif compression_flag == CompressionFlag.NO_COMPRESSION: to_write_buffer += to_compress_buffer else: raise NotImplementedError( "Compression method supplied is not implemented yet.") data_size = len(to_write_buffer) flag = None to_write_buffer += (b"\x00" * (0x10 - (data_size % 0x10))) if rsa_pub_key_path is not None and rsa_pub_key_path.is_file(): def rand_aes_key_generator() -> bytes: return randint(0, 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF).to_bytes( 0x10, byteorder="big") rsa_pub_key = import_rsa_key(open(rsa_pub_key_path).read()) rand_aes_key = rand_aes_key_generator() pkcs1_oaep_ctx = new_pkcs1_oaep_ctx(rsa_pub_key, hashAlgo=SHA256, label=b"") aes_ctx = new_aes_ctx(rand_aes_key, MODE_ECB) session_key += pkcs1_oaep_ctx.encrypt(rand_aes_key) to_write_buffer = aes_ctx.encrypt(to_write_buffer) flag = compression_flag | EncryptionFlag.ENCRYPT else: session_key += b"\x00" * 0x100 flag = compression_flag | EncryptionFlag.NO_ENCRYPT Path(out_path.parent).mkdir(parents=True, exist_ok=True) with open(out_path, "wb") as out_stream: out_stream.write(b"TINFOIL") out_stream.write(flag.to_bytes(1, byteorder="little")) out_stream.write(session_key) out_stream.write(data_size.to_bytes(8, "little")) out_stream.write(to_write_buffer)
def zcompress(packet, level): if isinstance(packet, memoryview): packet = packet.tobytes() else: packet = str(packet) return level + ZLIB_FLAG, zlib_compress(packet, level)
def compress_test(data: bytes, output: bool = True) -> Dict[str, Tuple[int, int, int]]: """ Compare compress modules. :param data: the data to compress. :param output: if this value is True, print the results to console. :return: {'module name': (<size of the compressed data>, <time to compress>, <time to decompress>)} """ res: Dict[str, Tuple[int, int, int]] = {} try_print('+++++++++++++++++++++++++++++++++++++++++++++++++++++', flag=output) size = len(data) try_print(f'Original size: {round(size/1024/1024), 4} MB', flag=output) # gzip for i in range(10): tmp = gzip_compress(data, compresslevel=i) key = f'gzip(compress level {i})' res[key] = (len(tmp), check_function_speed(gzip_compress, data, compresslevel=i), check_function_speed(gzip_decompress, tmp)) __print(res, size, key, output) # bz2 for i in range(1, 10): tmp = bz2_compress(data, compresslevel=i) key = f'bz2(compress level {i})' res[key] = (len(tmp), check_function_speed(bz2_compress, data, compresslevel=i), check_function_speed(bz2_decompress, tmp)) __print(res, size, key, output) # zlib for i in range(10): tmp = zlib_compress(data, level=i) key = f'zlib(compress level {i})' res[key] = (len(tmp), check_function_speed(zlib_compress, data, level=i), check_function_speed(zlib_decompress, tmp)) __print(res, size, key, output) # lzma tmp = lzma_compress(data, FORMAT_XZ, CHECK_CRC64) res[f'lzma(XZ - CRC64)'] = (len(tmp), check_function_speed(lzma_compress, data, FORMAT_XZ, CHECK_CRC64), check_function_speed(lzma_decompress, tmp, format=FORMAT_XZ)) __print(res, size, f'lzma(XZ - CRC64)', output) tmp = lzma_compress(data, FORMAT_XZ, CHECK_CRC32) res[f'lzma(XZ - CRC32)'] = (len(tmp), check_function_speed(lzma_compress, data, FORMAT_XZ, CHECK_CRC32), check_function_speed(lzma_decompress, tmp, format=FORMAT_XZ)) __print(res, size, f'lzma(XZ - CRC32)', output) tmp = lzma_compress(data, FORMAT_XZ, CHECK_NONE) res[f'lzma(XZ - NONE)'] = (len(tmp), check_function_speed(lzma_compress, data, FORMAT_XZ, CHECK_NONE), check_function_speed(lzma_decompress, tmp, format=FORMAT_XZ)) __print(res, size, f'lzma(XZ - NONE)', output) tmp = lzma_compress(data, FORMAT_ALONE, CHECK_NONE) res[f'lzma(ALONE - NONE)'] = (len(tmp), check_function_speed(lzma_compress, data, FORMAT_ALONE, CHECK_NONE), check_function_speed(lzma_decompress, tmp, format=FORMAT_ALONE)) __print(res, size, f'lzma(ALONE - NONE)', output) # brotli tmp = brotli_compress(data) key = 'brotli' res[key] = (len(tmp), check_function_speed(brotli_compress, data), check_function_speed(brotli_decompress, tmp)) __print(res, size, key, output) try_print('+++++++++++++++++++++++++++++++++++++++++++++++++++++', flag=output) return res