def compress( in_path, out_path, crc_to_spoof ): with open( in_path, 'rb' ) as in_file, open( out_path, 'wb' ) as out_file: lzmac = LZMACompressor( FORMAT_ALONE, filters = [ { 'id': FILTER_LZMA1, 'dict_size': 64 * 1024 } ] ) size = 0 crc = None out_file.write( pack( '<I', crc_to_spoof ) ) _, file_name = split( in_path ) data = ''' do local f = file.Open( 'files.txt', 'ab', 'DATA' ) if f then f:Write( string.format( '{0} = %s\\n', debug.getinfo( 1 ).short_src ) ) f:Close() end end '''.format( file_name ).encode() data = in_file.read( 1024 ) while len( data ): size = size + len( data ) if crc == None: crc = crc32( data ) else: crc = crc32( data, crc ) out_file.write( lzmac.compress( data ) ) data = in_file.read( 1024 ) if crc != crc_to_spoof: fix = 0 working_crc = ~crc_to_spoof for i in range( 32 ): if fix & 1: fix = ( fix >> 1 ) ^ 0xedb88320 else: fix = fix >> 1 if working_crc & 1: fix = fix ^ 0x5b358fd3 working_crc = working_crc >> 1 fix = ( fix ^ ~crc ) & 0xffffffff fix = pack( '<I', fix ) #crc = crc32( fix, crc ) size = size + len( fix ) print( 'Fix: {0}'.format( fix ) ) out_file.write( lzmac.compress( fix ) ) out_file.write( lzmac.flush() ) out_file.seek( 9, SEEK_SET ) out_file.write( pack( '<q', size ) )
def bcompress(b): if b is None: return None com = LZMACompressor() chunk = com.compress(b) tail = com.flush() return chunk + tail
def generate_lzma_stream(data: bytes, dict_size: int=33554432, lc: int=3, lp: int=1, pb: int=1) -> bytes: lzma_filters = [ {'id': FILTER_LZMA1, 'dict_size': dict_size, 'lc': lc, 'lp': lp, 'pb': pb} ] compressor = LZMACompressor(format=FORMAT_ALONE, filters=lzma_filters) lzma_stream = compressor.compress(data) return b''.join((lzma_stream, compressor.flush()))
def urlstate(self, encryption_key): """ Will return a url safe representation of the state. :type encryption_key: Key used for encryption. :rtype: str :return: Url representation av of the state. """ lzma = LZMACompressor() urlstate_data = json.dumps(self._state_dict) urlstate_data = lzma.compress(urlstate_data.encode("UTF-8")) urlstate_data += lzma.flush() urlstate_data = _AESCipher(encryption_key).encrypt(urlstate_data) lzma = LZMACompressor() urlstate_data = lzma.compress(urlstate_data) urlstate_data += lzma.flush() urlstate_data = base64.urlsafe_b64encode(urlstate_data) return urlstate_data.decode("utf-8")
def urlstate(self, encryption_key): """ Will return a url safe representation of the state. :type encryption_key: Key used for encryption. :rtype: str :return: Url representation av of the state. """ lzma = LZMACompressor() urlstate_data = json.dumps(self.data) urlstate_data = lzma.compress(urlstate_data.encode("UTF-8")) urlstate_data += lzma.flush() urlstate_data = _AESCipher(encryption_key).encrypt(urlstate_data) lzma = LZMACompressor() urlstate_data = lzma.compress(urlstate_data) urlstate_data += lzma.flush() urlstate_data = base64.urlsafe_b64encode(urlstate_data) return urlstate_data.decode("utf-8")
class BlockWrite: def __init__( self, fileobj: IOAbstract, check: int, preset: _LZMAPresetType, filters: _LZMAFiltersType, ) -> None: self.fileobj = fileobj self.check = check self.compressor = LZMACompressor(FORMAT_XZ, check, preset, filters) self.pos = 0 if self.compressor.compress(b"") != create_xz_header(check): raise XZError("block: compressor header") def _write(self, data: bytes) -> None: if data: self.fileobj.seek(self.pos) self.fileobj.write(data) self.pos += len(data) def compress(self, data: bytes) -> None: self._write(self.compressor.compress(data)) def finish(self) -> Tuple[int, int]: data = self.compressor.flush() # footer check, backward_size = parse_xz_footer(data[-12:]) if check != self.check: raise XZError("block: compressor footer check") # index records = parse_xz_index(data[-12 - backward_size:-12]) if len(records) != 1: raise XZError("block: compressor index records length") # remaining block data self._write(data[:-12 - backward_size]) return records[0] # (unpadded_size, uncompressed_size)
def compress(srcfd, destfd, level=9, bufsize=DEFAULT_BUFSIZE): setproctitle('mob compression') log.debug("Starting compression in process %d" % os.getpid()) compressor = LZMACompressor(options={'level': level}) log.debug("Compression level %d" % level) src = os.fdopen(srcfd, 'rb') dest = os.fdopen(destfd, 'wb') while True: log.debug('Reading into buffer for compression') buf = src.read(bufsize) log.debug('Read %d bytes' % len(buf)) if not buf: break dest.write(compressor.compress(buf)) # clean up dest.write(compressor.flush()) log.debug("Compression finished")
def _save_blob(start_index, delta_list, self_ref_dict, blob_name, blob_size, statistics=None): # mode = 2 indicates LZMA_SYNC_FLUSH, which show all output right after input comp_option = {'format': 'xz', 'level': 9} comp = LZMACompressor(options=comp_option) disk_offset_list = list() memory_offset_list = list() comp_data = '' original_length = 0 index = start_index item_count = 0 memory_overlay_size = 0 disk_overlay_size = 0 while index < len(delta_list): delta_item = delta_list[index] if delta_item.ref_id != DeltaItem.REF_SELF: # Those deduped chunks will be put right after original data # using deduped_list delta_bytes = delta_item.get_serialized() original_length += len(delta_bytes) comp_delta_bytes = comp.compress(delta_bytes) comp_data += comp_delta_bytes item_count += 1 if delta_item.delta_type == DeltaItem.DELTA_MEMORY: memory_offset_list.append(delta_item.offset) memory_overlay_size += len(comp_delta_bytes) elif delta_item.delta_type == DeltaItem.DELTA_DISK: disk_offset_list.append(delta_item.offset) disk_overlay_size += len(comp_delta_bytes) else: raise DeltaError("Delta should be either memory or disk") # remove dependece getting required index by finding reference deduped_list = self_ref_dict.get(delta_item.index, None) if deduped_list != None: #LOG.debug("moving %d deduped delta item" % (len(deduped_list))) for deduped_item in deduped_list: deduped_bytes = deduped_item.get_serialized() original_length += len(deduped_bytes) comp_deduped_bytes = comp.compress(deduped_bytes) comp_data += comp_deduped_bytes item_count += 1 if deduped_item.delta_type == DeltaItem.DELTA_MEMORY: memory_offset_list.append(deduped_item.offset) memory_overlay_size += len(comp_deduped_bytes) elif deduped_item.delta_type == DeltaItem.DELTA_DISK: disk_offset_list.append(deduped_item.offset) disk_overlay_size += len(comp_deduped_bytes) else: raise DeltaError( "Delta should be either memory or disk") if len(comp_data) >= blob_size: LOG.debug("savefile for %s(%ld delta item) %ld --> %ld" % \ (blob_name, item_count, original_length, len(comp_data))) comp_data += comp.flush() blob_file = open(blob_name, "w+b") blob_file.write(comp_data) blob_file.close() if statistics != None: statistics['item_count'] = item_count return index, memory_offset_list, disk_offset_list index += 1 comp_data += comp.flush() if len(comp_data) > 0: blob_file = open(blob_name, "w+b") blob_file.write(comp_data) blob_file.close() if statistics != None: statistics['item_count'] = item_count return index, memory_offset_list, disk_offset_list else: raise DeltaError("LZMA compression is zero")
data = fd.read(HASH_CHUNKING_SIZE) #LOG.debug("data, recovering : %d %d" % (start_offset, end_offset)) delta_list.append((start_offset, end_offset, ref_hashlist_id, data)) fd.close() return delta_list if __name__ == "__main__": import random import string if sys.argv[1] == "comp": base = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(2096)) compressor = LZMACompressor(LZMA_OPTION) comp = compressor.compress(base) comp += compressor.flush() decompressor = LZMADecompressor() decomp = decompressor.decompress(comp) decomp += decompressor.flush() if base != decomp: print "result is wrong" print "%d == %d" % (len(base), len(decomp)) sys.exit(1) print "success" elif sys.argv[1] == "xdelta": base = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(4096)) modi = "~"*4096
delta_list.append((start_offset, end_offset, ref_hashlist_id, data)) fd.close() return delta_list if __name__ == "__main__": import random import string if sys.argv[1] == "comp": base = ''.join( random.choice(string.ascii_uppercase + string.digits) for x in range(2096)) compressor = LZMACompressor(LZMA_OPTION) comp = compressor.compress(base) comp += compressor.flush() decompressor = LZMADecompressor() decomp = decompressor.decompress(comp) decomp += decompressor.flush() if base != decomp: print "result is wrong" print "%d == %d" % (len(base), len(decomp)) sys.exit(1) print "success" elif sys.argv[1] == "xdelta": base = ''.join( random.choice(string.ascii_uppercase + string.digits)
def _save_blob(start_index, delta_list, self_ref_dict, blob_name, blob_size, statistics=None): # mode = 2 indicates LZMA_SYNC_FLUSH, which show all output right after input comp_option = {'format':'xz', 'level':9} comp = LZMACompressor(options=comp_option) disk_offset_list = list() memory_offset_list= list() comp_data = '' original_length = 0 index = start_index item_count = 0 memory_overlay_size = 0 disk_overlay_size = 0 while index < len(delta_list): delta_item = delta_list[index] if delta_item.ref_id != DeltaItem.REF_SELF: # Those deduped chunks will be put right after original data # using deduped_list delta_bytes = delta_item.get_serialized() original_length += len(delta_bytes) comp_delta_bytes = comp.compress(delta_bytes) comp_data += comp_delta_bytes item_count += 1 if delta_item.delta_type == DeltaItem.DELTA_MEMORY: memory_offset_list.append(delta_item.offset) memory_overlay_size += len(comp_delta_bytes) elif delta_item.delta_type == DeltaItem.DELTA_DISK: disk_offset_list.append(delta_item.offset) disk_overlay_size += len(comp_delta_bytes) else: raise DeltaError("Delta should be either memory or disk") # remove dependece getting required index by finding reference deduped_list = self_ref_dict.get(delta_item.index, None) if deduped_list != None: #LOG.debug("moving %d deduped delta item" % (len(deduped_list))) for deduped_item in deduped_list: deduped_bytes = deduped_item.get_serialized() original_length += len(deduped_bytes) comp_deduped_bytes = comp.compress(deduped_bytes) comp_data += comp_deduped_bytes item_count += 1 if deduped_item.delta_type == DeltaItem.DELTA_MEMORY: memory_offset_list.append(deduped_item.offset) memory_overlay_size += len(comp_deduped_bytes) elif deduped_item.delta_type == DeltaItem.DELTA_DISK: disk_offset_list.append(deduped_item.offset) disk_overlay_size += len(comp_deduped_bytes) else: raise DeltaError("Delta should be either memory or disk") if len(comp_data) >= blob_size: LOG.debug("savefile for %s(%ld delta item) %ld --> %ld" % \ (blob_name, item_count, original_length, len(comp_data))) comp_data += comp.flush() blob_file = open(blob_name, "w+b") blob_file.write(comp_data) blob_file.close() if statistics != None: statistics['item_count'] = item_count return index, memory_offset_list, disk_offset_list index += 1 comp_data += comp.flush() if len(comp_data) > 0 : blob_file = open(blob_name, "w+b") blob_file.write(comp_data) blob_file.close() if statistics != None: statistics['item_count'] = item_count return index, memory_offset_list, disk_offset_list else: raise DeltaError("LZMA compression is zero")