def test(src, level=1): a0 = lzo.adler32(src) c = lzo.compress(src, level) u1 = lzo.decompress(c) a1 = lzo.adler32(u1) o = lzo.optimize(c) u2 = lzo.decompress(o) a2 = lzo.adler32(u2) if src != u1 or src != u2: raise lzo.error, "internal error 1" if a0 != a1 or a0 != a2: raise lzo.error, "internal error 2" print "compressed %6d -> %6d" % (len(src), len(c))
def test(src, level = 1): a0 = lzo.adler32(src) c = lzo.compress(src,level) u1 = lzo.decompress(c) a1 = lzo.adler32(u1) o = lzo.optimize(c) u2 = lzo.decompress(o) a2 = lzo.adler32(u2) if cmp(src,u1) != 0 or cmp(src,u2) != 0: raise lzo.error, "internal error 1" if cmp(a0,a1) != 0 or cmp(a0,a2) != 0: raise lzo.error, "internal error 2" print "compressed %6d -> %6d" % (len(src), len(c))
def gen_raw(src, level=1): a0 = lzo.adler32(src) c = lzo.compress(src, level, False) u1 = lzo.decompress(c, False, len(src)) a1 = lzo.adler32(u1) o = lzo.optimize(c, False, len(src)) u2 = lzo.decompress(o, False, len(src)) a2 = lzo.adler32(u2) # make sure it still works when you overstate the output buffer length u3 = lzo.decompress(c, False, len(src) + 100) if src != u1 or src != u2 or src != u3: raise lzo.error("internal error 1") if a0 != a1 or a0 != a2: raise lzo.error("internal error 2") print("compressed %6d -> %6d" % (len(src), len(c)))
def gen(src, level=1): a0 = lzo.adler32(src) c = lzo.compress(src, level) u1 = lzo.decompress(c) a1 = lzo.adler32(u1) o = lzo.optimize(c) u2 = lzo.decompress(o) a2 = lzo.adler32(u2) if src != u1: raise lzo.error("internal error 1: %r %r", src, u1) if src != u2: raise lzo.error("internal error 1: %r %r", src, u2) if a0 != a1 or a0 != a2: raise lzo.error("internal error 2") print("compressed %6d -> %6d" % (len(src), len(c)))
def _decode_key_block(self, key_block_compressed, key_block_info_list): key_list = [] i = 0 for compressed_size, decompressed_size in key_block_info_list: start = i; end = i + compressed_size # 4 bytes : compression type key_block_type = key_block_compressed[start:start+4] if key_block_type == b'\x00\x00\x00\x00': # extract one single key block into a key list key_list += self._split_key_block(key_block_compressed[start+8:end]) elif key_block_type == b'\x01\x00\x00\x00': if not HAVE_LZO: log.error("LZO compression is not supported") break # 4 bytes as adler32 checksum adler32 = unpack('>I', key_block_compressed[start+4:start+8])[0] # decompress key block header = '\xf0' + pack('>I', decompressed_size) key_block = lzo.decompress(header + key_block_compressed[start+8:end]) # notice that lzo 1.x return signed value assert(adler32 == lzo.adler32(key_block) & 0xffffffff) # extract one single key block into a key list key_list += self._split_key_block(key_block) elif key_block_type == b'\x02\x00\x00\x00': # 4 bytes same as end of block assert(key_block_compressed[start+4:start+8] == key_block_compressed[end-4:end]) # decompress key block key_block = zlib.decompress(key_block_compressed[start+self._number_width:end]) # extract one single key block into a key list key_list += self._split_key_block(key_block) i += compressed_size return key_list
def _decode_key_block(self, key_block_compressed, key_block_info_list): key_list = [] i = 0 for compressed_size, decompressed_size in key_block_info_list: start = i; end = i + compressed_size # 4 bytes : compression type key_block_type = key_block_compressed[start:start+4] if key_block_type == '\x00\x00\x00\x00': # extract one single key block into a key list key_list += self._split_key_block(key_block_compressed[start+8:end]) elif key_block_type == '\x01\x00\x00\x00': if not HAVE_LZO: print "LZO compression is not supported" break # 4 bytes as adler32 checksum adler32 = unpack('>I', key_block_compressed[start+4:start+8])[0] # decompress key block header = '\xf0' + pack('>I', decompressed_size) key_block = lzo.decompress(header + key_block_compressed[start+8:end]) # notice that lzo 1.x return signed value assert(adler32 == lzo.adler32(key_block) & 0xffffffff) # extract one single key block into a key list key_list += self._split_key_block(key_block) elif key_block_type == '\x02\x00\x00\x00': # 4 bytes same as end of block assert(key_block_compressed[start+4:start+8] == key_block_compressed[end-4:end]) # decompress key block key_block = zlib.decompress(key_block_compressed[start+self._number_width:end]) # extract one single key block into a key list key_list += self._split_key_block(key_block) i += compressed_size return key_list
def compressor_worker(): while not q.empty(): w_base, w_rel_base, w_f = q.get() w_rel_base = '' if w_rel_base == '.' else w_rel_base abs_path = os.path.join(w_base, w_f) rel_path = os.path.join(w_rel_base, w_f) extension = os.path.splitext(rel_path)[1][1:] raw_filestring = open(abs_path).read() compressed_filestring = lzo.compress(raw_filestring, options.compression) len_raw = len(raw_filestring) len_compressed = len(compressed_filestring) compression_factor = (float(len_compressed) / len_raw) if len_raw else 0 compression_used = False if compression_factor < options.cutoff and False: compression_used = True string_final = compressed_filestring if compression_used else raw_filestring len_final = len(string_final) adler32_final = lzo.adler32(string_final) compressed_data_chunks.append({ 'path': rel_path, 'path_mmh3': mmh3.hash64(rel_path)[0], 'adler32': adler32_final, 'size_before': len_raw, 'size_after': len_final, 'factor': compression_factor, 'compression': 1 if compression_used else 0, 'extension_str': extension, 'extension': extensions[extension] if extension in extensions else 0, 'data': string_final }) if options.verbose: print('\t'.join(( 'Y' if compression_used else 'N', extension, '%.02f' % (compression_factor * 100.0), str(len_raw / 1024), str(len_final / 1024), str(adler32_final), rel_path ))) q.task_done()
def _decode_record_block(self): f = open(self._fname, 'rb') f.seek(self._record_block_offset) num_record_blocks = self._read_number(f) num_entries = self._read_number(f) assert(num_entries == self._num_entries) record_block_info_size = self._read_number(f) record_block_size = self._read_number(f) # record block info section record_block_info_list = [] size_counter = 0 for i in range(num_record_blocks): compressed_size = self._read_number(f) decompressed_size = self._read_number(f) record_block_info_list += [(compressed_size, decompressed_size)] size_counter += self._number_width * 2 assert(size_counter == record_block_info_size) # actual record block data offset = 0 i = 0 size_counter = 0 for compressed_size, decompressed_size in record_block_info_list: record_block_compressed = f.read(compressed_size) # 4 bytes indicates block compression type record_block_type = record_block_compressed[:4] # no compression if record_block_type == b'\x00\x00\x00\x00': record_block = record_block_compressed[8:] # lzo compression elif record_block_type == b'\x01\x00\x00\x00': if not HAVE_LZO: log.error("LZO compression is not supported") break # 4 bytes as adler32 checksum adler32 = unpack('>I', record_block_compressed[4:8])[0] # decompress header = '\xf0' + pack('>I', decompressed_size) record_block = lzo.decompress(header + record_block_compressed[8:]) # notice that lzo 1.x return signed value assert(adler32 == lzo.adler32(record_block) & 0xffffffff) # zlib compression elif record_block_type == b'\x02\x00\x00\x00': # 4 bytes as checksum assert(record_block_compressed[4:8] == record_block_compressed[-4:]) # compressed contents record_block = zlib.decompress(record_block_compressed[8:]) assert(len(record_block) == decompressed_size) # split record block according to the offset info from key block while i < len(self._key_list): record_start, key_text = self._key_list[i] # reach the end of current record block if record_start - offset >= len(record_block): break # record end index if i < len(self._key_list)-1: record_end = self._key_list[i+1][0] else: record_end = len(record_block) + offset i += 1 record = record_block[record_start-offset:record_end-offset] # convert to utf-8 #record = record.decode(self._encoding, errors='ignore').strip(u'\x00').encode('utf-8') record = record.decode(self._encoding, errors='ignore').strip(u'\x00') # substitute styles if self._substyle and self._stylesheet: record = self._substitute_stylesheet(record) yield key_text, record offset += len(record_block) size_counter += compressed_size assert(size_counter == record_block_size) f.close()
def _decode_record_block(self): f = open(self._fname, 'rb') f.seek(self._record_block_offset) num_record_blocks = self._read_number(f) num_entries = self._read_number(f) assert (num_entries == self._num_entries) record_block_info_size = self._read_number(f) record_block_size = self._read_number(f) # record block info section record_block_info_list = [] size_counter = 0 for i in range(num_record_blocks): compressed_size = self._read_number(f) decompressed_size = self._read_number(f) record_block_info_list += [(compressed_size, decompressed_size)] size_counter += self._number_width * 2 assert (size_counter == record_block_info_size) # actual record block data offset = 0 i = 0 size_counter = 0 for compressed_size, decompressed_size in record_block_info_list: record_block_compressed = f.read(compressed_size) # 4 bytes indicates block compression type record_block_type = record_block_compressed[:4] # no compression if record_block_type == '\x00\x00\x00\x00': record_block = record_block_compressed[8:] # lzo compression elif record_block_type == '\x01\x00\x00\x00': if not HAVE_LZO: log.error("LZO compression is not supported") break # 4 bytes as adler32 checksum adler32 = unpack('>I', record_block_compressed[4:8])[0] # decompress header = '\xf0' + pack('>I', decompressed_size) record_block = lzo.decompress(header + record_block_compressed[8:]) # notice that lzo 1.x return signed value assert (adler32 == lzo.adler32(record_block) & 0xffffffff) # zlib compression elif record_block_type == '\x02\x00\x00\x00': # 4 bytes as checksum assert (record_block_compressed[4:8] == record_block_compressed[-4:]) # compressed contents record_block = zlib.decompress(record_block_compressed[8:]) assert (len(record_block) == decompressed_size) # split record block according to the offset info from key block while i < len(self._key_list): record_start, key_text = self._key_list[i] # reach the end of current record block if record_start - offset >= len(record_block): break # record end index if i < len(self._key_list) - 1: record_end = self._key_list[i + 1][0] else: record_end = len(record_block) + offset i += 1 record = record_block[record_start - offset:record_end - offset] # convert to utf-8 record = record.decode( self._encoding, errors='ignore').strip(u'\x00').encode('utf-8') # substitute styles if self._substyle and self._stylesheet: record = self._substitute_stylesheet(record) yield key_text, record offset += len(record_block) size_counter += compressed_size assert (size_counter == record_block_size) f.close()
def _decode_record_block(self): f = open(self._fname, 'rb') f.seek(self._record_block_offset) num_record_blocks = self._read_number(f) num_entries = self._read_number(f) assert(num_entries == self._num_entries) record_block_info_size = self._read_number(f) record_block_size = self._read_number(f) # record block info section record_block_info_list = [] size_counter = 0 for i in range(num_record_blocks): compressed_size = self._read_number(f) decompressed_size = self._read_number(f) record_block_info_list += [(compressed_size, decompressed_size)] size_counter += self._number_width * 2 assert(size_counter == record_block_info_size) # actual record block offset = 0 i = 0 size_counter = 0 for compressed_size, decompressed_size in record_block_info_list: record_block_compressed = f.read(compressed_size) record_block_type = record_block_compressed[:4] if record_block_type == '\x00\x00\x00\x00': record_block = record_block_compressed[8:] elif record_block_type == '\x01\x00\x00\x00': if not HAVE_LZO: print "LZO compression is not supported" break # 4 bytes as adler32 checksum adler32 = unpack('>I', record_block_compressed[4:8])[0] # decompress header = '\xf0' + pack('>I', decompressed_size) record_block = lzo.decompress(header + record_block_compressed[8:]) # notice that lzo 1.x return signed value assert(adler32 == lzo.adler32(record_block) & 0xffffffff) elif record_block_type == '\x02\x00\x00\x00': # 4 bytes as checksum assert(record_block_compressed[4:8] == record_block_compressed[-4:]) # compressed contents record_block = zlib.decompress(record_block_compressed[8:]) assert(len(record_block) == decompressed_size) # split record block according to the offset info from key block while i < len(self._key_list): record_start, key_text = self._key_list[i] # reach the end of current record block if record_start - offset >= len(record_block): break # record end index if i < len(self._key_list)-1: record_end = self._key_list[i+1][0] else: record_end = len(record_block) + offset i += 1 data = record_block[record_start-offset:record_end-offset] yield key_text, data offset += len(record_block) size_counter += compressed_size assert(size_counter == record_block_size) f.close()
def _decode_record_block(self): f = open(self._fname, "rb") f.seek(self._record_block_offset) num_record_blocks = self._read_number(f) num_entries = self._read_number(f) assert num_entries == self._num_entries record_block_info_size = self._read_number(f) record_block_size = self._read_number(f) # record block info section record_block_info_list = [] size_counter = 0 for i in range(num_record_blocks): compressed_size = self._read_number(f) decompressed_size = self._read_number(f) record_block_info_list += [(compressed_size, decompressed_size)] size_counter += self._number_width * 2 assert size_counter == record_block_info_size # actual record block offset = 0 i = 0 size_counter = 0 for compressed_size, decompressed_size in record_block_info_list: record_block_compressed = f.read(compressed_size) record_block_type = record_block_compressed[:4] if record_block_type == "\x00\x00\x00\x00": record_block = record_block_compressed[8:] elif record_block_type == "\x01\x00\x00\x00": if not HAVE_LZO: log.error("LZO compression is not supported") break # 4 bytes as adler32 checksum adler32 = unpack(">I", record_block_compressed[4:8])[0] # decompress header = "\xf0" + pack(">I", decompressed_size) record_block = lzo.decompress(header + record_block_compressed[8:]) # notice that lzo 1.x return signed value assert adler32 == lzo.adler32(record_block) & 0xFFFFFFFF elif record_block_type == "\x02\x00\x00\x00": # 4 bytes as checksum assert record_block_compressed[4:8] == record_block_compressed[ -4:] # compressed contents record_block = zlib.decompress(record_block_compressed[8:]) assert len(record_block) == decompressed_size # split record block according to the offset info from key block while i < len(self._key_list): record_start, key_text = self._key_list[i] # reach the end of current record block if record_start - offset >= len(record_block): break # record end index if i < len(self._key_list) - 1: record_end = self._key_list[i + 1][0] else: record_end = len(record_block) + offset i += 1 data = record_block[record_start - offset:record_end - offset] yield key_text, data offset += len(record_block) size_counter += compressed_size assert size_counter == record_block_size f.close()
file_descriptors.append(descriptor) file_data.append(c['data']) num_bytes += c['size_before'] num_compressed_bytes += c['size_after'] combined_data = ''.join(file_data) file_descriptors_path = options.output file_data_path = os.path.splitext(options.output)[0] + '.pak' output = { 'version': 2, 'file_descriptors': file_descriptors, 'data_file_path': os.path.basename(file_data_path), 'descriptors_file_path': os.path.basename(file_descriptors_path), 'size': len(combined_data), 'num_files': len(file_descriptors), 'crc32': lzo.crc32(combined_data), 'adler32': lzo.adler32(combined_data) } data_json = json.dumps(output) open(file_data_path, "w").write(combined_data) open(file_descriptors_path, "w").write(data_json) if options.verbose: print("Total: %d bytes -> %d bytes (%.02f%%)" % (num_bytes, num_compressed_bytes, (float(num_compressed_bytes) / num_bytes) * 100.0))