Example #1
0
def test(src, level=1):
    a0 = lzo.adler32(src)
    c = lzo.compress(src, level)
    u1 = lzo.decompress(c)
    a1 = lzo.adler32(u1)
    o = lzo.optimize(c)
    u2 = lzo.decompress(o)
    a2 = lzo.adler32(u2)
    if src != u1 or src != u2:
        raise lzo.error, "internal error 1"
    if a0 != a1 or a0 != a2:
        raise lzo.error, "internal error 2"
    print "compressed %6d -> %6d" % (len(src), len(c))
Example #2
0
def test(src, level=1):
    a0 = lzo.adler32(src)
    c =  lzo.compress(src, level)
    u1 = lzo.decompress(c)
    a1 = lzo.adler32(u1)
    o =  lzo.optimize(c)
    u2 = lzo.decompress(o)
    a2 = lzo.adler32(u2)
    if src != u1 or src != u2:
        raise lzo.error, "internal error 1"
    if a0 != a1 or a0 != a2:
        raise lzo.error, "internal error 2"
    print "compressed %6d -> %6d" % (len(src), len(c))
Example #3
0
def test(src, level = 1):
    a0 = lzo.adler32(src)
    c =  lzo.compress(src,level)
    u1 = lzo.decompress(c)
    a1 = lzo.adler32(u1)
    o =  lzo.optimize(c)
    u2 = lzo.decompress(o)
    a2 = lzo.adler32(u2)
    if cmp(src,u1) != 0 or cmp(src,u2) != 0:
        raise lzo.error, "internal error 1"
    if cmp(a0,a1) != 0 or cmp(a0,a2) != 0:
        raise lzo.error, "internal error 2"
    print "compressed %6d -> %6d" % (len(src), len(c))
Example #4
0
def gen_raw(src, level=1):
    a0 = lzo.adler32(src)
    c =  lzo.compress(src, level, False)
    u1 = lzo.decompress(c, False, len(src))
    a1 = lzo.adler32(u1)
    o =  lzo.optimize(c, False, len(src))
    u2 = lzo.decompress(o, False, len(src))
    a2 = lzo.adler32(u2)
    # make sure it still works when you overstate the output buffer length
    u3 = lzo.decompress(c, False, len(src) + 100)
    if src != u1 or src != u2 or src != u3:
        raise lzo.error("internal error 1")
    if a0 != a1 or a0 != a2:
        raise lzo.error("internal error 2")
    print("compressed %6d -> %6d" % (len(src), len(c)))
Example #5
0
def gen(src, level=1):
    a0 = lzo.adler32(src)
    c =  lzo.compress(src, level)
    u1 = lzo.decompress(c)
    a1 = lzo.adler32(u1)
    o =  lzo.optimize(c)
    u2 = lzo.decompress(o)
    a2 = lzo.adler32(u2)
    if src != u1:
        raise lzo.error("internal error 1: %r %r", src, u1)
    if src != u2:
        raise lzo.error("internal error 1: %r %r",  src, u2)
    if a0 != a1 or a0 != a2:
        raise lzo.error("internal error 2")
    print("compressed %6d -> %6d" % (len(src), len(c)))
Example #6
0
 def _decode_key_block(self, key_block_compressed, key_block_info_list):
     key_list = []
     i = 0
     for compressed_size, decompressed_size in key_block_info_list:
         start = i;
         end = i + compressed_size
         # 4 bytes : compression type
         key_block_type = key_block_compressed[start:start+4]
         if key_block_type == b'\x00\x00\x00\x00':
             # extract one single key block into a key list
             key_list += self._split_key_block(key_block_compressed[start+8:end])
         elif key_block_type == b'\x01\x00\x00\x00':
             if not HAVE_LZO:
                 log.error("LZO compression is not supported")
                 break
             # 4 bytes as adler32 checksum
             adler32 = unpack('>I', key_block_compressed[start+4:start+8])[0]
             # decompress key block
             header = '\xf0' + pack('>I', decompressed_size)
             key_block = lzo.decompress(header + key_block_compressed[start+8:end])
             # notice that lzo 1.x return signed value
             assert(adler32 == lzo.adler32(key_block) & 0xffffffff)
             # extract one single key block into a key list
             key_list += self._split_key_block(key_block)
         elif key_block_type == b'\x02\x00\x00\x00':
             # 4 bytes same as end of block
             assert(key_block_compressed[start+4:start+8] == key_block_compressed[end-4:end])
             # decompress key block
             key_block = zlib.decompress(key_block_compressed[start+self._number_width:end])
             # extract one single key block into a key list
             key_list += self._split_key_block(key_block)
         i += compressed_size
     return key_list
Example #7
0
 def _decode_key_block(self, key_block_compressed, key_block_info_list):
     key_list = []
     i = 0
     for compressed_size, decompressed_size in key_block_info_list:
         start = i;
         end = i + compressed_size
         # 4 bytes : compression type
         key_block_type = key_block_compressed[start:start+4]
         if key_block_type == '\x00\x00\x00\x00':
             # extract one single key block into a key list
             key_list += self._split_key_block(key_block_compressed[start+8:end])
         elif key_block_type == '\x01\x00\x00\x00':
             if not HAVE_LZO:
                 print "LZO compression is not supported"
                 break
             # 4 bytes as adler32 checksum
             adler32 = unpack('>I', key_block_compressed[start+4:start+8])[0]
             # decompress key block
             header = '\xf0' + pack('>I', decompressed_size)
             key_block = lzo.decompress(header + key_block_compressed[start+8:end])
             # notice that lzo 1.x return signed value
             assert(adler32 == lzo.adler32(key_block) & 0xffffffff)
             # extract one single key block into a key list
             key_list += self._split_key_block(key_block)
         elif key_block_type == '\x02\x00\x00\x00':
             # 4 bytes same as end of block
             assert(key_block_compressed[start+4:start+8] == key_block_compressed[end-4:end])
             # decompress key block
             key_block = zlib.decompress(key_block_compressed[start+self._number_width:end])
             # extract one single key block into a key list
             key_list += self._split_key_block(key_block)
         i += compressed_size
     return key_list
Example #8
0
def compressor_worker():
    while not q.empty():
        w_base, w_rel_base, w_f = q.get()

        w_rel_base = '' if w_rel_base == '.' else w_rel_base

        abs_path = os.path.join(w_base, w_f)
        rel_path = os.path.join(w_rel_base, w_f)

        extension = os.path.splitext(rel_path)[1][1:]

        raw_filestring = open(abs_path).read()
        compressed_filestring = lzo.compress(raw_filestring, options.compression)

        len_raw = len(raw_filestring)
        len_compressed = len(compressed_filestring)

        compression_factor = (float(len_compressed) / len_raw) if len_raw else 0
        compression_used = False

        if compression_factor < options.cutoff and False:
            compression_used = True

        string_final = compressed_filestring if compression_used else raw_filestring
        len_final = len(string_final)
        adler32_final = lzo.adler32(string_final)

        compressed_data_chunks.append({
            'path': rel_path,
            'path_mmh3': mmh3.hash64(rel_path)[0],
            'adler32': adler32_final,
            'size_before': len_raw,
            'size_after': len_final,
            'factor': compression_factor,
            'compression': 1 if compression_used else 0,
            'extension_str': extension,
            'extension': extensions[extension] if extension in extensions else 0,
            'data': string_final
        })

        if options.verbose:
            print('\t'.join((
                'Y' if compression_used else 'N',
                extension,
                '%.02f' % (compression_factor * 100.0),
                str(len_raw / 1024),
                str(len_final / 1024),
                str(adler32_final),
                rel_path
            )))

        q.task_done()
Example #9
0
    def _decode_record_block(self):
        f = open(self._fname, 'rb')
        f.seek(self._record_block_offset)

        num_record_blocks       = self._read_number(f)
        num_entries             = self._read_number(f)
        assert(num_entries == self._num_entries)
        record_block_info_size  = self._read_number(f)
        record_block_size       = self._read_number(f)

        # record block info section
        record_block_info_list = []
        size_counter = 0
        for i in range(num_record_blocks):
            compressed_size     = self._read_number(f)
            decompressed_size   = self._read_number(f)
            record_block_info_list += [(compressed_size, decompressed_size)]
            size_counter += self._number_width * 2
        assert(size_counter == record_block_info_size)

        # actual record block data
        offset = 0
        i = 0
        size_counter = 0
        for compressed_size, decompressed_size in record_block_info_list:
            record_block_compressed = f.read(compressed_size)
            # 4 bytes indicates block compression type
            record_block_type = record_block_compressed[:4]
            # no compression
            if record_block_type == b'\x00\x00\x00\x00':
                record_block = record_block_compressed[8:]
            # lzo compression
            elif record_block_type == b'\x01\x00\x00\x00':
                if not HAVE_LZO:
                    log.error("LZO compression is not supported")
                    break
                # 4 bytes as adler32 checksum
                adler32 = unpack('>I', record_block_compressed[4:8])[0]
                # decompress
                header = '\xf0' + pack('>I', decompressed_size)
                record_block = lzo.decompress(header + record_block_compressed[8:])
                # notice that lzo 1.x return signed value
                assert(adler32 == lzo.adler32(record_block) & 0xffffffff)
            # zlib compression
            elif record_block_type == b'\x02\x00\x00\x00':
                # 4 bytes as checksum
                assert(record_block_compressed[4:8] == record_block_compressed[-4:])
                # compressed contents
                record_block = zlib.decompress(record_block_compressed[8:])
            assert(len(record_block) == decompressed_size)
            # split record block according to the offset info from key block
            while i < len(self._key_list):
                record_start, key_text = self._key_list[i]
                # reach the end of current record block
                if record_start - offset >= len(record_block):
                    break
                # record end index
                if i < len(self._key_list)-1:
                    record_end = self._key_list[i+1][0]
                else:
                    record_end = len(record_block) + offset
                i += 1
                record = record_block[record_start-offset:record_end-offset]
                # convert to utf-8
                #record = record.decode(self._encoding, errors='ignore').strip(u'\x00').encode('utf-8')
                record = record.decode(self._encoding, errors='ignore').strip(u'\x00')
                # substitute styles
                if self._substyle and self._stylesheet:
                    record = self._substitute_stylesheet(record)

                yield key_text, record
            offset += len(record_block)
            size_counter += compressed_size
        assert(size_counter == record_block_size)

        f.close()
Example #10
0
    def _decode_record_block(self):
        f = open(self._fname, 'rb')
        f.seek(self._record_block_offset)

        num_record_blocks = self._read_number(f)
        num_entries = self._read_number(f)
        assert (num_entries == self._num_entries)
        record_block_info_size = self._read_number(f)
        record_block_size = self._read_number(f)

        # record block info section
        record_block_info_list = []
        size_counter = 0
        for i in range(num_record_blocks):
            compressed_size = self._read_number(f)
            decompressed_size = self._read_number(f)
            record_block_info_list += [(compressed_size, decompressed_size)]
            size_counter += self._number_width * 2
        assert (size_counter == record_block_info_size)

        # actual record block data
        offset = 0
        i = 0
        size_counter = 0
        for compressed_size, decompressed_size in record_block_info_list:
            record_block_compressed = f.read(compressed_size)
            # 4 bytes indicates block compression type
            record_block_type = record_block_compressed[:4]
            # no compression
            if record_block_type == '\x00\x00\x00\x00':
                record_block = record_block_compressed[8:]
            # lzo compression
            elif record_block_type == '\x01\x00\x00\x00':
                if not HAVE_LZO:
                    log.error("LZO compression is not supported")
                    break
                # 4 bytes as adler32 checksum
                adler32 = unpack('>I', record_block_compressed[4:8])[0]
                # decompress
                header = '\xf0' + pack('>I', decompressed_size)
                record_block = lzo.decompress(header +
                                              record_block_compressed[8:])
                # notice that lzo 1.x return signed value
                assert (adler32 == lzo.adler32(record_block) & 0xffffffff)
            # zlib compression
            elif record_block_type == '\x02\x00\x00\x00':
                # 4 bytes as checksum
                assert (record_block_compressed[4:8] ==
                        record_block_compressed[-4:])
                # compressed contents
                record_block = zlib.decompress(record_block_compressed[8:])
            assert (len(record_block) == decompressed_size)
            # split record block according to the offset info from key block
            while i < len(self._key_list):
                record_start, key_text = self._key_list[i]
                # reach the end of current record block
                if record_start - offset >= len(record_block):
                    break
                # record end index
                if i < len(self._key_list) - 1:
                    record_end = self._key_list[i + 1][0]
                else:
                    record_end = len(record_block) + offset
                i += 1
                record = record_block[record_start - offset:record_end -
                                      offset]
                # convert to utf-8
                record = record.decode(
                    self._encoding,
                    errors='ignore').strip(u'\x00').encode('utf-8')
                # substitute styles
                if self._substyle and self._stylesheet:
                    record = self._substitute_stylesheet(record)

                yield key_text, record
            offset += len(record_block)
            size_counter += compressed_size
        assert (size_counter == record_block_size)

        f.close()
Example #11
0
    def _decode_record_block(self):
        f = open(self._fname, 'rb')
        f.seek(self._record_block_offset)

        num_record_blocks       = self._read_number(f)
        num_entries             = self._read_number(f)
        assert(num_entries == self._num_entries)
        record_block_info_size  = self._read_number(f)
        record_block_size       = self._read_number(f)

        # record block info section
        record_block_info_list = []
        size_counter = 0
        for i in range(num_record_blocks):
            compressed_size = self._read_number(f)
            decompressed_size = self._read_number(f)
            record_block_info_list += [(compressed_size, decompressed_size)]
            size_counter += self._number_width * 2
        assert(size_counter == record_block_info_size)

        # actual record block
        offset = 0
        i = 0
        size_counter = 0
        for compressed_size, decompressed_size in record_block_info_list:
            record_block_compressed = f.read(compressed_size)
            record_block_type = record_block_compressed[:4]
            if record_block_type == '\x00\x00\x00\x00':
                record_block = record_block_compressed[8:]
            elif record_block_type == '\x01\x00\x00\x00':
                if not HAVE_LZO:
                    print "LZO compression is not supported"
                    break
                # 4 bytes as adler32 checksum
                adler32 = unpack('>I', record_block_compressed[4:8])[0]
                # decompress
                header = '\xf0' + pack('>I', decompressed_size)
                record_block = lzo.decompress(header + record_block_compressed[8:])
                # notice that lzo 1.x return signed value
                assert(adler32 == lzo.adler32(record_block) & 0xffffffff)
            elif record_block_type == '\x02\x00\x00\x00':
                # 4 bytes as checksum
                assert(record_block_compressed[4:8] == record_block_compressed[-4:])
                # compressed contents
                record_block = zlib.decompress(record_block_compressed[8:])
            assert(len(record_block) == decompressed_size)
            # split record block according to the offset info from key block
            while i < len(self._key_list):
                record_start, key_text = self._key_list[i]
                # reach the end of current record block
                if record_start - offset >= len(record_block):
                    break
                # record end index
                if i < len(self._key_list)-1:
                    record_end = self._key_list[i+1][0]
                else:
                    record_end = len(record_block) + offset
                i += 1
                data = record_block[record_start-offset:record_end-offset]
                yield key_text, data
            offset += len(record_block)
            size_counter += compressed_size
        assert(size_counter == record_block_size)

        f.close()
Example #12
0
    def _decode_record_block(self):
        f = open(self._fname, "rb")
        f.seek(self._record_block_offset)

        num_record_blocks = self._read_number(f)
        num_entries = self._read_number(f)
        assert num_entries == self._num_entries
        record_block_info_size = self._read_number(f)
        record_block_size = self._read_number(f)

        # record block info section
        record_block_info_list = []
        size_counter = 0
        for i in range(num_record_blocks):
            compressed_size = self._read_number(f)
            decompressed_size = self._read_number(f)
            record_block_info_list += [(compressed_size, decompressed_size)]
            size_counter += self._number_width * 2
        assert size_counter == record_block_info_size

        # actual record block
        offset = 0
        i = 0
        size_counter = 0
        for compressed_size, decompressed_size in record_block_info_list:
            record_block_compressed = f.read(compressed_size)
            record_block_type = record_block_compressed[:4]
            if record_block_type == "\x00\x00\x00\x00":
                record_block = record_block_compressed[8:]
            elif record_block_type == "\x01\x00\x00\x00":
                if not HAVE_LZO:
                    log.error("LZO compression is not supported")
                    break
                # 4 bytes as adler32 checksum
                adler32 = unpack(">I", record_block_compressed[4:8])[0]
                # decompress
                header = "\xf0" + pack(">I", decompressed_size)
                record_block = lzo.decompress(header +
                                              record_block_compressed[8:])
                # notice that lzo 1.x return signed value
                assert adler32 == lzo.adler32(record_block) & 0xFFFFFFFF
            elif record_block_type == "\x02\x00\x00\x00":
                # 4 bytes as checksum
                assert record_block_compressed[4:8] == record_block_compressed[
                    -4:]
                # compressed contents
                record_block = zlib.decompress(record_block_compressed[8:])
            assert len(record_block) == decompressed_size
            # split record block according to the offset info from key block
            while i < len(self._key_list):
                record_start, key_text = self._key_list[i]
                # reach the end of current record block
                if record_start - offset >= len(record_block):
                    break
                # record end index
                if i < len(self._key_list) - 1:
                    record_end = self._key_list[i + 1][0]
                else:
                    record_end = len(record_block) + offset
                i += 1
                data = record_block[record_start - offset:record_end - offset]
                yield key_text, data
            offset += len(record_block)
            size_counter += compressed_size
        assert size_counter == record_block_size

        f.close()
Example #13
0
        file_descriptors.append(descriptor)

        file_data.append(c['data'])

        num_bytes += c['size_before']
        num_compressed_bytes += c['size_after']

    combined_data = ''.join(file_data)

    file_descriptors_path = options.output
    file_data_path = os.path.splitext(options.output)[0] + '.pak'

    output = {
        'version': 2,
        'file_descriptors': file_descriptors,
        'data_file_path': os.path.basename(file_data_path),
        'descriptors_file_path': os.path.basename(file_descriptors_path),
        'size': len(combined_data),
        'num_files': len(file_descriptors),
        'crc32': lzo.crc32(combined_data),
        'adler32': lzo.adler32(combined_data)
    }

    data_json = json.dumps(output)

    open(file_data_path, "w").write(combined_data)
    open(file_descriptors_path, "w").write(data_json)

    if options.verbose:
        print("Total: %d bytes -> %d bytes (%.02f%%)" % (num_bytes, num_compressed_bytes, (float(num_compressed_bytes) / num_bytes) * 100.0))