def decompress(buf): reader = bitio.BitIO(buf) CM = reader.read(4) # compression method (usually 8) if CM != 8: raise Error("unsupported compression method: {}".format(CM)) CINFO = reader.read(4) # ln(window size) - 8 FLG = reader.read(8) result = bytearray() while True: BFINAL = reader.read(1) BTYPE = reader.read(2) if BTYPE == 0b01: # compression with fixed Huffman codes for literals/lengths # and distances result += decomp_fixed(reader) elif BTYPE == 0b10: # compression with dynamic Huffman codes lit_len_tree, distance_tree = dynamic_tree(reader) while True: # read a literal or length _type, value = read_literal_or_length(reader, lit_len_tree) if _type == 'eob': break elif _type == 'literal': result.append(value) elif _type == 'length': # read a distance length = value distance = read_distance(reader, distance_tree) for _ in range(length): result.append(result[-distance]) if BFINAL: # read ADLER32 checksum in last 4 bytes b1 = 256 * buf[-4] + buf[-3] a1 = 256 * buf[-2] + buf[-1] # compute it from result a = 1 b = 0 for byte in result: a += byte a %= 65521 b += a b %= 65521 # assert that checksum is correct assert a == a1 assert b == b1 return bytes(result)
def writecompressed(zivcode, path): from math import log bithandler = bitio.BitIO(path, write=True) if len(zivcode) >= 1: bithandler.writeBin(ord(zivcode[0][1]), 8) # lettre for i in range(1, len(zivcode)): length = int(log(i, 2)) + 1 bithandler.writeBin(zivcode[i][0], length) if zivcode[i][1] != '': bithandler.writeBin(ord(zivcode[i][1]), 8) # lettre del bithandler
def compress_fixed(source, items): """Use fixed Huffman code.""" out = bitio.BitIO() out.write_int(8, 4) # compression method = 8 out.write_int(7, 4) # window size = 2 ** (8 + 7) out.write_int(0x9c, 8) # FLG out.write(1) # BFINAL = 1 out.write(1, 0) # BTYPE = fixed Huffman codes for item in items: if isinstance(item, tuple): length, extra_length, distance, extra_distance = item # length code code = fixed_lit_len_codes[length] value, nb = int(code, 2), len(code) out.write_int(value, nb, order="msf") # extra bits for length value, nb = extra_length if nb: out.write_int(value, nb) # distance code = distance - 1 value, nb = code, 5 out.write_int(value, nb, order="msf") # extra bits for distance value, nb = extra_distance if nb: out.write_int(value, nb) else: literal = item code = fixed_lit_len_codes[item] value, nb = int(code, 2), len(code) out.write_int(value, nb, order="msf") # pad with 0 while out.bitnum != 8: out.write(0) # write ADLER32 checksum a, b = adler32(source) a1, a2 = divmod(a, 256) b1, b2 = divmod(b, 256) out.write_int(b1, 8) out.write_int(b2, 8) out.write_int(a1, 8) out.write_int(a2, 8) return bytes(out.bytestream)
def readfile(path): import sys try: bithandler = bitio.BitIO(path, write=False) char = bithandler.read(8) res = '' while char != 'EOF': # print('read: ' + str(char)) char = chr(char) res += char char = bithandler.read(8) return res except Exception as e: sys.stderr.write("Couldn't open " + path + ": " + str(e) + "\n") exit(1)
def readcompressed(path): ''' lis un fichier compressé depuis path et retourne le code de lempelziv ''' from math import log bithandler = bitio.BitIO(path, write=False) i = 1 ref = 0 res = [] char = bithandler.read(8) # lettre while char != 'EOF' and ref != 'EOF': if char == 'EOF': break else: char = chr(char) res.append((ref, char)) length = int(log(i, 2)) + 1 ref = bithandler.read(length) char = bithandler.read(8) # lettre i += 1 # Si il y a encore à écrire (référence existante, mais pas char) if ref != 'EOF' and ref != 0 and char == 'EOF': res.append((ref, '')) return res
def compress(source, window_size=32 * 1024): lz = lz77.LZ77() lit_len_count = {} distance_count = {} for item in lz.compress(source, window_size): if isinstance(item, tuple): length, distance = item lit_len_count[length] = lit_len_count.get(length, 0) + 1 distance_count[distance] = distance_count.get(distance, 0) + 1 else: literal = item lit_len_count[literal] = lit_len_count.get(literal, 0) + 1 print(lit_len_count) print(distance_count) lit_len_codelengths = huffman.codelengths_from_frequencies(lit_len_count) distance_codelengths = huffman.codelengths_from_frequencies(distance_count) codelengths_count = {} for car, length in lit_len_codelengths + distance_codelengths: codelengths_count[length] = codelengths_count.get(length, 0) + 1 print(codelengths_count) codelengths_codelengths = huffman.codelengths_from_frequencies( codelengths_count) codelengths_dict = dict(codelengths_codelengths) alphabet = (16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15) codelengths_list = [codelengths_dict.get(car, 0) for car in alphabet] while codelengths_list[-1] == 0: codelengths_list.pop() print(codelengths_list) out = bitio.BitIO() for length in codelengths_list: out.write_int(length, 3) out.move(-out.pos) for length in codelengths_list: print(length, out.read(3))
def compress(source, window_size=32 * 1024): lz = lz77.LZ77() lit_len_count = {} distance_count = {} store = [] replaced = 0 nb_tuples = 0 for item in lz.compress(source, window_size): if isinstance(item, tuple): nb_tuples += 1 length, distance = item replaced += length length_code, *extra_length = length_to_code(length) lit_len_count[length_code] = lit_len_count.get(length_code, 0) + 1 distance_code, *extra_dist = distance_to_code(distance) distance_count[distance_code] = \ distance_count.get(distance_code, 0) + 1 store.append( (length_code, extra_length, distance_code, extra_dist)) else: literal = item lit_len_count[literal] = lit_len_count.get(literal, 0) + 1 store.append(literal) store.append(256) # Estimate how many bytes would be saved with dynamic Huffman tables # The tables take about 100 bytes, and each (length, distance) tuple is # encoded in about 20 bits score = replaced - 100 - (nb_tuples * 20 // 8) if score < 0: # If dynamic tables is going to be inefficient, use fixed tables return compress_fixed(source, store) lit_len_count[256] = 1 # end of block lit_len_codelengths = huffman.codelengths_from_frequencies(lit_len_count) lit_len_codes = huffman.normalized(lit_len_codelengths) coded_lit_len = list(cl_encode(lit_len_codelengths)) HLIT = 1 + max(car for (car, _) in lit_len_codelengths) - 257 coded_distance = [] HDIST = 1 if distance_count: distance_codelengths = huffman.codelengths_from_frequencies( distance_count) distance_codes = huffman.normalized(distance_codelengths) coded_distance = list(cl_encode(distance_codelengths)) HDIST = 1 + max(dist for (dist, _) in distance_codelengths) - 1 else: return compress_fixed(source, store) codelengths_count = {} for coded in coded_lit_len, coded_distance: for item in coded: length = item[0] if isinstance(item, tuple) else item codelengths_count[length] = codelengths_count.get(length, 0) + 1 codelengths_codelengths = huffman.codelengths_from_frequencies( codelengths_count) codelengths_dict = dict(codelengths_codelengths) cl_codes = huffman.normalized(codelengths_codelengths) alphabet = (16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15) codelengths_list = [codelengths_dict.get(car, 0) for car in alphabet] while codelengths_list[-1] == 0: codelengths_list.pop() HCLEN = len(codelengths_list) - 4 out = bitio.BitIO() out.write_int(8, 4) # compression method = 8 out.write_int(7, 4) # window size = 2 ** (8 + 7) out.write_int(0x9c, 8) # FLG out.write(1) # BFINAL = 1 out.write(0, 1) # BTYPE = dynamic Huffman codes out.write_int(HLIT, 5) out.write_int(HDIST, 5) out.write_int(HCLEN, 4) # write codelengths for codelengths tree for length, car in zip(codelengths_list, alphabet): out.write_int(length, 3) # write lit_len and distance tables for item in coded_lit_len + coded_distance: if isinstance(item, tuple): length, extra = item code = cl_codes[length] value, nbits = int(code, 2), len(code) out.write_int(value, nbits, order="msf") if length == 16: out.write_int(extra, 2) elif length == 17: out.write_int(extra, 3) elif length == 18: out.write_int(extra, 7) else: code = cl_codes[item] value, nbits = int(code, 2), len(code) out.write_int(value, nbits, order="msf") for item in store: if isinstance(item, tuple): length, extra_length, distance, extra_distance = item # length code code = lit_len_codes[length] value, nb = int(code, 2), len(code) out.write_int(value, nb, order="msf") # extra bits for length value, nb = extra_length if nb: out.write_int(value, nb) # distance code = distance_codes[distance] value, nb = int(code, 2), len(code) out.write_int(value, nb, order="msf") # extra bits for distance value, nb = extra_distance if nb: out.write_int(value, nb) else: literal = item code = lit_len_codes[item] value, nb = int(code, 2), len(code) out.write_int(value, nb, order="msf") # pad with 0 while out.bitnum != 8: out.write(0) # write ADLER32 checksum a, b = adler32(source) a1, a2 = divmod(a, 256) b1, b2 = divmod(b, 256) out.write_int(b1, 8) out.write_int(b2, 8) out.write_int(a1, 8) out.write_int(a2, 8) return bytes(out.bytestream)
if args.code: print("encoding..") zivcode, dict = encode(rawdata) else: print("reading compressed file..") zivcode = readcompressed(args.input) if not (args.printing): if args.code: print("writing compressed file..") writecompressed(zivcode, output) print("ok") else: res = decode(zivcode) bithandler = bitio.BitIO(output, write=True) for i in res: bithandler.writeBin(ord(i), 8) print("decoded: " + res) else: res = codeToBinString(zivcode) print("input:") print("\t" + args.input) # print dictionnaire print("Dictionnaire") print(" --------------") for i in range(len(dict)): print(str(i).rjust(3) + " | " + dict[i]) print("Code de lempel-ziv: (ref, lettre)\n\t", end="") print(*zivcode) print("format compressé:\n\t", end="")