예제 #1
0
def decompress(buf):
    reader = bitio.BitIO(buf)

    CM = reader.read(4)  # compression method (usually 8)
    if CM != 8:
        raise Error("unsupported compression method: {}".format(CM))

    CINFO = reader.read(4)  # ln(window size) - 8

    FLG = reader.read(8)

    result = bytearray()

    while True:
        BFINAL = reader.read(1)

        BTYPE = reader.read(2)

        if BTYPE == 0b01:
            # compression with fixed Huffman codes for literals/lengths
            # and distances
            result += decomp_fixed(reader)

        elif BTYPE == 0b10:
            # compression with dynamic Huffman codes
            lit_len_tree, distance_tree = dynamic_tree(reader)

            while True:
                # read a literal or length
                _type, value = read_literal_or_length(reader, lit_len_tree)
                if _type == 'eob':
                    break
                elif _type == 'literal':
                    result.append(value)
                elif _type == 'length':
                    # read a distance
                    length = value
                    distance = read_distance(reader, distance_tree)
                    for _ in range(length):
                        result.append(result[-distance])

        if BFINAL:
            # read ADLER32 checksum in last 4 bytes
            b1 = 256 * buf[-4] + buf[-3]
            a1 = 256 * buf[-2] + buf[-1]
            # compute it from result
            a = 1
            b = 0
            for byte in result:
                a += byte
                a %= 65521
                b += a
                b %= 65521
            # assert that checksum is correct
            assert a == a1
            assert b == b1

            return bytes(result)
예제 #2
0
def writecompressed(zivcode, path):
    from math import log
    bithandler = bitio.BitIO(path, write=True)

    if len(zivcode) >= 1:
        bithandler.writeBin(ord(zivcode[0][1]), 8)  # lettre

    for i in range(1, len(zivcode)):
        length = int(log(i, 2)) + 1

        bithandler.writeBin(zivcode[i][0], length)
        if zivcode[i][1] != '':
            bithandler.writeBin(ord(zivcode[i][1]), 8)  # lettre
    del bithandler
예제 #3
0
def compress_fixed(source, items):
    """Use fixed Huffman code."""
    out = bitio.BitIO()
    out.write_int(8, 4)  # compression method = 8
    out.write_int(7, 4)  # window size = 2 ** (8 + 7)
    out.write_int(0x9c, 8)  # FLG

    out.write(1)  # BFINAL = 1
    out.write(1, 0)  # BTYPE = fixed Huffman codes

    for item in items:
        if isinstance(item, tuple):
            length, extra_length, distance, extra_distance = item
            # length code
            code = fixed_lit_len_codes[length]
            value, nb = int(code, 2), len(code)
            out.write_int(value, nb, order="msf")
            # extra bits for length
            value, nb = extra_length
            if nb:
                out.write_int(value, nb)
            # distance
            code = distance - 1
            value, nb = code, 5
            out.write_int(value, nb, order="msf")
            # extra bits for distance
            value, nb = extra_distance
            if nb:
                out.write_int(value, nb)
        else:
            literal = item
            code = fixed_lit_len_codes[item]
            value, nb = int(code, 2), len(code)
            out.write_int(value, nb, order="msf")

    # pad with 0
    while out.bitnum != 8:
        out.write(0)

    # write ADLER32 checksum
    a, b = adler32(source)
    a1, a2 = divmod(a, 256)
    b1, b2 = divmod(b, 256)
    out.write_int(b1, 8)
    out.write_int(b2, 8)
    out.write_int(a1, 8)
    out.write_int(a2, 8)

    return bytes(out.bytestream)
예제 #4
0
def readfile(path):
    import sys

    try:
        bithandler = bitio.BitIO(path, write=False)
        char = bithandler.read(8)
        res = ''
        while char != 'EOF':
            # print('read: ' + str(char))
            char = chr(char)
            res += char
            char = bithandler.read(8)
        return res
    except Exception as e:
        sys.stderr.write("Couldn't open " + path + ": " + str(e) + "\n")
        exit(1)
예제 #5
0
def readcompressed(path):
    ''' lis un fichier compressé depuis path et retourne le code de lempelziv '''
    from math import log
    bithandler = bitio.BitIO(path, write=False)
    i = 1
    ref = 0
    res = []
    char = bithandler.read(8)  # lettre
    while char != 'EOF' and ref != 'EOF':
        if char == 'EOF':
            break
        else:
            char = chr(char)
        res.append((ref, char))
        length = int(log(i, 2)) + 1
        ref = bithandler.read(length)
        char = bithandler.read(8)  # lettre
        i += 1
    # Si il y a encore à écrire (référence existante, mais pas char)
    if ref != 'EOF' and ref != 0 and char == 'EOF':
        res.append((ref, ''))
    return res
예제 #6
0
def compress(source, window_size=32 * 1024):
    lz = lz77.LZ77()
    lit_len_count = {}
    distance_count = {}
    for item in lz.compress(source, window_size):
        if isinstance(item, tuple):
            length, distance = item
            lit_len_count[length] = lit_len_count.get(length, 0) + 1
            distance_count[distance] = distance_count.get(distance, 0) + 1
        else:
            literal = item
            lit_len_count[literal] = lit_len_count.get(literal, 0) + 1

    print(lit_len_count)
    print(distance_count)
    lit_len_codelengths = huffman.codelengths_from_frequencies(lit_len_count)
    distance_codelengths = huffman.codelengths_from_frequencies(distance_count)
    codelengths_count = {}
    for car, length in lit_len_codelengths + distance_codelengths:
        codelengths_count[length] = codelengths_count.get(length, 0) + 1
    print(codelengths_count)
    codelengths_codelengths = huffman.codelengths_from_frequencies(
        codelengths_count)
    codelengths_dict = dict(codelengths_codelengths)

    alphabet = (16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1,
                15)
    codelengths_list = [codelengths_dict.get(car, 0) for car in alphabet]
    while codelengths_list[-1] == 0:
        codelengths_list.pop()
    print(codelengths_list)

    out = bitio.BitIO()
    for length in codelengths_list:
        out.write_int(length, 3)
    out.move(-out.pos)
    for length in codelengths_list:
        print(length, out.read(3))
예제 #7
0
def compress(source, window_size=32 * 1024):
    lz = lz77.LZ77()
    lit_len_count = {}
    distance_count = {}
    store = []
    replaced = 0
    nb_tuples = 0
    for item in lz.compress(source, window_size):
        if isinstance(item, tuple):
            nb_tuples += 1
            length, distance = item
            replaced += length
            length_code, *extra_length = length_to_code(length)
            lit_len_count[length_code] = lit_len_count.get(length_code, 0) + 1
            distance_code, *extra_dist = distance_to_code(distance)
            distance_count[distance_code] = \
                distance_count.get(distance_code, 0) + 1
            store.append(
                (length_code, extra_length, distance_code, extra_dist))
        else:
            literal = item
            lit_len_count[literal] = lit_len_count.get(literal, 0) + 1
            store.append(literal)

    store.append(256)

    # Estimate how many bytes would be saved with dynamic Huffman tables
    # The tables take about 100 bytes, and each (length, distance) tuple is
    # encoded in about 20 bits
    score = replaced - 100 - (nb_tuples * 20 // 8)
    if score < 0:
        # If dynamic tables is going to be inefficient, use fixed tables
        return compress_fixed(source, store)

    lit_len_count[256] = 1  # end of block

    lit_len_codelengths = huffman.codelengths_from_frequencies(lit_len_count)
    lit_len_codes = huffman.normalized(lit_len_codelengths)

    coded_lit_len = list(cl_encode(lit_len_codelengths))
    HLIT = 1 + max(car for (car, _) in lit_len_codelengths) - 257

    coded_distance = []
    HDIST = 1
    if distance_count:
        distance_codelengths = huffman.codelengths_from_frequencies(
            distance_count)
        distance_codes = huffman.normalized(distance_codelengths)
        coded_distance = list(cl_encode(distance_codelengths))
        HDIST = 1 + max(dist for (dist, _) in distance_codelengths) - 1
    else:
        return compress_fixed(source, store)

    codelengths_count = {}
    for coded in coded_lit_len, coded_distance:
        for item in coded:
            length = item[0] if isinstance(item, tuple) else item
            codelengths_count[length] = codelengths_count.get(length, 0) + 1

    codelengths_codelengths = huffman.codelengths_from_frequencies(
        codelengths_count)
    codelengths_dict = dict(codelengths_codelengths)
    cl_codes = huffman.normalized(codelengths_codelengths)

    alphabet = (16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1,
                15)
    codelengths_list = [codelengths_dict.get(car, 0) for car in alphabet]
    while codelengths_list[-1] == 0:
        codelengths_list.pop()
    HCLEN = len(codelengths_list) - 4

    out = bitio.BitIO()
    out.write_int(8, 4)  # compression method = 8
    out.write_int(7, 4)  # window size = 2 ** (8 + 7)
    out.write_int(0x9c, 8)  # FLG

    out.write(1)  # BFINAL = 1
    out.write(0, 1)  # BTYPE = dynamic Huffman codes

    out.write_int(HLIT, 5)
    out.write_int(HDIST, 5)
    out.write_int(HCLEN, 4)

    # write codelengths for codelengths tree
    for length, car in zip(codelengths_list, alphabet):
        out.write_int(length, 3)

    # write lit_len and distance tables
    for item in coded_lit_len + coded_distance:
        if isinstance(item, tuple):
            length, extra = item
            code = cl_codes[length]
            value, nbits = int(code, 2), len(code)
            out.write_int(value, nbits, order="msf")
            if length == 16:
                out.write_int(extra, 2)
            elif length == 17:
                out.write_int(extra, 3)
            elif length == 18:
                out.write_int(extra, 7)
        else:
            code = cl_codes[item]
            value, nbits = int(code, 2), len(code)
            out.write_int(value, nbits, order="msf")

    for item in store:
        if isinstance(item, tuple):
            length, extra_length, distance, extra_distance = item
            # length code
            code = lit_len_codes[length]
            value, nb = int(code, 2), len(code)
            out.write_int(value, nb, order="msf")
            # extra bits for length
            value, nb = extra_length
            if nb:
                out.write_int(value, nb)
            # distance
            code = distance_codes[distance]
            value, nb = int(code, 2), len(code)
            out.write_int(value, nb, order="msf")
            # extra bits for distance
            value, nb = extra_distance
            if nb:
                out.write_int(value, nb)
        else:
            literal = item
            code = lit_len_codes[item]
            value, nb = int(code, 2), len(code)
            out.write_int(value, nb, order="msf")

    # pad with 0
    while out.bitnum != 8:
        out.write(0)
    # write ADLER32 checksum
    a, b = adler32(source)
    a1, a2 = divmod(a, 256)
    b1, b2 = divmod(b, 256)
    out.write_int(b1, 8)
    out.write_int(b2, 8)
    out.write_int(a1, 8)
    out.write_int(a2, 8)

    return bytes(out.bytestream)
예제 #8
0
    if args.code:
        print("encoding..")
        zivcode, dict = encode(rawdata)
    else:
        print("reading compressed file..")
        zivcode = readcompressed(args.input)

    if not (args.printing):
        if args.code:
            print("writing compressed file..")
            writecompressed(zivcode, output)
            print("ok")
        else:
            res = decode(zivcode)
            bithandler = bitio.BitIO(output, write=True)
            for i in res:
                bithandler.writeBin(ord(i), 8)
            print("decoded: " + res)
    else:
        res = codeToBinString(zivcode)
        print("input:")
        print("\t" + args.input)
        # print dictionnaire
        print("Dictionnaire")
        print(" --------------")
        for i in range(len(dict)):
            print(str(i).rjust(3) + " | " + dict[i])
        print("Code de lempel-ziv:  (ref, lettre)\n\t", end="")
        print(*zivcode)
        print("format compressé:\n\t", end="")