Ejemplo n.º 1
0
def compress(inp, bitout):
    initfreqs = [1] * 5
    # initfreqs = [1] * 257
    freqs = huffmancoding.FrequencyTable(initfreqs)
    enc = huffmancoding.HuffmanEncoder(bitout)
    enc.codetree = freqs.build_code_tree(
    )  # Don't need to make canonical code because we don't transmit the code tree
    # exit()
    count = 0  # Number of bytes read from the input file
    while True:
        # Read and encode one byte
        symbol = inp.read(1)
        if len(symbol) == 0:
            break
        symbol = symbol[0] if python3 else ord(symbol)
        enc.write(symbol)
        count += 1

        # Update the frequency table and possibly the code tree
        freqs.increment(symbol)
        if (count < 262144 and is_power_of_2(count)
            ) or count % 262144 == 0:  # Update code tree
            enc.codetree = freqs.build_code_tree()
        if count % 262144 == 0:  # Reset frequency table
            freqs = huffmancoding.FrequencyTable(initfreqs)
    enc.write(256)  # EOF
Ejemplo n.º 2
0
def compressor(input_files, output_file):
    # Read input file once to compute symbol frequencies.
    # The resulting generated code is optimal for static Huffman coding and also canonical.
    freqs = huffmancoding.FrequencyTable([0] * 257)
    files_count = len(input_files)
    for files in input_files:
        get_frequencies(files, freqs)
        freqs.increment(256)  # EOF symbol
    code = freqs.build_code_tree()
    canoncode = huffmancoding.CanonicalCode(
        tree=code, symbollimit=freqs.get_symbol_limit())
    # Replace code tree with canonical one. For each symbol,
    # the code value may change but the code length stays the same.
    code = canoncode.to_code_tree()

    # Read input file again, compress with Huffman coding, and write output file
    with contextlib.closing(
            huffmancoding.BitOutputStream(open(output_file + ".mhs",
                                               "wb"))) as bitout:
        inp = [open(filename, 'rb') for filename in input_files]
        files_name = [os.path.basename(f.name) for f in inp]
        write_code_len_table(bitout, canoncode, files_name)
        compress(code, inp, bitout, files_count)
        for file in inp:
            file.close()
Ejemplo n.º 3
0
def get_frequencies(filepath):
    freqs = huffmancoding.FrequencyTable([0] * 257)
    with open(filepath, "rb") as input:
        while True:
            b = input.read(1)
            if len(b) == 0:
                break
            freqs.increment(b[0])
    return freqs
Ejemplo n.º 4
0
def decompress(bitin, out):
	initfreqs = [1] * 257
	freqs = huffmancoding.FrequencyTable(initfreqs)
	dec = huffmancoding.HuffmanDecoder(bitin)
	dec.codetree = freqs.build_code_tree()  # Use same algorithm as the compressor
	count = 0  # Number of bytes written to the output file
	while True:
		# Decode and write one byte
		symbol = dec.read()
		if symbol == 256:  # EOF symbol
			break
		out.write(bytes((symbol,)))
		count += 1
		
		# Update the frequency table and possibly the code tree
		freqs.increment(symbol)
		if (count < 262144 and is_power_of_2(count)) or count % 262144 == 0:  # Update code tree
			dec.codetree = freqs.build_code_tree()
		if count % 262144 == 0:  # Reset frequency table
			freqs = huffmancoding.FrequencyTable(initfreqs)
Ejemplo n.º 5
0
def get_frequencies(filepath):
    # freqs = huffmancoding.FrequencyTable([0] * 257)
    # with open(filepath, "rb") as input:
    # 	while True:
    # 		b = input.read(1)
    # 		if len(b) == 0:
    # 			break
    # 		freqs.increment(b[0])
    # return freqs
    rand_arr = np.random.randint(100, size=1024)
    # freq_arr = [7, 5, 4, 8, 12, 10, 20]
    freq_arr = rand_arr.tolist()
    freqs = huffmancoding.FrequencyTable(freq_arr)
    save_rand_freq(freq_arr)

    return freqs
Ejemplo n.º 6
0
def huffman_compress(code_freq, quant_code, codeword_refer, output_file):
    code_freq.append(1)
    freq = huffmancoding.FrequencyTable(code_freq)
    code_tree = freq.build_code_tree()
    # Replace code tree with canonical one. For each symbol, the code value may change but the code length stays the same.
    canonical_code = huffmancoding.CanonicalCode(tree=code_tree, symbollimit=freq.get_symbol_limit())
    code_tree = canonical_code.to_code_tree()
    codes = code_tree.codes

    # f0 = open(codeword_refer, "w")
    # for c in codes:
    #     f0.write(f'{c} \n')
    # f0.close()

    with contextlib.closing(huffmancoding.BitOutputStream(open(output_file, "wb"))) as bit_out:
        write_code_len_table(bit_out, canonical_code)
        compress(code_tree, quant_code, bit_out)