def compress(inp, bitout): initfreqs = [1] * 5 # initfreqs = [1] * 257 freqs = huffmancoding.FrequencyTable(initfreqs) enc = huffmancoding.HuffmanEncoder(bitout) enc.codetree = freqs.build_code_tree( ) # Don't need to make canonical code because we don't transmit the code tree # exit() count = 0 # Number of bytes read from the input file while True: # Read and encode one byte symbol = inp.read(1) if len(symbol) == 0: break symbol = symbol[0] if python3 else ord(symbol) enc.write(symbol) count += 1 # Update the frequency table and possibly the code tree freqs.increment(symbol) if (count < 262144 and is_power_of_2(count) ) or count % 262144 == 0: # Update code tree enc.codetree = freqs.build_code_tree() if count % 262144 == 0: # Reset frequency table freqs = huffmancoding.FrequencyTable(initfreqs) enc.write(256) # EOF
def compressor(input_files, output_file): # Read input file once to compute symbol frequencies. # The resulting generated code is optimal for static Huffman coding and also canonical. freqs = huffmancoding.FrequencyTable([0] * 257) files_count = len(input_files) for files in input_files: get_frequencies(files, freqs) freqs.increment(256) # EOF symbol code = freqs.build_code_tree() canoncode = huffmancoding.CanonicalCode( tree=code, symbollimit=freqs.get_symbol_limit()) # Replace code tree with canonical one. For each symbol, # the code value may change but the code length stays the same. code = canoncode.to_code_tree() # Read input file again, compress with Huffman coding, and write output file with contextlib.closing( huffmancoding.BitOutputStream(open(output_file + ".mhs", "wb"))) as bitout: inp = [open(filename, 'rb') for filename in input_files] files_name = [os.path.basename(f.name) for f in inp] write_code_len_table(bitout, canoncode, files_name) compress(code, inp, bitout, files_count) for file in inp: file.close()
def get_frequencies(filepath): freqs = huffmancoding.FrequencyTable([0] * 257) with open(filepath, "rb") as input: while True: b = input.read(1) if len(b) == 0: break freqs.increment(b[0]) return freqs
def decompress(bitin, out): initfreqs = [1] * 257 freqs = huffmancoding.FrequencyTable(initfreqs) dec = huffmancoding.HuffmanDecoder(bitin) dec.codetree = freqs.build_code_tree() # Use same algorithm as the compressor count = 0 # Number of bytes written to the output file while True: # Decode and write one byte symbol = dec.read() if symbol == 256: # EOF symbol break out.write(bytes((symbol,))) count += 1 # Update the frequency table and possibly the code tree freqs.increment(symbol) if (count < 262144 and is_power_of_2(count)) or count % 262144 == 0: # Update code tree dec.codetree = freqs.build_code_tree() if count % 262144 == 0: # Reset frequency table freqs = huffmancoding.FrequencyTable(initfreqs)
def get_frequencies(filepath): # freqs = huffmancoding.FrequencyTable([0] * 257) # with open(filepath, "rb") as input: # while True: # b = input.read(1) # if len(b) == 0: # break # freqs.increment(b[0]) # return freqs rand_arr = np.random.randint(100, size=1024) # freq_arr = [7, 5, 4, 8, 12, 10, 20] freq_arr = rand_arr.tolist() freqs = huffmancoding.FrequencyTable(freq_arr) save_rand_freq(freq_arr) return freqs
def huffman_compress(code_freq, quant_code, codeword_refer, output_file): code_freq.append(1) freq = huffmancoding.FrequencyTable(code_freq) code_tree = freq.build_code_tree() # Replace code tree with canonical one. For each symbol, the code value may change but the code length stays the same. canonical_code = huffmancoding.CanonicalCode(tree=code_tree, symbollimit=freq.get_symbol_limit()) code_tree = canonical_code.to_code_tree() codes = code_tree.codes # f0 = open(codeword_refer, "w") # for c in codes: # f0.write(f'{c} \n') # f0.close() with contextlib.closing(huffmancoding.BitOutputStream(open(output_file, "wb"))) as bit_out: write_code_len_table(bit_out, canonical_code) compress(code_tree, quant_code, bit_out)