def decompress(input): with open(input, 'rb') as file: data = file.read() data = huffman.decode(data) shape, data = struct.unpack('iii', data[:12]), data[12:] (Y_length, Cb_length, Cr_length), data = struct.unpack('iii', data[:12]), data[12:] fmt = '%us%us%us' % (Y_length, Cb_length, Cr_length) (Y_data, Cb_data, Cr_data) = struct.unpack(fmt, data) # Channel decompression Y = decompress_channel(Y_data) Cb = decompress_channel(Cb_data) Cr = decompress_channel(Cr_data) # Chroma resampling Cb = np.repeat(Cb, CS, 0) Cb = np.repeat(Cb, CS, 1) Cr = np.repeat(Cr, CS, 0) Cr = np.repeat(Cr, CS, 1) decoded_image = np.empty((shape[0], shape[1], 3), dtype='uint8') decoded_image[:, :, 0] = Y[:shape[0], :shape[1]] decoded_image[:, :, 1] = Cb[:shape[0], :shape[1]] decoded_image[:, :, 2] = Cr[:shape[0], :shape[1]] decoded_image = convert(decoded_image, 'YCbCr', 'RGB') return decoded_image
def test_decoding_real_world(self): frequency_map = get_real_world_input() tree = build_tree(frequency_map) text = get_real_world_text() encoded = encode(text, tree) decoded = decode(encoded, tree) self.assertEqual(text, decoded)
def decompress(compressed, uncompressed): '''First, read a Huffman tree from the 'compressed' stream using your read_tree function. Then use that tree to decode the rest of the stream and write the resulting symbols to the 'uncompressed' stream. Args: compressed: A file stream from which compressed input is read. uncompressed: A writable file stream to which the uncompressed output is written. ''' comp = BitReader(compressed) uncomp = BitWriter(uncompressed) tree = read_tree(comp) while True: try: uncomp_byte = huffman.decode(tree, comp) if uncomp_byte == None: raise EOFError uncomp.writebits(uncomp_byte, 8) except EOFError: uncomp.writebits(29, 8) break
def test_decoding(self): frequency_map = get_wikipedia_input() tree = build_tree(frequency_map) samples = ['a', 'abc', 'adeaddadcededabadbabeabeadedabacabed'] for text in samples: encoded = encode(text, tree) decoded = decode(encoded, tree) self.assertEqual(text, decoded)
def decompress(compressed, uncompressed): bitreader = bitio.BitReader(compressed) bitwriter = bitio.BitWriter(uncompressed) tree = read_tree(bitreader) # Repeatedly read coded bits from the file, decode them using tree while True: decoded = huffman.decode(tree, bitreader) # As soon as you decode the end-of-message symbol, you should stop reading. if decoded is None: break # write the decoded byte to the uncompressed output bitwriter.writebits(decoded, 8)
def decode(filename_in, filename_out): with open(filename_in, 'rb') as fi: n_header = int.from_bytes(fi.read(8), 'big') u = int.from_bytes(fi.read(1), 'big') header = fi.read(n_header) data = fi.read() freq = pickle.loads(header) tree = huffman.build_tree(freq) map_code = huffman.build_map_code(tree) out = huffman.decode(data, map_code, u) with open(filename_out, 'wb') as fo: fo.write(out)
def test_decode_reverses_encode_long(): string = """In computer science and information theory, Huffman coding is an entropy encoding algorithm used for lossless data compression. The term refers to the use of a variable-length code table for encoding a source symbol (such as a character in a file) where the variable-length code table has been derived in a particular way based on the estimated probability of occurrence for each possible value of the source symbol. It was developed by David A. Huffman while he was a Ph.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes. """ tree = encode(TEST_FILE, string) decoded = decode(TEST_FILE, tree) assert decoded.startswith(string)
def test_encode_text(self): print('test_encode_text') huffman.DEBUG = True print('huffman.DEBUG=' + str(huffman.DEBUG)) path = self.filepath with open(path, 'r+') as f: eof = False while not eof: line = f.readline() if len(line) == 0: eof = True break coded = huffman.encode(line) decoded = huffman.decode(coded[0], coded[1]) self.assertEqual(line, decoded)
def decompress(compressed, uncompressed): '''First, read a Huffman tree from the 'compressed' stream using your read_tree function. Then use that tree to decode the rest of the stream and write the resulting symbols to the 'uncompressed' stream. Args: compressed: A file stream from which compressed input is read. uncompressed: A writable file stream to which the uncompressed output is written. ''' bitstream = bitio.BitReader(compressed) # Gets bits from compressed tree = read_tree(bitstream) # Produce tree based on bit sequence while True: # Do final decoding of tree based on remaining bits val = huffman.decode(tree, bitstream) if val is None: # Stop at endLead break else: # Write the stored values in the tree (ordered by bit sequence) uncompressed.write(bytes([val])) # as a byte in uncompressed
def decompress(compressed, uncompressed): '''First, read a Huffman tree from the 'compressed' stream using your read_tree function. Then use that tree to decode the rest of the stream and write the resulting symbols to the 'uncompressed' stream. Args: compressed: A file stream from which compressed input is read. uncompressed: A writable file stream to which the uncompressed output is written. ''' bitreader = bitio.BitReader(compressed) tree = read_tree(bitreader) message = list() # the list of ascii codes to be added to while True: char = huffman.decode(tree, bitreader) # decode the character if char == None: # if it's an endmessage, add to the list, exit break message.append(char) # otherwise add to the list # writes the message to the writable file stream uncompressed.write(bytes(message))
def main(): txt = 1000 * open('README').read() t0 = time() freq = Counter(txt) print('count: %9.6f sec' % (time() - t0)) t0 = time() tree = huffTree(freq) print('tree: %9.6f sec' % (time() - t0)) write_dot(tree, 'tree.dot') code = huffCode(tree) # create tree from code (no frequencies) write_dot(make_tree(code), 'tree_raw.dot') a = bitarray() t0 = time() a.encode(code, txt) print('C encode: %9.6f sec' % (time() - t0)) # Time the decode function above t0 = time() res = decode(tree, a) Py_time = time() - t0 assert ''.join(res) == txt print('Py decode: %9.6f sec' % Py_time) # Time the decode method which is implemented in C t0 = time() res = a.decode(code) assert ''.join(res) == txt C_time = time() - t0 print('C decode: %9.6f sec' % C_time) print('Ratio: %f' % (Py_time / C_time))
def decompress (compressed, uncompressed): '''First, read a Huffman tree from the 'compressed' stream using your read_tree function. Then use that tree to decode the rest of the stream and write the resulting symbols to the 'uncompressed' stream. Args: compressed: A file stream from which compressed input is read. uncompressed: A writable file stream to which the uncompressed output is written. ''' #reader reads from the encoded compressed file reader = bitio.BitReader(compressed) #writer writes to a decoded uncompressed file writer = bitio.BitWriter(uncompressed) #the huffman tree read containing the decoding info tree = read_tree(reader) while True: current_element = huffman.decode(tree, reader) #when TreeLeafEndMessage is reached close file if current_element == None: break else: writer.writebits(current_element, 8)
textCodes = getCode(textTree) print("\nHuffman code for text data:") for (key, value) in textCodes.items(): print(key, '\t', value) # let's encode the tale textBinary = encode(textData, textCodes) print("\nEncoded text data:") print("%s -------> %s" % (textData, textBinary)) print("Average length (bits per character): ", len(textBinary) / len(textData)) # TODO: to compare average length to entropy, must implement getEntropy() print("PART A - Entropy:", getEntropy(textFreqs)) print("PART B - The ceiling of entropy and average code length are equal") # TODO: to decode messages, must implement decode() messageEncoded = '0110000101010010111100011001111110100101100101001011110' messageDecoded = decode(messageEncoded, textTree) print('\nPART C, D - Decoded message:', "".join(messageDecoded)) print("\n\n-----Web session lengths.------") # construct the frequency dictionary sessionLengths = np.load("sessionLengths.npy") webFreqs = {} for i in range(len(sessionLengths)): webFreqs[i + 1] = sessionLengths[i] print('PART E - Entropy:', getEntropy(webFreqs)) # compute and plot session length probabilities webProbs = sessionLengths / np.sum(sessionLengths) plt.semilogy(range(1, 101), webProbs) plt.xlabel('Web Session Length')
def test_decode_reverses_encode_special(): string = '! %' tree = encode(TEST_FILE, string) decoded = decode(TEST_FILE, tree) assert decoded.startswith(string)
def test_decode_reverses_encode_simple(): string = 'abbb' tree = encode(TEST_FILE, string) decoded = decode(TEST_FILE, tree) assert decoded.startswith(string)
decoded_message_uint8 = np.array( [ord(c) for c in decoded_message.getvalue()], dtype=np.uint8) # Overbodige data wissen te_vertwijderen_nullen = len(decoded_message_uint8) - initiele_lengte print("Er moeten ", te_vertwijderen_nullen,"bits verwijderd worden") decoded_message_uint8 = decoded_message_uint8[:-te_vertwijderen_nullen or None] #print("decoded_message_uint8: ", decoded_message_uint8) #print("Lengte hiervan is:", (8*len(decoded_message_uint8)),"bits") print("Het verschil voor en na kanaaldecodering en verwijderde bits is: ", len(decoded_message_uint8) - initiele_lengte) # ======================= SOURCE DECODING ======================== # =========================== Huffman ============================ print("-------START HUFFMAN DECODING-------") print("lengte chan decoded data", (8*len(decoded_message_uint8)),"bits") klaar_voor_src_dec = util.uint8_to_bit(decoded_message_uint8) huf_decoded_message = huffman.decode(huffman_tree, klaar_voor_src_dec) print(F"Dec: {t.toc_str()}") print("Huffman decoded lengte:", (8*len(huf_decoded_message)), "bits, = lengte originele data") # ======================= Source recreating ======================== print("-------START SOURCE RECREATING-------") verhouding = np.reshape(huf_decoded_message, (image.height, image.width, image.num_of_channels)) afbeelding = Image.fromarray(verhouding,image.mode) afbeelding.show()
def ReadDataBlock(self, codingParams): """ Reads a block of coded data from a PACFile object that has already executed OpenForReading() and returns those samples as reconstituted signed-fraction data """ # loop over channels (whose coded data are stored separately) and read in each data block data = [] for iCh in range(codingParams.nChannels): data.append(np.array( [], dtype=np.float64)) # add location for this channel's data # read in string containing the number of bytes of data for this channel (but check if at end of file!) s = self.fp.read(calcsize("<L")) # will be empty if at end of file if not s: # hit last block, see if final overlap and add needs returning, else return nothing if codingParams.overlapAndAdd: overlapAndAdd = codingParams.overlapAndAdd codingParams.overlapAndAdd = 0 # setting it to zero so next pass will just return return overlapAndAdd else: return # not at end of file, get nBytes from the string we just read nBytes = unpack("<L", s)[0] # read it as a little-endian unsigned long # read the nBytes of data into a PackedBits object to unpack pb = PackedBits() pb.SetPackedData( self.fp.read(nBytes) ) # PackedBits function SetPackedData() converts strings to internally-held array of bytes if pb.nBytes < nBytes: raise "Only read a partial block of coded PACFile data" # extract the data from the PackedBits object codingParams.state = pb.ReadBits(2) # read in blockType overallScaleFactor = pb.ReadBits( codingParams.nScaleBits) # overall scale factor hTable = pb.ReadBits( codingParams.nHuffTableBits) # huffman table code scaleFactor = [] bitAlloc = [] if codingParams.state == 0: mantissa = np.zeros(codingParams.nMDCTLinesLong, np.int32) # start w/ all mantissas zero elif codingParams.state == 1 or codingParams.state == 3: mantissa = np.zeros(codingParams.nMDCTLinesTrans, np.int32) # start w/ all mantissas zero else: mantissa = np.zeros(codingParams.nMDCTLinesShort, np.int32) # start w/ all mantissas zero for iBand in range( codingParams.sfBandsLong.nBands ): # loop over each scale factor band to pack its data ba = pb.ReadBits(codingParams.nMantSizeBits) if ba: ba += 1 # no bit allocation of 1 so ba of 2 and up stored as one less bitAlloc.append(ba) # bit allocation for this band scaleFactor.append(pb.ReadBits( codingParams.nScaleBits)) # scale factor for this band if bitAlloc[iBand]: if codingParams.state == 0: nMDCTLines = codingParams.nMDCTLinesLong nLines = codingParams.sfBandsLong.nLines[iBand] lowerLine = codingParams.sfBandsLong.lowerLine[iBand] upperLine = codingParams.sfBandsLong.upperLine[iBand] elif codingParams.state == 1 or codingParams.state == 3: nMDCTLines = codingParams.nMDCTLinesTrans nLines = codingParams.sfBandsTrans.nLines[iBand] lowerLine = codingParams.sfBandsTrans.lowerLine[iBand] upperLine = codingParams.sfBandsTrans.upperLine[iBand] else: nMDCTLines = codingParams.nMDCTLinesShort nLines = codingParams.sfBandsShort.nLines[iBand] lowerLine = codingParams.sfBandsShort.lowerLine[iBand] upperLine = codingParams.sfBandsShort.upperLine[iBand] # read non huffman encoded mantissas if hTable == 0: m = np.empty(nLines, np.int32) for j in range(nLines): m[j] = pb.ReadBits( bitAlloc[iBand] ) # mantissas for this band (if bit allocation non-zero) and bit alloc <>1 so encoded as 1 lower than actual allocation mantissa[lowerLine:upperLine + 1] = m # read huffman mantissas else: nHuffBits = pb.ReadBits(codingParams.nHuffLengthBits) nChunks = int(np.ceil(nHuffBits / 16.)) huffBits = np.empty(nChunks + 1).astype(dtype=np.uint16) huffBits[0] = nHuffBits for i in range(nChunks): bits = pb.ReadBits(np.min([16, nHuffBits])) if (nHuffBits < 16): bits = bits << (16 - nHuffBits) huffBits[i + 1] = bits nHuffBits = nHuffBits - 16 if huffBits.any(): decoded = decode( huffBits, codingParams.encodingTrees[hTable - 1]) mantissa[lowerLine:upperLine + 1] = decoded # done unpacking data (end loop over scale factor bands) # (DECODE HERE) decode the unpacked data for this channel, overlap-and-add first half, and append it to the data array (saving other half for next overlap-and-add) decodedData = self.Decode(scaleFactor, bitAlloc, mantissa, overallScaleFactor, codingParams) data[iCh] = np.concatenate( (data[iCh], np.add(codingParams.overlapAndAdd[iCh], decodedData[:codingParams.a]) )) # data[iCh] is overlap-and-added data codingParams.overlapAndAdd[iCh] = decodedData[ codingParams.a:] # save other half for next pass # end loop over channels, return signed-fraction samples for this block return data
import huffman huffman.encode("input.txt", "test.huff") huffman.decode("test.huff", "output.txt")
def decode(file, tree): f= open('huff_'+file,'r') text = f.read() f.close print lz.decode(huff.decode(tree, text))
def test_decode(self): decode("story.huff", "story_.txt") assert True
huffRes = huffman.encode(filechars) # run length encoding import RLE rleRes = RLE.encode(huffRes) # sizes hsize = len(huffRes.tobytes()) rsize = int(len(rleRes) * 1.5 / 8) print "original size = ", reduce( lambda x, y: x * y, image.shape ), " bytes, huffman size = ", hsize, "bytes, compressed size = ", rsize, " bytes" # decoding decoded = RLE.decode(rleRes) decoded = eval("[" + huffman.decode(decoded)[:len(filechars)] + "]") pointer = 0 final = np.zeros(image.shape, np.uint8) for idx, channel in enumerate(copy): rows, cols = channel.shape # we undo the zigzag traversal for row in range(0, rows, 8): for col in range(0, cols, 8): bloc = np.zeros((64), np.float32) bloc[Zigzag] = decoded[pointer:pointer + 64] pointer += 64 # apply the inverse DCT
if len(sys.argv) < 3: print("Too few arguments given!") print("Please format your command like this:") print(" python encode_file.py <Path to input> <Path to output>") print() print("In the output path, please do not specify a file extension.") print("This program will create two input files:") print(" <INPUT>.data") print(" <INPUT>.keys") print("Both of these files are necessary to decompress our data.") exit() input_path = sys.argv[1] output_path = sys.argv[2] bin_string = "" data = dict() print("Reading {0}.data & {0}.keys...".format(input_path)) try: bin_string, data = read(input_path) except FileNotFoundError as fnf: print("ERROR: File {0} not found!".format(input_path)) print("Decoding the file...") decoded = decode(bin_string, data) print("Writing out to {0}...".format(output_path)) write(output_path, decoded)
def test_decode(self): decode("", "") assert True
string = h.remove_spl_ch(string) message = h.remove_spl_ch(message) # to create the huffman map prob_of_characters, enc_dict = h.encode_dict(input=string) print("\nencoded dictionary : ", end="\n\n") for key, value in enc_dict: print(key, " : ", value) print("\n\n") print("probability of characters : ", end="\n\n") for key, value in prob_of_characters: print(key, " : ", value) print("\n\n") # to encode the message(custom) using the huffman map enc_msg = h.encode(msg=message, dictionary=enc_dict) print("encoded message : ", enc_msg, end="\n\n") # to encode the original string using huffman map enc_string = h.encode(msg=message, dictionary=enc_dict) # to decode the encoded message using huffman map dec_msg = h.decode(enc_msg=enc_msg, dictionary=enc_dict) print("decoded message : ", dec_msg, end="\n\n") # to get information about the space saved h.size_saved(dictionary=enc_dict, msg=string, enc_msg=enc_string)
def test_decode_reverses_encode_unicode(): string = 'Kærlighed og Øl!' tree = encode(TEST_FILE, string) decoded = decode(TEST_FILE, tree) assert decoded.startswith(string)
def decode(file, tree): f = open('huff_' + file, 'r') text = f.read() f.close print lz.decode(huff.decode(tree, text))
import pickle import argparse from huffman import decode parser = argparse.ArgumentParser() parser.add_argument('input', help='path to input binary file.') parser.add_argument('output', help='path to output text file.') args = parser.parse_args() with open(args.input, 'rb') as in_, open(args.output, 'wb') as out: out.write(decode(*pickle.load(in_)))
from pprint import pprint import huffman from view import viz_tree data = b"huffman" tree = huffman.build_tree(data) map_code = huffman.build_map_code(tree) # encode bin_data = huffman.encode(data, map_code) print("Map code") for k, v in map_code.items(): print("{}: {}".format(chr(k), v.to01())) print("Encoded data") print(bin_data.to01()) viz_tree(tree) # decode print("After decode") print(huffman.decode(bin_data.tobytes(), map_code, bin_data.buffer_info()[3])) # calculate performance p = len(bin_data) / (len(data) * 8) print(f"Reduce {p * 100}%")
def test_decode(self): decode("test_file.huff", "test_file_.txt") assert True
t = Time() t.tic() # TODO Determine the number of occurrences of the source or use a fixed huffman_freq huffman_freq = "TODO" huffman_tree = huffman.Tree(huffman_freq) print(F"Generating the Huffman Tree took {t.toc_str()}") t.tic() # TODO print-out the codebook and validate the codebook (include your findings in the report) encoded_message = huffman.encode(huffman_tree.codebook, image.get_pixel_seq()) print(len(encoded_message)) print("Enc: {}".format(t.toc())) t.tic() decoded_message = huffman.decode(huffman_tree, encoded_message) print("Dec: {}".format(t.toc())) input_lzw = img.get_pixel_seq().copy() # ======================= SOURCE ENCODING ======================== # ====================== Lempel-Ziv-Welch ======================== t.tic() encoded_msg, dictonary = lzw.encode(input_lzw) print("Enc: {}".format(t.toc())) t.tic() decoded_msg = lzw.decode(encoded_msg) print("Enc: {0:.4f}".format(t.toc()))