def decompress(txt): """Decompress text with password""" buff = BytesIO(memoryview(txt)) if buff.read(len(BSC_HEADER_FLAG)) != BSC_HEADER_FLAG: raise InvalidFileFormatException() huff = dict() blen = 0 while True: chk = buff.read(len(BSC_HEADER_FLAG)) if chk == BSC_HEADER_FLAG: break buff.seek(-len(BSC_HEADER_FLAG), 1) fqc, fq = unpack('B', buff.read(1))[0], 0 if fqc is 0x01: fq = unpack('H', buff.read(2))[0] elif fqc is 0x02: fq = unpack('I', buff.read(4))[0] else: # Subtract 2 from fqc because first two values are reserved fq = fqc - 2 sym = unpack('B', buff.read(1))[0] blen += fq huff[sym] = fq buff.close() out = bitarray() # Cut off unnecessary parts out.frombytes(txt[txt.index(BSC_HEADER_FLAG, len(BSC_HEADER_FLAG))+len(BSC_HEADER_FLAG):]) return reverse(encode(huff), out, blen)
def compress(txt): """Compress text with password""" con = bytearray() feq = Counter(txt) out = apply(encode(feq), txt) con += BSC_HEADER_FLAG for sym in feq: fq = feq[sym] if fq < 2**8 - 2: # Use next Byte to pack small int, first 0x01 and 0x02 are reserved values. con += pack('B', fq + 2) elif fq < 4**8: # Use H con += pack('B', 1) con += pack('H', fq) elif fq < 8**8: # Use I con += pack('B', 2) con += pack('I', fq) else: raise FrequencyOverflowException("Symbol " + sym + " occurred " + fq + " times, we cannot handle that!") con += pack('B', sym) con += BSC_HEADER_FLAG con += bitarray(out).tobytes() return con
def encode(text): Lziv = open('Lziv_'+file,'w') #text = lz.prep_text(file) t1 = time.clock() lztext = lz.encode(text,windowlen) Lziv.write(lztext) Lziv.close() t2 = time.clock() lztext = lz.prep_text('Lziv_'+file) t3 = time.clock() tree = huff.make_tree(lztext) t4 = time.clock() h = open('huff_'+file,'w') t5 = time.clock() htext = huff.encode(tree,lztext) #h.write(tree) h.write(htext) t6 = time.clock() h.close return [t2-t1,t4-t3,t6-t5],tree
def test_decoding_real_world(self): frequency_map = get_real_world_input() tree = build_tree(frequency_map) text = get_real_world_text() encoded = encode(text, tree) decoded = decode(encoded, tree) self.assertEqual(text, decoded)
def encode(text): Lziv = open('Lziv_' + file, 'w') #text = lz.prep_text(file) t1 = time.clock() lztext = lz.encode(text, windowlen) Lziv.write(lztext) Lziv.close() t2 = time.clock() lztext = lz.prep_text('Lziv_' + file) t3 = time.clock() tree = huff.make_tree(lztext) t4 = time.clock() h = open('huff_' + file, 'w') t5 = time.clock() htext = huff.encode(tree, lztext) #h.write(tree) h.write(htext) t6 = time.clock() h.close return [t2 - t1, t4 - t3, t6 - t5], tree
def file2DNA(fileName, fileId, signalStatus): totalLen = 0 myFile = io.open(fileName, "rb") outFile = io.open(fileName + '.dnac', "w") chunkManager = ChunkManager1.ChunkManager(outFile, fileId) prevBase = 'A' countOfBytes = 0 while True: byte = myFile.read(1) countOfBytes = countOfBytes + 1 global percentageCompleted global fileLength percentageCompleted = (countOfBytes * 1.00 / fileLength) * 100 if countOfBytes % 1000 == 0: #print percentageCompleted,fileLength signalStatus.emit(str(int(percentageCompleted))) if (not byte): break tritString = str(huffman.encode(byte)) totalLen = totalLen + len(tritString) dnaString = ExtraModules.encodeSTR(tritString, prevBase) prevBase = dnaString[-1] chunkManager.addString(dnaString) S2 = ExtraModules.intToBase3(totalLen, 20) currLen = 20 + totalLen lenOfS3 = 25 - (currLen % 25) S3 = '0' * lenOfS3 dnaString1 = ExtraModules.encodeSTR(S3 + S2, prevBase) chunkManager.addString(dnaString1) chunkManager.close() signalStatus.emit('100')
def test_encoding(self): frequency_map = get_wikipedia_input() tree = build_tree(frequency_map) encoded = encode('adeaddadcededabadbabeabeadedabacabed', tree) self.assertEqual( encoded.to01(), '01000100100000010001110001000010011010000110100111001001110010001000010011010110100111000' )
def test_encode_returns_correct_tree(): string = "abbb" smallest = Node(0.25, 'a', None, None) sec_smallest = Node(0.75, 'b', None, None) correct_tree = Node(1, None, smallest, sec_smallest) tree = encode(TEST_FILE, string) assert isinstance(tree, Node) assert correct_tree == tree
def test_decoding(self): frequency_map = get_wikipedia_input() tree = build_tree(frequency_map) samples = ['a', 'abc', 'adeaddadcededabadbabeabeadedabacabed'] for text in samples: encoded = encode(text, tree) decoded = decode(encoded, tree) self.assertEqual(text, decoded)
def sendFrame(self): block = QByteArray() out = QDataStream(block, QIODevice.WriteOnly) out.setVersion(QDataStream.Qt_5_0) frame = huffman.encode(self.cam.capture()) out.writeInt32(len(frame)) out.writeRawData(frame) self.client.write(block)
def test_decode_reverses_encode_long(): string = """In computer science and information theory, Huffman coding is an entropy encoding algorithm used for lossless data compression. The term refers to the use of a variable-length code table for encoding a source symbol (such as a character in a file) where the variable-length code table has been derived in a particular way based on the estimated probability of occurrence for each possible value of the source symbol. It was developed by David A. Huffman while he was a Ph.D. student at MIT, and published in the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes. """ tree = encode(TEST_FILE, string) decoded = decode(TEST_FILE, tree) assert decoded.startswith(string)
def encode(filename_in, filename_out): with open(filename_in, 'rb') as fi: freq = huffman.freq_str(read_each(fi)) tree = huffman.build_tree(freq) map_code = huffman.build_map_code(tree) fi.seek(0) out = huffman.encode(read_each(fi), map_code) u = out.buffer_info()[3] # unused bits of last byte header = pickle.dumps(freq, pickle.HIGHEST_PROTOCOL) n_header = len(header) with open(filename_out, 'wb') as fo: fo.write( n_header.to_bytes(8, 'big') + u.to_bytes(1, 'big') + header + out.tobytes())
def test_encode_text(self): print('test_encode_text') huffman.DEBUG = True print('huffman.DEBUG=' + str(huffman.DEBUG)) path = self.filepath with open(path, 'r+') as f: eof = False while not eof: line = f.readline() if len(line) == 0: eof = True break coded = huffman.encode(line) decoded = huffman.decode(coded[0], coded[1]) self.assertEqual(line, decoded)
def compress(image, img_mode, output): shape = image.shape image = convert(image, img_mode, 'YCbCr') # Chroma subsampling Y = image[:, :, 0] Cb = utils.submatrices(image[:, :, 1], CS, CS).mean((2, 3)) Cr = utils.submatrices(image[:, :, 2], CS, CS).mean((2, 3)) # Channel compression Y_data, Y_length = compress_channel(Y) Cb_data, Cb_length = compress_channel(Cb) Cr_data, Cr_length = compress_channel(Cr) click.echo(Y_length) click.echo(Cb_length) click.echo(Cr_length) # click.echo((Yr_length, Yi_length, Cbr_length, Cbi_length, Crr_length, Cri_length)) file_data = bytearray() file_data.extend(struct.pack('iii', shape[0], shape[1], 3)) fmt = 'iii%us%us%us' % (Y_length, Cb_length, Cr_length) file_data.extend( struct.pack(fmt, Y_length, Cb_length, Cr_length, Y_data, Cb_data, Cr_data)) click.echo(len(file_data)) file_data = huffman.encode(file_data) click.echo(len(file_data)) with open(output, 'wb') as file: file.write(file_data)
# ======================= SOURCE ENCODING ======================== # =========================== Huffman ============================ # Use t.tic() and t.toc() to measure the executing time as shown below t = Time() t.tic() # TODO Determine the number of occurrences of the source or use a fixed huffman_freq huffman_freq = "TODO" huffman_tree = huffman.Tree(huffman_freq) print(F"Generating the Huffman Tree took {t.toc_str()}") t.tic() # TODO print-out the codebook and validate the codebook (include your findings in the report) encoded_message = huffman.encode(huffman_tree.codebook, image.get_pixel_seq()) print(len(encoded_message)) print("Enc: {}".format(t.toc())) t.tic() decoded_message = huffman.decode(huffman_tree, encoded_message) print("Dec: {}".format(t.toc())) input_lzw = img.get_pixel_seq().copy() # ======================= SOURCE ENCODING ======================== # ====================== Lempel-Ziv-Welch ======================== t.tic() encoded_msg, dictonary = lzw.encode(input_lzw) print("Enc: {}".format(t.toc()))
sequences.extend( ((b, g, r, a), c) for b, g, r, a, c in struct.iter_unpack('=BBBBI', chunks[:4*n_ints])) sequences_int.extend(struct.iter_unpack('=II', chunks[:4*n_ints])) if save_imgs: os.makedirs('training_images', exist_ok=True) image_manip.export_bmp_py(pixels, "training_images/img_gen_huffman_{:03d}.bmp".format(i).encode('ascii')) colors = [c for c, _ in sequences_int] # This makes all color appear at least onece, such # that they are in the Huffman tree colors.extend(0xff000000 | (r << 20) | (g << 12) | (b << 4) for r, g, b in itertools.product(range(0x10), range(0x10), range(0x10))) lengths = [l for _, l in sequences_int] # colors print('generating color encoder/decoder') _, (codes, tree) = huffman.encode(colors) max_len_code_color = max(n_bits for sym, (code_str, code_int, n_bits) in codes) print('max color code length:', max(l for s, (s_c, i_c, l) in codes)) generate_huffman_encoder(codes, 'opengl/huffman_encode_colors.cpp') generate_huffman_decoder(tree, 'opengl/huffman_decode_colors.cpp') generate_huffman_decoder(tree, '../src/huffman_decode_colors.sv', lang='sv') avg_color = huffman.huffman_avg_len(colors) # lengths print('generating length encoder/decoder') _, (codes, tree) = huffman.encode(lengths) max_len_code_length = max(n_bits for sym, (code_str, code_int, n_bits) in codes) assert max_len_code_length + max_len_code_color < 32 print('max length code length:', max(l for s, (s_c, i_c, l) in codes)) generate_huffman_encoder(codes, 'opengl/huffman_encode_lengths.cpp') generate_huffman_decoder(tree, 'opengl/huffman_decode_lengths.cpp')
] # we make a list out of the flattened array via zigzag traversal fileContent = [] for idx, channel in enumerate(copy): rows, cols = channel.shape for row in range(0, rows, 8): for col in range(0, cols, 8): bloc = channel[row:row + 8, col:col + 8] flattenedBloc = bloc.flatten() fileContent.extend(flattenedBloc[Zigzag].tolist()) # huffman encoding import huffman filechars = "".join(map(lambda x: str(x) + ",", fileContent)) huffRes = huffman.encode(filechars) # run length encoding import RLE rleRes = RLE.encode(huffRes) # sizes hsize = len(huffRes.tobytes()) rsize = int(len(rleRes) * 1.5 / 8) print "original size = ", reduce( lambda x, y: x * y, image.shape ), " bytes, huffman size = ", hsize, "bytes, compressed size = ", rsize, " bytes" # decoding decoded = RLE.decode(rleRes) decoded = eval("[" + huffman.decode(decoded)[:len(filechars)] + "]")
def test_encode(self): encode("test_file.txt", "test_file.huff") assert True
def test_encode(self): encode("story.txt", "story.huff") assert True
print("-------The tortoise and the hare.----------") # construct the frequency dictionary file = open("theTortoiseAndTheHare.txt", "r") textData = file.read() textFreqs = getFreqDict(textData) # construct the Huffman tree and extract binary codes textTree = getHuffmanTree(textFreqs) textCodes = getCode(textTree) print("\nHuffman code for text data:") for (key, value) in textCodes.items(): print(key, '\t', value) # let's encode the tale textBinary = encode(textData, textCodes) print("\nEncoded text data:") print("%s -------> %s" % (textData, textBinary)) print("Average length (bits per character): ", len(textBinary) / len(textData)) # TODO: to compare average length to entropy, must implement getEntropy() print("PART A - Entropy:", getEntropy(textFreqs)) print("PART B - The ceiling of entropy and average code length are equal") # TODO: to decode messages, must implement decode() messageEncoded = '0110000101010010111100011001111110100101100101001011110' messageDecoded = decode(messageEncoded, textTree) print('\nPART C, D - Decoded message:', "".join(messageDecoded)) print("\n\n-----Web session lengths.------") # construct the frequency dictionary sessionLengths = np.load("sessionLengths.npy")
#COLOR PALETTE CAPTURES ALL POSSIBLE PIXEL VALUES IN DICT color_palette = {} total_pix = len(pixel_values) #num of total pixels of image for i in range(total_pix): pixvalstr= str(pixel_values[i]) palette = color_palette.keys() # if new value, add to dictionary if pixvalstr not in palette: color_palette[pixvalstr] = 1 else: color_palette[pixvalstr] += 1 #add huffman marker to color_palette color_palette['end'] = 1 #Huffman tree creation huff = encode(color_palette) #array of tuples huff_dict = {} #convert array to huff_dictionary for p in huff: huff_dict[p[0]] = p[1] #Generate binary string of image img_bin_str = '' for i in pixel_values: j = str(i) #convert RGB value into string for key dict usage img_bin_str = img_bin_str + huff_dict[j] #Generate ENCODETABLEHEADER #255,255,255,freq1,255,255,255,freq2,BASE128 encodertableheader = str(len(huff_dict) - 1) #start with number of keys, minus the 'end' marker keys = huff_dict.keys() for i in range(len(keys)): #iterating through keys, ignoring end
import huffman huffman.encode("input.txt", "test.huff") huffman.decode("test.huff", "output.txt")
def test_encode(): string = "abbb" encode(TEST_FILE, string) with open(TEST_FILE, 'rb') as bit_file: bits = BitArray(bit_file) assert bits.bin == '01110000'
def EncodeSingleChannel(data,codingParams): """Encodes a single-channel block of signed-fraction data based on the parameters in a PACFile object""" # prepare various constants N = codingParams.a + codingParams.b halfN = N/2 nScaleBits = codingParams.nScaleBits maxMantBits = (1<<codingParams.nMantSizeBits) # 1 isn't an allowed bit allocation so n size bits counts up to 2^n if maxMantBits>16: maxMantBits = 16 # to make sure we don't ever overflow mantissa holders if codingParams.state == 0: sfBands = codingParams.sfBandsLong elif codingParams.state == 1 or codingParams.state == 3: sfBands = codingParams.sfBandsTrans else: sfBands = codingParams.sfBandsShort # vectorizing the Mantissa function call # vMantissa = np.vectorize(Mantissa) # compute target mantissa bit budget for this block of halfN MDCT mantissas bitBudget = codingParams.targetBitsPerSample * halfN # this is overall target bit rate bitBudget -= 34 # Block type + nBytes bits bitBudget -= nScaleBits*(sfBands.nBands +1) # less scale factor bits (including overall scale factor) bitBudget -= codingParams.nHuffTableBits # less huff table type bits bitBudget -= codingParams.nMantSizeBits*sfBands.nBands # less mantissa bit allocation bits if codingParams.state == 2: bitsFromRes = np.min([codingParams.reservoir, 1.125*bitBudget]) codingParams.reservoir -= bitsFromRes else: bitsFromRes = 0 # window data for side chain FFT and also window and compute MDCT timeSamples = data if codingParams.state == 0 or codingParams.state == 2: mdctTimeSamples = SineWindow(data) mdctLines = MDCT(mdctTimeSamples, halfN, halfN)[:halfN] else: mdctTimeSamples = TransitionSineWindow(data,codingParams.a,codingParams.b) mdctLines = MDCT(mdctTimeSamples, codingParams.a, codingParams.b)[:halfN] # compute overall scale factor for this block and boost mdctLines using it maxLine = np.max( np.abs(mdctLines) ) overallScale = ScaleFactor(maxLine,nScaleBits) #leading zeroes don't depend on nMantBits mdctLines *= (1<<overallScale) # compute SMRs in side chain FFT SMRs = CalcSMRs(timeSamples, mdctLines, overallScale, codingParams.sampleRate, sfBands) # perform bit allocation using SMR results bitAlloc = BitAlloc(bitBudget+bitsFromRes, maxMantBits, sfBands.nBands, sfBands.nLines, SMRs) # given the bit allocations, quantize the mdct lines in each band scaleFactor = np.empty(sfBands.nBands,dtype=np.int32) nMant=halfN for iBand in range(sfBands.nBands): if not bitAlloc[iBand]: nMant-= sfBands.nLines[iBand] # account for mantissas not being transmitted mantissa=np.empty(nMant,dtype=np.int32) nHuffMaps = len(codingParams.encodingMaps) mHuff=[] huffBits=[] for h in range(nHuffMaps): mHuff.append([]) huffBits.append(0) iMant=0 for iBand in range(sfBands.nBands): nLines= sfBands.nLines[iBand] if nLines and bitAlloc[iBand]: # Only encode mantissas if lines exist in current band lowLine = sfBands.lowerLine[iBand] highLine = sfBands.upperLine[iBand] + 1 # extra value is because slices don't include last value scaleLine = np.max(np.abs( mdctLines[lowLine:highLine] ) ) scaleFactor[iBand] = ScaleFactor(scaleLine, nScaleBits, bitAlloc[iBand]) # store FP coded mantissa m = vMantissa(mdctLines[lowLine:highLine],scaleFactor[iBand], nScaleBits, bitAlloc[iBand]) mantissa[iMant:iMant+nLines] = m for h in range(nHuffMaps): # store Huffman coded mantissa huffCode = huff.encode(m, codingParams.encodingMaps[h]) mHuff[h].append(huffCode) huffBits[h] += codingParams.nHuffLengthBits + huffCode[0] # increment starting index iMant += nLines else: for h in range(nHuffMaps): mHuff[h].append([]) # If building freq table, at mantissas to freq table if codingParams.buildTable: codingParams.freqTable = huff.buildFrequencyTable(codingParams.freqTable, mantissa) # Initialize optimal bits as non-huffman optimalBits = np.sum(np.multiply(bitAlloc,sfBands.nLines)) huffTable = 0 # check for optimal bit allocation for h in range(nHuffMaps): if huffBits[h] < optimalBits: huffTable = h + 1 optimalBits = huffBits[h] mantissa = mHuff[h] # calculate rollover bits for bit reservoir codingParams.reservoir += np.max([bitBudget + bitsFromRes - optimalBits, 0]) # else return normal fp mantissas return (scaleFactor, bitAlloc, mantissa, overallScale, huffTable, optimalBits)
for z in range(4): if current >= 9: if current != 0: compressed += str(current) current = 0 if y_test[i][z] != y[i][z]: flag = 1 if flag == 1: if current != 0: compressed += str(current) + ref(y[i]) current = 0 wrong += 1 else: current += 1 total += 1 print("Accuracy: ", (1 - wrong / total) * 100) print("Wrong: ", wrong) print("Total", total) # print(compressed) val, key = hf.encode(compressed) # print(val) print("Length of DNA: ", len(DNA_backup)) print("Lenght of LSTM Compress: ", len(compressed)) print("After Huffman: ", len(str(val))) print("Just Huffman: ", len(str(hf.encode(DNA_backup)[0]))) print("Final Compression: ", ((len(DNA_backup) - len(str(val))) * 100 / len(DNA_backup))) print("Just Huffman Compression: ", (len(DNA_backup) - len(str(hf.encode(DNA_backup)[0]))) * 100 / len(DNA_backup))
def evaluate(self, data1, s, d): global size print("size: ", size) #size=20 #global vals global vals_d if s == 'bwt': global count_bwt count_bwt += 1 sum = 0 vals_bwt = [] for i in range(1, size + 1): rnd_txt = RandomText(data1) data = rnd_txt.makeRandomText(i) pre_text = self.textBrowser.toPlainText() self.textBrowser.setText(pre_text + " " + str(i) + ": " + data + " \n \n") initial_len = len(data) t1_start = time.perf_counter_ns() bwt = BWT(data) transform_bwt = bwt.transform() transform_rle = bwt.rle_encode(transform_bwt) decode_rle = bwt.rle_decode(transform_rle) #decode_bwt = bwt.ibwt(decode_rle) t1_stop = time.perf_counter_ns() pre_text = self.textBrowser_3.toPlainText() self.textBrowser_3.setText(pre_text + str(t1_stop - t1_start) + "\n") sum = sum + (t1_stop - t1_start) vals_bwt.append(t1_stop - t1_start) pre_text2 = self.textBrowser_2.toPlainText() self.textBrowser_2.setText(pre_text2 + str(i) + "BWT: " + transform_bwt + " RLE: " + transform_rle + " RLE-DECODE: " + decode_rle + " \n") #print("len of transform_rle:", len(transform_rle)) #print("koef = "+str(initial_len/len(transform_rle))) key = "".join(s + str(count_bwt)) vals_d[s] = vals_bwt pre_text = self.textBrowser_3.toPlainText() self.textBrowser_3.setText(pre_text + 'Sum for bwt: ' + str(sum) + "\n") pre_text2 = self.textBrowser_2.toPlainText() self.textBrowser_2.setText(pre_text2 + "; length of the compressed text: " + str(len(transform_rle)) + "; initial length: " + str(len(data)) + "\n") elif s == 'huffman': global count_h count_h += 1 sum = 0 vals_h = [] for i in range(1, size + 1): rnd_txt = RandomText(data1) data = rnd_txt.makeRandomText(i) initial_len = len(data) pre_text = self.textBrowser.toPlainText() self.textBrowser.setText(pre_text + " " + str(i) + ": " + data + " \n \n") t1_start = time.perf_counter_ns() huffman = Huffman(data) frequencyTable = huffman.computeFrequencies(data) codeTable = huffman.huffman_code(frequencyTable) huffmanCode = huffman.encode(codeTable) encoded = "".join(huffmanCode[ch] for ch in data) decoded_str = huffman.huffman_decode(encoded, huffmanCode) t1_stop = time.perf_counter_ns() vals_h.append(t1_stop - t1_start) pre_text = self.textBrowser_3.toPlainText() self.textBrowser_3.setText(pre_text + str(t1_stop - t1_start) + "\n") pre_text2 = self.textBrowser_2.toPlainText() self.textBrowser_2.setText(pre_text2 + "encoded: " + encoded + "Decoded-string: " + decoded_str + "\n") sum = sum + (t1_stop - t1_start) #print("len of transform_rle:", len(huffmanCode)) #print("koef = " + str(initial_len / len(huffmanCode))) key = "".join(s + str(count_h)) vals_d[key] = vals_h pre_text = self.textBrowser_3.toPlainText() self.textBrowser_3.setText(pre_text + 'Sum for Huffman: ' + str(sum) + "\n") self.textBrowser_2.setText(pre_text2 + "length of Huffmancode: " + str(len(huffmanCode)) + "; initial length: " + str(len(data)) + "\n") elif s == 'repair': sum = 0 vals_r = [] global count_r count_r += 1 for i in range(1, size + 1): rnd_txt = RandomText(data1) data = rnd_txt.makeRandomText(i) initial_len = len(data) pre_text = self.textBrowser.toPlainText() self.textBrowser.setText(pre_text + " " + str(i) + ": " + data + " \n \n") t1_start = time.perf_counter_ns() repair = RePair(data) ch = 'A' rules = {} rules, s1 = repair.repair(data, ch, rules) #decomp_string=repair.decomp(rules,s) decomp_string = "" t1_stop = time.perf_counter_ns() vals_r.append(t1_stop - t1_start) pre_text2 = self.textBrowser_2.toPlainText() self.textBrowser_2.setText(pre_text2 + "Rules: " + str(rules) + "; s: " + s1 + "Decomp string: " + decomp_string) pre_text = self.textBrowser_3.toPlainText() self.textBrowser_3.setText(pre_text + str(t1_stop - t1_start) + "\n") sum = sum + (t1_stop - t1_start) #print("len of transform_rle:", len(s1)) #print("koef = " + str(initial_len / len(s1))) key = "".join(s + str(count_r)) vals_d[key] = vals_r pre_text = self.textBrowser_3.toPlainText() self.textBrowser_3.setText(pre_text + 'Sum for RePair: ' + str(sum) + "\n") pre_text2 = self.textBrowser_2.toPlainText() self.textBrowser_2.setText(pre_text2 + ' initial length: ' + str(len(data)) + 'len(RePair): ' + str(len(s))) self.write_to_excel()
def test_decode_reverses_encode_simple(): string = 'abbb' tree = encode(TEST_FILE, string) decoded = decode(TEST_FILE, tree) assert decoded.startswith(string)
from pprint import pprint import huffman from view import viz_tree data = b"huffman" tree = huffman.build_tree(data) map_code = huffman.build_map_code(tree) # encode bin_data = huffman.encode(data, map_code) print("Map code") for k, v in map_code.items(): print("{}: {}".format(chr(k), v.to01())) print("Encoded data") print(bin_data.to01()) viz_tree(tree) # decode print("After decode") print(huffman.decode(bin_data.tobytes(), map_code, bin_data.buffer_info()[3])) # calculate performance p = len(bin_data) / (len(data) * 8) print(f"Reduce {p * 100}%")
def test_decode_reverses_encode_special(): string = '! %' tree = encode(TEST_FILE, string) decoded = decode(TEST_FILE, tree) assert decoded.startswith(string)
string = h.remove_spl_ch(string) message = h.remove_spl_ch(message) # to create the huffman map prob_of_characters, enc_dict = h.encode_dict(input=string) print("\nencoded dictionary : ", end="\n\n") for key, value in enc_dict: print(key, " : ", value) print("\n\n") print("probability of characters : ", end="\n\n") for key, value in prob_of_characters: print(key, " : ", value) print("\n\n") # to encode the message(custom) using the huffman map enc_msg = h.encode(msg=message, dictionary=enc_dict) print("encoded message : ", enc_msg, end="\n\n") # to encode the original string using huffman map enc_string = h.encode(msg=message, dictionary=enc_dict) # to decode the encoded message using huffman map dec_msg = h.decode(enc_msg=enc_msg, dictionary=enc_dict) print("decoded message : ", dec_msg, end="\n\n") # to get information about the space saved h.size_saved(dictionary=enc_dict, msg=string, enc_msg=enc_string)
def test_decode_reverses_encode_unicode(): string = 'Kærlighed og Øl!' tree = encode(TEST_FILE, string) decoded = decode(TEST_FILE, tree) assert decoded.startswith(string)
def main(): path = "../data/raw/iliad.txt" ascii_content = np.fromfile(path, np.uint8) huffman.encode(ascii_content)
ccount += 1 text += line words += [w.strip(' \n.,”“') for w in line.split()] print('= Stats =') print('Number of characters', ccount) print('Number of words', len(words)) min_entropy = compute_entropy(frequency_map.values()) print('Minimum entropy', min_entropy) huffman_entropy = compute_huffman_entropy(frequency_map) print('Huffman entropy', huffman_entropy) tree = build_tree(frequency_map) encoded_text = encode(text, tree) print('Length of raw text: {} bytes'.format(len(text))) print('Length of encoded text: {} bytes'.format(len(encoded_text)/8)) print('Compression rate: {}'.format(len(text)*8/len(encoded_text))) print('= Word-based =') text_length = 0 frequency_map = {} for w in words: text_length += len(w) if w not in frequency_map: frequency_map[w] = 0 frequency_map[w] += 1 avg_word_size = text_length / len(words)
if task == TASK_ARITH: blockSize = int(arguments[3]) if task not in TASKS: sys.stderr.write( f"Invalid usage! The given task: {task} does not exist!\n") sys.stderr.write("For help, use: encode.py -h") sys.exit(errno.EINVAL) if not os.path.exists(fileName): sys.stderr.write(f"Could not find input file: {fileName}") sys.exit(errno.ENOENT) if task == TASK_DISPLAY: utils.display(stats.createStatistic(fileName)) if task == TASK_SF: utils.display(shannon_fano.encode(fileName)) if task == TASK_SF_STAT: codes = shannon_fano.encode(fileName) utils.display(codes) utils.displayOptimality(stats.getOptimality(codes)) if task == TASK_HUFF: utils.display(huffman.encode(fileName)) if task == TASK_HUFF_STAT: codes = huffman.encode(fileName) utils.display(huffman.encode(fileName)) utils.displayOptimality(stats.getOptimality(codes)) if task == TASK_ARITH: code = arithmetic.encode(fileName, blockSize) utils.displayArithmeticCode(code)
path = os.path.join(output_path, dirname) if not os.path.exists(path): os.mkdir(path) # Save results total_added_bitrate = 0 for i in range(len(resids)): if i > 0 and i % 10 == 0: print('Running frame', i) print('Average added bitrate so far:', total_added_bitrate / i) # Encode encoded = model.encoder(data[i:i + 1]) # Grab size _, _, added_bitrate = huffman.encode(encoded, fps=int(args['fps'])) total_added_bitrate += added_bitrate # Binarize encoded = encoded.sign() encoded[encoded == 0] = 1 # Decode decoded = model.decoder(encoded) decoded = decoded.data.numpy() save_img(i, compressed[i] + decoded[0], 'result') save_img(i, compressed[i], 'compressed') save_img(i, compressed[i] + resids[i], 'reference') #save_img(i, resids[i], 'input', resid=True) #save_img(i, decoded[0], 'output', resid=True)
def test_encode(self): encode("", "") assert True