def test_build_codebook_it_should_initialize_codebook_with_left_and_right_nodes( self): left = Huffman(1, 'a') right = Huffman(1, 'b') huffman = Huffman(2, 'anything', left, right) self.assertEqual(huffman.codebook['a'], '0') self.assertEqual(huffman.codebook['b'], '1')
def test_build_codebook_it_should_correctly_assign_leaf_prefixes_according_to_Huffman_algorithm( self): left = Huffman(1, 'a') right = Huffman(1, 'b') huffman = Huffman(2, 'ab', left, right) self.assertEqual(huffman.codebook[huffman.left.data], '0') self.assertEqual(huffman.codebook[huffman.right.data], '1')
def test_huffman_decode_it_should_use_right_leaf_to_decode(self): right = Huffman(1, 'b') huffman = Huffman(2, 'something', None, right) encoded = huffman.huffman_encode('b') self.assertEqual(encoded[0], '1') decoded = huffman.huffman_decode(encoded) self.assertEqual('b', decoded)
def test_huffman_decode_it_should_use_left_leaf_to_decode(self): left = Huffman(1, 'a') huffman = Huffman(2, 'doesnt matter', left) encoded = huffman.huffman_encode('a') self.assertEqual(encoded[0], '0') decoded = huffman.huffman_decode(encoded) self.assertEqual('a', decoded)
def test_encode_decode(self): encode_text = Huffman().get_encoded_bytes("\n") assert encode_text == "0" decode_text = Huffman().get_decoded_text("0") assert decode_text == "\n" encode_text = Huffman().get_encoded_bytes('"Huffman"') decode_text = Huffman().get_decoded_text(encode_text) assert decode_text == '"Huffman"'
def test_huffman_decode_it_should_use_data_to_return_the_decoded_string( self): left = Huffman(1, 'a') right = Huffman(1, 'b') huffman = Huffman(2, 'ab', left, right) encoded = huffman.huffman_encode('ab') decoded = huffman.huffman_decode(encoded) self.assertEqual('ab', decoded)
def test_print_it_should_print_a_huffman_node_according_to_implementation( self): left = Huffman(1, 'a') right = Huffman(1, 'b') huffman = Huffman(2, 'abc', left, right) builtins.print = Mock() builtins.print(repr(huffman)) builtins.print.assert_called_with( '<Huffman(data: abc, left: a, right: b>')
def browserFile(self): filename = filedialog.askopenfilename( initialdir=".", title="Selecione a o arquivo", filetypes=(("text files", ".txt"),)) file = open(filename, "r") first_line = file.readline() if first_line[:2] == "*(": word_dencode = '' for line in file: word_dencode += line dencode = Huffman(word=word_dencode, trim=eval( first_line[1:]), dencode=0) file_dencode = open(os.path.basename(file.name)[ 0: -4]+"_dencoded.txt", "w+") file_dencode.write(dencode.word_decode) self.log["text"] = "Descompactação realizada com sucesso!!\n" + \ "Verifique o resultado na pasta raiz do projeto." self.resume["text"] = "" else: file.seek(0) word_for_encode = '' for line in file: word_for_encode += line encodefy = Huffman(word=word_for_encode) file_encode = open(os.path.basename(file.name)[ 0: -4] + "_encoded.txt", "w+") file_encode.write("*" + str(encodefy.trim) + "\n") file_encode.write(str(encodefy.word_encode)) self.log["text"] = "Compressão realizada com sucesso!!\n" + \ "Verifique o resultado na pasta raiz do projeto." size_compress = len(encodefy.word_encode) current_size = len(encodefy.word)*8 self.resume["text"] = "Tamanho antigo do arquivo: " + \ str(current_size) + " bits \n" + "Tamanho atual do arquivo: " + \ str(size_compress) + " bits\n" + "Taxa de compressão: " + \ str("{:.2f}".format((size_compress/current_size)*100)) + "%" file.close()
def main(): try: filename = sys.argv[1] except IndexError: print("Задайте файл с помощью аргументов командной строки") return if not os.path.exists(filename): print("Файл не найден!") return print("Исходный файл: '{}' ({} байт)".format(filename, os.path.getsize(filename))) huf = Huffman(filename) codes_filename, res_filename, zeroes = huf.compress(filename) print("Сжатый файл: '{}' ({} байт)".format(res_filename, os.path.getsize(res_filename))) print("Размер таблицы кодов: {} байт".format(sys.getsizeof( huf.codes_table))) print("Нулей дописано в последний байт:", zeroes) dec_filename = huf.decompress(filename, res_filename, zeroes) print("Восстановленный файл: '{}' ({} байт)".format( dec_filename, os.path.getsize(dec_filename)))
def compress_data(): if request.method == "POST": if request.files: file = request.files[ "ecg_data"] # pass file to huffman encoding class # print('test') print(file) filename = secure_filename(file.filename) print('uploaded filename:', filename) print('current directory:', os.getcwd()) file.save(os.path.join(os.getcwd() + UPLOAD_FOLDER, filename)) data_filepath = os.getcwd() + UPLOAD_FOLDER + '/' + filename input_file_size = os.stat(data_filepath).st_size text_object = Huffman(data_filepath) ratio = text_object.create_output() return render_template("compress_output.html", file_size=input_file_size, compression_ratio=ratio, final_file_size=(text_object.initial_byte_counter * 3), result=text_object.codes_dict)
def test_unzip_tree_it_should_return_None_for_index_over_the_length_of_the_encoded_tree( self): encodedTreeDummy = 'abcdefg' dummyLeaf = Huffman(1, 'abc') index = len(encodedTreeDummy) + 1 self.assertIsNone( Huffman.unzip_tree(encodedTreeDummy, [dummyLeaf], index))
def test_huffman_decode_it_should_raise_ValueError_when_code_is_invalid( self): with self.assertRaises(ValueError) as assertRaisesContext: huffman = Huffman(1, 'abc') huffman.huffman_decode('abcdefg') self.assertTrue('Error when encoding the string' in assertRaisesContext.exception.args)
def __init__(self, image, quality, out, comment): self.quality = quality self.jpeg_obj = JpegInfo(image, comment) self.image_width, self.image_height = image.size self.out = out self.dct = DCT(self.quality) self.huf = Huffman(*image.size)
def file_set (self, file_chooser): openedFile = open (file_chooser.get_file ().get_path (), "r"); text = "" for line in openedFile.readlines (): text += line huffman = Huffman (); huffman.originalMessage = text huffman.startHuffmanCoding (); self.show_results (huffman);
def compress_each_file_in_folder(): path = './testfiles/' for filename in listdir(path): if '_compressed' not in filename and '_decompressed' not in filename: print(filename) filename, file_extension = os.path.splitext(filename) infile = open(f'{path}{filename}{file_extension}', 'rb') outfile = open(f'{path}{filename}_compressed{file_extension}', 'wb') h = Huffman(infile, outfile) h.compress()
def __init__(self, frame_id, img_size, macroblock_size, frame, bufsize): threading.Thread.__init__(self) self.bit_generator = BitstreamGenerator(frame_id, img_size, macroblock_size) # liste de listes de tuples # chacune des sous-listes (qui sont ici des listes de tuples) correspond # en fait à la représentation RLE d'un macrobloc self.frame = frame # c'est aussi égal à (img_size[0] * img_size[1]) // macroblock_size**2 self.total_num_of_macroblocks = len(self.frame) self.bufsize = bufsize # concrètement, les métadonnées des paquets envoyés pour former le bitstream # du dict font 50 "bits" (50 = 16 + 2 + 16 + 16) self.taille_metadonnees_dict = 50 # taille d'un paquet élémentaire du dict avant de l'adjoindre au paquet # du bitstream self.taille_paquet_elementaire_dict = self.bufsize - self.taille_metadonnees_dict # les métadonnées des paquets envoyés pour former le bitstream du body # ont une taille de 66 "bits" (66 = 16 + 2 + 16 + 16 + 16) self.taille_metadonnees_body = 66 # taille d'un paquet élémentaire du dict avant de l'adjoindre au paquet # du bitstream (de taille 66 + taille_paquet_elementaire_body, qui vaut # bufsize par définition) self.taille_paquet_elementaire_body = self.bufsize - self.taille_metadonnees_body # initialisation du nombre de paquets qui seront envoyés du client au # serveur, et qui sont associés au dict (resp. au body) self.nb_paquets_dict = None self.nb_paquets_body = None # génération puis encodage du dictionnaire de huffman associé à la # frame **entière** t_debut_generation_dico_huffman = time() self.huff = Huffman( [tuple_RLE for macrobloc in self.frame for tuple_RLE in macrobloc]) t_fin_generation_dico_huffman = time() self.duree_generation_dico_huffman = t_fin_generation_dico_huffman - t_debut_generation_dico_huffman self.dict_huffman_encode = self.huff.dictToBin() # définit la taille des données compressées **utiles** du body self.taille_donnees_compressees_huffman = 0 # buffer qui se remplit si jamais verrou_buffer_bitstream est déjà acquis # par le thread principal self.buffer_interne = ""
def main(): filename = 'ziip.rar' #'test.txt' print("Исходный файл: '{}' ({} байт)".format(filename, os.path.getsize(filename))) huf = Huffman(filename) res_filename, zeroes = huf.compress(filename) print("Сжатый файл: '{}' ({} байт)".format(res_filename, os.path.getsize(res_filename))) print("Нулей дописано в последний байт:", zeroes) dec_filename = huf.decompress(filename, res_filename, zeroes) print("Восстановленный файл: '{}' ({} байт)".format( dec_filename, os.path.getsize(dec_filename)))
def test_huffman_encode_decode_reverse(): initial = 'A man, a plan, a canal, Panama' huffman = Huffman(initial) message = initial encoded = huffman.encode(message) decoded = huffman.decode(encoded) assert message == decoded message = 'nana' encoded = huffman.encode(message) decoded = huffman.decode(encoded) assert message == decoded
def main(): h = Huffman(request_freq_table) for s in test_data: print " encoding: ", s sp = [ord(c) for c in s] e_result = h.Encode(sp, False) print " e_result: ", FormatAsBits(e_result) d_result = ''.join(ListToStr(h.Decode(e_result[0], False, e_result[1]))) if d_result != s: print "difference found: ", d_result, " ", s else: print "It worked: ", s print
def read_dht_header(jpeg: Jpeg, file_: T.BinaryIO): """ Read Huffman Table """ start_seek_position = file_.tell() # JPEGs are network byte order e.g. *big* endian length = one_from_file(">H", file_) ht_information = one_from_file("B", file_) # bit 0..3 : number of HT (0..3, otherwise error) # bit 4 : type of HT, 0 = DC table, 1 = AC table # bit 5..7 : not used, must be 0 print("ht_information", bin(ht_information)) # number as in "index" not "count" ht_num = ht_information & 0b00001111 print(f"huffman table number: {ht_num}") ht_type = (ht_information & 0b00010000) >> 4 ht_type_str = "AC" if bool(ht_type) else "DC" print(f"type of huffman table: {ht_type_str}") unused_ht = (ht_information & 0b11100000) >> 5 assert unused_ht == 0 # Number of symbols with codes of length 1..16, # the sum(n) of these bytes is the total number of codes, # which must be <= 256 num_symbols_per_bit_length = unpack_from_file("16B", file_) print(num_symbols_per_bit_length) num_symbols = sum(num_symbols_per_bit_length) assert num_symbols <= 256 # per comment above # Table containing the symbols in order of increasing # code length ( n = total number of codes ). # symbols = unpack_from_file("{}B".format(num_symbols), file_) symbols: T.List[T.List[int]] = [] for symbol_count in num_symbols_per_bit_length: symbols.append(unpack_from_file(f"{symbol_count}B", file_)) print(symbols) huff = Huffman(num_symbols_per_bit_length, symbols) if (file_.tell() - start_seek_position) != length: raise NotImplementedError( "Sorry, we don't handle this yet {} != {}".format( (file_.tell() - start_seek_position), length))
def main(): h = Huffman(request_freq_table) for s in test_data: print " encoding: ", s sp = [ord(c) for c in s] e_result = BitBucket() h.EncodeToBB(e_result, sp, True) print " e_result: ", FormatAsBits(e_result.GetAllBits()) d_result = ListToStr(h.DecodeFromBB(e_result, True, -1)) if d_result != s: print "difference found: d_result(%s) vs orig(%s)" % (repr(d_result), repr(s)) else: print "It worked: ", s print
def __init__(self, bytes): self.Id = bytes[0] & 0x0F self.TableType = HuffmanTableType(bytes[0] >> 4) self.Codes = {} self.Huffman = Huffman() index = 1 code = 0 counts = [] for i in range(16): counts.append(bytes[index]) index += 1 for i in range(16): for _ in range(counts[i]): self.Codes[(i + 1, code)] = bytes[index] code += 1 index += 1 code <<= 1 self.Huffman.FromTable(self.Codes)
def test_init(self): huffman = Huffman(100, 100) pydc = huffman.dc_matrix pyac = huffman.ac_matrix huffman = jpype.JClass('james.Huffman')(100, 100) jadc = huffman.DC_matrix jaac = huffman.AC_matrix def check(py, ja): self.assertEqual(len(py), len(ja)) for i in range(len(py)): self.assertEqual(len(py[i]), len(ja[i])) for j in range(len(py[i])): self.assertEqual(len(py[i][j]), len(ja[i][j])) for k in range(len(py[i][j])): self.assertEqual(py[i][j][k], ja[i][j][k], '%d %d %d %d %d' % (i, j, k, py[i][j][k], ja[i][j][k])) check(pyac, jaac) check(pydc, jadc)
def compression(self): # 将系数展开成图像,分次调用编码函数,整理码流,返回编码 # shape of origin image self.shape = (size(self.coefficients[0][-1][0], 0) * 2, size(self.coefficients[0][-1][0], 1) * 2) num = int(log2(self.shape[0] / size(self.coarse_coef[0], 0))) for i in range(3): # three channels position = [ size(self.coarse_coef[i], 0), size(self.coarse_coef[i], 1) ] img = zeros([self.shape[0], self.shape[1]], dtype=uint8) self.coarse_coef[i] = self.coarse_coef[i].astype(float) * iinfo( uint8).max # convert to integer img[0:position[0], 0:position[1]] = self.coarse_coef[i].astype(uint8) for j in range(num): for k in range(3): self.coefficients[i][j][k] = self.coefficients[i][j][ k].astype(float) * iinfo(uint8).max self.coefficients[i][j][k] = self.coefficients[i][j][ k].astype(uint8) img[position[0]:position[0] * 2, 0:position[1]] = self.coefficients[i][j][0] img[0:position[0], position[1]:position[1] * 2] = self.coefficients[i][j][1] img[position[0]:position[0] * 2, position[1]:position[1] * 2] = self.coefficients[i][j][2] position[0] *= 2 position[1] *= 2 # cv2.imshow("whole", img) self.img = img # self.img = array([[63, -34, 49, 10, 7, 13, -12, 7], [-31, 23, 14, -13, 3, 4, 6, -1], [15, 14, 3, -12, 5, -7, 3, 9], [-9, -7, -14, 8, 4, -2, 3, 2], [-5, 9, -1, 47, 4, 6, -2, 2], [3, 0, -3, 2, 3, -2, 0, 4], [2, -3, 6, -4, 3, 6, 3, 6], [5, 11, 5, 6, 0, 3, -4, 4]]) # self.shape = (8, 8) byte_array = self.__encode(409600) # around 50KB tree = Huffman() tree.ctor_from_bytes(byte_array, str(i) + '_code')
komprimerede fil). Begge filer skal åbnes i “binary mode”. Når en BitWriter instantieres, skal den have et file object som argument. Opgave 2: I opgave 2 skal man bruge metoderne readint32bits() og readbit() fra klassen BitReader fra det udleverede bibliotek bitIO.py til at læse heltal (for hyppighedstabel) og bits (for Huffmans-koderne) fra inputfilen (den komprimerede fil). Man skal bruge kaldet write(bytes([b])) (hvor write() er fra file objects og bytes() er en built-in funktion) til skrive bytes til outputfilen (den genskabte originale fil). Her er b et heltal som repræsenterer den byte, som skal skrives. Begge filer skal ˚abnes i “binary mode”. Når en BitReader instantieres, skal den have et file object som argument. :Gruppe medlemmer: Mads Emil Falkenstrøm, [email protected] Mathias Birkebjerg Kristiansen, [email protected] Patrick Nielsen, [email protected] """ import sys from huffman import Huffman if __name__ == '__main__': infile = open(sys.argv[1], 'rb') outfile = open(sys.argv[2], 'wb') h = Huffman(infile, outfile) h.compress()
def init(texto): huffman = Huffman(texto) return huffman.mostrarTabelas()
def decompress_files(): infile = open('./testfiles/same_compressed.txt', 'rb') outfile = open('./testfiles/same_decompressed.txt', 'wb') h = Huffman(infile, outfile) h.decompress()
from huffman import Huffman import sys path = 'images\\sample.bmp' h = Huffman(path) output_path = h.compress() print("Compressed file path: " + output_path) decom_path = h.decompress(output_path) print("Decompressed file path: " + decom_path)
def compress_text (self, entry): huffman = Huffman (); huffman.originalMessage = str(entry.get_text ()) huffman.startHuffmanCoding (); self.show_results (huffman);
def main(): parser = OptionParser() parser.add_option( "-v", "--verbose", type="int", dest="v", help="Sets verbosity. At v=1, the opcodes will be printed. " "At v=2, so will the headers [default: %default]", default=0, metavar="VERBOSITY") parser.add_option("-f", "--force_streamgroup", dest="f", help="If set, everything will use stream-group 0. " "[default: %default]", default=0) (options, args) = parser.parse_args() print options requests = default_requests responses = default_responses if args >= 1: requests = [] responses = [] for filename in args: (har_requests, har_responses) = ReadHarFile(filename) requests.extend(har_requests) responses.extend(har_responses) spdy4_rq = SPDY4(options) spdy4_rq.compressor.huffman_table = Huffman(request_freq_table) spdy4_rq.decompressor.huffman_table = spdy4_rq.compressor.huffman_table spdy3_rq = SPDY3(options) http1_rq = HTTP1(options) spdy4_rs = SPDY4(options) spdy4_rs.compressor.huffman_table = Huffman(response_freq_table) spdy4_rs.decompressor.huffman_table = spdy4_rs.compressor.huffman_table spdy3_rs = SPDY3(options) http1_rs = HTTP1(options) print " UC: UnCompressed frame size" print " CM: CoMpressed frame size" print " UR: Uncompressed / Http uncompressed" print " CR: Compressed / Http compressed" def framelen(x): return len(x) + 10 h1usrq = 0 h1csrq = 0 s3usrq = 0 s3csrq = 0 s4usrq = 0 s4csrq = 0 h1usrs = 0 h1csrs = 0 s3usrs = 0 s3csrs = 0 s4usrs = 0 s4csrs = 0 for i in xrange(len(requests)): request = requests[i] response = responses[i] rq4 = spdy4_rq.ProcessFrame(request, request) rs4 = spdy4_rs.ProcessFrame(response, request) rq3 = spdy3_rq.ProcessFrame(request, request) rs3 = spdy3_rs.ProcessFrame(response, request) rqh = http1_rq.ProcessFrame(request, request) rsh = http1_rs.ProcessFrame(response, request) if options.v >= 2: print '##################################################################' print '####### request-path: "%s"' % requests[i][":path"][:80] print "####### stream group: %2d, %s" % (rq4[7], GetHostname(request)) print "####### dict size: %3d" % spdy4_rs.decompressor.GetDictSize( ) print print "## request ##\n", rqh[1] if options.v >= 4: print "request header: ", request for op in rq4[6]: print "rq_op: ", FormatOp(op) print "\n## response ##\n", rqh[1] if options.v >= 4: print "response header: ", response for op in rs4[6]: print "rs_op: ", FormatOp(op) print message = CompareHeaders(request, rq4[4]) if message: print "Something is wrong with the request." if options.v >= 1: print message message = CompareHeaders(response, rs4[4]) if message: print "Something is wrong with the response." if options.v >= 1: print message (h1comrq, h1uncomrq) = map(len, rqh) h1usrq += h1uncomrq h1csrq += h1comrq (s3comrq, s3uncomrq) = map(framelen, rq3) s3usrq += s3uncomrq s3csrq += s3comrq (s4comrq, s4uncomrq) = map(framelen, rq4[:2]) s4usrq += s4uncomrq s4csrq += s4comrq (h1comrs, h1uncomrs) = map(len, rsh) h1usrs += h1uncomrs h1csrs += h1comrs (s3comrs, s3uncomrs) = map(framelen, rs3) s3usrs += s3uncomrs s3csrs += s3comrs (s4comrs, s4uncomrs) = map(framelen, rs4[:2]) s4usrs += s4uncomrs s4csrs += s4comrs lines = [ ("http1 req", h1uncomrq, h1comrq, 1.0 * h1uncomrq / h1uncomrq, 1.0 * h1comrq / h1comrq), ("spdy3 req", s3uncomrq, s3comrq, 1.0 * s3uncomrq / h1uncomrq, 1.0 * s3comrq / h1comrq), ("spdy4 req", s4uncomrq, s4comrq, 1.0 * s4uncomrq / h1uncomrq, 1.0 * s4comrq / h1comrq), ("http1 res", h1uncomrs, h1comrs, 1.0 * h1uncomrs / h1uncomrs, 1.0 * h1comrs / h1comrs), ("spdy3 res", s3uncomrs, s3comrs, 1.0 * s3uncomrs / h1uncomrs, 1.0 * s3comrs / h1comrs), ("spdy4 res", s4uncomrs, s4comrs, 1.0 * s4uncomrs / h1uncomrs, 1.0 * s4comrs / h1comrs), ] if options.v >= 1: print " UC | CM | UR | CR" for fmtarg in lines: print " %s frame size: %4d | %4d | %2.2f | %2.2f" % fmtarg print print "Thats all folks. If you see this, everything worked OK" print "######################################################################" print "######################################################################" print print " http1 | spdy3 | spdy4 " fmtarg = (h1usrq, s3usrq, s4usrq) print "Req Uncompressed Sums: % 8d | % 8d | % 8d " % fmtarg fmtarg = (h1csrq, s3csrq, s4csrq) print "Req Compressed Sums: % 8d | % 8d | % 8d " % fmtarg if h1usrq: fmtarg = (h1usrq * 1. / h1usrq, s3usrq * 1. / h1usrq, s4usrq * 1. / h1usrq) print "Req Uncompressed/uncompressed HTTP: % 2.5f | % 2.5f | % 2.5f " % fmtarg fmtarg = (h1csrq * 1. / h1usrq, s3csrq * 1. / h1usrq, s4csrq * 1. / h1usrq) print "Req Compressed/uncompressed HTTP: % 2.5f | % 2.5f | % 2.5f " % fmtarg print fmtarg = (h1usrs, s3usrs, s4usrs) print "Res Uncompressed Sums: % 8d | % 8d | % 8d " % fmtarg fmtarg = (h1csrs, s3csrs, s4csrs) print "Res Compressed Sums: % 8d | % 8d | % 8d " % fmtarg if h1usrs: fmtarg = (h1usrs * 1. / h1usrs, s3usrs * 1. / h1usrs, s4usrs * 1. / h1usrs) print "Res Uncompressed/uncompressed HTTP: % 2.5f | % 2.5f | % 2.5f " % fmtarg fmtarg = (h1csrs * 1. / h1usrs, s3csrs * 1. / h1usrs, s4csrs * 1. / h1usrs) print "Res Compressed/uncompressed HTTP: % 2.5f | % 2.5f | % 2.5f " % fmtarg print print spdy4_rq.wf print print spdy4_rq.wf.length_freaks print print spdy4_rs.wf print print spdy4_rs.wf.length_freaks print