def test_huffman_decode_it_should_use_left_leaf_to_decode(self): left = Huffman(1, 'a') huffman = Huffman(2, 'doesnt matter', left) encoded = huffman.huffman_encode('a') self.assertEqual(encoded[0], '0') decoded = huffman.huffman_decode(encoded) self.assertEqual('a', decoded)
def test_build_codebook_it_should_correctly_assign_leaf_prefixes_according_to_Huffman_algorithm( self): left = Huffman(1, 'a') right = Huffman(1, 'b') huffman = Huffman(2, 'ab', left, right) self.assertEqual(huffman.codebook[huffman.left.data], '0') self.assertEqual(huffman.codebook[huffman.right.data], '1')
def main(): try: filename = sys.argv[1] except IndexError: print("Задайте файл с помощью аргументов командной строки") return if not os.path.exists(filename): print("Файл не найден!") return print("Исходный файл: '{}' ({} байт)".format(filename, os.path.getsize(filename))) huf = Huffman(filename) codes_filename, res_filename, zeroes = huf.compress(filename) print("Сжатый файл: '{}' ({} байт)".format(res_filename, os.path.getsize(res_filename))) print("Размер таблицы кодов: {} байт".format(sys.getsizeof( huf.codes_table))) print("Нулей дописано в последний байт:", zeroes) dec_filename = huf.decompress(filename, res_filename, zeroes) print("Восстановленный файл: '{}' ({} байт)".format( dec_filename, os.path.getsize(dec_filename)))
def test_huffman_decode_it_should_raise_ValueError_when_code_is_invalid( self): with self.assertRaises(ValueError) as assertRaisesContext: huffman = Huffman(1, 'abc') huffman.huffman_decode('abcdefg') self.assertTrue('Error when encoding the string' in assertRaisesContext.exception.args)
def test_unzip_tree_it_should_return_None_for_index_over_the_length_of_the_encoded_tree( self): encodedTreeDummy = 'abcdefg' dummyLeaf = Huffman(1, 'abc') index = len(encodedTreeDummy) + 1 self.assertIsNone( Huffman.unzip_tree(encodedTreeDummy, [dummyLeaf], index))
def test_build_codebook_it_should_initialize_codebook_with_left_and_right_nodes( self): left = Huffman(1, 'a') right = Huffman(1, 'b') huffman = Huffman(2, 'anything', left, right) self.assertEqual(huffman.codebook['a'], '0') self.assertEqual(huffman.codebook['b'], '1')
def test_huffman_decode_it_should_use_right_leaf_to_decode(self): right = Huffman(1, 'b') huffman = Huffman(2, 'something', None, right) encoded = huffman.huffman_encode('b') self.assertEqual(encoded[0], '1') decoded = huffman.huffman_decode(encoded) self.assertEqual('b', decoded)
def compress_data(): if request.method == "POST": if request.files: file = request.files[ "ecg_data"] # pass file to huffman encoding class # print('test') print(file) filename = secure_filename(file.filename) print('uploaded filename:', filename) print('current directory:', os.getcwd()) file.save(os.path.join(os.getcwd() + UPLOAD_FOLDER, filename)) data_filepath = os.getcwd() + UPLOAD_FOLDER + '/' + filename input_file_size = os.stat(data_filepath).st_size text_object = Huffman(data_filepath) ratio = text_object.create_output() return render_template("compress_output.html", file_size=input_file_size, compression_ratio=ratio, final_file_size=(text_object.initial_byte_counter * 3), result=text_object.codes_dict)
def test_encode_decode(self): encode_text = Huffman().get_encoded_bytes("\n") assert encode_text == "0" decode_text = Huffman().get_decoded_text("0") assert decode_text == "\n" encode_text = Huffman().get_encoded_bytes('"Huffman"') decode_text = Huffman().get_decoded_text(encode_text) assert decode_text == '"Huffman"'
def __init__(self, image, quality, out, comment): self.quality = quality self.jpeg_obj = JpegInfo(image, comment) self.image_width, self.image_height = image.size self.out = out self.dct = DCT(self.quality) self.huf = Huffman(*image.size)
def test_print_it_should_print_a_huffman_node_according_to_implementation( self): left = Huffman(1, 'a') right = Huffman(1, 'b') huffman = Huffman(2, 'abc', left, right) builtins.print = Mock() builtins.print(repr(huffman)) builtins.print.assert_called_with( '<Huffman(data: abc, left: a, right: b>')
def file_set (self, file_chooser): openedFile = open (file_chooser.get_file ().get_path (), "r"); text = "" for line in openedFile.readlines (): text += line huffman = Huffman (); huffman.originalMessage = text huffman.startHuffmanCoding (); self.show_results (huffman);
def main(): input= File_Input() huff = Huffman() freqTable = input.read() huffTable = huff.to_huffman(copy.copy(freqTable)) str = input.get_str() binStr = get_bin(huffTable, str) results(str, binStr, huffTable) menu(freqTable, huffTable, str, binStr)
def browserFile(self): filename = filedialog.askopenfilename( initialdir=".", title="Selecione a o arquivo", filetypes=(("text files", ".txt"),)) file = open(filename, "r") first_line = file.readline() if first_line[:2] == "*(": word_dencode = '' for line in file: word_dencode += line dencode = Huffman(word=word_dencode, trim=eval( first_line[1:]), dencode=0) file_dencode = open(os.path.basename(file.name)[ 0: -4]+"_dencoded.txt", "w+") file_dencode.write(dencode.word_decode) self.log["text"] = "Descompactação realizada com sucesso!!\n" + \ "Verifique o resultado na pasta raiz do projeto." self.resume["text"] = "" else: file.seek(0) word_for_encode = '' for line in file: word_for_encode += line encodefy = Huffman(word=word_for_encode) file_encode = open(os.path.basename(file.name)[ 0: -4] + "_encoded.txt", "w+") file_encode.write("*" + str(encodefy.trim) + "\n") file_encode.write(str(encodefy.word_encode)) self.log["text"] = "Compressão realizada com sucesso!!\n" + \ "Verifique o resultado na pasta raiz do projeto." size_compress = len(encodefy.word_encode) current_size = len(encodefy.word)*8 self.resume["text"] = "Tamanho antigo do arquivo: " + \ str(current_size) + " bits \n" + "Tamanho atual do arquivo: " + \ str(size_compress) + " bits\n" + "Taxa de compressão: " + \ str("{:.2f}".format((size_compress/current_size)*100)) + "%" file.close()
def compress_each_file_in_folder(): path = './testfiles/' for filename in listdir(path): if '_compressed' not in filename and '_decompressed' not in filename: print(filename) filename, file_extension = os.path.splitext(filename) infile = open(f'{path}{filename}{file_extension}', 'rb') outfile = open(f'{path}{filename}_compressed{file_extension}', 'wb') h = Huffman(infile, outfile) h.compress()
def glavnaMethod(): count = 0 ukucaj= File_Input() igra = Huffman() dajSveRezultatee = ukucaj.citaj() dioIgre = igra.prebaciSveUHufman(copy.copy(dajSveRezultatee)) kucajText = ukucaj.citajFile() bajneri = sveU_bajneriju(dioIgre, kucajText) pokaziSveScore(kucajText, bajneri, dioIgre) Izbor(dajSveRezultatee, dioIgre, kucajText, bajneri)
def __init__(self, frame_id, img_size, macroblock_size, frame, bufsize): threading.Thread.__init__(self) self.bit_generator = BitstreamGenerator(frame_id, img_size, macroblock_size) # liste de listes de tuples # chacune des sous-listes (qui sont ici des listes de tuples) correspond # en fait à la représentation RLE d'un macrobloc self.frame = frame # c'est aussi égal à (img_size[0] * img_size[1]) // macroblock_size**2 self.total_num_of_macroblocks = len(self.frame) self.bufsize = bufsize # concrètement, les métadonnées des paquets envoyés pour former le bitstream # du dict font 50 "bits" (50 = 16 + 2 + 16 + 16) self.taille_metadonnees_dict = 50 # taille d'un paquet élémentaire du dict avant de l'adjoindre au paquet # du bitstream self.taille_paquet_elementaire_dict = self.bufsize - self.taille_metadonnees_dict # les métadonnées des paquets envoyés pour former le bitstream du body # ont une taille de 66 "bits" (66 = 16 + 2 + 16 + 16 + 16) self.taille_metadonnees_body = 66 # taille d'un paquet élémentaire du dict avant de l'adjoindre au paquet # du bitstream (de taille 66 + taille_paquet_elementaire_body, qui vaut # bufsize par définition) self.taille_paquet_elementaire_body = self.bufsize - self.taille_metadonnees_body # initialisation du nombre de paquets qui seront envoyés du client au # serveur, et qui sont associés au dict (resp. au body) self.nb_paquets_dict = None self.nb_paquets_body = None # génération puis encodage du dictionnaire de huffman associé à la # frame **entière** t_debut_generation_dico_huffman = time() self.huff = Huffman( [tuple_RLE for macrobloc in self.frame for tuple_RLE in macrobloc]) t_fin_generation_dico_huffman = time() self.duree_generation_dico_huffman = t_fin_generation_dico_huffman - t_debut_generation_dico_huffman self.dict_huffman_encode = self.huff.dictToBin() # définit la taille des données compressées **utiles** du body self.taille_donnees_compressees_huffman = 0 # buffer qui se remplit si jamais verrou_buffer_bitstream est déjà acquis # par le thread principal self.buffer_interne = ""
def main(): h = Huffman(request_freq_table) for s in test_data: print " encoding: ", s sp = [ord(c) for c in s] e_result = h.Encode(sp, False) print " e_result: ", FormatAsBits(e_result) d_result = ''.join(ListToStr(h.Decode(e_result[0], False, e_result[1]))) if d_result != s: print "difference found: ", d_result, " ", s else: print "It worked: ", s print
def main(): filename = 'ziip.rar' #'test.txt' print("Исходный файл: '{}' ({} байт)".format(filename, os.path.getsize(filename))) huf = Huffman(filename) res_filename, zeroes = huf.compress(filename) print("Сжатый файл: '{}' ({} байт)".format(res_filename, os.path.getsize(res_filename))) print("Нулей дописано в последний байт:", zeroes) dec_filename = huf.decompress(filename, res_filename, zeroes) print("Восстановленный файл: '{}' ({} байт)".format( dec_filename, os.path.getsize(dec_filename)))
def huffman_output_bits(text: str): h = Huffman.from_dist(text) pprint(h.to_bits) yield from linear_create( (-1, *(int(b) for b in ''.join(h.pack(text))[::-1])), ">") yield "<|" yield from huffman_walker(h.tree, decoders=["[->>>>>>+<<<<<<]"])
def main(): h = Huffman(request_freq_table) for s in test_data: print " encoding: ", s sp = [ord(c) for c in s] e_result = BitBucket() h.EncodeToBB(e_result, sp, True) print " e_result: ", FormatAsBits(e_result.GetAllBits()) d_result = ListToStr(h.DecodeFromBB(e_result, True, -1)) if d_result != s: print "difference found: d_result(%s) vs orig(%s)" % (repr(d_result), repr(s)) else: print "It worked: ", s print
class HuffmanTable(): def __init__(self, bytes): self.Id = bytes[0] & 0x0F self.TableType = HuffmanTableType(bytes[0] >> 4) self.Codes = {} self.Huffman = Huffman() index = 1 code = 0 counts = [] for i in range(16): counts.append(bytes[index]) index += 1 for i in range(16): for _ in range(counts[i]): self.Codes[(i + 1, code)] = bytes[index] code += 1 index += 1 code <<= 1 self.Huffman.FromTable(self.Codes) # self.Huffman.DrawTree(filename="{}_{}".format(self.TableType, self.Id)) def __repr__(self): result = "Table {:02X} Type {}".format(self.Id, self.TableType) # for k, v in self.Codes.items(): # formatstr = "\n{:0" + str(k[0]) + "b} at length {} = {:02X}" # result += formatstr.format(k[1],k[0],v) return result
def __init__(self, bytes): self.Id = bytes[0] & 0x0F self.TableType = HuffmanTableType(bytes[0] >> 4) self.Codes = {} self.Huffman = Huffman() index = 1 code = 0 counts = [] for i in range(16): counts.append(bytes[index]) index += 1 for i in range(16): for _ in range(counts[i]): self.Codes[(i + 1, code)] = bytes[index] code += 1 index += 1 code <<= 1 self.Huffman.FromTable(self.Codes)
def test_huffman_decode_it_should_use_data_to_return_the_decoded_string( self): left = Huffman(1, 'a') right = Huffman(1, 'b') huffman = Huffman(2, 'ab', left, right) encoded = huffman.huffman_encode('ab') decoded = huffman.huffman_decode(encoded) self.assertEqual('ab', decoded)
def huffman_output_pairs(text: str): h = Huffman.from_dist(text) pprint(h.to_bits) bits = [int(b) for b in ''.join(h.pack(text))[::-1]] pairs = [ a + 2 * b for a, b in zip_longest(bits[::2], bits[1::2], fillvalue=0) ] yield from linear_create((-1, *pairs), ">") yield "<|" yield from huffman_walker(h.tree, decoders=[ ">>-<<[->+<[->->>>>>+<<<<<]]+[->+]<[-<+>]<", "[->>>>>>+<<<<<<]" ])
def read_dht_header(jpeg: Jpeg, file_: T.BinaryIO): """ Read Huffman Table """ start_seek_position = file_.tell() # JPEGs are network byte order e.g. *big* endian length = one_from_file(">H", file_) ht_information = one_from_file("B", file_) # bit 0..3 : number of HT (0..3, otherwise error) # bit 4 : type of HT, 0 = DC table, 1 = AC table # bit 5..7 : not used, must be 0 print("ht_information", bin(ht_information)) # number as in "index" not "count" ht_num = ht_information & 0b00001111 print(f"huffman table number: {ht_num}") ht_type = (ht_information & 0b00010000) >> 4 ht_type_str = "AC" if bool(ht_type) else "DC" print(f"type of huffman table: {ht_type_str}") unused_ht = (ht_information & 0b11100000) >> 5 assert unused_ht == 0 # Number of symbols with codes of length 1..16, # the sum(n) of these bytes is the total number of codes, # which must be <= 256 num_symbols_per_bit_length = unpack_from_file("16B", file_) print(num_symbols_per_bit_length) num_symbols = sum(num_symbols_per_bit_length) assert num_symbols <= 256 # per comment above # Table containing the symbols in order of increasing # code length ( n = total number of codes ). # symbols = unpack_from_file("{}B".format(num_symbols), file_) symbols: T.List[T.List[int]] = [] for symbol_count in num_symbols_per_bit_length: symbols.append(unpack_from_file(f"{symbol_count}B", file_)) print(symbols) huff = Huffman(num_symbols_per_bit_length, symbols) if (file_.tell() - start_seek_position) != length: raise NotImplementedError( "Sorry, we don't handle this yet {} != {}".format( (file_.tell() - start_seek_position), length))
def compression(self): # 将系数展开成图像,分次调用编码函数,整理码流,返回编码 # shape of origin image self.shape = (size(self.coefficients[0][-1][0], 0) * 2, size(self.coefficients[0][-1][0], 1) * 2) num = int(log2(self.shape[0] / size(self.coarse_coef[0], 0))) for i in range(3): # three channels position = [ size(self.coarse_coef[i], 0), size(self.coarse_coef[i], 1) ] img = zeros([self.shape[0], self.shape[1]], dtype=uint8) self.coarse_coef[i] = self.coarse_coef[i].astype(float) * iinfo( uint8).max # convert to integer img[0:position[0], 0:position[1]] = self.coarse_coef[i].astype(uint8) for j in range(num): for k in range(3): self.coefficients[i][j][k] = self.coefficients[i][j][ k].astype(float) * iinfo(uint8).max self.coefficients[i][j][k] = self.coefficients[i][j][ k].astype(uint8) img[position[0]:position[0] * 2, 0:position[1]] = self.coefficients[i][j][0] img[0:position[0], position[1]:position[1] * 2] = self.coefficients[i][j][1] img[position[0]:position[0] * 2, position[1]:position[1] * 2] = self.coefficients[i][j][2] position[0] *= 2 position[1] *= 2 # cv2.imshow("whole", img) self.img = img # self.img = array([[63, -34, 49, 10, 7, 13, -12, 7], [-31, 23, 14, -13, 3, 4, 6, -1], [15, 14, 3, -12, 5, -7, 3, 9], [-9, -7, -14, 8, 4, -2, 3, 2], [-5, 9, -1, 47, 4, 6, -2, 2], [3, 0, -3, 2, 3, -2, 0, 4], [2, -3, 6, -4, 3, 6, 3, 6], [5, 11, 5, 6, 0, 3, -4, 4]]) # self.shape = (8, 8) byte_array = self.__encode(409600) # around 50KB tree = Huffman() tree.ctor_from_bytes(byte_array, str(i) + '_code')
def test_huffman_encode_decode_reverse(): initial = 'A man, a plan, a canal, Panama' huffman = Huffman(initial) message = initial encoded = huffman.encode(message) decoded = huffman.decode(encoded) assert message == decoded message = 'nana' encoded = huffman.encode(message) decoded = huffman.decode(encoded) assert message == decoded
def test_init(self): huffman = Huffman(100, 100) pydc = huffman.dc_matrix pyac = huffman.ac_matrix huffman = jpype.JClass('james.Huffman')(100, 100) jadc = huffman.DC_matrix jaac = huffman.AC_matrix def check(py, ja): self.assertEqual(len(py), len(ja)) for i in range(len(py)): self.assertEqual(len(py[i]), len(ja[i])) for j in range(len(py[i])): self.assertEqual(len(py[i][j]), len(ja[i][j])) for k in range(len(py[i][j])): self.assertEqual(py[i][j][k], ja[i][j][k], '%d %d %d %d %d' % (i, j, k, py[i][j][k], ja[i][j][k])) check(pyac, jaac) check(pydc, jadc)
komprimerede fil). Begge filer skal åbnes i “binary mode”. Når en BitWriter instantieres, skal den have et file object som argument. Opgave 2: I opgave 2 skal man bruge metoderne readint32bits() og readbit() fra klassen BitReader fra det udleverede bibliotek bitIO.py til at læse heltal (for hyppighedstabel) og bits (for Huffmans-koderne) fra inputfilen (den komprimerede fil). Man skal bruge kaldet write(bytes([b])) (hvor write() er fra file objects og bytes() er en built-in funktion) til skrive bytes til outputfilen (den genskabte originale fil). Her er b et heltal som repræsenterer den byte, som skal skrives. Begge filer skal ˚abnes i “binary mode”. Når en BitReader instantieres, skal den have et file object som argument. :Gruppe medlemmer: Mads Emil Falkenstrøm, [email protected] Mathias Birkebjerg Kristiansen, [email protected] Patrick Nielsen, [email protected] """ import sys from huffman import Huffman if __name__ == '__main__': infile = open(sys.argv[1], 'rb') outfile = open(sys.argv[2], 'wb') h = Huffman(infile, outfile) h.compress()
class JpegEncoder(object): def __init__(self, image, quality, out, comment): self.quality = quality self.jpeg_obj = JpegInfo(image, comment) self.image_width, self.image_height = image.size self.out = out self.dct = DCT(self.quality) self.huf = Huffman(*image.size) def compress(self, embedded_data=None, password='******'): self.embedded_data = EmbedData(embedded_data) if embedded_data else None self.password = password self.write_headers() self.write_compressed_data() self.write_eoi() self.out.flush() def get_quality(self): return self.quality def set_quality(self, quality): self.quality = quality self.dct = DCT(quality) def write_array(self, data): length = ((data[2] & 0xff) << 8) + (data[3] & 0xff) + 2 self.out.write(bytearray(data[:length])) def write_marker(self, data): self.out.write(bytearray(data[:2])) def write_eoi(self): EOI = [0xff, 0xD9] self.write_marker(EOI) def write_headers(self): SOI = [0xff, 0xD8] self.write_marker(SOI) JFIF = [0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, 0x01, 0x01, 0x00, 0x60, 0x00, 0x60, 0x00, 0x00] self.write_array(JFIF) comment = self.jpeg_obj.get_comment() if comment: length = len(comment) + 2 COM = [0xff, 0xfe, length >> 8 & 0xff, length & 0xff] COM.extend(comment) self.write_array(COM) DQT = [0xff, 0xdb, 0x00, 0x84] for k in range(2): DQT.append(k) DQT.extend([self.dct.quantum[k][JPEG_NATURAL_ORDER[i]] for i in range(64)]) self.write_array(DQT) SOF = [0xff, 0xc0, 0x00, 0x11, self.jpeg_obj.precision, self.jpeg_obj.image_height >> 8 & 0xff, self.jpeg_obj.image_height & 0xff, self.jpeg_obj.image_width >> 8 & 0xff, self.jpeg_obj.image_width & 0xff, self.jpeg_obj.comp_num] for i in range(self.jpeg_obj.comp_num): SOF.append(self.jpeg_obj.com_id[i]) SOF.append(eight_byte(self.jpeg_obj.hsamp_factor[i], self.jpeg_obj.vsamp_factor[i])) SOF.append(self.jpeg_obj.qtable_number[i]) self.write_array(SOF) DHT = [0xff, 0xc4, 0, 0] for i in range(4): DHT.extend(self.huf.BITS[i]) DHT.extend(self.huf.VAL[i]) DHT[2] = len(DHT) - 2 >> 8 & 0xff DHT[3] = len(DHT) - 2 & 0xff self.write_array(DHT) SOS = [0] * 14 SOS = [0xff, 0xda, 0x00, 0x0c, self.jpeg_obj.comp_num] for i in range(self.jpeg_obj.comp_num): SOS.append(self.jpeg_obj.com_id[i]) SOS.append(eight_byte(self.jpeg_obj.dctable_number[i], self.jpeg_obj.actable_number[i])) SOS.append(self.jpeg_obj.ss) SOS.append(self.jpeg_obj.se) SOS.append(eight_byte(self.jpeg_obj.ah, self.jpeg_obj.al)) self.write_array(SOS) def _get_coeff(self): dct_array1 = create_array(0.0, 8, 8) dct_array2 = create_array(0.0, 8, 8) dct_array3 = create_array(0, 64) coeff = [] for r in range(min(self.jpeg_obj.block_height)): for c in range(min(self.jpeg_obj.block_width)): xpos = c * 8 ypos = r * 8 for comp in range(self.jpeg_obj.comp_num): indata = self.jpeg_obj.components[comp] maxa = self.image_height / 2 * self.jpeg_obj.vsamp_factor[comp] - 1 maxb = self.image_width / 2 * self.jpeg_obj.hsamp_factor[comp] - 1 for i in range(self.jpeg_obj.vsamp_factor[comp]): for j in range(self.jpeg_obj.hsamp_factor[comp]): ia = ypos * self.jpeg_obj.vsamp_factor[comp] + i * 8 ib = xpos * self.jpeg_obj.hsamp_factor[comp] + j * 8 for a in range(8): for b in range(8): dct_array1[a][b] = indata[min(ia+a, maxa)][min(ib+b, maxb)] dct_array2 = self.dct.forward_dct(dct_array1) dct_array3 = self.dct.quantize_block(dct_array2, self.jpeg_obj.qtable_number[comp]) coeff.extend(dct_array3[:64]) return coeff def write_compressed_data(self): tmp = 0 last_dc_value = create_array(0, self.jpeg_obj.comp_num) zero_array = create_array(0, 64) width, height = 0, 0 min_block_width = min(self.jpeg_obj.block_width) min_block_height = min(self.jpeg_obj.block_height) logger.info('DCT/quantisation starts') logger.info('%d x %d' % (self.image_width, self.image_height)) coeff = self._get_coeff() coeff_count = len(coeff) logger.info('got %d DCT AC/DC coefficients' % coeff_count) _changed, _embedded, _examined, _expected, _one, _large, _thrown, _zero = 0, 0, 0, 0, 0, 0, 0, 0 shuffled_index = 0 for i, cc in enumerate(coeff): if i % 64 == 0: continue if cc == 1 or cc == -1: _one += 1 elif cc == 0: _zero += 1 _large = coeff_count - _zero - _one - coeff_count / 64 _expected = _large + int(0.49 * _one) logger.info('one=%d' % _one) logger.info('large=%d' % _large) logger.info('expected capacity: %d bits' % _expected) logger.info('expected capacity with') for i in range(1, 8): n = (1 << i) - 1 changed = _large - _large % (n + 1) changed = (changed + _one + _one / 2 - _one / (n + 1)) / (n + 1) usable = (_expected * i / n - _expected * i / n % n) / 8 if usable == 0: break logger.info('%s code: %d bytes (efficiency: %d.%d bits per change)' % ('default' if i == 1 else '(1, %d, %d)' % (n, i), usable, usable * 8 / changed, usable * 80 / changed % 10)) if self.embedded_data is not None: logger.info('permutation starts') random = F5Random(self.password) permutation = Permutation(coeff_count, random) next_bit_to_embed = 0 byte_to_embed = len(self.embedded_data) available_bits_to_embed = 0 logger.info('Embedding of %d bits (%d+4 bytes)' % (byte_to_embed * 8 + 32, byte_to_embed)) if byte_to_embed > 0x007fffff: byte_to_embed = 0x007ffff for i in range(1, 8): self.n = (1 << i) - 1 usable = (_expected * i / self.n - _expected * i / self.n % self.n) / 8 if usable < byte_to_embed + 4: break k = i - 1 self.n = (1 << k) - 1 if self.n == 0: logger.info('using default code, file will not fit') self.n = 1 elif self.n == 1: logger.info('using default code') else: logger.info('using (1, %d, %d) code' % (self.n, k)) byte_to_embed |= k << 24 byte_to_embed ^= random.get_next_byte() byte_to_embed ^= random.get_next_byte() << 8 byte_to_embed ^= random.get_next_byte() << 16 byte_to_embed ^= random.get_next_byte() << 24 next_bit_to_embed = byte_to_embed & 1 byte_to_embed >>= 1 available_bits_to_embed = 31 _embedded += 1 for i, shuffled_index in enumerate(permutation.shuffled): if shuffled_index % 64 == 0 or coeff[shuffled_index] == 0: continue cc = coeff[shuffled_index] _examined += 1 if cc > 0 and (cc & 1) != next_bit_to_embed: coeff[shuffled_index] -= 1 _changed +=1 elif cc < 0 and (cc & 1) == next_bit_to_embed: coeff[shuffled_index] += 1 _changed += 1 if coeff[shuffled_index] != 0: if available_bits_to_embed == 0: if self.n > 1 or not self.embedded_data.available(): break byte_to_embed = self.embedded_data.read() byte_to_embed ^= random.get_next_byte() available_bits_to_embed = 8 next_bit_to_embed = byte_to_embed & 1 byte_to_embed >>= 1 available_bits_to_embed -= 1 _embedded += 1 else: _thrown += 1 if self.n > 1: try: is_last_byte = False filtered_index = FilteredCollection(permutation.shuffled[i+1:], lambda index: index % 64 and coeff[index]) while not is_last_byte: k_bits_to_embed = 0 for i in range(k): if available_bits_to_embed == 0: if not self.embedded_data.available(): is_last_byte = True break byte_to_embed = self.embedded_data.read() byte_to_embed ^= random.get_next_byte() available_bits_to_embed = 8 next_bit_to_embed = byte_to_embed & 1 byte_to_embed >>= 1 available_bits_to_embed -= 1 k_bits_to_embed |= next_bit_to_embed << i _embedded += 1 code_word = filtered_index.offer(self.n) while True: vhash = 0 for i, index in enumerate(code_word): if coeff[index] > 0: extracted_bit = coeff[index] & 1 else: extracted_bit = 1 - (coeff[index] & 1) if extracted_bit == 1: vhash ^= i + 1 i = vhash ^ k_bits_to_embed if not i: break i -= 1 coeff[code_word[i]] += 1 if coeff[code_word[i]] < 0 else -1 _changed += 1 if not coeff[code_word[i]]: _thrown += 1 code_word[i:i+1] = [] code_word.extend(filtered_index.offer(1)) else: break except FilteredCollection.ListNotEnough: pass if _examined > 0: logger.info('%d coefficients examined' % _examined) if _changed > 0: logger.info('%d coefficients changed (efficiency: %d.%d bits per change' % (_changed, _embedded / _changed, _embedded * 10 / _changed % 10)) logger.info('%d coefficients thrown (zeroed)' % _thrown) logger.info('%d bits (%d bytes) embedded' % (_embedded, _embedded / 8)) logger.info('starting hufman encoding') shuffled_index = 0 for r in range(min_block_height): for c in range(min_block_width): for comp in range(self.jpeg_obj.comp_num): for i in range(self.jpeg_obj.vsamp_factor[comp]): for j in range(self.jpeg_obj.hsamp_factor[comp]): dct_array3 = coeff[shuffled_index:shuffled_index+64] self.huf.huffman_block_encoder(self.out, dct_array3, last_dc_value[comp], self.jpeg_obj.dctable_number[comp], self.jpeg_obj.actable_number[comp]) last_dc_value[comp] = dct_array3[0] shuffled_index += 64 self.huf.flush_buffer(self.out)
class JpegEncoder(object): def __init__(self, image, quality, out, comment, ais): self.quality = quality self.jpeg_obj = JpegInfo(image, comment) self.image_width, self.image_height = image.size self.out = out self.dct = DCT(self.quality) self.huf = Huffman(*image.size) self.hasais = ais self.k_matrix = -1 def compress(self, embedded_data=None, password='******'): self.embedded_data = EmbedData( embedded_data) if embedded_data else None self.password = password self.write_headers() self.write_compressed_data() self.write_eoi() self.out.flush() def get_quality(self): return self.quality def set_quality(self, quality): self.quality = quality self.dct = DCT(quality) def write_array(self, data): length = ((data[2] & 0xff) << 8) + (data[3] & 0xff) + 2 self.out.write(bytearray(data[:length])) def write_marker(self, data): self.out.write(bytearray(data[:2])) def write_eoi(self): EOI = [0xff, 0xD9] self.write_marker(EOI) def write_headers(self): SOI = [0xff, 0xD8] self.write_marker(SOI) JFIF = [ 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, 0x01, 0x01, 0x00, 0x60, 0x00, 0x60, 0x00, 0x00 ] self.write_array(JFIF) comment = self.jpeg_obj.get_comment() if comment: length = len(comment) + 2 COM = [0xff, 0xfe, length >> 8 & 0xff, length & 0xff] COM.extend(comment) self.write_array(COM) DQT = [0xff, 0xdb, 0x00, 0x84] for k in range(2): DQT.append(k) DQT.extend([ self.dct.quantum[k][JPEG_NATURAL_ORDER[i]] for i in range(64) ]) self.write_array(DQT) SOF = [ 0xff, 0xc0, 0x00, 0x11, self.jpeg_obj.precision, self.jpeg_obj.image_height >> 8 & 0xff, self.jpeg_obj.image_height & 0xff, self.jpeg_obj.image_width >> 8 & 0xff, self.jpeg_obj.image_width & 0xff, self.jpeg_obj.comp_num ] for i in range(self.jpeg_obj.comp_num): SOF.append(self.jpeg_obj.com_id[i]) SOF.append( eight_byte(self.jpeg_obj.hsamp_factor[i], self.jpeg_obj.vsamp_factor[i])) SOF.append(self.jpeg_obj.qtable_number[i]) self.write_array(SOF) DHT = [0xff, 0xc4, 0, 0] for i in range(4): DHT.extend(self.huf.BITS[i]) DHT.extend(self.huf.VAL[i]) DHT[2] = len(DHT) - 2 >> 8 & 0xff DHT[3] = len(DHT) - 2 & 0xff self.write_array(DHT) SOS = [0] * 14 SOS = [0xff, 0xda, 0x00, 0x0c, self.jpeg_obj.comp_num] for i in range(self.jpeg_obj.comp_num): SOS.append(self.jpeg_obj.com_id[i]) SOS.append( eight_byte(self.jpeg_obj.dctable_number[i], self.jpeg_obj.actable_number[i])) SOS.append(self.jpeg_obj.ss) SOS.append(self.jpeg_obj.se) SOS.append(eight_byte(self.jpeg_obj.ah, self.jpeg_obj.al)) self.write_array(SOS) def _get_coeff(self): dct_array1 = create_array(0.0, 8, 8) dct_array2 = create_array(0.0, 8, 8) dct_array3 = create_array(0, 64) coeff = [] for r in range(min(self.jpeg_obj.block_height)): for c in range(min(self.jpeg_obj.block_width)): xpos = c * 8 ypos = r * 8 for comp in range(self.jpeg_obj.comp_num): indata = self.jpeg_obj.components[comp] maxa = self.image_height / 2 * self.jpeg_obj.vsamp_factor[ comp] - 1 maxb = self.image_width / 2 * self.jpeg_obj.hsamp_factor[ comp] - 1 for i in range(self.jpeg_obj.vsamp_factor[comp]): for j in range(self.jpeg_obj.hsamp_factor[comp]): ia = ypos * self.jpeg_obj.vsamp_factor[comp] + i * 8 ib = xpos * self.jpeg_obj.hsamp_factor[comp] + j * 8 for a in range(8): for b in range(8): dct_array1[a][b] = indata[min( ia + a, maxa)][min(ib + b, maxb)] dct_array2 = self.dct.forward_dct(dct_array1) dct_array3 = self.dct.quantize_block( dct_array2, self.jpeg_obj.qtable_number[comp]) coeff.extend(dct_array3[:64]) return coeff def write_compressed_data(self): last_dc_value = create_array(0, self.jpeg_obj.comp_num) zero_array = create_array(0, 64) width, height = 0, 0 min_block_width = min(self.jpeg_obj.block_width) min_block_height = min(self.jpeg_obj.block_height) logger.info('DCT/quantisation starts') logger.info('%d x %d' % (self.image_width, self.image_height)) coeff = self._get_coeff() #量化后的系数,是整数 coeff_count = len(coeff) #导出未处理的QDCT if self.hasais: filename = 'unpro_ais.json' else: filename = 'unpro.json' with open(filename, 'w') as f_unprocess: json.dump(coeff, f_unprocess) #AIS处理 if self.hasais: size_secret = self.embedded_data.len ais = Ais(coeff, size_secret) #coeff被修改 ais.statistic() self.k_matrix = ais.fix() with open('aised.json', 'w') as f_aised: json.dump(coeff, f_aised) #嵌入——>再统计嵌入后的数据,决定是否继续做AIS处理 logger.info('got %d DCT AC/DC coefficients' % coeff_count) _changed, _embedded, _examined, _expected, _one, _large, _thrown, _zero = 0, 0, 0, 0, 0, 0, 0, 0 shuffled_index = 0 for i, cc in enumerate(coeff): if i % 64 == 0: continue if cc == 1 or cc == -1: _one += 1 elif cc == 0: _zero += 1 _large = coeff_count - _zero - _one - coeff_count / 64 #有效系数的个数 _expected = _large + int(0.49 * _one) #预期容量,shrinkage效应无法确定 logger.info('one=%d' % _one) logger.info('large=%d' % _large) logger.info('\nexpected capacity: %d bits\n' % _expected) logger.info('expected capacity with') for i in range(1, 8): n = (1 << i) - 1 #n=2^i-1 changed = _large - _large % (n + 1) changed = (changed + _one + _one / 2 - _one / (n + 1)) / (n + 1) usable = (_expected * i / n - _expected * i / n % n) / 8 if usable == 0: break logger.info( '%s code: %d bytes (efficiency: %d.%d bits per change)' % ('default' if i == 1 else '(1, %d, %d)' % (n, i), usable, usable * 8 / changed, usable * 80 / changed % 10)) #shuffles all coefficients using a permutation,使用排列对系数进行混洗 if self.embedded_data is not None: logger.info('permutation starts') random = F5Random(self.password) permutation = Permutation(coeff_count, random) next_bit_to_embed = 0 byte_to_embed = len(self.embedded_data) available_bits_to_embed = 0 logger.info('Embedding of %d bits (%d+4 bytes)' % (byte_to_embed * 8 + 32, byte_to_embed)) if byte_to_embed > 0x007fffff: byte_to_embed = 0x007ffff for i in range(1, 8): self.n = (1 << i) - 1 usable = (_expected * i / self.n - _expected * i / self.n % self.n) / 8 if usable < byte_to_embed + 4: break #确定(1,n,k) if self.k_matrix < 0: k = i - 1 else: k = self.k_matrix self.n = (1 << k) - 1 if self.n == 0: logger.info('using default code, file will not fit') self.n = 1 elif self.n == 1: logger.info('using default code') else: logger.info('using (1, %d, %d) code' % (self.n, k)) byte_to_embed |= k << 24 byte_to_embed ^= random.get_next_byte() byte_to_embed ^= random.get_next_byte() << 8 byte_to_embed ^= random.get_next_byte() << 16 byte_to_embed ^= random.get_next_byte() << 24 next_bit_to_embed = byte_to_embed & 1 byte_to_embed >>= 1 available_bits_to_embed = 31 _embedded += 1 for i, shuffled_index in enumerate(permutation.shuffled): if shuffled_index % 64 == 0 or coeff[shuffled_index] == 0: continue cc = coeff[shuffled_index] _examined += 1 if cc > 0 and (cc & 1) != next_bit_to_embed: coeff[shuffled_index] -= 1 _changed += 1 elif cc < 0 and (cc & 1) == next_bit_to_embed: coeff[shuffled_index] += 1 _changed += 1 if coeff[shuffled_index] != 0: if available_bits_to_embed == 0: if self.n > 1 or not self.embedded_data.available(): break byte_to_embed = self.embedded_data.read() byte_to_embed ^= random.get_next_byte() available_bits_to_embed = 8 next_bit_to_embed = byte_to_embed & 1 byte_to_embed >>= 1 available_bits_to_embed -= 1 _embedded += 1 else: _thrown += 1 if self.n > 1: try: is_last_byte = False filtered_index = FilteredCollection( permutation.shuffled[i + 1:], lambda index: index % 64 and coeff[index]) while not is_last_byte: k_bits_to_embed = 0 for i in range(k): if available_bits_to_embed == 0: if not self.embedded_data.available(): is_last_byte = True break byte_to_embed = self.embedded_data.read() byte_to_embed ^= random.get_next_byte() available_bits_to_embed = 8 next_bit_to_embed = byte_to_embed & 1 byte_to_embed >>= 1 available_bits_to_embed -= 1 k_bits_to_embed |= next_bit_to_embed << i _embedded += 1 code_word = filtered_index.offer(self.n) while True: vhash = 0 for i, index in enumerate(code_word): if coeff[index] > 0: extracted_bit = coeff[index] & 1 else: extracted_bit = 1 - (coeff[index] & 1) if extracted_bit == 1: vhash ^= i + 1 i = vhash ^ k_bits_to_embed if not i: break i -= 1 coeff[code_word[i]] += 1 if coeff[ code_word[i]] < 0 else -1 _changed += 1 if not coeff[code_word[i]]: _thrown += 1 code_word[i:i + 1] = [] code_word.extend(filtered_index.offer(1)) else: break except FilteredCollection.ListNotEnough: pass if _examined > 0: logger.info('%d coefficients examined' % _examined) if _changed > 0: logger.info( '%d coefficients changed (efficiency: %d.%d bits per change' % (_changed, _embedded / _changed, _embedded * 10 / _changed % 10)) logger.info('%d coefficients thrown (zeroed)' % _thrown) logger.info('%d bits (%d bytes) embedded' % (_embedded, _embedded / 8)) #导出嵌入后的系数coeff if self.hasais: filename2 = 'embeded_ais.json' else: filename2 = 'embeded.json' with open(filename2, 'w') as f_embeded: json.dump(coeff, f_embeded) logger.info('starting hufman encoding') shuffled_index = 0 for r in range(min_block_height): for c in range(min_block_width): for comp in range(self.jpeg_obj.comp_num): for i in range(self.jpeg_obj.vsamp_factor[comp]): for j in range(self.jpeg_obj.hsamp_factor[comp]): dct_array3 = coeff[shuffled_index:shuffled_index + 64] self.huf.huffman_block_encoder( self.out, dct_array3, last_dc_value[comp], self.jpeg_obj.dctable_number[comp], self.jpeg_obj.actable_number[comp]) last_dc_value[comp] = dct_array3[0] shuffled_index += 64 self.huf.flush_buffer(self.out) logger.info('hufman encode end')
if (args['verbose']): print('Reading input-file...') inputFile = open ('./'+args['source'], 'r') inputData = inputFile.read() inputFile.close() # Determine compression-method # ============================= # HUFFMAN-ENCODING if (args['compression'] == 'h' or args['uncompression'] == 'h'): if (args['verbose']): print('Encoding-mode is set to HUFFMAN.') from huffman import Huffman coder = Huffman() # Choose built-in omega or generate own? if (args['dictionary'] == 'b'): if (args['verbose']): print('Using build-in dictionary!') coder.buildOwn = False else: if (args['verbose']): print('We need to generate / use our own dictionary!') if (args['uncompression'] != None): try: f = open('./'+args['source']+'.huff', 'rb') coder.setOmega(pickle.load(f)) except Exception: print('Unable to read huffman-dictionary!')
def init(texto): huffman = Huffman(texto) return huffman.mostrarTabelas()
#!/usr/bin/env python3 print('Testground EPR_05') print('Welchen Aufgabenteil moechten Sie testen?'); print('[1|2|3]'); which = input(">> ") ######### AUFGABE 5.2 TESTS ######### if (which == "3"): from huffman import Huffman Huff = Huffman() Huff.buildOwn = True print('Geben Sie einen Text zum codieren ein!'); text = input('>> '); encoded = Huff.encode(text) print(encoded) print('Dekodiert:') print(Huff.decode(encoded)) Huff.verbose() ######### AUFGABE 5.2 TESTS ######### if (which == "2"): from rle import RunLenghtEncoding rle = RunLenghtEncoding(); print('Geben Sie einen Text zum codieren ein!'); text = input('>> '); res = rle.encode(text)