Ejemplo n.º 1
0
 def test_huffman_decode_it_should_use_left_leaf_to_decode(self):
     left = Huffman(1, 'a')
     huffman = Huffman(2, 'doesnt matter', left)
     encoded = huffman.huffman_encode('a')
     self.assertEqual(encoded[0], '0')
     decoded = huffman.huffman_decode(encoded)
     self.assertEqual('a', decoded)
Ejemplo n.º 2
0
 def test_build_codebook_it_should_correctly_assign_leaf_prefixes_according_to_Huffman_algorithm(
         self):
     left = Huffman(1, 'a')
     right = Huffman(1, 'b')
     huffman = Huffman(2, 'ab', left, right)
     self.assertEqual(huffman.codebook[huffman.left.data], '0')
     self.assertEqual(huffman.codebook[huffman.right.data], '1')
Ejemplo n.º 3
0
def main():
    try:
        filename = sys.argv[1]
    except IndexError:
        print("Задайте файл с помощью аргументов командной строки")
        return

    if not os.path.exists(filename):
        print("Файл не найден!")
        return

    print("Исходный файл: '{}' ({} байт)".format(filename,
                                                 os.path.getsize(filename)))

    huf = Huffman(filename)
    codes_filename, res_filename, zeroes = huf.compress(filename)
    print("Сжатый файл: '{}'  ({} байт)".format(res_filename,
                                                os.path.getsize(res_filename)))
    print("Размер таблицы кодов: {} байт".format(sys.getsizeof(
        huf.codes_table)))
    print("Нулей дописано в последний байт:", zeroes)

    dec_filename = huf.decompress(filename, res_filename, zeroes)
    print("Восстановленный файл: '{}'  ({} байт)".format(
        dec_filename, os.path.getsize(dec_filename)))
Ejemplo n.º 4
0
 def test_huffman_decode_it_should_raise_ValueError_when_code_is_invalid(
         self):
     with self.assertRaises(ValueError) as assertRaisesContext:
         huffman = Huffman(1, 'abc')
         huffman.huffman_decode('abcdefg')
     self.assertTrue('Error when encoding the string' in
                     assertRaisesContext.exception.args)
Ejemplo n.º 5
0
 def test_unzip_tree_it_should_return_None_for_index_over_the_length_of_the_encoded_tree(
         self):
     encodedTreeDummy = 'abcdefg'
     dummyLeaf = Huffman(1, 'abc')
     index = len(encodedTreeDummy) + 1
     self.assertIsNone(
         Huffman.unzip_tree(encodedTreeDummy, [dummyLeaf], index))
Ejemplo n.º 6
0
 def test_build_codebook_it_should_initialize_codebook_with_left_and_right_nodes(
         self):
     left = Huffman(1, 'a')
     right = Huffman(1, 'b')
     huffman = Huffman(2, 'anything', left, right)
     self.assertEqual(huffman.codebook['a'], '0')
     self.assertEqual(huffman.codebook['b'], '1')
Ejemplo n.º 7
0
 def test_huffman_decode_it_should_use_right_leaf_to_decode(self):
     right = Huffman(1, 'b')
     huffman = Huffman(2, 'something', None, right)
     encoded = huffman.huffman_encode('b')
     self.assertEqual(encoded[0], '1')
     decoded = huffman.huffman_decode(encoded)
     self.assertEqual('b', decoded)
Ejemplo n.º 8
0
def compress_data():

    if request.method == "POST":

        if request.files:

            file = request.files[
                "ecg_data"]  # pass file to huffman encoding class
            # print('test')
            print(file)

            filename = secure_filename(file.filename)
            print('uploaded filename:', filename)
            print('current directory:', os.getcwd())

            file.save(os.path.join(os.getcwd() + UPLOAD_FOLDER, filename))
            data_filepath = os.getcwd() + UPLOAD_FOLDER + '/' + filename

            input_file_size = os.stat(data_filepath).st_size

            text_object = Huffman(data_filepath)

            ratio = text_object.create_output()

    return render_template("compress_output.html",
                           file_size=input_file_size,
                           compression_ratio=ratio,
                           final_file_size=(text_object.initial_byte_counter *
                                            3),
                           result=text_object.codes_dict)
Ejemplo n.º 9
0
	def test_encode_decode(self):
		encode_text = Huffman().get_encoded_bytes("\n")
		assert encode_text == "0"
		decode_text = Huffman().get_decoded_text("0")
		assert decode_text == "\n"
		encode_text = Huffman().get_encoded_bytes('"Huffman"')
		decode_text = Huffman().get_decoded_text(encode_text)
		assert decode_text == '"Huffman"'
    def __init__(self, image, quality, out, comment):
        self.quality = quality
        self.jpeg_obj = JpegInfo(image, comment)

        self.image_width, self.image_height = image.size
        self.out = out

        self.dct = DCT(self.quality)
        self.huf = Huffman(*image.size)
Ejemplo n.º 11
0
 def test_print_it_should_print_a_huffman_node_according_to_implementation(
         self):
     left = Huffman(1, 'a')
     right = Huffman(1, 'b')
     huffman = Huffman(2, 'abc', left, right)
     builtins.print = Mock()
     builtins.print(repr(huffman))
     builtins.print.assert_called_with(
         '<Huffman(data: abc, left: a, right: b>')
Ejemplo n.º 12
0
    def file_set (self, file_chooser):
        openedFile = open (file_chooser.get_file ().get_path (), "r");
        text = ""
        for line in openedFile.readlines ():
            text += line

        huffman = Huffman ();
        huffman.originalMessage = text
        huffman.startHuffmanCoding ();
        self.show_results (huffman);
Ejemplo n.º 13
0
def main():
    input= File_Input()
    huff = Huffman()
    freqTable = input.read()
    huffTable = huff.to_huffman(copy.copy(freqTable))
    str = input.get_str()
    
    binStr = get_bin(huffTable, str)
    results(str, binStr, huffTable)
    menu(freqTable, huffTable, str, binStr)
Ejemplo n.º 14
0
    def browserFile(self):
        filename = filedialog.askopenfilename(
            initialdir=".", title="Selecione a o arquivo", filetypes=(("text files", ".txt"),))

        file = open(filename, "r")

        first_line = file.readline()

        if first_line[:2] == "*(":

            word_dencode = ''

            for line in file:
                word_dencode += line

            dencode = Huffman(word=word_dencode, trim=eval(
                first_line[1:]), dencode=0)

            file_dencode = open(os.path.basename(file.name)[
                                0: -4]+"_dencoded.txt", "w+")
            file_dencode.write(dencode.word_decode)

            self.log["text"] = "Descompactação realizada com sucesso!!\n" + \
                                "Verifique o resultado na pasta raiz do projeto."
            self.resume["text"] = ""

        else:
            file.seek(0)

            word_for_encode = ''

            for line in file:
                word_for_encode += line

            encodefy = Huffman(word=word_for_encode)

            file_encode = open(os.path.basename(file.name)[
                0: -4] + "_encoded.txt", "w+")

            file_encode.write("*" + str(encodefy.trim) + "\n")
            file_encode.write(str(encodefy.word_encode))

            self.log["text"] = "Compressão realizada com sucesso!!\n" + \
                                "Verifique o resultado na pasta raiz do projeto."

            size_compress = len(encodefy.word_encode)
            current_size = len(encodefy.word)*8

            self.resume["text"] = "Tamanho antigo do arquivo: " + \
                str(current_size) + " bits \n" + "Tamanho atual do arquivo: " + \
                str(size_compress) + " bits\n" + "Taxa de compressão: " + \
                str("{:.2f}".format((size_compress/current_size)*100)) + "%"


        file.close()
Ejemplo n.º 15
0
def compress_each_file_in_folder():
    path = './testfiles/'
    for filename in listdir(path):
        if '_compressed' not in filename and '_decompressed' not in filename:
            print(filename)
            filename, file_extension = os.path.splitext(filename)
            infile = open(f'{path}{filename}{file_extension}', 'rb')
            outfile = open(f'{path}{filename}_compressed{file_extension}',
                           'wb')
            h = Huffman(infile, outfile)
            h.compress()
Ejemplo n.º 16
0
def glavnaMethod():
    count = 0
    ukucaj= File_Input()
    igra = Huffman()
    dajSveRezultatee = ukucaj.citaj()
    dioIgre = igra.prebaciSveUHufman(copy.copy(dajSveRezultatee))
    kucajText = ukucaj.citajFile()
    
    bajneri = sveU_bajneriju(dioIgre, kucajText)
    pokaziSveScore(kucajText, bajneri, dioIgre)
    Izbor(dajSveRezultatee, dioIgre, kucajText, bajneri)
Ejemplo n.º 17
0
    def __init__(self, frame_id, img_size, macroblock_size, frame, bufsize):
        threading.Thread.__init__(self)

        self.bit_generator = BitstreamGenerator(frame_id, img_size,
                                                macroblock_size)

        # liste de listes de tuples
        # chacune des sous-listes (qui sont ici des listes de tuples) correspond
        # en fait à la représentation RLE d'un macrobloc
        self.frame = frame

        # c'est aussi égal à (img_size[0] * img_size[1]) // macroblock_size**2
        self.total_num_of_macroblocks = len(self.frame)

        self.bufsize = bufsize

        # concrètement, les métadonnées des paquets envoyés pour former le bitstream
        # du dict font 50 "bits" (50 = 16 + 2 + 16 + 16)
        self.taille_metadonnees_dict = 50

        # taille d'un paquet élémentaire du dict avant de l'adjoindre au paquet
        # du bitstream
        self.taille_paquet_elementaire_dict = self.bufsize - self.taille_metadonnees_dict

        # les métadonnées des paquets envoyés pour former le bitstream du body
        # ont une taille de 66 "bits" (66 = 16 + 2 + 16 + 16 + 16)
        self.taille_metadonnees_body = 66

        # taille d'un paquet élémentaire du dict avant de l'adjoindre au paquet
        # du bitstream (de taille 66 + taille_paquet_elementaire_body, qui vaut
        # bufsize par définition)
        self.taille_paquet_elementaire_body = self.bufsize - self.taille_metadonnees_body

        # initialisation du nombre de paquets qui seront envoyés du client au
        # serveur, et qui sont associés au dict (resp. au body)
        self.nb_paquets_dict = None
        self.nb_paquets_body = None

        # génération puis encodage du dictionnaire de huffman associé à la
        # frame **entière**
        t_debut_generation_dico_huffman = time()
        self.huff = Huffman(
            [tuple_RLE for macrobloc in self.frame for tuple_RLE in macrobloc])
        t_fin_generation_dico_huffman = time()
        self.duree_generation_dico_huffman = t_fin_generation_dico_huffman - t_debut_generation_dico_huffman
        self.dict_huffman_encode = self.huff.dictToBin()

        # définit la taille des données compressées **utiles** du body
        self.taille_donnees_compressees_huffman = 0

        # buffer qui se remplit si jamais verrou_buffer_bitstream est déjà acquis
        # par le thread principal
        self.buffer_interne = ""
Ejemplo n.º 18
0
def main():
  h = Huffman(request_freq_table)
  for s in test_data:
    print " encoding: ", s
    sp = [ord(c) for c in s]
    e_result = h.Encode(sp, False)
    print "      e_result: ", FormatAsBits(e_result)
    d_result = ''.join(ListToStr(h.Decode(e_result[0], False, e_result[1])))
    if d_result != s:
      print "difference found: ", d_result, " ", s
    else:
      print "It worked: ", s
    print
Ejemplo n.º 19
0
def main():
    filename = 'ziip.rar'  #'test.txt'
    print("Исходный файл: '{}' ({} байт)".format(filename,
                                                 os.path.getsize(filename)))
    huf = Huffman(filename)
    res_filename, zeroes = huf.compress(filename)
    print("Сжатый файл: '{}'  ({} байт)".format(res_filename,
                                                os.path.getsize(res_filename)))
    print("Нулей дописано в последний байт:", zeroes)

    dec_filename = huf.decompress(filename, res_filename, zeroes)
    print("Восстановленный файл: '{}'  ({} байт)".format(
        dec_filename, os.path.getsize(dec_filename)))
Ejemplo n.º 20
0
def huffman_output_bits(text: str):
    h = Huffman.from_dist(text)
    pprint(h.to_bits)
    yield from linear_create(
        (-1, *(int(b) for b in ''.join(h.pack(text))[::-1])), ">")
    yield "<|"
    yield from huffman_walker(h.tree, decoders=["[->>>>>>+<<<<<<]"])
Ejemplo n.º 21
0
def main():
  h = Huffman(request_freq_table)
  for s in test_data:
    print " encoding: ", s
    sp = [ord(c) for c in s]
    e_result = BitBucket()
    h.EncodeToBB(e_result, sp, True)
    print "      e_result: ", FormatAsBits(e_result.GetAllBits())

    d_result = ListToStr(h.DecodeFromBB(e_result, True, -1))
    if d_result != s:
      print "difference found: d_result(%s) vs orig(%s)" % (repr(d_result),
                                                            repr(s))
    else:
      print "It worked: ", s
    print
Ejemplo n.º 22
0
class HuffmanTable():
    def __init__(self, bytes):
        self.Id = bytes[0] & 0x0F
        self.TableType = HuffmanTableType(bytes[0] >> 4)
        self.Codes = {}
        self.Huffman = Huffman()
        index = 1
        code = 0
        counts = []
        for i in range(16):
            counts.append(bytes[index])
            index += 1
        for i in range(16):
            for _ in range(counts[i]):
                self.Codes[(i + 1, code)] = bytes[index]
                code += 1
                index += 1
            code <<= 1
        self.Huffman.FromTable(self.Codes)
        # self.Huffman.DrawTree(filename="{}_{}".format(self.TableType, self.Id))

    def __repr__(self):
        result = "Table {:02X} Type {}".format(self.Id, self.TableType)
        # for k, v in self.Codes.items():
        #     formatstr = "\n{:0" + str(k[0]) + "b} at length {} = {:02X}"
        #     result += formatstr.format(k[1],k[0],v)
        return result
Ejemplo n.º 23
0
 def __init__(self, bytes):
     self.Id = bytes[0] & 0x0F
     self.TableType = HuffmanTableType(bytes[0] >> 4)
     self.Codes = {}
     self.Huffman = Huffman()
     index = 1
     code = 0
     counts = []
     for i in range(16):
         counts.append(bytes[index])
         index += 1
     for i in range(16):
         for _ in range(counts[i]):
             self.Codes[(i + 1, code)] = bytes[index]
             code += 1
             index += 1
         code <<= 1
     self.Huffman.FromTable(self.Codes)
    def __init__(self, image, quality, out, comment):
        self.quality = quality
        self.jpeg_obj = JpegInfo(image, comment)

        self.image_width, self.image_height = image.size
        self.out = out

        self.dct = DCT(self.quality)
        self.huf = Huffman(*image.size)
Ejemplo n.º 25
0
 def test_huffman_decode_it_should_use_data_to_return_the_decoded_string(
         self):
     left = Huffman(1, 'a')
     right = Huffman(1, 'b')
     huffman = Huffman(2, 'ab', left, right)
     encoded = huffman.huffman_encode('ab')
     decoded = huffman.huffman_decode(encoded)
     self.assertEqual('ab', decoded)
Ejemplo n.º 26
0
def huffman_output_pairs(text: str):
    h = Huffman.from_dist(text)
    pprint(h.to_bits)
    bits = [int(b) for b in ''.join(h.pack(text))[::-1]]
    pairs = [
        a + 2 * b for a, b in zip_longest(bits[::2], bits[1::2], fillvalue=0)
    ]
    yield from linear_create((-1, *pairs), ">")
    yield "<|"
    yield from huffman_walker(h.tree,
                              decoders=[
                                  ">>-<<[->+<[->->>>>>+<<<<<]]+[->+]<[-<+>]<",
                                  "[->>>>>>+<<<<<<]"
                              ])
Ejemplo n.º 27
0
def read_dht_header(jpeg: Jpeg, file_: T.BinaryIO):
    """
    Read Huffman Table
    """
    start_seek_position = file_.tell()

    # JPEGs are network byte order e.g. *big* endian
    length = one_from_file(">H", file_)

    ht_information = one_from_file("B", file_)
    # bit 0..3 : number of HT (0..3, otherwise error)
    # bit 4    : type of HT, 0 = DC table, 1 = AC table
    # bit 5..7 : not used, must be 0
    print("ht_information", bin(ht_information))

    # number as in "index" not "count"
    ht_num = ht_information & 0b00001111
    print(f"huffman table number: {ht_num}")
    ht_type = (ht_information & 0b00010000) >> 4

    ht_type_str = "AC" if bool(ht_type) else "DC"
    print(f"type of huffman table: {ht_type_str}")
    unused_ht = (ht_information & 0b11100000) >> 5
    assert unused_ht == 0

    # Number of symbols with codes of length 1..16,
    # the sum(n) of these bytes is the total number of codes,
    # which must be <= 256
    num_symbols_per_bit_length = unpack_from_file("16B", file_)
    print(num_symbols_per_bit_length)

    num_symbols = sum(num_symbols_per_bit_length)
    assert num_symbols <= 256  # per comment above

    # Table containing the symbols in order of increasing
    # code length ( n = total number of codes ).
    # symbols = unpack_from_file("{}B".format(num_symbols), file_)

    symbols: T.List[T.List[int]] = []
    for symbol_count in num_symbols_per_bit_length:
        symbols.append(unpack_from_file(f"{symbol_count}B", file_))

    print(symbols)

    huff = Huffman(num_symbols_per_bit_length, symbols)

    if (file_.tell() - start_seek_position) != length:
        raise NotImplementedError(
            "Sorry, we don't handle this yet {} != {}".format(
                (file_.tell() - start_seek_position), length))
 def compression(self):
     # 将系数展开成图像,分次调用编码函数,整理码流,返回编码
     # shape of origin image
     self.shape = (size(self.coefficients[0][-1][0], 0) * 2,
                   size(self.coefficients[0][-1][0], 1) * 2)
     num = int(log2(self.shape[0] / size(self.coarse_coef[0], 0)))
     for i in range(3):  # three channels
         position = [
             size(self.coarse_coef[i], 0),
             size(self.coarse_coef[i], 1)
         ]
         img = zeros([self.shape[0], self.shape[1]], dtype=uint8)
         self.coarse_coef[i] = self.coarse_coef[i].astype(float) * iinfo(
             uint8).max  # convert to integer
         img[0:position[0],
             0:position[1]] = self.coarse_coef[i].astype(uint8)
         for j in range(num):
             for k in range(3):
                 self.coefficients[i][j][k] = self.coefficients[i][j][
                     k].astype(float) * iinfo(uint8).max
                 self.coefficients[i][j][k] = self.coefficients[i][j][
                     k].astype(uint8)
             img[position[0]:position[0] * 2,
                 0:position[1]] = self.coefficients[i][j][0]
             img[0:position[0],
                 position[1]:position[1] * 2] = self.coefficients[i][j][1]
             img[position[0]:position[0] * 2,
                 position[1]:position[1] * 2] = self.coefficients[i][j][2]
             position[0] *= 2
             position[1] *= 2
         # cv2.imshow("whole", img)
         self.img = img
         # self.img = array([[63, -34, 49, 10, 7, 13, -12, 7], [-31, 23, 14, -13, 3, 4, 6, -1], [15, 14, 3, -12, 5, -7, 3, 9], [-9, -7, -14, 8, 4, -2, 3, 2], [-5, 9, -1, 47, 4, 6, -2, 2], [3, 0, -3, 2, 3, -2, 0, 4], [2, -3, 6, -4, 3, 6, 3, 6], [5, 11, 5, 6, 0, 3, -4, 4]])
         # self.shape = (8, 8)
         byte_array = self.__encode(409600)  # around 50KB
         tree = Huffman()
         tree.ctor_from_bytes(byte_array, str(i) + '_code')
Ejemplo n.º 29
0
def test_huffman_encode_decode_reverse():
    initial = 'A man, a plan, a canal, Panama'
    huffman = Huffman(initial)

    message = initial
    encoded = huffman.encode(message)
    decoded = huffman.decode(encoded)
    assert message == decoded

    message = 'nana'
    encoded = huffman.encode(message)
    decoded = huffman.decode(encoded)
    assert message == decoded
    def test_init(self):
        huffman = Huffman(100, 100)
        pydc = huffman.dc_matrix
        pyac = huffman.ac_matrix
        huffman = jpype.JClass('james.Huffman')(100, 100)
        jadc = huffman.DC_matrix
        jaac = huffman.AC_matrix

        def check(py, ja):
            self.assertEqual(len(py), len(ja))
            for i in range(len(py)):
                self.assertEqual(len(py[i]), len(ja[i]))
                for j in range(len(py[i])):
                    self.assertEqual(len(py[i][j]), len(ja[i][j]))
                    for k in range(len(py[i][j])):
                        self.assertEqual(py[i][j][k], ja[i][j][k], '%d %d %d %d %d' % (i, j, k, py[i][j][k], ja[i][j][k])) 
                        
        check(pyac, jaac)
        check(pydc, jadc)
Ejemplo n.º 31
0
        komprimerede fil). Begge filer skal åbnes i “binary mode”. Når en
        BitWriter instantieres, skal den have et file object som argument.

    Opgave 2:
        I opgave 2 skal man bruge metoderne readint32bits()
        og readbit() fra klassen BitReader fra det udleverede bibliotek bitIO.py til at læse heltal (for hyppighedstabel) og bits (for
        Huffmans-koderne) fra inputfilen (den komprimerede fil). Man skal
        bruge kaldet write(bytes([b])) (hvor write() er fra file objects
        og bytes() er en built-in funktion) til skrive bytes til outputfilen
        (den genskabte originale fil). Her er b et heltal som repræsenterer
        den byte, som skal skrives. Begge filer skal ˚abnes i “binary mode”.
        Når en BitReader instantieres, skal den have et file object som
        argument.

    :Gruppe medlemmer:
        Mads Emil Falkenstrøm, [email protected]
        Mathias Birkebjerg Kristiansen, [email protected]
        Patrick Nielsen, [email protected]
"""

import sys

from huffman import Huffman

if __name__ == '__main__':
    infile = open(sys.argv[1], 'rb')
    outfile = open(sys.argv[2], 'wb')

    h = Huffman(infile, outfile)
    h.compress()
class JpegEncoder(object):
    def __init__(self, image, quality, out, comment):
        self.quality = quality
        self.jpeg_obj = JpegInfo(image, comment)

        self.image_width, self.image_height = image.size
        self.out = out

        self.dct = DCT(self.quality)
        self.huf = Huffman(*image.size)

    def compress(self, embedded_data=None, password='******'):
        self.embedded_data = EmbedData(embedded_data) if embedded_data else None
        self.password = password

        self.write_headers()
        self.write_compressed_data()
        self.write_eoi()
        self.out.flush()

    def get_quality(self):
        return self.quality

    def set_quality(self, quality):
        self.quality = quality
        self.dct = DCT(quality)

    def write_array(self, data):
        length = ((data[2] & 0xff) << 8) + (data[3] & 0xff) + 2
        self.out.write(bytearray(data[:length]))

    def write_marker(self, data):
        self.out.write(bytearray(data[:2]))

    def write_eoi(self):
        EOI = [0xff, 0xD9]
        self.write_marker(EOI)

    def write_headers(self):
        SOI = [0xff, 0xD8]
        self.write_marker(SOI)

        JFIF = [0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 
                0x49, 0x46, 0x00, 0x01, 0x01, 0x01, 
                0x00, 0x60, 0x00, 0x60, 0x00, 0x00]
        self.write_array(JFIF)

        comment = self.jpeg_obj.get_comment()
        if comment:
            length = len(comment) + 2
            COM = [0xff, 0xfe, length >> 8 & 0xff, length & 0xff]
            COM.extend(comment)
            self.write_array(COM)

        DQT = [0xff, 0xdb, 0x00, 0x84]
        for k in range(2):
            DQT.append(k)
            DQT.extend([self.dct.quantum[k][JPEG_NATURAL_ORDER[i]] for i in range(64)])
        self.write_array(DQT)

        SOF = [0xff, 0xc0, 0x00, 0x11, 
                self.jpeg_obj.precision, 
                self.jpeg_obj.image_height >> 8 & 0xff, 
                self.jpeg_obj.image_height & 0xff, 
                self.jpeg_obj.image_width >> 8 & 0xff, 
                self.jpeg_obj.image_width & 0xff, 
                self.jpeg_obj.comp_num]
        for i in range(self.jpeg_obj.comp_num):
            SOF.append(self.jpeg_obj.com_id[i])
            SOF.append(eight_byte(self.jpeg_obj.hsamp_factor[i], self.jpeg_obj.vsamp_factor[i]))
            SOF.append(self.jpeg_obj.qtable_number[i])
        self.write_array(SOF)
        
        DHT = [0xff, 0xc4, 0, 0]
        for i in range(4):
            DHT.extend(self.huf.BITS[i])
            DHT.extend(self.huf.VAL[i])

        DHT[2] = len(DHT) - 2 >> 8 & 0xff
        DHT[3] = len(DHT) - 2 & 0xff
        self.write_array(DHT)

        SOS = [0] * 14
        SOS = [0xff, 0xda, 0x00, 0x0c, self.jpeg_obj.comp_num]
        for i in range(self.jpeg_obj.comp_num):
            SOS.append(self.jpeg_obj.com_id[i])
            SOS.append(eight_byte(self.jpeg_obj.dctable_number[i], self.jpeg_obj.actable_number[i]))

        SOS.append(self.jpeg_obj.ss)
        SOS.append(self.jpeg_obj.se)
        SOS.append(eight_byte(self.jpeg_obj.ah, self.jpeg_obj.al))
        self.write_array(SOS)

    def _get_coeff(self):
        dct_array1 = create_array(0.0, 8, 8)
        dct_array2 = create_array(0.0, 8, 8)
        dct_array3 = create_array(0, 64)

        coeff = []
        for r in range(min(self.jpeg_obj.block_height)):
            for c in range(min(self.jpeg_obj.block_width)):
                xpos = c * 8
                ypos = r * 8
                for comp in range(self.jpeg_obj.comp_num):
                    indata = self.jpeg_obj.components[comp]
                    maxa = self.image_height / 2 * self.jpeg_obj.vsamp_factor[comp] - 1
                    maxb = self.image_width / 2 * self.jpeg_obj.hsamp_factor[comp] - 1

                    for i in range(self.jpeg_obj.vsamp_factor[comp]):
                        for j in range(self.jpeg_obj.hsamp_factor[comp]):
                            ia = ypos * self.jpeg_obj.vsamp_factor[comp] + i * 8
                            ib = xpos * self.jpeg_obj.hsamp_factor[comp] + j * 8

                            for a in range(8):
                                for b in range(8):
                                    dct_array1[a][b] = indata[min(ia+a, maxa)][min(ib+b, maxb)]

                            dct_array2 = self.dct.forward_dct(dct_array1)
                            dct_array3 = self.dct.quantize_block(dct_array2, 
                                    self.jpeg_obj.qtable_number[comp])
                            coeff.extend(dct_array3[:64])
        return coeff

    def write_compressed_data(self):
        tmp = 0

        last_dc_value = create_array(0, self.jpeg_obj.comp_num)
        zero_array = create_array(0, 64)
        width, height = 0, 0

        min_block_width = min(self.jpeg_obj.block_width)
        min_block_height = min(self.jpeg_obj.block_height)

        logger.info('DCT/quantisation starts')
        logger.info('%d x %d' % (self.image_width, self.image_height))

        coeff = self._get_coeff()
        coeff_count = len(coeff)

        logger.info('got %d DCT AC/DC coefficients' % coeff_count)
        _changed, _embedded, _examined, _expected, _one, _large, _thrown, _zero = 0, 0, 0, 0, 0, 0, 0, 0
        shuffled_index = 0
        for i, cc in enumerate(coeff):
            if i % 64 == 0:
                continue
            if cc == 1 or cc == -1:
                _one += 1
            elif cc == 0:
                _zero += 1

        _large = coeff_count - _zero - _one - coeff_count / 64
        _expected = _large + int(0.49 * _one)

        logger.info('one=%d' % _one)
        logger.info('large=%d' % _large)

        logger.info('expected capacity: %d bits' % _expected)
        logger.info('expected capacity with')
        for i in range(1, 8):
            n = (1 << i) - 1
            changed = _large - _large % (n + 1)
            changed = (changed + _one + _one / 2 - _one / (n + 1)) / (n + 1)

            usable = (_expected * i / n - _expected * i / n % n) / 8
            if usable == 0:
                break

            logger.info('%s code: %d bytes (efficiency: %d.%d bits per change)' % ('default' if i == 1 else '(1, %d, %d)' % (n, i), usable, usable * 8 / changed, usable * 80 / changed % 10))

        if self.embedded_data is not None:
            logger.info('permutation starts')
            random = F5Random(self.password)
            permutation = Permutation(coeff_count, random)

            next_bit_to_embed = 0
            byte_to_embed = len(self.embedded_data)
            available_bits_to_embed = 0

            logger.info('Embedding of %d bits (%d+4 bytes)' % (byte_to_embed * 8 + 32, byte_to_embed))

            if byte_to_embed > 0x007fffff:
                byte_to_embed = 0x007ffff

            for i in range(1, 8):
                self.n = (1 << i) - 1
                usable = (_expected * i / self.n - _expected * i / self.n % self.n) / 8
                if usable < byte_to_embed + 4:
                    break

            k = i - 1
            self.n = (1 << k) - 1

            if self.n == 0:
                logger.info('using default code, file will not fit')
                self.n = 1
            elif self.n == 1:
                logger.info('using default code')
            else:
                logger.info('using (1, %d, %d) code' % (self.n, k))

            byte_to_embed |= k << 24
            byte_to_embed ^= random.get_next_byte()
            byte_to_embed ^= random.get_next_byte() << 8
            byte_to_embed ^= random.get_next_byte() << 16
            byte_to_embed ^= random.get_next_byte() << 24

            next_bit_to_embed = byte_to_embed & 1
            byte_to_embed >>= 1
            available_bits_to_embed = 31
            _embedded += 1

            for i, shuffled_index in enumerate(permutation.shuffled):
                if shuffled_index % 64 == 0 or coeff[shuffled_index] == 0:
                    continue
                cc = coeff[shuffled_index]
                _examined += 1

                if cc > 0 and (cc & 1) != next_bit_to_embed:
                    coeff[shuffled_index] -= 1
                    _changed +=1
                elif cc < 0 and (cc & 1) == next_bit_to_embed:
                    coeff[shuffled_index] += 1
                    _changed += 1

                if coeff[shuffled_index] != 0:
                    if available_bits_to_embed == 0:
                        if self.n > 1 or not self.embedded_data.available():
                            break
                        byte_to_embed = self.embedded_data.read()
                        byte_to_embed ^= random.get_next_byte()
                        available_bits_to_embed = 8
                    next_bit_to_embed = byte_to_embed & 1
                    byte_to_embed >>= 1
                    available_bits_to_embed -= 1
                    _embedded += 1
                else:
                    _thrown += 1

            if self.n > 1:
                try:
                    is_last_byte = False
                    filtered_index = FilteredCollection(permutation.shuffled[i+1:], lambda index: index % 64 and coeff[index])
                    while not is_last_byte:
                        k_bits_to_embed = 0
                        for i in range(k):
                            if available_bits_to_embed == 0:
                                if not self.embedded_data.available():
                                    is_last_byte = True
                                    break
                                byte_to_embed = self.embedded_data.read()
                                byte_to_embed ^= random.get_next_byte()
                                available_bits_to_embed = 8
                            next_bit_to_embed = byte_to_embed & 1
                            byte_to_embed >>= 1
                            available_bits_to_embed -= 1
                            k_bits_to_embed |= next_bit_to_embed << i
                            _embedded += 1

                        code_word = filtered_index.offer(self.n)
                        while True:
                            vhash = 0
                            for i, index in enumerate(code_word):
                                if coeff[index] > 0:
                                    extracted_bit = coeff[index] & 1
                                else:
                                    extracted_bit = 1 - (coeff[index] & 1)
                                if extracted_bit == 1:
                                    vhash ^= i + 1
                            i = vhash ^ k_bits_to_embed
                            if not i:
                                break

                            i -= 1
                            coeff[code_word[i]] += 1 if coeff[code_word[i]] < 0 else -1
                            _changed += 1

                            if not coeff[code_word[i]]:
                                _thrown += 1
                                code_word[i:i+1] = []
                                code_word.extend(filtered_index.offer(1))
                            else:
                                break
                except FilteredCollection.ListNotEnough:
                    pass

            if _examined > 0:
                logger.info('%d coefficients examined' % _examined)
            if _changed > 0:
                logger.info('%d coefficients changed (efficiency: %d.%d bits per change' % (_changed, _embedded / _changed, _embedded * 10 / _changed % 10))
            logger.info('%d coefficients thrown (zeroed)' % _thrown)
            logger.info('%d bits (%d bytes) embedded' % (_embedded, _embedded / 8))

        logger.info('starting hufman encoding')
        shuffled_index = 0

        for r in range(min_block_height):
            for c in range(min_block_width):
                for comp in range(self.jpeg_obj.comp_num):
                    for i in range(self.jpeg_obj.vsamp_factor[comp]):
                        for j in range(self.jpeg_obj.hsamp_factor[comp]):
                            dct_array3 = coeff[shuffled_index:shuffled_index+64]
                            self.huf.huffman_block_encoder(self.out, dct_array3, last_dc_value[comp],
                                    self.jpeg_obj.dctable_number[comp], self.jpeg_obj.actable_number[comp])
                            last_dc_value[comp] = dct_array3[0]
                            shuffled_index += 64
        
        self.huf.flush_buffer(self.out)
Ejemplo n.º 33
0
class JpegEncoder(object):
    def __init__(self, image, quality, out, comment, ais):
        self.quality = quality
        self.jpeg_obj = JpegInfo(image, comment)

        self.image_width, self.image_height = image.size
        self.out = out

        self.dct = DCT(self.quality)
        self.huf = Huffman(*image.size)

        self.hasais = ais
        self.k_matrix = -1

    def compress(self, embedded_data=None, password='******'):
        self.embedded_data = EmbedData(
            embedded_data) if embedded_data else None
        self.password = password

        self.write_headers()
        self.write_compressed_data()
        self.write_eoi()
        self.out.flush()

    def get_quality(self):
        return self.quality

    def set_quality(self, quality):
        self.quality = quality
        self.dct = DCT(quality)

    def write_array(self, data):
        length = ((data[2] & 0xff) << 8) + (data[3] & 0xff) + 2
        self.out.write(bytearray(data[:length]))

    def write_marker(self, data):
        self.out.write(bytearray(data[:2]))

    def write_eoi(self):
        EOI = [0xff, 0xD9]
        self.write_marker(EOI)

    def write_headers(self):
        SOI = [0xff, 0xD8]
        self.write_marker(SOI)

        JFIF = [
            0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, 0x01,
            0x01, 0x00, 0x60, 0x00, 0x60, 0x00, 0x00
        ]
        self.write_array(JFIF)

        comment = self.jpeg_obj.get_comment()
        if comment:
            length = len(comment) + 2
            COM = [0xff, 0xfe, length >> 8 & 0xff, length & 0xff]
            COM.extend(comment)
            self.write_array(COM)

        DQT = [0xff, 0xdb, 0x00, 0x84]
        for k in range(2):
            DQT.append(k)
            DQT.extend([
                self.dct.quantum[k][JPEG_NATURAL_ORDER[i]] for i in range(64)
            ])
        self.write_array(DQT)

        SOF = [
            0xff, 0xc0, 0x00, 0x11, self.jpeg_obj.precision,
            self.jpeg_obj.image_height >> 8 & 0xff,
            self.jpeg_obj.image_height & 0xff,
            self.jpeg_obj.image_width >> 8 & 0xff,
            self.jpeg_obj.image_width & 0xff, self.jpeg_obj.comp_num
        ]
        for i in range(self.jpeg_obj.comp_num):
            SOF.append(self.jpeg_obj.com_id[i])
            SOF.append(
                eight_byte(self.jpeg_obj.hsamp_factor[i],
                           self.jpeg_obj.vsamp_factor[i]))
            SOF.append(self.jpeg_obj.qtable_number[i])
        self.write_array(SOF)

        DHT = [0xff, 0xc4, 0, 0]
        for i in range(4):
            DHT.extend(self.huf.BITS[i])
            DHT.extend(self.huf.VAL[i])

        DHT[2] = len(DHT) - 2 >> 8 & 0xff
        DHT[3] = len(DHT) - 2 & 0xff
        self.write_array(DHT)

        SOS = [0] * 14
        SOS = [0xff, 0xda, 0x00, 0x0c, self.jpeg_obj.comp_num]
        for i in range(self.jpeg_obj.comp_num):
            SOS.append(self.jpeg_obj.com_id[i])
            SOS.append(
                eight_byte(self.jpeg_obj.dctable_number[i],
                           self.jpeg_obj.actable_number[i]))

        SOS.append(self.jpeg_obj.ss)
        SOS.append(self.jpeg_obj.se)
        SOS.append(eight_byte(self.jpeg_obj.ah, self.jpeg_obj.al))
        self.write_array(SOS)

    def _get_coeff(self):
        dct_array1 = create_array(0.0, 8, 8)
        dct_array2 = create_array(0.0, 8, 8)
        dct_array3 = create_array(0, 64)

        coeff = []
        for r in range(min(self.jpeg_obj.block_height)):
            for c in range(min(self.jpeg_obj.block_width)):
                xpos = c * 8
                ypos = r * 8
                for comp in range(self.jpeg_obj.comp_num):
                    indata = self.jpeg_obj.components[comp]
                    maxa = self.image_height / 2 * self.jpeg_obj.vsamp_factor[
                        comp] - 1
                    maxb = self.image_width / 2 * self.jpeg_obj.hsamp_factor[
                        comp] - 1

                    for i in range(self.jpeg_obj.vsamp_factor[comp]):
                        for j in range(self.jpeg_obj.hsamp_factor[comp]):
                            ia = ypos * self.jpeg_obj.vsamp_factor[comp] + i * 8
                            ib = xpos * self.jpeg_obj.hsamp_factor[comp] + j * 8

                            for a in range(8):
                                for b in range(8):
                                    dct_array1[a][b] = indata[min(
                                        ia + a, maxa)][min(ib + b, maxb)]

                            dct_array2 = self.dct.forward_dct(dct_array1)
                            dct_array3 = self.dct.quantize_block(
                                dct_array2, self.jpeg_obj.qtable_number[comp])
                            coeff.extend(dct_array3[:64])
        return coeff

    def write_compressed_data(self):
        last_dc_value = create_array(0, self.jpeg_obj.comp_num)
        zero_array = create_array(0, 64)
        width, height = 0, 0
        min_block_width = min(self.jpeg_obj.block_width)
        min_block_height = min(self.jpeg_obj.block_height)

        logger.info('DCT/quantisation starts')
        logger.info('%d x %d' % (self.image_width, self.image_height))

        coeff = self._get_coeff()  #量化后的系数,是整数
        coeff_count = len(coeff)

        #导出未处理的QDCT
        if self.hasais:
            filename = 'unpro_ais.json'
        else:
            filename = 'unpro.json'
        with open(filename, 'w') as f_unprocess:
            json.dump(coeff, f_unprocess)

        #AIS处理
        if self.hasais:
            size_secret = self.embedded_data.len
            ais = Ais(coeff, size_secret)  #coeff被修改
            ais.statistic()
            self.k_matrix = ais.fix()
            with open('aised.json', 'w') as f_aised:
                json.dump(coeff, f_aised)

        #嵌入——>再统计嵌入后的数据,决定是否继续做AIS处理
        logger.info('got %d DCT AC/DC coefficients' % coeff_count)
        _changed, _embedded, _examined, _expected, _one, _large, _thrown, _zero = 0, 0, 0, 0, 0, 0, 0, 0
        shuffled_index = 0
        for i, cc in enumerate(coeff):
            if i % 64 == 0:
                continue
            if cc == 1 or cc == -1:
                _one += 1
            elif cc == 0:
                _zero += 1

        _large = coeff_count - _zero - _one - coeff_count / 64  #有效系数的个数
        _expected = _large + int(0.49 * _one)  #预期容量,shrinkage效应无法确定

        logger.info('one=%d' % _one)
        logger.info('large=%d' % _large)
        logger.info('\nexpected capacity: %d bits\n' % _expected)
        logger.info('expected capacity with')

        for i in range(1, 8):
            n = (1 << i) - 1  #n=2^i-1
            changed = _large - _large % (n + 1)
            changed = (changed + _one + _one / 2 - _one / (n + 1)) / (n + 1)
            usable = (_expected * i / n - _expected * i / n % n) / 8
            if usable == 0:
                break

            logger.info(
                '%s code: %d bytes (efficiency: %d.%d bits per change)' %
                ('default' if i == 1 else '(1, %d, %d)' % (n, i), usable,
                 usable * 8 / changed, usable * 80 / changed % 10))

        #shuffles all coefficients using a permutation,使用排列对系数进行混洗
        if self.embedded_data is not None:
            logger.info('permutation starts')
            random = F5Random(self.password)
            permutation = Permutation(coeff_count, random)

            next_bit_to_embed = 0
            byte_to_embed = len(self.embedded_data)
            available_bits_to_embed = 0

            logger.info('Embedding of %d bits (%d+4 bytes)' %
                        (byte_to_embed * 8 + 32, byte_to_embed))

            if byte_to_embed > 0x007fffff:
                byte_to_embed = 0x007ffff

            for i in range(1, 8):
                self.n = (1 << i) - 1
                usable = (_expected * i / self.n -
                          _expected * i / self.n % self.n) / 8
                if usable < byte_to_embed + 4:
                    break

            #确定(1,n,k)
            if self.k_matrix < 0:
                k = i - 1
            else:
                k = self.k_matrix
            self.n = (1 << k) - 1

            if self.n == 0:
                logger.info('using default code, file will not fit')
                self.n = 1
            elif self.n == 1:
                logger.info('using default code')
            else:
                logger.info('using (1, %d, %d) code' % (self.n, k))

            byte_to_embed |= k << 24
            byte_to_embed ^= random.get_next_byte()
            byte_to_embed ^= random.get_next_byte() << 8
            byte_to_embed ^= random.get_next_byte() << 16
            byte_to_embed ^= random.get_next_byte() << 24

            next_bit_to_embed = byte_to_embed & 1
            byte_to_embed >>= 1
            available_bits_to_embed = 31
            _embedded += 1

            for i, shuffled_index in enumerate(permutation.shuffled):
                if shuffled_index % 64 == 0 or coeff[shuffled_index] == 0:
                    continue
                cc = coeff[shuffled_index]
                _examined += 1

                if cc > 0 and (cc & 1) != next_bit_to_embed:
                    coeff[shuffled_index] -= 1
                    _changed += 1
                elif cc < 0 and (cc & 1) == next_bit_to_embed:
                    coeff[shuffled_index] += 1
                    _changed += 1

                if coeff[shuffled_index] != 0:
                    if available_bits_to_embed == 0:
                        if self.n > 1 or not self.embedded_data.available():
                            break
                        byte_to_embed = self.embedded_data.read()
                        byte_to_embed ^= random.get_next_byte()
                        available_bits_to_embed = 8
                    next_bit_to_embed = byte_to_embed & 1
                    byte_to_embed >>= 1
                    available_bits_to_embed -= 1
                    _embedded += 1
                else:
                    _thrown += 1

            if self.n > 1:
                try:
                    is_last_byte = False
                    filtered_index = FilteredCollection(
                        permutation.shuffled[i + 1:],
                        lambda index: index % 64 and coeff[index])
                    while not is_last_byte:
                        k_bits_to_embed = 0
                        for i in range(k):
                            if available_bits_to_embed == 0:
                                if not self.embedded_data.available():
                                    is_last_byte = True
                                    break
                                byte_to_embed = self.embedded_data.read()
                                byte_to_embed ^= random.get_next_byte()
                                available_bits_to_embed = 8
                            next_bit_to_embed = byte_to_embed & 1
                            byte_to_embed >>= 1
                            available_bits_to_embed -= 1
                            k_bits_to_embed |= next_bit_to_embed << i
                            _embedded += 1

                        code_word = filtered_index.offer(self.n)
                        while True:
                            vhash = 0
                            for i, index in enumerate(code_word):
                                if coeff[index] > 0:
                                    extracted_bit = coeff[index] & 1
                                else:
                                    extracted_bit = 1 - (coeff[index] & 1)
                                if extracted_bit == 1:
                                    vhash ^= i + 1
                            i = vhash ^ k_bits_to_embed
                            if not i:
                                break

                            i -= 1
                            coeff[code_word[i]] += 1 if coeff[
                                code_word[i]] < 0 else -1
                            _changed += 1

                            if not coeff[code_word[i]]:
                                _thrown += 1
                                code_word[i:i + 1] = []
                                code_word.extend(filtered_index.offer(1))
                            else:
                                break
                except FilteredCollection.ListNotEnough:
                    pass

            if _examined > 0:
                logger.info('%d coefficients examined' % _examined)
            if _changed > 0:
                logger.info(
                    '%d coefficients changed (efficiency: %d.%d bits per change'
                    % (_changed, _embedded / _changed,
                       _embedded * 10 / _changed % 10))
            logger.info('%d coefficients thrown (zeroed)' % _thrown)
            logger.info('%d bits (%d bytes) embedded' %
                        (_embedded, _embedded / 8))

        #导出嵌入后的系数coeff
        if self.hasais:
            filename2 = 'embeded_ais.json'
        else:
            filename2 = 'embeded.json'
        with open(filename2, 'w') as f_embeded:
            json.dump(coeff, f_embeded)

        logger.info('starting hufman encoding')
        shuffled_index = 0

        for r in range(min_block_height):
            for c in range(min_block_width):
                for comp in range(self.jpeg_obj.comp_num):
                    for i in range(self.jpeg_obj.vsamp_factor[comp]):
                        for j in range(self.jpeg_obj.hsamp_factor[comp]):
                            dct_array3 = coeff[shuffled_index:shuffled_index +
                                               64]
                            self.huf.huffman_block_encoder(
                                self.out, dct_array3, last_dc_value[comp],
                                self.jpeg_obj.dctable_number[comp],
                                self.jpeg_obj.actable_number[comp])
                            last_dc_value[comp] = dct_array3[0]
                            shuffled_index += 64

        self.huf.flush_buffer(self.out)
        logger.info('hufman encode end')
if (args['verbose']):
	print('Reading input-file...')
inputFile = open ('./'+args['source'], 'r')
inputData = inputFile.read()
inputFile.close()

# Determine compression-method
# =============================

# HUFFMAN-ENCODING
if (args['compression'] == 'h' or args['uncompression'] == 'h'):
	if (args['verbose']):
		print('Encoding-mode is set to HUFFMAN.')
	from huffman import Huffman
	coder = Huffman()

	# Choose built-in omega or generate own?
	if (args['dictionary'] == 'b'):
		if (args['verbose']):
			print('Using build-in dictionary!')
		coder.buildOwn = False
	else:
		if (args['verbose']):
			print('We need to generate / use our own dictionary!')
		if (args['uncompression'] != None):
			try:
				f = open('./'+args['source']+'.huff', 'rb')
				coder.setOmega(pickle.load(f))
			except Exception:
				print('Unable to read huffman-dictionary!')
Ejemplo n.º 35
0
def init(texto):
    huffman = Huffman(texto)
    return huffman.mostrarTabelas()
Ejemplo n.º 36
0
def init(texto):
    huffman = Huffman(texto)
    return huffman.mostrarTabelas()
#!/usr/bin/env python3


print('Testground EPR_05')
print('Welchen Aufgabenteil moechten Sie testen?');
print('[1|2|3]');
which = input(">> ")

######### AUFGABE 5.2 TESTS #########
if (which == "3"):
	from huffman import Huffman
	Huff = Huffman()
	Huff.buildOwn = True
	print('Geben Sie einen Text zum codieren ein!');
	text = input('>> ');
	encoded = Huff.encode(text)
	print(encoded)
	print('Dekodiert:')
	print(Huff.decode(encoded))
	Huff.verbose()


######### AUFGABE 5.2 TESTS #########
if (which == "2"):
	from rle import RunLenghtEncoding
	rle = RunLenghtEncoding();

	print('Geben Sie einen Text zum codieren ein!');
	text = input('>> ');
	res = rle.encode(text)