Esempio n. 1
0
def test_build_tree():
    string = "abbb"
    tree = build_tree(string)
    assert isinstance(tree, Node)
    assert tree[1] is None
    assert tree[2][1] == 'a'
    assert tree[3][1] == 'b'
Esempio n. 2
0
 def test_decoding_real_world(self):
     frequency_map = get_real_world_input()
     tree = build_tree(frequency_map)
     text = get_real_world_text()
     encoded = encode(text, tree)
     decoded = decode(encoded, tree)
     self.assertEqual(text, decoded)
Esempio n. 3
0
 def test_decoding(self):
     frequency_map = get_wikipedia_input()
     tree = build_tree(frequency_map)
     samples = ['a', 'abc', 'adeaddadcededabadbabeabeadedabacabed']
     for text in samples:
         encoded = encode(text, tree)
         decoded = decode(encoded, tree)
         self.assertEqual(text, decoded)
Esempio n. 4
0
 def test_encoding(self):
     frequency_map = get_wikipedia_input()
     tree = build_tree(frequency_map)
     encoded = encode('adeaddadcededabadbabeabeadedabacabed', tree)
     self.assertEqual(
         encoded.to01(),
         '01000100100000010001110001000010011010000110100111001001110010001000010011010110100111000'
     )
def compress_image(input_file):
    before = time.time()
    path = os.path.dirname(__file__) + "/images/" + input_file
    print("Compressing %s ..." % input_file)
    image = Image.open(path)
    print("Image is %d x %d" % (image.size[0], image.size[1]))
    img_raw_size = huffman.raw_size(image.size[0], image.size[1])
    print("Image size is: %d byte." % img_raw_size)
    data = np.asarray(image)

    hilbert_array = hilbert_compression(data)

    hilbert_array = hilbert_array.astype('uint8')

    print("Counting symbols...")
    counts = huffman.count_hilbert(hilbert_array)

    print("Building tree...")
    tree = huffman.build_tree(counts)

    print("Trimming tree...")
    trimmed_tree = huffman.trim_tree(tree)

    print("Assigning codes to pixels...")
    codes = huffman.assign_codes(trimmed_tree)

    estimated_size = huffman.compressed_size(counts, codes)

    print("Estimated size: %d bytes" % estimated_size)

    output_path = os.path.dirname(
        __file__) + "/output/" + input_file[:-3] + "txt"

    print("Writing to %s..." % (input_file[:-3] + "txt"))
    stream = OutputBitStream(output_path)
    print("Encoding header...")
    huffman.encode_header(image, stream)
    stream.flush_buffer()
    print("Encoding tree...")
    huffman.encode_tree(trimmed_tree, stream)
    stream.flush_buffer()
    print("Encoding pixels...")
    huffman.encode_hilbert(hilbert_array, codes, stream)
    stream.close()

    after = time.time()
    real_size = stream.bytes_written
    print("Wrote %d bytes." % real_size)

    print("Estimate is %scorrect." %
          ('' if estimated_size == real_size else 'in'))
    print("Compression ratio: %0.2f" % (float(img_raw_size) / real_size))
    print("Took %0.2f seconds." % (after - before))

    return hilbert_array
Esempio n. 6
0
def decode(filename_in, filename_out):
    with open(filename_in, 'rb') as fi:
        n_header = int.from_bytes(fi.read(8), 'big')
        u = int.from_bytes(fi.read(1), 'big')
        header = fi.read(n_header)
        data = fi.read()
        freq = pickle.loads(header)
        tree = huffman.build_tree(freq)
        map_code = huffman.build_map_code(tree)
        out = huffman.decode(data, map_code, u)
        with open(filename_out, 'wb') as fo:
            fo.write(out)
Esempio n. 7
0
 def test_building_encoding_for_uniform(self):
     frequency_map = get_uniform_input()
     tree = build_tree(frequency_map)
     lookup = build_lookup(tree)
     # Human readable
     lookup_readable = {k: bits.to01() for k, bits in lookup.items()}
     self.assertEqual(lookup_readable, {
         'a': '110',
         'b': '111',
         'c': '00',
         'd': '01',
         'e': '10'
     })
Esempio n. 8
0
def encode(filename_in, filename_out):
    with open(filename_in, 'rb') as fi:
        freq = huffman.freq_str(read_each(fi))
        tree = huffman.build_tree(freq)
        map_code = huffman.build_map_code(tree)
        fi.seek(0)
        out = huffman.encode(read_each(fi), map_code)
        u = out.buffer_info()[3]  # unused bits of last byte
        header = pickle.dumps(freq, pickle.HIGHEST_PROTOCOL)
        n_header = len(header)

        with open(filename_out, 'wb') as fo:
            fo.write(
                n_header.to_bytes(8, 'big') + u.to_bytes(1, 'big') + header +
                out.tobytes())
Esempio n. 9
0
    number_of_symbols = 10
    decay_rate = 0.01
    # Generates different random upper case ASCII characters as specified by `number_of_symbols`
    random_symbols = huff.generate_n_random_symbols(number_of_symbols)
    freqs = huff.generate_random_symbol_frequencies(type_of_distribution,
                                                    number_of_realization,
                                                    number_of_symbols,
                                                    decay_rate)
    input_text = huff.generate_random_text_with_predefined_frequencies(
        freqs, random_symbols, 200)

    # Calculate frequency of symbols
    symbols_and_freqs, unique_symbols = huff.calculate_frequency_of_symbols_from_text(
        input_text)
    # Build Tree
    tree = huff.build_tree(symbols_and_freqs)
    # Traverse tree and build dictionary
    # `tree_type=0` means, left -> 0 and right -> 1. `tree_type=1` means, left -> 1 and right -> 0.
    dictionary = huff.traverse_tree(tree, unique_symbols, tree_type=0)

    # Encode the input text based on created dictionary
    encoded_text = huff.encode_message(dictionary, input_text)
    # Decode the encoded text based on created dictionary
    decoded_text = huff.decode_message(encoded_text, dictionary)

    # Create reports
    huff.produce_huffman_report(symbols_and_freqs, dictionary)
    huff.huffman_coded_string_report(input_text, encoded_text,
                                     symbols_and_freqs, dictionary)
    huff.huffman_decoded_string_report(encoded_text, decoded_text,
                                       symbols_and_freqs, dictionary)
Esempio n. 10
0
                frequency_map[c] += 1
                ccount += 1
            text += line
            words += [w.strip(' \n.,”“') for w in line.split()]

    print('= Stats =')
    print('Number of characters', ccount)
    print('Number of words', len(words))

    min_entropy = compute_entropy(frequency_map.values())
    print('Minimum entropy', min_entropy)

    huffman_entropy = compute_huffman_entropy(frequency_map)
    print('Huffman entropy', huffman_entropy)

    tree = build_tree(frequency_map)
    encoded_text = encode(text, tree)
    print('Length of raw text: {} bytes'.format(len(text)))
    print('Length of encoded text: {} bytes'.format(len(encoded_text)/8))
    print('Compression rate: {}'.format(len(text)*8/len(encoded_text)))

    print('= Word-based =')
    text_length = 0

    frequency_map = {}
    for w in words:
        text_length += len(w)
        if w not in frequency_map:
            frequency_map[w] = 0
        frequency_map[w] += 1
    avg_word_size = text_length / len(words)
Esempio n. 11
0
def test_build_tree_empty():
    string = ""
    tree = build_tree(string)
    assert tree is None
Esempio n. 12
0
from pprint import pprint

import huffman
from view import viz_tree

data = b"huffman"

tree = huffman.build_tree(data)
map_code = huffman.build_map_code(tree)

# encode
bin_data = huffman.encode(data, map_code)

print("Map code")
for k, v in map_code.items():
    print("{}: {}".format(chr(k), v.to01()))
print("Encoded data")
print(bin_data.to01())
viz_tree(tree)

# decode
print("After decode")
print(huffman.decode(bin_data.tobytes(), map_code, bin_data.buffer_info()[3]))

# calculate performance
p = len(bin_data) / (len(data) * 8)
print(f"Reduce {p * 100}%")