Example #1
0
    def test_tiny(self):
        code = huffman_code({0: 0})
        self.assertEqual(len(code), 1)
        self.assertEqual(code, {0: bitarray()})

        code = huffman_code({0: 0, 1: 0})
        self.assertEqual(len(code), 2)
        for i in range(2):
            self.assertEqual(len(code[i]), 1)
Example #2
0
 def test_endianness(self):
     freq = {'A': 10, 'B': 2, 'C': 5}
     for endian in 'big', 'little':
         code = huffman_code(freq, endian)
         self.assertEqual(len(code), 3)
         for v in code.values():
             self.assertEqual(v.endian(), endian)
Example #3
0
 def test_simple(self):
     freq = {0: 10, 'as': 2, None: 1.6}
     code = huffman_code(freq)
     self.assertEqual(len(code), 3)
     self.assertEqual(len(code[0]), 1)
     self.assertEqual(len(code['as']), 2)
     self.assertEqual(len(code[None]), 2)
Example #4
0
 def test_random_list(self):
     plain = [randint(0, 100) for _ in range(500)]
     code = huffman_code(Counter(plain))
     a = bitarray()
     a.encode(code, plain)
     self.assertEqual(a.decode(code), plain)
     self.check_tree(code)
Example #5
0
 def test_counter(self):
     message = 'the quick brown fox jumps over the lazy dog.'
     code = huffman_code(Counter(message))
     a = bitarray()
     a.encode(code, message)
     self.assertEqual(''.join(a.decode(code)), message)
     self.check_tree(code)
Example #6
0
 def test_balanced(self):
     n = 6
     freq = {}
     for i in range(2**n):
         freq[i] = 1
     code = huffman_code(freq)
     self.assertEqual(len(code), 2**n)
     self.assertTrue(all(len(v) == n for v in code.values()))
Example #7
0
 def test_unbalanced(self):
     N = 27
     freq = {}
     for i in range(N):
         freq[i] = 2**i
     code = huffman_code(freq)
     self.assertEqual(len(code), N)
     for i in range(N):
         self.assertEqual(len(code[i]), N - (1 if i <= 1 else i))
Example #8
0
def encode(filename):
    with open(filename, 'rb') as fi:
        plain = bytearray(fi.read())

    code = huffman_code(Counter(plain))
    with open(filename + '.huff', 'wb') as fo:
        for sym in sorted(code):
            fo.write(('%02x %s\n' % (sym, code[sym].to01())).encode())
        a = bitarray(endian='little')
        a.encode(code, plain)
        # write unused bits
        fo.write(b'unused %s\n' % str(a.buffer_info()[3]).encode())
        a.tofile(fo)
    print('Bits: %d / %d' % (len(a), 8 * len(plain)))
    print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(plain)))
Example #9
0
 def test_random_freq(self):
     N = randint(2, 1000)
     # create Huffman code for N symbols
     code = huffman_code({i: random() for i in range(N)})
     self.check_tree(code)
Example #10
0
from random import random, randint
from time import time

from bitarray import bitarray, decodetree
from bitarray.util import huffman_code

N = 1_000_000

# create Huffman code for N symbols
code = huffman_code({i: random() for i in range(N)})
print(len(code))

# create the decodetree object
t0 = time()
tree = decodetree(code)
print('decodetree(code):  %9.6f sec' % (time() - t0))

print(tree.nodes())
plain = [randint(0, N - 1) for _ in range(100)]

a = bitarray()
a.encode(code, plain)

# decode using the code dictionary
t0 = time()
res = a.decode(code)
print('decode(code):  %9.6f sec' % (time() - t0))
assert res == plain

# decode using the decodetree
t0 = time()