def test_tiny(self): code = huffman_code({0: 0}) self.assertEqual(len(code), 1) self.assertEqual(code, {0: bitarray()}) code = huffman_code({0: 0, 1: 0}) self.assertEqual(len(code), 2) for i in range(2): self.assertEqual(len(code[i]), 1)
def test_endianness(self): freq = {'A': 10, 'B': 2, 'C': 5} for endian in 'big', 'little': code = huffman_code(freq, endian) self.assertEqual(len(code), 3) for v in code.values(): self.assertEqual(v.endian(), endian)
def test_simple(self): freq = {0: 10, 'as': 2, None: 1.6} code = huffman_code(freq) self.assertEqual(len(code), 3) self.assertEqual(len(code[0]), 1) self.assertEqual(len(code['as']), 2) self.assertEqual(len(code[None]), 2)
def test_random_list(self): plain = [randint(0, 100) for _ in range(500)] code = huffman_code(Counter(plain)) a = bitarray() a.encode(code, plain) self.assertEqual(a.decode(code), plain) self.check_tree(code)
def test_counter(self): message = 'the quick brown fox jumps over the lazy dog.' code = huffman_code(Counter(message)) a = bitarray() a.encode(code, message) self.assertEqual(''.join(a.decode(code)), message) self.check_tree(code)
def test_balanced(self): n = 6 freq = {} for i in range(2**n): freq[i] = 1 code = huffman_code(freq) self.assertEqual(len(code), 2**n) self.assertTrue(all(len(v) == n for v in code.values()))
def test_unbalanced(self): N = 27 freq = {} for i in range(N): freq[i] = 2**i code = huffman_code(freq) self.assertEqual(len(code), N) for i in range(N): self.assertEqual(len(code[i]), N - (1 if i <= 1 else i))
def encode(filename): with open(filename, 'rb') as fi: plain = bytearray(fi.read()) code = huffman_code(Counter(plain)) with open(filename + '.huff', 'wb') as fo: for sym in sorted(code): fo.write(('%02x %s\n' % (sym, code[sym].to01())).encode()) a = bitarray(endian='little') a.encode(code, plain) # write unused bits fo.write(b'unused %s\n' % str(a.buffer_info()[3]).encode()) a.tofile(fo) print('Bits: %d / %d' % (len(a), 8 * len(plain))) print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(plain)))
def test_random_freq(self): N = randint(2, 1000) # create Huffman code for N symbols code = huffman_code({i: random() for i in range(N)}) self.check_tree(code)
from random import random, randint from time import time from bitarray import bitarray, decodetree from bitarray.util import huffman_code N = 1_000_000 # create Huffman code for N symbols code = huffman_code({i: random() for i in range(N)}) print(len(code)) # create the decodetree object t0 = time() tree = decodetree(code) print('decodetree(code): %9.6f sec' % (time() - t0)) print(tree.nodes()) plain = [randint(0, N - 1) for _ in range(100)] a = bitarray() a.encode(code, plain) # decode using the code dictionary t0 = time() res = a.decode(code) print('decode(code): %9.6f sec' % (time() - t0)) assert res == plain # decode using the decodetree t0 = time()