def analyze(filename, printCode=False, writeDot=False): with open(filename, 'rb') as fi: s = fi.read() freq = Counter(s) tree = huffTree(freq) if writeDot: write_dot(tree, 'tree.dot', is_binary(s)) code = huffCode(tree) if printCode: print_code(freq, code)
def encode(filename): with open(filename, 'rb') as fi: s = fi.read() code = huffCode(huffTree(Counter(s))) with open(filename + '.huff', 'wb') as fo: for c in sorted(code): fo.write(('%02x %s\n' % (c if is_py3k else ord(c), code[c].to01())).encode()) a = bitarray(endian='little') a.encode(code, s) # write unused bits fo.write(b'unused %s\n' % str(a.buffer_info()[3]).encode()) a.tofile(fo) print('%d / %d' % (len(a), 8 * len(s))) print('Ratio =%6.2f%%' % (100.0 * a.buffer_info()[1] / len(s)))
def main(): txt = 1000 * open('README').read() t0 = time() freq = Counter(txt) print('count: %9.6f sec' % (time() - t0)) t0 = time() tree = huffTree(freq) print('tree: %9.6f sec' % (time() - t0)) write_dot(tree, 'tree.dot') code = huffCode(tree) # create tree from code (no frequencies) write_dot(make_tree(code), 'tree_raw.dot') a = bitarray() t0 = time() a.encode(code, txt) print('C encode: %9.6f sec' % (time() - t0)) # Time the decode function above t0 = time() res = decode(tree, a) Py_time = time() - t0 assert ''.join(res) == txt print('Py decode: %9.6f sec' % Py_time) # Time the decode method which is implemented in C t0 = time() res = a.decode(code) assert ''.join(res) == txt C_time = time() - t0 print('C decode: %9.6f sec' % C_time) print('Ratio: %f' % (Py_time / C_time))
def main(): txt = open('README').read() code = huffCode(freq_string(txt)) sample = 2000 * txt a = bitarray() a.encode(code, sample) # Time the decode function above start_time = time.time() res = decode(code, a) Py_time = time.time() - start_time assert ''.join(res) == sample print('Py_time: %.6f sec' % Py_time) # Time the decode method which is implemented in C start_time = time.time() res = a.decode(code) C_time = time.time() - start_time assert ''.join(res) == sample print('C_time: %.6f sec' % C_time) print('Ratio: %f' % (Py_time / C_time))
res = [] it = iter(bitsequence) while True: r = traverse(it, tree) if r is False: break else: if r == []: raise ValueError("prefix code does not match data") res.append(r) return res txt = open('README').read() code = huffCode(freq_string(txt)) sample = 1000*txt a = bitarray() a.encode(code, sample) # Time the decode function above start_time = time.time() res = decode(code, a) Py_time = time.time() - start_time assert ''.join(res) == sample print('Py_time: %.6f sec' % Py_time) # Time the decode method which is implemented in C start_time = time.time()
res = [] it = iter(bitsequence) while True: r = traverse(it, tree) if r is False: break else: if r == []: raise ValueError("prefix code does not match data") res.append(r) return res txt = open('README').read() code = huffCode(freq_string(txt)) sample = 1000 * txt a = bitarray() a.encode(code, sample) # Time the decode function above start_time = time.time() res = decode(code, a) Py_time = time.time() - start_time assert ''.join(res) == sample print('Py_time: %.6f sec' % Py_time) # Time the decode method which is implemented in C start_time = time.time()