class Adaptive_huffman: def __init__(self, byte_seq): range = (0, 255) self.range_size = abs(range[0] - range[1]) + 1 self.current_num = self.range_size * 2 - 1 self.byte_seq = byte_seq self.tree = Node(0,self.current_num, data=NYT) self.all_nodes = [self.tree] self.nyt = self.tree def encode(self): def bin_str2bool_list(binary_string): return [c == '1' for c in binary_string] code = bitarray() for symbol in tqdm(self.byte_seq,desc="COMPRESSING",colour='green',unit='bytes'): result = self.tree.search(symbol) if result['first_appearance']: code.extend(result['code']) code.frombytes(bytes([symbol])) else: code.extend(result['code']) self.update(symbol,result['first_appearance']) remaining_length = bits2bytes(len(code)+3)*8 - (len(code)+3) code = (bitarray(bin_str2bool_list('{:03b}'.format(remaining_length))) + code) return code def decode(self): def bool_list2int(boolean_list): return sum(v << i for i, v in enumerate(reversed(boolean_list))) code = bitarray() bit_seq = bitarray() bit_seq.frombytes(self.byte_seq) remaining_length = bool_list2int(bit_seq[:3]) bit_len = bit_seq.length() - remaining_length index = 3 current_node = None pbar = tqdm(total=len(bit_seq)) while index < bit_len: current_node = self.tree while current_node.left or current_node.right: bit = bit_seq[index] current_node = current_node.right if bit else current_node.left index += 1 pbar.update(1) if current_node.data == NYT: is_first = True dec = bit_seq[index:index+8] code += dec index += 8 pbar.update(8) else: is_first = False dec = current_node.data code += dec pbar.update(len(dec)) self.update(dec, is_first) pbar.desc = "EXTRACTING" pbar.unit = "bits" pbar.colour = "yellow" return code def update(self, data, is_first): def find_node(data): for node in self.all_nodes: if node.data == data: return node raise KeyError(f'Cannot find the target node given {data}.') current_node = None while True: if is_first: current_node = self.nyt self.current_num -= 1 new_external = Node(1, self.current_num, data=data) current_node.right = new_external self.all_nodes.append(new_external) self.current_num -= 1 self.nyt = Node(0,self.current_num, data=NYT) current_node.left = self.nyt self.all_nodes.append(self.nyt) current_node.weight += 1 current_node.data = None self.nyt = current_node.left else: if not current_node: current_node = find_node(data) node_max_num = max( ( n for n in self.all_nodes if n.weight == current_node.weight ), key=operator.attrgetter('num') ) if node_max_num not in (current_node, current_node.parent): exchange(node_max_num,current_node) current_node = node_max_num current_node.weight += 1 if not current_node.parent: break current_node = current_node.parent is_first = False