Beispiel #1
0
 def build_huffman_tree(self):
     vocab = self.get_vocab()
     vocab = {info[0]: info[1] for w, info in vocab.items()}
     self.huffman = HuffmanTree(vocab)
     self.huffman_left, self.huffman_right = self.huffman.generate_node_left_and_right_path(
     )
     print("build_tree_complete")
def _get_min(symbol_list: List[Union[int, HuffmanTree]], freq_list: List[int]) \
        -> Tuple[List[Union[int, HuffmanTree]], List[int]]:
    """
    A helper function for build_huffman_tree that uses the <symbol_list> and
    <freq_list> to map the symbols and frequencies.
    """
    output = []
    total = 0
    for i in range(2):
        # Pop smallest frequency and get its index
        small_freq = min(freq_list)
        index = freq_list.index(small_freq)

        # Add up the sum
        total += small_freq + i * 0

        # Get the symbol for the freq. and pop the freq from the list
        symbol = symbol_list.pop(index)
        freq_list.pop(index)

        if isinstance(symbol, int):
            output.append(HuffmanTree(symbol))
        elif isinstance(symbol, HuffmanTree):
            output.append(symbol)

    tree = HuffmanTree(None, output[0], output[1])

    symbol_list.append(tree)
    freq_list.append(total)

    return symbol_list, freq_list
Beispiel #3
0
    def __init__(self, file_name, args):
        self.args = args
        # 队列存储所有配对
        self.word_pair_catch = deque()
        # 采样表
        self.sample_table = []
        # 去掉频率低于mini_count后所有的单词
        self.sentence_length = 0
        # 句子个数
        self.sentence_count = 0
        # 词 --> id
        self.word2id = {}
        # id --> 词
        self.id2word = {}
        # 词频率
        self.word_frequency = {}
        # 去重 去低频次 之后单词个数
        self.word_count = 0
        self.input_file = open(os.path.join(self.args.dir, file_name),
                               encoding='utf-8').readlines()

        self.get_words()
        self.init_sample_table()

        if args.using_hs:
            tree = HuffmanTree(self.word_frequency)
            self.huffman_positive, self.huffman_negative = tree.get_huffman_code_and_path(
            )

        print('Word Count: %d' % len(self.word2id))
        print('Sentence Length: %d' % (self.sentence_length))
        print('Sentence count: %d' % (self.sentence_count))
    def make_tree(self, counter):
        """make_tree builds a HuffmanTree and creates a dictionary whose keys are symbols and whose values are 
        the binary code for each symbol and then saves the tree as self.tree"""
        # sortedList = sorted(list, key = lambda HuffmanTree: HuffmanTree.freq)
        counterkeys = list(counter)
        myTrees = []
        for i in counterkeys:
            myTrees += [HuffmanTree(symbol=i, freq=counter[i])]

        while (len(myTrees) > 1):
            lowest = self.lowest(myTrees)
            self.tree = lowest
            myTrees.remove(lowest)
            nextLow = self.lowest(myTrees)
            myTrees.remove(nextLow)
            self.tree = HuffmanTree(right=self.tree,
                                    left=nextLow,
                                    freq=self.tree.freq + nextLow.freq)
            myTrees += [self.tree]

        self.dict = dict(self.tree.get_codes())
        dictionary = dict(
            list(map(lambda x: (x[1], x[0]), self.tree.get_codes())))
        self.tree.read_dict(dictionary)
        return self.tree
Beispiel #5
0
def generate(sentence):
    with open('train.txt') as fin:
        train = fin.read()
        train += ' ' + preprocess(sentence)
        morse_codes = translateToMorseCode(train)
        huffman_tree = HuffmanTree()
        huffman_tree.train(morse_codes)
        print huffman_tree.translate(preprocess(sentence))
def generate_tree_postorder(node_lst: List[ReadNode],
                            root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes that the list represents a tree in postorder.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 0, 1, 0)]
    >>> generate_tree_postorder(lst, 2)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(5, None, None), \
    HuffmanTree(7, None, None)), HuffmanTree(None, HuffmanTree(10, None,
    None),\
    HuffmanTree(12, None, None)))

    >>> lst = [ReadNode(0, 104, 0, 101), ReadNode(0, 119, 0, 114), \
    ReadNode(1, 0, 1, 1), ReadNode(0, 100, 0, 111), ReadNode(0, 108, 1, 3), \
    ReadNode(1, 2, 1, 4)]
    >>> tree = generate_tree_postorder(lst, len(lst)-1)
    >>> print(tree)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(None,
    HuffmanTree(104, None, None), HuffmanTree(101, None, None)), \
    HuffmanTree(None, HuffmanTree(119, None, None), \
    HuffmanTree(114, None, None))), \
    HuffmanTree(None, HuffmanTree(108, None, None), \
    HuffmanTree(None, HuffmanTree(100, None, None), \
    HuffmanTree(111, None, None))))
    >>> number_nodes(tree)
    >>> t = bytes_to_nodes(tree_to_bytes(tree))
    >>> t
    [ReadNode(0, 104, 0, 101), ReadNode(0, 119, 0, 114),
    ReadNode(1, 0, 1, 1),\
    ReadNode(0, 100, 0, 111), ReadNode(0, 108, 1, 3), ReadNode(1, 2, 1, 4)]

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 0, 1, 0)]
    >>> tree = generate_tree_postorder(lst, 2)
    >>> number_nodes(tree)
    >>> t = bytes_to_nodes(tree_to_bytes(tree))
    >>> t
    [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), ReadNode(1, 0, 1, 1)]
    """

    tree = HuffmanTree(None)
    tree.right = _post_order_helper(node_lst, root_index, False)

    right_index = _find_height(tree.right)

    if right_index is None:
        right_index = 0
    else:
        right_index = len(right_index)

    tree.left = _post_order_helper(node_lst, root_index, True, right_index)

    _post_order_set_none(tree)

    return tree
def build_huffman_tree(freq_dict: Dict[int, int]) -> HuffmanTree:
    """ Return the Huffman tree corresponding to the frequency dictionary
    <freq_dict>.

    Precondition: freq_dict is not empty.

    >>> freq = {2: 6, 3: 4}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(3), HuffmanTree(2))
    >>> t == result
    True
    >>> freq = {2: 6, 3: 4, 7: 5}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(2), \
                             HuffmanTree(None, HuffmanTree(3), HuffmanTree(7)))
    >>> t == result
    True
    >>> import random
    >>> symbol = random.randint(0,255)
    >>> freq = {symbol: 6}
    >>> t = build_huffman_tree(freq)
    >>> any_valid_byte_other_than_symbol = (symbol + 1) % 256
    >>> dummy_tree = HuffmanTree(any_valid_byte_other_than_symbol)
    >>> result = HuffmanTree(None, HuffmanTree(symbol), dummy_tree)
    >>> t.left == result.left or t.right == result.left
    True
    >>> freq = {2: 6, 3: 4, 7: 5, 8: 4}
    >>> tree = build_huffman_tree(freq)
    >>> result =  HuffmanTree(None, HuffmanTree(None, \
    HuffmanTree(3, None, None), HuffmanTree(8, None, None)), \
    HuffmanTree(None, HuffmanTree(7, None, None), HuffmanTree(2, None, None)))
    >>> tree == result
    True
    >>> freq = {3: 1}
    >>> tree = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(3, None, None), HuffmanTree(2, \
    None, None))
    >>> tree == result
    True
    """
    # Empty dictionary
    if freq_dict == {}:
        return HuffmanTree(None)

    # Only one item in freq_dict
    elif len(freq_dict) == 1:
        return HuffmanTree(None, HuffmanTree(list(freq_dict)[0]),
                           HuffmanTree(2))

    else:
        symbol_list = list(freq_dict)
        freq_list = list(freq_dict.values())
        while len(symbol_list) > 1:
            symbol_list, freq_list = _get_min(symbol_list, freq_list)
        return symbol_list[0]
Beispiel #8
0
def generate_tree_general(node_lst: List[ReadNode],
                          root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes nothing about the order of the tree nodes in the list.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 1, 1, 0)]
    >>> generate_tree_general(lst, 2)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(10, None, None), \
HuffmanTree(12, None, None)), \
HuffmanTree(None, HuffmanTree(5, None, None), HuffmanTree(7, None, None)))
    >>> leftleft = HuffmanTree(None, HuffmanTree(20), HuffmanTree(71))
    >>> left = HuffmanTree(None, HuffmanTree(3), leftleft)
    >>> right = HuffmanTree(None, HuffmanTree(9), HuffmanTree(10))
    >>> tree = HuffmanTree(None, left, right)
    >>> generate_tree_general([ReadNode(1, 2, 1, 3), ReadNode(0, 4, 0, 12), ReadNode(1, 1, 0, 2), ReadNode(0, 9, 0, 10)], 0)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(None, \
HuffmanTree(4, None, None), HuffmanTree(12, None, None)), \
HuffmanTree(2, None, None)), HuffmanTree(None, HuffmanTree(9, None, None), \
HuffmanTree(10, None, None)))
    """
    curr_node = node_lst[root_index]
    tree = HuffmanTree()
    if curr_node.l_type == 0:
        tree.left = HuffmanTree(curr_node.l_data)
    else:
        treex = generate_tree_general(node_lst, curr_node.l_data)
        tree.left = treex
    if curr_node.r_type == 0:
        tree.right = HuffmanTree(curr_node.r_data)
    else:
        treey = generate_tree_general(node_lst, curr_node.r_data)
        tree.right = treey
    return tree
Beispiel #9
0
def generate_tree_general(node_lst: List[ReadNode],
                          root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes nothing about the order of the tree nodes in the list.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 1, 1, 0)]
    >>> generate_tree_general(lst, 2)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(10, None, None), \
HuffmanTree(12, None, None)), \
HuffmanTree(None, HuffmanTree(5, None, None), HuffmanTree(7, None, None)))
    """
    # 0 is leaf, 1 is not a leaf
    root = node_lst[root_index]
    tree = HuffmanTree(None, None, None)
    if root.l_type == 1:
        a = node_lst[root.l_data]
        tree.left = __generate_tree_gen_help(node_lst, a)
    else:
        tree.left = HuffmanTree(root.l_data, None, None)
    if root.r_type == 1:
        a = node_lst[root.r_data]
        tree.right = __generate_tree_gen_help(node_lst, a)
    else:
        tree.right = HuffmanTree(root.r_data, None, None)
    return tree
def _number_nodes_helper(tree: HuffmanTree, number: int = 0) -> int:
    """
    A helper function that uses a <tree> to <number> the internal nodes.
    """
    if tree.is_leaf():
        return number - 1
    else:
        number = _number_nodes_helper(tree.left, number) + 1
        number = _number_nodes_helper(tree.right, number) + 1
        tree.number = number
        return number
Beispiel #11
0
 def __init__(self, file_name, min_count):
     self.get_words(file_name, min_count)
     print(" ")
     self.cbow_count = []
     self.word_pair_catch = deque()
     self.cbow_word_pair_catch = deque()
     self.init_sample_table()
     tree = HuffmanTree(self.word_frequency)
     print("tree ", tree)
     self.huffman_positive, self.huffman_negative = tree.get_huffman_code_and_path(
     )
     print('Word Count: %d' % len(self.word2id))
     print('Sentence Length: %d' % (self.sentence_length))
Beispiel #12
0
    def __init__(self, input_file, min_count):
        self.input_file = input_file
        self.sentence_sum_length = 0  # 用于统计句子中出现单词的总数量
        self.sentence_count = 0  # 用于统计随机游走数量
        self.word_count = 0
        self.word2id = dict()
        self.id2word = dict()
        self.word_frequency = dict()  # 词频:用于统计随机游走学列中单词出现的次数
        self.word_pair_catch = deque()  # 是什么

        self.get_words(min_count)
        tree = HuffmanTree(self.word_frequency)
        self.huffman_positive, self.huffman_negative = tree.divide_pos_and_neg(
        )
Beispiel #13
0
    def test_create_leafs(self):
        leafs = HuffmanTree.create_leafs("test")
        expect = [Leaf("t", 0.5), Leaf("e", 0.25), Leaf("s", 0.25)]
        self.do_lists_of_leafs_equal(list(leafs), expect)

        leafs = HuffmanTree.create_leafs("aaaaaaaAAAAbbbbb eee")
        expect = [
            Leaf("a", 0.35),
            Leaf("A", 0.2),
            Leaf("b", 0.25),
            Leaf(" ", 0.05),
            Leaf("e", 0.15)
        ]
        self.do_lists_of_leafs_equal(list(leafs), expect)
Beispiel #14
0
 def test_compute_code(self):
     tree = InternalNode(1, Leaf("a", 0.5), Leaf("b", 0.5))
     code = HuffmanTree.compute_code(tree)
     self.assertEqual(code, {'a': '0', 'b': '1'})
     tree = InternalNode(
         1, Leaf("a", 0.5),
         InternalNode(0.5, Leaf("b", 0.25), Leaf("c", 0.25)))
     code = HuffmanTree.compute_code(tree)
     self.assertEqual(code, {'a': '0', 'b': '10', 'c': '11'})
     tree = InternalNode(
         1, Leaf("a", 0.5),
         InternalNode(0.5, Leaf("b", 0.25),
                      InternalNode(0.25, Leaf("c", 0.1), Leaf("d", 0.15))))
     code = HuffmanTree.compute_code(tree)
     self.assertEqual(code, {'a': '0', 'b': '10', 'c': '110', 'd': '111'})
Beispiel #15
0
def build_huffman_tree(freq_dict: Dict[int, int]) -> HuffmanTree:
    """ Return the Huffman tree corresponding to the frequency dictionary
    <freq_dict>.

    Precondition: freq_dict is not empty.

    >>> freq = {2: 6, 3: 4}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(3), HuffmanTree(2))
    >>> t == result
    True
    >>> freq = {2: 6, 3: 4, 7: 5}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(2), \
                            HuffmanTree(None, HuffmanTree(3), HuffmanTree(7)))
    >>> t == result
    True
    >>> import random
    >>> symbol = random.randint(0,255)
    >>> freq = {symbol: 6}
    >>> t = build_huffman_tree(freq)
    >>> any_valid_byte_other_than_symbol = (symbol + 1) % 256
    >>> dummy_tree = HuffmanTree(any_valid_byte_other_than_symbol)
    >>> result = HuffmanTree(None, HuffmanTree(symbol), dummy_tree)
    >>> t.left == result.left or t.right == result.right
    True
    """
    if len(freq_dict) == 1:
        index = 0
        for el in freq_dict:
            index = el
        return HuffmanTree(None, HuffmanTree(index), None)

    freq_dict2 = create_leafs(freq_dict)
    while len(freq_dict2) != 1:
        i, j = find_smallest_dict(freq_dict2)
        smallest = freq_dict2[i][1]
        second_smallest = freq_dict2[j][1]
        temp = HuffmanTree(None, smallest, second_smallest)
        combined_freq = freq_dict2[i][0] + freq_dict2[j][0]
        sym = str(i) + str(j)
        freq_dict2[sym] = [combined_freq, temp]
        freq_dict2.pop(i)
        freq_dict2.pop(j)
    index = 0
    for el in freq_dict2:
        index = el
    return freq_dict2[index][1]
def get_codes(tree: HuffmanTree) -> Dict[int, str]:
    """ Return a dictionary which maps symbols from the Huffman tree <tree>
    to codes.

    >>> tree = HuffmanTree(None, HuffmanTree(3), HuffmanTree(2))
    >>> d = get_codes(tree)
    >>> d == {3: "0", 2: "1"}
    True
    >>> tree = HuffmanTree(None, None, None)
    >>> d = get_codes(tree)
    >>> d == {}
    True
    >>> left = HuffmanTree(None, HuffmanTree(3), HuffmanTree(2))
    >>> right = HuffmanTree(9)
    >>> tree = HuffmanTree(None, left, right)
    >>> d_test = get_codes(tree)
    >>> d_test == {3: "00", 2: "01", 9: "1"}
    True
    >>> left_ext = HuffmanTree(None, HuffmanTree(2), HuffmanTree(3))
    >>> left = HuffmanTree(None, HuffmanTree(1), left_ext)
    >>> right = HuffmanTree(None, HuffmanTree(9), HuffmanTree(10))
    >>> tree = HuffmanTree(None, left, right)
    >>> d = get_codes(tree)
    >>> d == {1: '00', 2: '010', 3: '011', 9: '10', 10: '11'}
    True
    >>> tree = HuffmanTree(None, HuffmanTree(3), HuffmanTree(2))
    >>> d_text = get_codes(tree)
    >>> d_text
    {3: '0', 2: '1'}
    """
    # Edge Case
    if tree is None or (tree.symbol is None and tree.is_leaf()):
        return {}
    else:
        return _get_codes_helper(tree, "")
Beispiel #17
0
def number_nodes(tree: HuffmanTree) -> None:
    """ Number internal nodes in <tree> according to postorder traversal. The
    numbering starts at 0.
    >>> leftleft = HuffmanTree(None, HuffmanTree(4), HuffmanTree(12))
    >>> left = HuffmanTree(None, leftleft, HuffmanTree(2))
    >>> right = HuffmanTree(None, HuffmanTree(9), HuffmanTree(10))
    >>> tree = HuffmanTree(None, left, right)
    >>> number_nodes(tree)
    >>> tree.left.left.number
    0
    >>> tree.left.number
    1
    >>> tree.right.number
    2
    >>> tree.number
    3
    """
    curr_number = 0
    list_of_nodes = []
    dict_of_nodes = find_internal_nodes(tree, 0)
    for el in dict_of_nodes:
        list_of_nodes.append(el)
    list_of_nodes.reverse()
    for el in list_of_nodes:
        for tree in dict_of_nodes[el]:
            tree.number = curr_number
            curr_number += 1
def _traverse_post_order(tree: HuffmanTree, byte_list: List[int] = None) \
        -> List:
    """
    Traverses a <tree> in post order and appends 1 if it is a leaf,
    0 if it is a node, and other specifications.
    """
    if not tree.is_leaf():
        if byte_list is None:
            byte_list = []

        byte_list = _traverse_post_order(tree.left, byte_list)
        byte_list = _traverse_post_order(tree.right, byte_list)

        if not tree.left.is_leaf():
            byte_list.append(1)
            byte_list.append(tree.left.number)

        if tree.left.is_leaf():
            byte_list.append(0)
            byte_list.append(tree.left.symbol)

        if not tree.right.is_leaf():
            byte_list.append(1)
            byte_list.append(tree.right.number)

        if tree.right.is_leaf():
            byte_list.append(0)
            byte_list.append(tree.right.symbol)

    return byte_list
def tree_to_bytes(tree: HuffmanTree) -> bytes:
    """ Return a bytes representation of the Huffman tree <tree>.
    The representation should be based on the postorder traversal of the tree's
    internal nodes, starting from 0.

    Precondition: <tree> has its nodes numbered.

    >>> tree = HuffmanTree(None, HuffmanTree(3, None, None), \
    HuffmanTree(2, None, None))
    >>> number_nodes(tree)
    >>> list(tree_to_bytes(tree))
    [0, 3, 0, 2]
    >>> left = HuffmanTree(None, HuffmanTree(3, None, None), \
    HuffmanTree(2, None, None))
    >>> right = HuffmanTree(5)
    >>> tree = HuffmanTree(None, left, right)
    >>> number_nodes(tree)
    >>> list(tree_to_bytes(tree))
    [0, 3, 0, 2, 1, 0, 0, 5]
    >>> tree = build_huffman_tree(build_frequency_dict(b"helloworld"))
    >>> number_nodes(tree)
    >>> list(tree_to_bytes(tree)) #doctest: +NORMALIZE_WHITESPACE
    [0, 104, 0, 101, 0, 119, 0, 114, 1, 0, 1, 1, 0, 100, 0, 111, 0, 108,\
    1, 3, 1, 2, 1, 4]
    >>> tree = HuffmanTree(None, HuffmanTree(None, HuffmanTree(10, None, None),\
    HuffmanTree(12, None, None)), \
    HuffmanTree(None, HuffmanTree(5, None, None), HuffmanTree(7, None, None)))
    >>> number_nodes(tree)
    >>> list(tree_to_bytes(tree))
    [0, 10, 0, 12, 0, 5, 0, 7, 1, 0, 1, 1]
    """
    if tree.is_leaf() and tree.symbol is None:
        return bytes([])
    else:
        return bytes(_traverse_post_order(tree))
Beispiel #20
0
def __huffman_helper(real: HuffmanTree) -> None:
    """Helper function for build_huffman trees where it basically goes through
    the tree turning the internal nodes symbols to None"""
    if not real.is_leaf and real is not None:
        real.symbol = None
        __huffman_helper(real.left)
        __huffman_helper(real.right)
Beispiel #21
0
def __preorder_pls(tree: HuffmanTree, freq_dict: Dict[int, int],
                   ff: Dict[int, int]) -> None:
    if tree.is_leaf and tree.symbol is not None:
        tree.symbol = freq_dict[max(freq_dict.keys())]
        freq_dict.pop(max(freq_dict.keys()))
    else:
        __preorder_pls(tree.left, freq_dict, ff)
        __preorder_pls(tree.right, freq_dict, ff)
Beispiel #22
0
class HuffmanDecoder(object):
    def decode(self, filename):
        self.filename = filename
        self.read_key_file()
        self.read_message_file()
        self.write_to_file()
        
    def read_key_file(self): 
        """read_key_file reads in lines from the filename.txt.HUFFMAN.KEY"""
        fp = open(self.filename + ".KEY")
        numOfSymbols = int(fp.readline())
        self.fileLength = int(fp.readline())
        self.dictionary = {}
        for i in range(numOfSymbols): 
            sym = fp.read(1)
            code = fp.readline().strip()
            self.dictionary[code] = sym
        fp.close()

    def read_message_file(self):
        """read_message_file uncompresseses a binary (byte) file and save the resulting text as self.text
        using the HuffmanTree class's find_char method"""
        f = open(self.filename, "rb")
        readBytes = f.read()
        f.close()
        readBytes = list(readBytes)

        stringOfBits = ""
        for i in range(len(readBytes)):
            stringOfBits += binary.EightBitNumToBinary(readBytes[i])

        self.tree = HuffmanTree()
        self.tree.read_dict(self.dictionary)
        
        toPrint = ""
        for i in range(self.fileLength):
            toPrint += self.tree.find_char(stringOfBits)[0]
            stringOfBits = stringOfBits[ self.tree.find_char(stringOfBits)[1]: ]
        self.text = toPrint
        
    def write_to_file(self):
        """write_to_file writes the text from self.text to a new file called self.filename.HUFFMAN.DECODED"""
        f = open(self.filename + ".DECODED", "w")
        f.write(self.text)
        f.close()
Beispiel #23
0
def build_huffman_tree(freq_dict: Dict[int, int]) -> HuffmanTree:  # FIX THIS
    """ Return the Huffman tree corresponding to the frequency dictionary
    <freq_dict>.

    Precondition: freq_dict is not empty.

    >>> freq = {2: 6, 3: 4}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(3), HuffmanTree(2))
    >>> t == result
    True
    >>> freq = {2: 6, 3: 4, 7: 5}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(2), \
                             HuffmanTree(None, HuffmanTree(3), HuffmanTree(7)))
    >>> t == result
    True
    >>> import random
    >>> symbol = random.randint(0,255)
    >>> freq = {symbol: 6}
    >>> t = build_huffman_tree(freq)
    >>> any_valid_byte_other_than_symbol = (symbol + 1) % 256
    >>> dummy_tree = HuffmanTree(any_valid_byte_other_than_symbol)
    >>> result = HuffmanTree(None, HuffmanTree(symbol), dummy_tree)
    >>> t.left == result.left or t.right == result.left
    True
    """
    if len(freq_dict) == 1:
        a = list(freq_dict.keys())
        b = (a[0] + 1) % 256
        return HuffmanTree(None, HuffmanTree(a[0]), HuffmanTree(b))
    lst = [(freq_dict[j], j) for j in freq_dict]
    lst.sort()
    lst2 = []
    for i in lst:
        lst2.append((i[0], HuffmanTree(i[1])))
    while len(lst2) > 1:
        za = lst2.pop(0)
        warudo = lst2.pop(0)
        a = HuffmanTree(None, za[1], warudo[1])
        lst2.append((za[0] + warudo[0], a))
        lst2.sort()
    real = lst2[0][1]
    __huffman_helper(real)
    return real
Beispiel #24
0
def _clean_huff_tree(tree: HuffmanTree):
    """
    convert all HuffmanTree.number to None after
    using it to keep list sorted
    """
    if tree:
        _clean_huff_tree(tree.left)
        _clean_huff_tree(tree.right)
        tree.number = None
Beispiel #25
0
def huffman_encoding(text):
    if not text:
        raise ValueError("Text is empty. Abort encoding.")

    tree = HuffmanTree()
    tree.grow(text).encode()
    char_code = tree.get_mapping()

    res = ""
    for char in text:
        try:
            code = char_code[char]
        except:
            raise KeyError(
                f"{char} does not exist in the mapping. Abort encoding.")
        res += code

    return res, tree
Beispiel #26
0
 def test_decode(self):
     self.assertEqual(
         HuffmanTree.decode("010110", {
             "t": "0",
             "e": "10",
             "s": "11"
         }), "test")
     self.assertEqual(
         HuffmanTree.decode("00101111100", {
             "t": "00",
             "e": "1011",
             "s": "111"
         }), "test")
     self.assertRaises(Exception, HuffmanTree.decode, "0101101", {
         "t": "0",
         "e": "10",
         "s": "11"
     })
Beispiel #27
0
def _recursive_numbering(tree: HuffmanTree, cur_num: list):
    """
    Algorithm that recursively builds and mutates the huff_map
    """
    if tree.is_leaf():  # dont number leaves
        return

    elif not tree:  # blank tree, graceful termiantion
        return

    else:

        _recursive_numbering(tree.left, cur_num)
        # then right
        _recursive_numbering(tree.right, cur_num)
        # finally root
        tree.number = cur_num[0]
        cur_num[0] += 1
def _swap(tree: HuffmanTree, dic: Dict) -> None:
    """
    Swaps the values.
    """
    if tree.symbol:
        if tree.symbol in dic:
            tree.symbol = dic[tree.symbol]
    else:
        _swap(tree.left, dic)
        _swap(tree.right, dic)
Beispiel #29
0
    def read_message_file(self):
        """read_message_file uncompresseses a binary (byte) file and save the resulting text as self.text
        using the HuffmanTree class's find_char method"""
        f = open(self.filename, "rb")
        readBytes = f.read()
        f.close()
        readBytes = list(readBytes)

        stringOfBits = ""
        for i in range(len(readBytes)):
            stringOfBits += binary.EightBitNumToBinary(readBytes[i])

        self.tree = HuffmanTree()
        self.tree.read_dict(self.dictionary)
        
        toPrint = ""
        for i in range(self.fileLength):
            toPrint += self.tree.find_char(stringOfBits)[0]
            stringOfBits = stringOfBits[ self.tree.find_char(stringOfBits)[1]: ]
        self.text = toPrint
Beispiel #30
0
def __generate_tree_gen_help(node_lst: list, a: ReadNode) -> HuffmanTree:
    """Helper for generate tree general"""
    buff = HuffmanTree(None, None, None)
    if a.l_type == 0:
        buff.left = HuffmanTree(a.l_data, None, None)
    else:
        b = node_lst[a.l_data]
        buff.left = __generate_tree_gen_help(node_lst, b)
    if a.r_type == 0:
        buff.right = HuffmanTree(a.r_data, None, None)
    else:
        b = node_lst[a.r_data]
        buff.right = __generate_tree_gen_help(node_lst, b)
    return buff
Beispiel #31
0
def generate_tree_general(node_lst: List[ReadNode],
                          root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes nothing about the order of the tree nodes in the list.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 1, 1, 0)]
    >>> generate_tree_general(lst, 2)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(10, None, None), \
HuffmanTree(12, None, None)), \
HuffmanTree(None, HuffmanTree(5, None, None), HuffmanTree(7, None, None)))
    """
    # TODO: Implement this function
    if len(node_lst) == 0:
        return HuffmanTree(None)

    for node in node_lst:

        if node.l_type == 0:  # if left side is a leaf
            pass
        else:
            pass