Example #1
0
def huffman(characters):
    """
    Huffman codes compress data.
    greedy algorithm, optimal substructure
    If we assign a 3 bit codeword to each character, it takes 300k bits to encode a 100k character file
    What if we use variable length codes?
    prefix codes => no codeword is also a prefix of another codeword.
    :param characters: list of characters
    :return:
    """
    n = len(characters)
    queue = PriorityQueue(characters)
    queue.build_min_heap()
    for i in range(0, n-1):
        new_node = Character()
        new_node.left = queue.heap_extract_min()
        new_node.right = queue.heap_extract_min()
        new_node.freq = new_node.left.freq + new_node.right.freq
        queue.min_heap_insert(new_node)

    return queue.heap_extract_min()