def decode_cover_text(wt_dict: WordTypeDictionary, cover_text: str, header_length=DEFAULT_HEADER_LENGTH) -> Bits: """ Given a valid cover text containing a header, and the correct header length and word-type dictionary, retrieve the secret message. :param wt_dict: a dictionary of word-types :param cover_text: the cover text consisting of a header and message :param header_length: the pre-shared length, in bits, of the header :return: the retrieved secret message as bits """ if cover_text is None: raise ValueError("Cover text cannot be None.") message = Bits() if cover_text.__len__() == 0: return message header_bits, trailing_bits, cover_text = fixed_size_decode( wt_dict, cover_text, header_length) message_length = get_message_length_from_header(header_bits) - len( trailing_bits) message = message.__add__(trailing_bits) message_bits, trailing_bits, cover_text = fixed_size_decode( wt_dict, cover_text, message_length) if len(cover_text) > 0: print("Warning: there were {} characters left over in the cover text. " "Please verify the provided header length.".format( len(cover_text))) message = message.__add__(message_bits) return message
def allocate_path_bits(huffman_tree: Tuple[int, HuffmanTree], prefix: Bits = None): """ Walk the given HuffmanTree and allocate bits to every path. Ignores and overwrites any existing path codes in the given tree. The path code value in every node will be an entire cumulative Bit value. :param huffman_tree: the tuple containing Huffman (sub)tree and its cumulative priority :param prefix: the cumulative bits for the path up until this node. Leave empty when calling on the whole tree. """ tree = huffman_tree[1] tree.path_code = prefix if tree.left is not None: if prefix is None: left_code = zero_bit else: left_code = prefix.__add__(zero_bit) allocate_path_bits(tree.left, left_code) if tree.right is not None: if prefix is None: right_code = one_bit else: right_code = prefix.__add__(one_bit) allocate_path_bits(tree.right, right_code)
def fixed_size_decode(wt_dict: WordTypeDictionary, cover_text: str, data_length: int) -> \ Tuple[Bits, Bits, str]: """ Given a valid cover text and word-type dictionary, retrieve the message of the desired length. :param wt_dict: a dictionary of word-types :param cover_text: a full or partial cover text containing the message :param data_length: the exact number of bits that should be decoded from the cover text :return: a tuple containing the retrieved message bits; trailing bits from the last word decoded (if any); and the remaining cover text after decoding """ message = Bits() longest_word_length = len(get_longest_word_in_dictionary(wt_dict)) while message.len < data_length: if cover_text.__len__() == 0: raise ValueError( "Cover text was too short for expected {} bits of data".format( data_length)) word, bits = get_word_from_cover_text(wt_dict, cover_text, longest_word_length) message = message.__add__(bits) cover_text = (cover_text[len(word):]).lstrip() trailing_bits = message[data_length:] message = message[:data_length] return message, trailing_bits, cover_text
def has_correct_bits(huffman_tree: HuffmanTreeWithFrequencies, bits: Bits): """ Show that the given tree has bits for all non-root nodes, and that they accumulate correctly. :param bits: :param huffman_tree: :return: """ if huffman_tree is None: return True tree = huffman_tree[1] if not tree.path_code.__eq__(bits): return False left_code = bits.__add__(Bits(bin="0")) right_code = bits.__add__(Bits(bin="1")) return has_correct_bits(tree.left, left_code) and has_correct_bits( tree.right, right_code)
def encode_string_as_bits(huffman_tree: HuffmanTree, input_string: str, symbol_length: int) -> Bits: """ Given a string of characters, use the HuffmanTree to to encode it as a matching stream of bits. The given string must be valid with respect to the HuffmanTree (i.e. it should be a string as generated from encode_bits_as_strings). :param huffman_tree: a Huffman tree with path bits allocated :param input_string: the cover text to convert into bits :param symbol_length: the correct symbol length used to encode the text :return: the secret message contained within the cover text """ if symbol_length < 1: raise ValueError("An invalid symbol length was specified.") cover_text_length = input_string.__len__() if symbol_length > cover_text_length: warnings.warn("Cover text is smaller than the given symbol length. " "Padding with" " spaces.") padding = " " * (symbol_length - cover_text_length) input_string = input_string.__add__(padding) cover_text_length = input_string.__len__() elif not cover_text_length % symbol_length == 0: warnings.warn( "Cover text is not a multiple of the given symbol length." " Padding with spaces.") padding = " " * (symbol_length - (cover_text_length % symbol_length)) input_string = input_string.__add__(padding) cover_text_length = input_string.__len__() reps = cover_text_length // symbol_length bits = Bits() for x in range(0, reps): start_index = symbol_length * x this_symbol = input_string[start_index:start_index + symbol_length] symbol_bits = search_tree_for_symbol(huffman_tree, this_symbol) bits = bits.__add__(symbol_bits) return bits
def encode_bits_as_strings(tree: HuffmanTree, bits: Bits, string_prefix: str = "") -> Tuple[Bits, str]: """ Given a bit stream and a Huffman tree, return the appropriate string of symbols. The output will match the statistical distribution of the sample it was made with as much as possible, although limited by the necessity of an unambiguous HuffmanTree structure. If the Huffman tree does not have path bits to match the input exactly, it will append 0s until the function can complete. :param tree: a Huffman tree with path bits allocated :param bits: the input bits :param string_prefix: the so-far accumulated string. Leave empty when calling manually :return: a Tuple of the remaining bits and the accumulated string made up of symbols in the Huffman tree """ if bits is None or bits.__eq__(Bits()): return Bits(), string_prefix if tree.left is not None and tree.right is not None: # This tree has subtrees left_tree = tree.left[1] right_tree = tree.right[1] if left_tree.path_code is None or right_tree.path_code is \ None: raise HuffmanError( "When encoding bits as strings, a node was missing " "a path code") else: if bits.startswith(left_tree.path_code): remaining_bits, accumulated_string = \ encode_bits_as_strings( left_tree, bits, string_prefix) elif bits.startswith(right_tree.path_code): remaining_bits, accumulated_string = \ encode_bits_as_strings( right_tree, bits, string_prefix) else: # Binary sequence does not match a leaf value. Must # pad with 0s padded_bits = bits.__add__(zero_bit) return padded_bits, string_prefix if tree.path_code is None: # This tree is a root node if bits is None: # We are out of bits, so we can return the # final string return remaining_bits, accumulated_string else: # Continue recursively processing the # remaining bits return encode_bits_as_strings(tree, remaining_bits, accumulated_string) else: return remaining_bits, accumulated_string elif tree.left is None and tree.right is None: # This tree is # a leaf node if tree.path_code is None: raise HuffmanError("When encoding bits as strings, a leaf node was" " missing a path code") else: if bits.startswith(tree.path_code): accumulated_string = string_prefix + tree.value[0] if bits.__eq__(tree.path_code): remaining_bits = None else: remaining_bits = bits[tree.path_code.length:] return remaining_bits, accumulated_string else: warnings.warn("When encoding bits as strings, some unencodable" " bits were left over") return bits, string_prefix else: raise HuffmanError( "The given Huffman tree contained a node with exactly 1 " "child tree")