def testStartswith(self): a = Bits('0b0000000111') self.assertTrue(a.startswith('0b111')) self.assertFalse(a.startswith(1)) self.assertTrue(a.startswith('0b011', start=1)) self.assertFalse(a.startswith('0b0111', end=3)) self.assertTrue(a.startswith('0b0111', end=4))
def encode_bits_as_strings(tree: HuffmanTree, bits: Bits, string_prefix: str = "") -> Tuple[Bits, str]: """ Given a bit stream and a Huffman tree, return the appropriate string of symbols. The output will match the statistical distribution of the sample it was made with as much as possible, although limited by the necessity of an unambiguous HuffmanTree structure. If the Huffman tree does not have path bits to match the input exactly, it will append 0s until the function can complete. :param tree: a Huffman tree with path bits allocated :param bits: the input bits :param string_prefix: the so-far accumulated string. Leave empty when calling manually :return: a Tuple of the remaining bits and the accumulated string made up of symbols in the Huffman tree """ if bits is None or bits.__eq__(Bits()): return Bits(), string_prefix if tree.left is not None and tree.right is not None: # This tree has subtrees left_tree = tree.left[1] right_tree = tree.right[1] if left_tree.path_code is None or right_tree.path_code is \ None: raise HuffmanError( "When encoding bits as strings, a node was missing " "a path code") else: if bits.startswith(left_tree.path_code): remaining_bits, accumulated_string = \ encode_bits_as_strings( left_tree, bits, string_prefix) elif bits.startswith(right_tree.path_code): remaining_bits, accumulated_string = \ encode_bits_as_strings( right_tree, bits, string_prefix) else: # Binary sequence does not match a leaf value. Must # pad with 0s padded_bits = bits.__add__(zero_bit) return padded_bits, string_prefix if tree.path_code is None: # This tree is a root node if bits is None: # We are out of bits, so we can return the # final string return remaining_bits, accumulated_string else: # Continue recursively processing the # remaining bits return encode_bits_as_strings(tree, remaining_bits, accumulated_string) else: return remaining_bits, accumulated_string elif tree.left is None and tree.right is None: # This tree is # a leaf node if tree.path_code is None: raise HuffmanError("When encoding bits as strings, a leaf node was" " missing a path code") else: if bits.startswith(tree.path_code): accumulated_string = string_prefix + tree.value[0] if bits.__eq__(tree.path_code): remaining_bits = None else: remaining_bits = bits[tree.path_code.length:] return remaining_bits, accumulated_string else: warnings.warn("When encoding bits as strings, some unencodable" " bits were left over") return bits, string_prefix else: raise HuffmanError( "The given Huffman tree contained a node with exactly 1 " "child tree")