def tree_words(node): """Return all the significant text below the given node as a list of words. >>> list(tree_words(parse_minidom('<h1>one</h1> two <div>three<em>four</em></div>'))) ['one', 'two', 'three', 'four'] """ for word in split_text(tree_text(node)): word = word.strip() if word: yield word
def split_node(node): # Split text node in into user-friendly chunks. pieces = split_text(node.nodeValue) if len(pieces) <= 1: return parent = node.parentNode for piece in pieces: piece_node = node.ownerDocument.createTextNode(piece) parent.insertBefore(piece_node, node) remove_node(node)