def ignore_given_selectors(tree, selectors_to_ignore): """ Remove all HTML elements matching any of the CSS selectors provided by the caller from the parse tree generated by BeautifulSoup. """ for selector in selectors_to_ignore: for element in soupselect.select(tree, selector): element.extract()
def find_root_node(tree, selector): """ Given a document tree generated by BeautifulSoup, find the most specific document node that doesn't "lose any information" (i.e. everything that we want to be included in the Vim help file) while ignoring as much fluff as possible (e.g. headers, footers and navigation menus included in the original HTML document). """ # Try to find the root node using a CSS selector provided by the caller. matches = soupselect.select(tree, selector) if matches: return matches[0] # Otherwise we'll fall back to the <body> element. try: return tree.html.body except: # Don't break when html.body doesn't exist. return tree