def parse_html(html_string, debug=False): """ create the document tree from html code """ assert isinstance(html_string, TEXT_TYPE), "given html_string must be unicode!" h2c = HtmlParser(debug=debug) document_tree = h2c.feed(html_string) if debug: h2c.debug() return document_tree
def parse_html(html_string, debug=False): """ create the document tree from html code """ assert isinstance(html_string, str), "given html_string must be unicode!" h2c = HtmlParser(debug=debug) document_tree = h2c.feed(html_string) if debug: h2c.debug() return document_tree
</ul> </li> <li><p>subitem 1.2</p> </li> </ul> </li> <li><p>item 2</p> <ul> <li>subitem 2.1</li> </ul> </li> </ul> <p>Text under list.</p> <p>4 <img alt="PNG pictures" src="/image.png" /> four</p> <p>5 <img alt="Image without files ext?" src="/path1/path2/image" /> five</p> """ print(data) h2c = HtmlParser( # debug=True ) document_tree = h2c.feed(data) h2c.debug() e = ReStructuredTextEmitter(document_tree, debug=True) content = e.emit() print("*" * 79) print(content) print("*" * 79) print(content.replace(" ", ".").replace("\n", "\\n\n"))
if __name__ == '__main__': import doctest print(doctest.testmod()) # import sys;sys.exit() from creole.parser.html_parser import HtmlParser data = """A <<unittest_macro1 args="foo1">>bar1<</unittest_macro1>> in a line...""" # print(data.strip()) h2c = HtmlParser( debug=True ) document_tree = h2c.feed(data) h2c.debug() from creole.shared.unknown_tags import escape_unknown_nodes e = CreoleEmitter(document_tree, debug=True, unknown_emit=escape_unknown_nodes ) content = e.emit() print("*" * 79) print(content) print("*" * 79) print(content.replace(" ", ".").replace("\n", "\\n\n"))