Example #1
0
def parse_html(html_string, debug=False):
    """ create the document tree from html code """
    assert isinstance(html_string, TEXT_TYPE), "given html_string must be unicode!"

    h2c = HtmlParser(debug=debug)
    document_tree = h2c.feed(html_string)
    if debug:
        h2c.debug()
    return document_tree
Example #2
0
def parse_html(html_string, debug=False, **parser_kwargs):
    """ create the document tree from html code """
    assert isinstance(html_string, unicode)

    h2c = HtmlParser(debug, **parser_kwargs)
    document_tree = h2c.feed(html_string)
    if debug:
        h2c.debug()
    return document_tree
Example #3
0
</li>
</ul>
</li>
<li><p>item 2</p>
<ul>
<li>subitem 2.1</li>
</ul>
</li>
</ul>
<p>Text under list.</p>
<p>4 <img alt="PNG pictures" src="/image.png" /> four</p>
<p>5 <img alt="Image without files ext?" src="/path1/path2/image" /> five</p>
"""

    print(data)
    h2c = HtmlParser(
#        debug=True
    )
    document_tree = h2c.feed(data)
    h2c.debug()

    e = ReStructuredTextEmitter(document_tree,
        debug=True
    )
    content = e.emit()
    print("*" * 79)
    print(content)
    print("*" * 79)
    print(content.replace(" ", ".").replace("\n", "\\n\n"))