예제 #1
0
def parse_html(html_string, debug=False):
    """ create the document tree from html code """
    assert isinstance(html_string, str), "given html_string must be unicode!"

    h2c = HtmlParser(debug=debug)
    document_tree = h2c.feed(html_string)
    if debug:
        h2c.debug()
    return document_tree
예제 #2
0
</ul>
</li>
<li><p>subitem 1.2</p>
</li>
</ul>
</li>
<li><p>item 2</p>
<ul>
<li>subitem 2.1</li>
</ul>
</li>
</ul>
<p>Text under list.</p>
<p>4 <img alt="PNG pictures" src="/image.png" /> four</p>
<p>5 <img alt="Image without files ext?" src="/path1/path2/image" /> five</p>
"""

    print(data)
    h2c = HtmlParser(
        #        debug=True
    )
    document_tree = h2c.feed(data)
    h2c.debug()

    e = ReStructuredTextEmitter(document_tree, debug=True)
    content = e.emit()
    print("*" * 79)
    print(content)
    print("*" * 79)
    print(content.replace(" ", ".").replace("\n", "\\n\n"))