Exemple #1
0
def parse_html(html_string, debug=False):
    """ create the document tree from html code """
    assert isinstance(html_string, TEXT_TYPE), "given html_string must be unicode!"

    h2c = HtmlParser(debug=debug)
    document_tree = h2c.feed(html_string)
    if debug:
        h2c.debug()
    return document_tree
def parse_html(html_string, debug=False):
    """ create the document tree from html code """
    assert isinstance(html_string, str), "given html_string must be unicode!"

    h2c = HtmlParser(debug=debug)
    document_tree = h2c.feed(html_string)
    if debug:
        h2c.debug()
    return document_tree
Exemple #3
0
</ul>
</li>
<li><p>subitem 1.2</p>
</li>
</ul>
</li>
<li><p>item 2</p>
<ul>
<li>subitem 2.1</li>
</ul>
</li>
</ul>
<p>Text under list.</p>
<p>4 <img alt="PNG pictures" src="/image.png" /> four</p>
<p>5 <img alt="Image without files ext?" src="/path1/path2/image" /> five</p>
"""

    print(data)
    h2c = HtmlParser(
        #        debug=True
    )
    document_tree = h2c.feed(data)
    h2c.debug()

    e = ReStructuredTextEmitter(document_tree, debug=True)
    content = e.emit()
    print("*" * 79)
    print(content)
    print("*" * 79)
    print(content.replace(" ", ".").replace("\n", "\\n\n"))



if __name__ == '__main__':
    import doctest
    print(doctest.testmod())

#    import sys;sys.exit()
    from creole.parser.html_parser import HtmlParser

    data = """A <<unittest_macro1 args="foo1">>bar1<</unittest_macro1>> in a line..."""

#    print(data.strip())
    h2c = HtmlParser(
        debug=True
    )
    document_tree = h2c.feed(data)
    h2c.debug()

    from creole.shared.unknown_tags import escape_unknown_nodes

    e = CreoleEmitter(document_tree,
        debug=True,
        unknown_emit=escape_unknown_nodes
    )
    content = e.emit()
    print("*" * 79)
    print(content)
    print("*" * 79)
    print(content.replace(" ", ".").replace("\n", "\\n\n"))