コード例 #1
0
 def handle_starttag(self, tag, attrs):
     global cur
     a = {}
     for attr in attrs:
         a[attr[0]] = attr[1]
     cur = DOM.Element(cur, tag, a)
     if tag in self_closing_tags:
         cur.parent.write(cur)
         cur = cur.parent
コード例 #2
0
def parse(html):
    global cur
    cur = DOM.Element(None, 'document')
    parser().feed(html)
    while cur.tag != 'document':
        cur.parent.write(cur)
        cur = cur.parent
    if not cur.getElementsByTagName('body'):
        return parse('<html><body>%s</body></html>' % html)
    return cur