def handle_starttag(self, tag, attrs): global cur a = {} for attr in attrs: a[attr[0]] = attr[1] cur = DOM.Element(cur, tag, a) if tag in self_closing_tags: cur.parent.write(cur) cur = cur.parent
def parse(html): global cur cur = DOM.Element(None, 'document') parser().feed(html) while cur.tag != 'document': cur.parent.write(cur) cur = cur.parent if not cur.getElementsByTagName('body'): return parse('<html><body>%s</body></html>' % html) return cur