def finish_starttag(self, tagname, attrs): unicodeTagName = unicode(tagname, self._charset) lowerTagName = string.lower(unicodeTagName) if not HTML_DTD.has_key(lowerTagName): # Skip any tags not defined in HTML 4.01 return element = self._ownerDoc.createElementNS(EMPTY_NAMESPACE, unicodeTagName) # Allows for multiple META tags in a document if lowerTagName == 'meta': lowered = map( lambda (name, value): (string.lower(name), string.lower(value)), attrs) if ('http-equiv', 'content-type') in lowered: for (name, value) in lowered: if name == 'content': match = g_reCharset.search(value) if match: self._charset = match.group('charset') # Add any attributes to the tag for (name, value) in attrs: element.setAttributeNS(EMPTY_NAMESPACE, unicode(name, self._charset), unicode(value, self._charset)) # Look for its parent for i in range(1, len(self._stack)): parent = self._stack[-i] if lowerTagName in HTML_DTD[string.lower(parent.tagName)]: parent.appendChild(element) if i > 1: self._stack = self._stack[:-i + 1] if HTML_DTD[lowerTagName]: self._stack.append(element) return # no parent found if not self._hasHtml and lowerTagName == 'html': self._stack[0].appendChild(element) self._stack.append(element) self._hasHtml = 1 return
def finish_starttag(self, tagname, attrs): unicodeTagName = unicode(tagname, self._charset) lowerTagName = string.lower(unicodeTagName) if not HTML_DTD.has_key(lowerTagName): # Skip any tags not defined in HTML 4.01 return element = self._ownerDoc.createElementNS(EMPTY_NAMESPACE, unicodeTagName) # Allows for multiple META tags in a document if lowerTagName == 'meta': lowered = map(lambda (name, value): (string.lower(name), string.lower(value)), attrs) if ('http-equiv', 'content-type') in lowered: for (name, value) in lowered: if name == 'content': match = g_reCharset.search(value) if match: self._charset = match.group('charset') # Add any attributes to the tag for (name, value) in attrs: element.setAttributeNS(EMPTY_NAMESPACE, unicode(name, self._charset), unicode(value, self._charset)) # Look for its parent for i in range(1, len(self._stack)): parent = self._stack[-i] if lowerTagName in HTML_DTD[string.lower(parent.tagName)]: parent.appendChild(element) if i > 1: self._stack = self._stack[:-i+1] if HTML_DTD[lowerTagName]: self._stack.append(element) return # no parent found if not self._hasHtml and lowerTagName == 'html': self._stack[0].appendChild(element) self._stack.append(element) self._hasHtml = 1 return
def _4dom_createHTMLElement(self, tagName): lowered = string.lower(tagName) if not HTML_DTD.has_key(lowered): raise TypeError('Unknown HTML Element: %s' % tagName) if lowered in NoClassTags: from HTMLElement import HTMLElement return HTMLElement(self, tagName) #FIXME: capitalize() broken with unicode in Python 2.0 #normTagName = string.capitalize(tagName) capitalized = string.upper(tagName[0]) + lowered[1:] element = HTMLTagMap.get(capitalized, capitalized) module = 'HTML%sElement' % element if not self._html.has_key(module): #Try to import it (should never fail) __import__('xml.dom.html.%s' % module) # Class and module have the same name klass = getattr(self._html[module], module) return klass(self, tagName)