def handle_data(self,data):
     if self.stack:
         if '#PCDATA' not in HTML_DTD.get(self.stack[-1],[]) and not strip(data):
             # this is probably ignorable whitespace
             self._cont_handler.ignorableWhitespace(data)
         else:
             self._cont_handler.characters(to_xml_string(data,self._encoding))
Beispiel #2
0
 def handle_data(self, data):
     if self.stack:
         if '#PCDATA' not in HTML_DTD.get(self.stack[-1],
                                          []) and not strip(data):
             # this is probably ignorable whitespace
             self._cont_handler.ignorableWhitespace(data)
         else:
             self._cont_handler.characters(
                 to_xml_string(data, self._encoding))
    def finish_starttag(self, tag, attrs):
        """uses the HTML DTD to automatically generate events
        for missing tags"""

        # guess omitted close tags
        while self.stack and \
              upper(self.stack[-1]) in HTML_OPT_END and \
              tag not in HTML_DTD.get(self.stack[-1],[]):
            self.unknown_endtag(self.stack[-1])
            del self.stack[-1]

        if self.stack and tag not in HTML_DTD.get(self.stack[-1],[]) and self.verbose:
            print 'Warning : trying to add %s as a child of %s'%\
                  (tag,self.stack[-1])

        self.unknown_starttag(tag,attrs)
        if upper(tag) in HTML_FORBIDDEN_END:
            # close immediately tags for which we won't get an end
            self.unknown_endtag(tag)
            return 0
        else:
            self.stack.append(tag)
        return 1
Beispiel #4
0
    def finish_starttag(self, tag, attrs):
        """uses the HTML DTD to automatically generate events
        for missing tags"""

        # guess omitted close tags
        while self.stack and \
              upper(self.stack[-1]) in HTML_OPT_END and \
              tag not in HTML_DTD.get(self.stack[-1],[]):
            self.unknown_endtag(self.stack[-1])
            del self.stack[-1]

        if self.stack and tag not in HTML_DTD.get(self.stack[-1],
                                                  []) and self.verbose:
            print 'Warning : trying to add %s as a child of %s'%\
                  (tag,self.stack[-1])

        self.unknown_starttag(tag, attrs)
        if upper(tag) in HTML_FORBIDDEN_END:
            # close immediately tags for which we won't get an end
            self.unknown_endtag(tag)
            return 0
        else:
            self.stack.append(tag)
        return 1
Beispiel #5
0
    def finish_starttag(self, tagname, attrs):
        unicodeTagName = unicode(tagname, self._charset)
        lowerTagName = string.lower(unicodeTagName)
        if not HTML_DTD.has_key(lowerTagName):
            # Skip any tags not defined in HTML 4.01
            return

        element = self._ownerDoc.createElementNS(EMPTY_NAMESPACE,
                                                 unicodeTagName)

        # Allows for multiple META tags in a document
        if lowerTagName == 'meta':
            lowered = map(
                lambda (name, value):
                (string.lower(name), string.lower(value)), attrs)
            if ('http-equiv', 'content-type') in lowered:
                for (name, value) in lowered:
                    if name == 'content':
                        match = g_reCharset.search(value)
                        if match:
                            self._charset = match.group('charset')

        # Add any attributes to the tag
        for (name, value) in attrs:
            element.setAttributeNS(EMPTY_NAMESPACE,
                                   unicode(name, self._charset),
                                   unicode(value, self._charset))

        # Look for its parent
        for i in range(1, len(self._stack)):
            parent = self._stack[-i]
            if lowerTagName in HTML_DTD[string.lower(parent.tagName)]:
                parent.appendChild(element)
                if i > 1:
                    self._stack = self._stack[:-i + 1]
                if HTML_DTD[lowerTagName]:
                    self._stack.append(element)
                return

        # no parent found
        if not self._hasHtml and lowerTagName == 'html':
            self._stack[0].appendChild(element)
            self._stack.append(element)
            self._hasHtml = 1
        return
Beispiel #6
0
    def finish_starttag(self, tagname, attrs):
        unicodeTagName = unicode(tagname, self._charset)
        lowerTagName = string.lower(unicodeTagName)
        if not HTML_DTD.has_key(lowerTagName):
            # Skip any tags not defined in HTML 4.01
            return

        element = self._ownerDoc.createElementNS(EMPTY_NAMESPACE, unicodeTagName)

        # Allows for multiple META tags in a document
        if lowerTagName == 'meta':
            lowered = map(lambda (name, value):
                          (string.lower(name), string.lower(value)),
                          attrs)
            if ('http-equiv', 'content-type') in lowered:
                for (name, value) in lowered:
                    if name == 'content':
                        match = g_reCharset.search(value)
                        if match:
                            self._charset = match.group('charset')

        # Add any attributes to the tag
        for (name, value) in attrs:
            element.setAttributeNS(EMPTY_NAMESPACE, unicode(name, self._charset),
                                   unicode(value, self._charset))

        # Look for its parent
        for i in range(1, len(self._stack)):
            parent = self._stack[-i]
            if lowerTagName in HTML_DTD[string.lower(parent.tagName)]:
                parent.appendChild(element)
                if i > 1:
                    self._stack = self._stack[:-i+1]
                if HTML_DTD[lowerTagName]:
                    self._stack.append(element)
                return

        # no parent found
        if not self._hasHtml and lowerTagName == 'html':
            self._stack[0].appendChild(element)
            self._stack.append(element)
            self._hasHtml = 1
        return
Beispiel #7
0
    def _4dom_createHTMLElement(self, tagName):
        lowered = string.lower(tagName)
        if not HTML_DTD.has_key(lowered):
            raise TypeError('Unknown HTML Element: %s' % tagName)

        if lowered in NoClassTags:
            from HTMLElement import HTMLElement
            return HTMLElement(self, tagName)

        #FIXME: capitalize() broken with unicode in Python 2.0
        #normTagName = string.capitalize(tagName)
        capitalized = string.upper(tagName[0]) + lowered[1:]
        element = HTMLTagMap.get(capitalized, capitalized)
        module = 'HTML%sElement' % element
        if not self._html.has_key(module):
            #Try to import it (should never fail)
            __import__('xml.dom.html.%s' % module)
        # Class and module have the same name
        klass = getattr(self._html[module], module)
        return klass(self, tagName)
Beispiel #8
0
    def _4dom_createHTMLElement(self, tagName):
        lowered = string.lower(tagName)
        if not HTML_DTD.has_key(lowered):
            raise TypeError('Unknown HTML Element: %s' % tagName)

        if lowered in NoClassTags:
            from HTMLElement import HTMLElement
            return HTMLElement(self, tagName)

        #FIXME: capitalize() broken with unicode in Python 2.0
        #normTagName = string.capitalize(tagName)
        capitalized = string.upper(tagName[0]) + lowered[1:]
        element = HTMLTagMap.get(capitalized, capitalized)
        module = 'HTML%sElement' % element
        if not self._html.has_key(module):
            #Try to import it (should never fail)
            __import__('xml.dom.html.%s' % module)
        # Class and module have the same name
        klass = getattr(self._html[module], module)
        return klass(self, tagName)