def endElement(self, namespaceUri, tagName): self._level -= 1 # Do not break short tag form (<tag/>) if self._canIndent and not self._inElement: self.writeAscii('\n' + (self.indent * self._level)) XmlPrinter.endElement(self, namespaceUri, tagName) # Allow indenting after endtags self._canIndent = True return
def comment(self, data): if self._inElement: self.writeAscii('>') self._inElement = False if self._canIndent: self.writeAscii('\n' + (self.indent * self._level)) XmlPrinter.comment(self, data) # Allow indenting after comments self._canIndent = True return
def processingInstruction(self, target, data): if self._inElement: self.writeAscii('>') self._inElement = False if self._canIndent: self.writeAscii('\n' + (self.indent * self._level)) XmlPrinter.processingInstruction(self, target, data) # Allow indenting after processing instructions self._canIndent = True return
def startElement(self, namespaceUri, tagName, namespaces, attributes): if self._inElement: self.writeAscii('>') self._inElement = False if self._canIndent: self.writeAscii('\n' + (self.indent * self._level)) XmlPrinter.startElement(self, namespaceUri, tagName, namespaces, attributes) self._level += 1 self._canIndent = True return
def __init__(self, stream, encoding): """ Creates an HtmlPrinter instance. stream must be a file-like object open for writing binary data. encoding specifies the encoding which is to be used for writing to the stream. """ XmlPrinter.__init__(self, stream, encoding) self.disableOutputEscaping = 0 return
def text(self, data, disableEscaping=0): """ Handles a text event. Extends the overridden method by disabling output escaping if in the content of certain elements like SCRIPT or STYLE. """ if self._inElement: self.writeAscii('>') self._inElement = False disableEscaping = disableEscaping or self.disableOutputEscaping XmlPrinter.text(self, data, disableEscaping) return
def doctype(self, name, publicId, systemId): """ Handles a doctype event. Extends the overridden method by adding support for the case when there is a publicId and no systemId, which is allowed in HTML but not in XML. """ if publicId and not systemId: self.writeAscii('<!DOCTYPE ') self.writeEncode(name, 'document type name') self.writeAscii(' PUBLIC "') self.writeEncode(publicId, 'document type public-id') self.writeAscii('">\n') else: XmlPrinter.doctype(self, name, publicId, systemId) return
def endElement(self, namespaceUri, tagName): """ Handles an endElement event. Differs from the overridden method in that an end tag is not generated for certain elements. """ if namespaceUri is not EMPTY_NAMESPACE: XmlPrinter.endElement(self, namespaceUri, tagName) return element = tagName.lower() if element not in self.forbiddenEndElements: self.writeAscii('</') self.writeEncode(tagName, 'element name') self.writeAscii('>') # Restore normal escaping if closing a no-escape element. if element in self.noEscapeElements: self.disableOutputEscaping -= 1 return
def startElement(self, namespaceUri, tagName, namespaces, attributes): """ Handles a startElement event. Extends the overridden method by disabling output escaping for the content of certain elements (SCRIPT and STYLE). """ if namespaceUri is not EMPTY_NAMESPACE: XmlPrinter.startElement(self, namespaceUri, tagName, namespaces, attributes) return if tagName.lower() in self.noEscapeElements: self.disableOutputEscaping += 1 XmlPrinter.startElement(self, namespaceUri, tagName, namespaces, attributes) # HTML tags are never in minimized form ('<tag/>') self.writeAscii('>') self._inElement = False return
def attribute(self, elementUri, elementName, name, value): """ Handles an attribute event. Extends the overridden method by writing boolean attributes in minimized form. """ if elementUri is not EMPTY_NAMESPACE: XmlPrinter.attribute(self, elementUri, elementName, name, value) return element = elementName.lower() attribute = name.lower() if element in self.booleanAttributes.get(attribute, []) \ and attribute == value.lower(): # A boolean attribute, just write out the name self.writeAscii(' ') self.writeEncode(name, 'attribute name') elif element in self.uriAttributes.get(attribute, []): # From HTML 4.0 Section B.2.1 # We recommend that user agents adopt the following convention for # handling non-ASCII characters: # 1. Represent each character in UTF-8 (see [RFC2279]) as one or # more bytes. # 2. Escape these bytes with the URI escaping mechanism (i.e., by # converting each byte to %HH, where HH is the hexadecimal # notation of the byte value). # (Although this recommendation is for HTML user agents # that encounter HTML with improperly escaped URI refs, # we implement it in order to comply with XSLT's html # output method, and because there's no compelling reason # not to do it for non-XSLT serializations as well) # # FIXME: # "&" should not be escaped in an attribute value when it # it is followed by "{" (see Section B.7.1 of HTML 4.0). value = unicode(re.sub('[\x80-\xff]', lambda match: '%%%02X' % ord(match.group()), value.encode('UTF-8'))) XmlPrinter.attribute(self, elementUri, elementName, name, value) else: XmlPrinter.attribute(self, elementUri, elementName, name, value) return
def cdataSection(self, data): XmlPrinter.cdataSection(self, data) # Do not allow indenting for elements with mixed content self._canIndent = False return
def text(self, data, disableEscaping=0): XmlPrinter.text(self, data, disableEscaping) # Do not allow indenting for elements with mixed content self._canIndent = False return
def __init__(self, stream, encoding): XmlPrinter.__init__(self, stream, encoding) self._level = 0 self._canIndent = False # don't indent first element return