def custom_serialize_xml(write, elem, encoding, qnames, namespaces, indentation='\n'): """ Custom function to serialize XML. Basically is a copy of ElementTree._serialize_xml that adds indentantion and newlines to each of the attributes in a tag. This results in each attribute and value in a different line that helps with svn synchronization to avoid file conflicts. This function should only be used with python 2.7 """ tag = elem.tag text = elem.text next_indentation = elem.tail if tag is ET.Comment: write("<!--%s-->" % pyET._encode(text, encoding)) elif tag is ET.ProcessingInstruction: write("<?%s?>" % pyET._encode(text, encoding)) else: tag = qnames[tag] if tag is None: if text: write(pyET._escape_cdata(text, encoding)) for e in elem: custom_serialize_xml(write, e, encoding, qnames, None, next_indentation) else: write("<" + tag) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): if k: k = ":" + k write("%s\t\txmlns%s=\"%s\"" % ( indentation, k.encode(encoding), pyET._escape_attrib(v, encoding) ) ) for k, v in sorted(items): if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = pyET._escape_attrib(v, encoding) write("%s\t\t%s=\"%s\"" % (indentation, qnames[k], v)) if text or len(elem): write(">") if text: write(pyET._escape_cdata(text, encoding)) for e in elem: custom_serialize_xml(write, e, encoding, qnames, None, next_indentation) write("</" + tag + ">") else: write(" />") if elem.tail: write(pyET._escape_cdata(elem.tail, encoding))
def _write2(self, file, node, encoding, namespaces): # write XML to file tag = node.tag if tag is ElementTree.Comment: text = node.text.encode(encoding) file.write("<!--%s-->" % text) elif tag is ElementTree.ProcessingInstruction: text = node.text.encode(encoding) file.write("<?%s?>" % text) else: items = node.items() xmlns_items = [] # new namespaces in this scope try: if isinstance(tag, ElementTree.QName) or tag[:1] == "{": tag, xmlns = ElementTree.fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: ElementTree._raise_serialization_error(tag) file.write("<" + ElementTree._encode(tag, encoding)) if items or xmlns_items: items.sort() # lexical order for k, v in items: try: if isinstance(k, ElementTree.QName) or k[:1] == "{": k, xmlns = ElementTree.fixtag(k, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: ElementTree._raise_serialization_error(k) try: if isinstance(v, ElementTree.QName): v, xmlns = ElementTree.fixtag(v, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: ElementTree._raise_serialization_error(v) file.write(" %s=\"%s\"" % (ElementTree._encode(k, encoding), ElementTree._escape_attrib(v, encoding))) for k, v in xmlns_items: file.write(" %s=\"%s\"" % (ElementTree._encode(k, encoding), ElementTree._escape_attrib(v, encoding))) if node.text or len(node): file.write(">") if node.text: text = node.text.encode(encoding) file.write(text) for n in node: self._write(file, n, encoding, namespaces) file.write("</" + ElementTree._encode(tag, encoding) + ">") else: file.write(" />") for k, v in xmlns_items: del namespaces[v] if node.tail: tail = node.tail.encode(encoding) file.write(tail)
def _write(self, file, node, encoding, namespaces): # write XML to file tag = node.tag if tag is ET.Comment: file.write("<!-- %s -->" % ET._escape_cdata(node.text, encoding)) elif tag is ET.ProcessingInstruction: file.write("<?%s?>" % ET._escape_cdata(node.text, encoding)) else: items = node.items() xmlns_items = [] # new namespaces in this scope try: if isinstance(tag, ET.QName) or tag[:1] == "{": tag, xmlns = ET.fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: ET._raise_serialization_error(tag) file.write("<" + ET._encode(tag, encoding)) if items or xmlns_items: items.sort() # lexical order for k, v in items: try: if isinstance(k, ET.QName) or k[:1] == "{": k, xmlns = ET.fixtag(k, namespaces) if xmlns: ET.xmlns_items.append(xmlns) except TypeError: ET._raise_serialization_error(k) try: if isinstance(v, ET.QName): v, xmlns = ET.fixtag(v, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: ET._raise_serialization_error(v) file.write( " %s=\'%s\'" % (ET._encode(k, encoding), _escape_attrib(v, encoding))) for k, v in xmlns_items: file.write( " %s=\'%s\'" % (ET._encode(k, encoding), _escape_attrib(v, encoding))) if node.text or len(node): file.write(">") if node.text: file.write(ET._escape_cdata(node.text, encoding)) for n in node: self._write(file, n, encoding, namespaces) file.write("</" + ET._encode(tag, encoding) + ">") else: file.write(" />") for k, v in xmlns_items: del namespaces[v] if node.tail: file.write(ET._escape_cdata(node.tail, encoding))
def _serialize_xml(write, elem, encoding, qnames, namespaces): """This horrible monkeypatching of ElementTree is done to preserve attribute order via use of an ordered dict. It comes from https://stackoverflow.com/a/30902567 """ tag = elem.tag text = elem.text if tag is ET.Comment: write("<!--%s-->" % ET._encode(text, encoding)) elif tag is ET.ProcessingInstruction: write("<?%s?>" % ET._encode(text, encoding)) else: tag = qnames[tag] if tag is None: if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) else: write("<" + tag) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % ( k.encode(encoding), ET._escape_attrib(v, encoding) )) #for k, v in sorted(items): # lexical order for k, v in items: # Monkey patch if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v, encoding) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) write("</" + tag + ">") else: write(" />") if elem.tail: write(ET._escape_cdata(elem.tail, encoding))
def _serialize_xml(write, elem, encoding, qnames, namespaces): """This horrible monkeypatching of ElementTree is done to preserve attribute order via use of an ordered dict. It comes from https://stackoverflow.com/a/30902567 """ tag = elem.tag text = elem.text if tag is ET.Comment: write("<!--%s-->" % ET._encode(text, encoding)) elif tag is ET.ProcessingInstruction: write("<?%s?>" % ET._encode(text, encoding)) else: tag = qnames[tag] if tag is None: if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) else: write("<" + tag) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % (k.encode(encoding), ET._escape_attrib(v, encoding))) #for k, v in sorted(items): # lexical order for k, v in items: # Monkey patch if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v, encoding) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) write("</" + tag + ">") else: write(" />") if elem.tail: write(ET._escape_cdata(elem.tail, encoding))
def _serialize_xml(write, elem, encoding, qnames, namespaces): tag = elem.tag text = elem.text if tag is ET.Comment: write("<!--%s-->" % ET._encode(text, encoding)) elif tag is ET.ProcessingInstruction: write("<?%s?>" % ET._encode(text, encoding)) else: tag = qnames[tag] if tag is None: if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) else: write("<" + tag) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % ( k.encode(encoding), ET._escape_attrib(v, encoding) )) #for k, v in sorted(items): # lexical order for k, v in items: # Monkey patch if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v, encoding) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) write("</" + tag + ">") else: write(" />") if elem.tail: write(ET._escape_cdata(elem.tail, encoding))
def _serialize_xml(write, elem, encoding, qnames, namespaces): tag = elem.tag text = elem.text if tag is ET.Comment: write("<!--%s-->" % ET._encode(text, encoding)) elif tag is ET.ProcessingInstruction: write("<?%s?>" % ET._encode(text, encoding)) else: tag = qnames[tag] if tag is None: if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) else: write("<" + tag) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % (k.encode(encoding), ET._escape_attrib(v, encoding))) #for k, v in sorted(items): # lexical order for k, v in items: # Monkey patch if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v, encoding) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) write("</" + tag + ">") else: write(" />") if elem.tail: write(ET._escape_cdata(elem.tail, encoding))
def _escape_attrib(text, encoding=None, replace=etree.string.replace): # escape attribute value try: if encoding: try: text = etree._encode(text, encoding) except UnicodeError: return etree._encode_entity(text) text = replace(text, "&", "&") text = replace(text, "\"", """) text = replace(text, "<", "<") text = replace(text, ">", ">") return text except (TypeError, AttributeError): etree._raise_serialization_error(text)
def cdata_serialize_xml(write, elem, encoding, qnames, namespaces): """This is a terrible hack to add CDATA serialization to ElementTree. It allows serialization of CDATA sections at the expense of comments. Any comments will be rendered as CDATA sections due to this This was necessary because ElementTree makes explicit exceptions at several points for handling comments, which makes it very difficult to add a CDATA handler. Hopefully ElementTree will natively support CDATA in the future, at which point I can retire this patch. """ tag = elem.tag text = elem.text if tag is etree.Comment: write("<![CDATA[%s]]>" % etree._encode(text, encoding)) return else: old_serialize_xml(write, elem, encoding, qnames, namespaces)
def _escape_attrib(text, encoding=None, replace=string.replace): # escape attribute value try: if encoding: try: text = ET._encode(text, encoding) except UnicodeError: return ET._encode_entity(text) text = replace(text, "&", "&") text = replace(text, "'", "'") #text = replace(text, "\"", """) text = replace(text, "<", "<") text = replace(text, ">", ">") return text except (TypeError, AttributeError): ET._raise_serialization_error(text)
def _serialize_xml(write, elem, encoding, qnames, namespaces, level=0): def _indent_gen(i): return ' ' * (2 * i) tag = elem.tag text = elem.text if tag == '![CDATA[': # CDATA. Do NOT escape special characters except ]]> u = str_to_unicode(escape_cdata_text(text)) write("<%s%s\n]]>\n" % (tag, u.encode(encoding))) elif tag is ET.Comment: write("%s<!--%s-->\n" % (_indent_gen(level), ET._encode(text, encoding))) elif tag is ET.ProcessingInstruction: write("%s<?%s?>\n" % (_indent_gen(level), ET._encode(text, encoding))) else: tag = qnames[tag] if tag is None: if text: string = ET._escape_cdata(text, encoding) if len(elem) > 0: string = "%s%s\n" % (_indent_gen(level+1), string) write(string) for e in elem: _serialize_xml(write, e, encoding, qnames, None, level+1) else: write("%s<%s" % (_indent_gen(level), tag)) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % ( k.encode(encoding), ET._escape_attrib(v, encoding) )) for k, v in sorted(items): # lexical order if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v, encoding) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if len(elem) > 0: write("\n") if text: string = ET._escape_cdata(text, encoding) if len(elem) > 0: string = "%s%s\n" % (_indent_gen(level+1), string) write(string) for e in elem: _serialize_xml(write, e, encoding, qnames, None, level+1) string = "</%s>\n" % tag if len(elem) > 0: string = "%s%s" % (_indent_gen(level), string) write(string) else: write(" />\n") if elem.tail: write("%s%s\n" % (_indent_gen(level), ET._escape_cdata(elem.tail, encoding)))
def custom_xml_write(self, file, node, encoding, namespaces, indentation='\n'): """ Custom write function based on ElementTree.ElementTree._write only for python 2.6 Basically it does the same but writes each attribute in a different line The same was done with custom_serialize_xml for python 2.7 """ tag = node.tag next_indentation = node.tail if tag is pyET.Comment: file.write("<!-- %s -->" % pyET._escape_cdata(node.text, encoding)) elif tag is pyET.ProcessingInstruction: file.write("<?%s?>" % pyET._escape_cdata(node.text, encoding)) else: items = node.items() xmlns_items = [] try: if isinstance(tag, pyET.QName) or tag[:1] == "{": tag, xmlns = pyET.fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: pyET._raise_serialization_error(tag) file.write("<" + pyET._encode(tag, encoding)) if items or xmlns_items: items.sort() for k, v in items: try: if isinstance(k, pyET.QName) or k[:1] == "{": k, xmlns = pyET.fixtag(k, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: pyET._raise_serialization_error(k) try: if isinstance(v, pyET.QName): v, xmlns = pyET.fixtag(v, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: pyET._raise_serialization_error(v) file.write("%s\t\t%s=\"%s\"" % (indentation, pyET._encode(k, encoding), pyET._escape_attrib(v, encoding))) for k, v in xmlns_items: file.write("%s\t\t%s=\"%s\"" % (indentation, pyET._encode(k, encoding), pyET._escape_attrib(v, encoding))) if node.text or len(node): file.write(">") if node.text: file.write(pyET._escape_cdata(node.text, encoding)) for n in node: self._write(file, n, encoding, namespaces, next_indentation) file.write("</" + pyET._encode(tag, encoding) + ">") else: file.write(" />") for k, v in xmlns_items: del namespaces[v] if node.tail: file.write(pyET._escape_cdata(node.tail, encoding))