def custom_serialize_xml(write, elem, encoding, qnames, namespaces, indentation='\n'): """ Custom function to serialize XML. Basically is a copy of ElementTree._serialize_xml that adds indentantion and newlines to each of the attributes in a tag. This results in each attribute and value in a different line that helps with svn synchronization to avoid file conflicts. This function should only be used with python 2.7 """ tag = elem.tag text = elem.text next_indentation = elem.tail if tag is ET.Comment: write("<!--%s-->" % pyET._encode(text, encoding)) elif tag is ET.ProcessingInstruction: write("<?%s?>" % pyET._encode(text, encoding)) else: tag = qnames[tag] if tag is None: if text: write(pyET._escape_cdata(text, encoding)) for e in elem: custom_serialize_xml(write, e, encoding, qnames, None, next_indentation) else: write("<" + tag) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): if k: k = ":" + k write("%s\t\txmlns%s=\"%s\"" % ( indentation, k.encode(encoding), pyET._escape_attrib(v, encoding) ) ) for k, v in sorted(items): if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = pyET._escape_attrib(v, encoding) write("%s\t\t%s=\"%s\"" % (indentation, qnames[k], v)) if text or len(elem): write(">") if text: write(pyET._escape_cdata(text, encoding)) for e in elem: custom_serialize_xml(write, e, encoding, qnames, None, next_indentation) write("</" + tag + ">") else: write(" />") if elem.tail: write(pyET._escape_cdata(elem.tail, encoding))
def _write(self, file, node, encoding, namespaces): # write XML to file tag = node.tag if tag is ET.Comment: file.write("<!-- %s -->" % ET._escape_cdata(node.text, encoding)) elif tag is ET.ProcessingInstruction: file.write("<?%s?>" % ET._escape_cdata(node.text, encoding)) else: items = node.items() xmlns_items = [] # new namespaces in this scope try: if isinstance(tag, ET.QName) or tag[:1] == "{": tag, xmlns = ET.fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: ET._raise_serialization_error(tag) file.write("<" + ET._encode(tag, encoding)) if items or xmlns_items: items.sort() # lexical order for k, v in items: try: if isinstance(k, ET.QName) or k[:1] == "{": k, xmlns = ET.fixtag(k, namespaces) if xmlns: ET.xmlns_items.append(xmlns) except TypeError: ET._raise_serialization_error(k) try: if isinstance(v, ET.QName): v, xmlns = ET.fixtag(v, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: ET._raise_serialization_error(v) file.write( " %s=\'%s\'" % (ET._encode(k, encoding), _escape_attrib(v, encoding))) for k, v in xmlns_items: file.write( " %s=\'%s\'" % (ET._encode(k, encoding), _escape_attrib(v, encoding))) if node.text or len(node): file.write(">") if node.text: file.write(ET._escape_cdata(node.text, encoding)) for n in node: self._write(file, n, encoding, namespaces) file.write("</" + ET._encode(tag, encoding) + ">") else: file.write(" />") for k, v in xmlns_items: del namespaces[v] if node.tail: file.write(ET._escape_cdata(node.tail, encoding))
def _serialize_xml(write, elem, qnames, namespaces, short_empty_elements, **kwargs): tag = elem.tag text = elem.text if tag is ET.Comment: write("<!--%s-->" % text) elif tag is ET.ProcessingInstruction: write("<?%s?>" % text) else: tag = qnames[tag] if tag is None: if text: write(ET._escape_cdata(text)) for e in elem: _serialize_xml(write, e, qnames, None, short_empty_elements=short_empty_elements) else: write("<" + tag) items = list(elem.items()) if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % (k, ET._escape_attrib(v))) for k, v in items: # avoid lexicographical order for XML attributes if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem) or not short_empty_elements: write(">") if text: write(ET._escape_cdata(text)) for e in elem: _serialize_xml(write, e, qnames, None, short_empty_elements=short_empty_elements) write("</" + tag + ">") else: write(" />") if elem.tail: write(ET._escape_cdata(elem.tail))
def _serialize_xml(write, elem, encoding, qnames, namespaces): """This horrible monkeypatching of ElementTree is done to preserve attribute order via use of an ordered dict. It comes from https://stackoverflow.com/a/30902567 """ tag = elem.tag text = elem.text if tag is ET.Comment: write("<!--%s-->" % ET._encode(text, encoding)) elif tag is ET.ProcessingInstruction: write("<?%s?>" % ET._encode(text, encoding)) else: tag = qnames[tag] if tag is None: if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) else: write("<" + tag) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % ( k.encode(encoding), ET._escape_attrib(v, encoding) )) #for k, v in sorted(items): # lexical order for k, v in items: # Monkey patch if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v, encoding) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) write("</" + tag + ">") else: write(" />") if elem.tail: write(ET._escape_cdata(elem.tail, encoding))
def _write(self, file, node, encoding, namespaces): # write XML to file tag = node.tag if tag is ET.Comment: file.write("<!-- %s -->" % ET._escape_cdata(node.text, encoding)) elif tag is ET.ProcessingInstruction: file.write("<?%s?>" % ET._escape_cdata(node.text, encoding)) else: items = node.items() xmlns_items = [] # new namespaces in this scope try: if isinstance(tag, ET.QName) or tag[:1] == "{": tag, xmlns = ET.fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: ET._raise_serialization_error(tag) file.write("<" + ET._encode(tag, encoding)) if items or xmlns_items: items.sort() # lexical order for k, v in items: try: if isinstance(k, ET.QName) or k[:1] == "{": k, xmlns = ET.fixtag(k, namespaces) if xmlns: ET.xmlns_items.append(xmlns) except TypeError: ET._raise_serialization_error(k) try: if isinstance(v, ET.QName): v, xmlns = ET.fixtag(v, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: ET._raise_serialization_error(v) file.write(" %s=\'%s\'" % (ET._encode(k, encoding), _escape_attrib(v, encoding))) for k, v in xmlns_items: file.write(" %s=\'%s\'" % (ET._encode(k, encoding), _escape_attrib(v, encoding))) if node.text or len(node): file.write(">") if node.text: file.write(ET._escape_cdata(node.text, encoding)) for n in node: self._write(file, n, encoding, namespaces) file.write("</" + ET._encode(tag, encoding) + ">") else: file.write(" />") for k, v in xmlns_items: del namespaces[v] if node.tail: file.write(ET._escape_cdata(node.tail, encoding))
def _serialize_xml(write, elem, encoding, qnames, namespaces): """This horrible monkeypatching of ElementTree is done to preserve attribute order via use of an ordered dict. It comes from https://stackoverflow.com/a/30902567 """ tag = elem.tag text = elem.text if tag is ET.Comment: write("<!--%s-->" % ET._encode(text, encoding)) elif tag is ET.ProcessingInstruction: write("<?%s?>" % ET._encode(text, encoding)) else: tag = qnames[tag] if tag is None: if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) else: write("<" + tag) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % (k.encode(encoding), ET._escape_attrib(v, encoding))) #for k, v in sorted(items): # lexical order for k, v in items: # Monkey patch if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v, encoding) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) write("</" + tag + ">") else: write(" />") if elem.tail: write(ET._escape_cdata(elem.tail, encoding))
def _serialize_xml(write, elem, encoding, qnames, namespaces): tag = elem.tag text = elem.text if tag is ET.Comment: write("<!--%s-->" % ET._encode(text, encoding)) elif tag is ET.ProcessingInstruction: write("<?%s?>" % ET._encode(text, encoding)) else: tag = qnames[tag] if tag is None: if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) else: write("<" + tag) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % ( k.encode(encoding), ET._escape_attrib(v, encoding) )) #for k, v in sorted(items): # lexical order for k, v in items: # Monkey patch if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v, encoding) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) write("</" + tag + ">") else: write(" />") if elem.tail: write(ET._escape_cdata(elem.tail, encoding))
def _serialize_xml(write, elem, encoding, qnames, namespaces): tag = elem.tag text = elem.text if tag is ET.Comment: write("<!--%s-->" % ET._encode(text, encoding)) elif tag is ET.ProcessingInstruction: write("<?%s?>" % ET._encode(text, encoding)) else: tag = qnames[tag] if tag is None: if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) else: write("<" + tag) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % (k.encode(encoding), ET._escape_attrib(v, encoding))) #for k, v in sorted(items): # lexical order for k, v in items: # Monkey patch if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v, encoding) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if text: write(ET._escape_cdata(text, encoding)) for e in elem: _serialize_xml(write, e, encoding, qnames, None) write("</" + tag + ">") else: write(" />") if elem.tail: write(ET._escape_cdata(elem.tail, encoding))
def _serialize_xml(write, elem, qnames, namespaces): tag = elem.tag text = elem.text if tag is etree.Comment: write("<!--%s-->" % text) elif tag is etree.ProcessingInstruction: write("<?%s?>" % text) else: tag = qnames[tag] if tag is None: if text: write(etree._escape_cdata(text)) for e in elem: etree._serialize_xml(write, e, qnames, None) else: write("<" + tag) items = list(elem.items()) if items or namespaces: if namespaces: for v, k in sorted( namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % (k, etree._escape_attrib(v))) for k, v in list(items): # no !!!! lexical order if isinstance(k, etree.QName): k = k.text if isinstance(v, etree.QName): v = qnames[v.text] else: v = etree._escape_attrib(v) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if text: write(etree._escape_cdata(text)) for e in elem: etree._serialize_xml(write, e, qnames, None) write("</" + tag + ">") else: write(" />") if elem.tail: write(etree._escape_cdata(elem.tail))
def _serialize_xml(write, elem, qnames, namespaces, short_empty_elements, **kwargs): if elem.tag == "![CDATA[": write("\n<{}{}]]>\n".format(elem.tag, elem.text)) if elem.tail: write(ET._escape_cdata(elem.tail)) else: return ET._original_serialize_xml(write, elem, qnames, namespaces, short_empty_elements, **kwargs)
def _serialize_xml(write, elem, qnames, namespaces,short_empty_elements, **kwargs): if elem.tag == '![CDATA[': #write("\n<{}{}]]>\n".format(elem.tag, elem.text)) write("<%s%s]]>" % (elem.tag, elem.text)) if elem.tail: write(ET._escape_cdata(elem.tail)) else: return ET._original_serialize_xml(write, elem, qnames, namespaces,short_empty_elements, **kwargs)
def _serialize_xml(write, elem, qnames, namespaces, short_empty_elements, **kwargs): if elem.tag == '![CDATA[': write("<%s%s]]>" % (elem.tag, elem.text)) if elem.tail: write(et._escape_cdata(elem.tail)) else: return et._original_serialize_xml(write, elem, qnames, namespaces, short_empty_elements, **kwargs)
def _serialize_xml_cdata(write, elem, encoding, qnames, namespaces): tag = elem.tag text = elem.text if tag == CDATA_KEY: write('<![CDATA[') if text: write(ET._escape_cdata(text, encoding)) for e in elem: # we use the standart handler, because is not legal to have nested CDATA _serialize_xml(write, e, encoding, qnames, None) write(']]>') else: _serialize_xml(write, elem, encoding, qnames, namespaces)
def cdata(self, s: str): """Write cdata. """ # assert ']]>' not in s self.write('<![CDATA[%s]]>' % (etree._escape_cdata(s), ))
def _serialize_xml(write, elem, encoding, qnames, namespaces, level=0): def _indent_gen(i): return ' ' * (2 * i) tag = elem.tag text = elem.text if tag == '![CDATA[': # CDATA. Do NOT escape special characters except ]]> u = str_to_unicode(escape_cdata_text(text)) write("<%s%s\n]]>\n" % (tag, u.encode(encoding))) elif tag is ET.Comment: write("%s<!--%s-->\n" % (_indent_gen(level), ET._encode(text, encoding))) elif tag is ET.ProcessingInstruction: write("%s<?%s?>\n" % (_indent_gen(level), ET._encode(text, encoding))) else: tag = qnames[tag] if tag is None: if text: string = ET._escape_cdata(text, encoding) if len(elem) > 0: string = "%s%s\n" % (_indent_gen(level+1), string) write(string) for e in elem: _serialize_xml(write, e, encoding, qnames, None, level+1) else: write("%s<%s" % (_indent_gen(level), tag)) items = elem.items() if items or namespaces: if namespaces: for v, k in sorted(namespaces.items(), key=lambda x: x[1]): # sort on prefix if k: k = ":" + k write(" xmlns%s=\"%s\"" % ( k.encode(encoding), ET._escape_attrib(v, encoding) )) for k, v in sorted(items): # lexical order if isinstance(k, ET.QName): k = k.text if isinstance(v, ET.QName): v = qnames[v.text] else: v = ET._escape_attrib(v, encoding) write(" %s=\"%s\"" % (qnames[k], v)) if text or len(elem): write(">") if len(elem) > 0: write("\n") if text: string = ET._escape_cdata(text, encoding) if len(elem) > 0: string = "%s%s\n" % (_indent_gen(level+1), string) write(string) for e in elem: _serialize_xml(write, e, encoding, qnames, None, level+1) string = "</%s>\n" % tag if len(elem) > 0: string = "%s%s" % (_indent_gen(level), string) write(string) else: write(" />\n") if elem.tail: write("%s%s\n" % (_indent_gen(level), ET._escape_cdata(elem.tail, encoding)))
def comment(self, s: str): """Write comment. """ # assert '-->' not in s self.write('<!--%s-->' % (etree._escape_cdata(s), ))
def custom_xml_write(self, file, node, encoding, namespaces, indentation='\n'): """ Custom write function based on ElementTree.ElementTree._write only for python 2.6 Basically it does the same but writes each attribute in a different line The same was done with custom_serialize_xml for python 2.7 """ tag = node.tag next_indentation = node.tail if tag is pyET.Comment: file.write("<!-- %s -->" % pyET._escape_cdata(node.text, encoding)) elif tag is pyET.ProcessingInstruction: file.write("<?%s?>" % pyET._escape_cdata(node.text, encoding)) else: items = node.items() xmlns_items = [] try: if isinstance(tag, pyET.QName) or tag[:1] == "{": tag, xmlns = pyET.fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: pyET._raise_serialization_error(tag) file.write("<" + pyET._encode(tag, encoding)) if items or xmlns_items: items.sort() for k, v in items: try: if isinstance(k, pyET.QName) or k[:1] == "{": k, xmlns = pyET.fixtag(k, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: pyET._raise_serialization_error(k) try: if isinstance(v, pyET.QName): v, xmlns = pyET.fixtag(v, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: pyET._raise_serialization_error(v) file.write("%s\t\t%s=\"%s\"" % (indentation, pyET._encode(k, encoding), pyET._escape_attrib(v, encoding))) for k, v in xmlns_items: file.write("%s\t\t%s=\"%s\"" % (indentation, pyET._encode(k, encoding), pyET._escape_attrib(v, encoding))) if node.text or len(node): file.write(">") if node.text: file.write(pyET._escape_cdata(node.text, encoding)) for n in node: self._write(file, n, encoding, namespaces, next_indentation) file.write("</" + pyET._encode(tag, encoding) + ">") else: file.write(" />") for k, v in xmlns_items: del namespaces[v] if node.tail: file.write(pyET._escape_cdata(node.tail, encoding))