def _write_html(write, node, encoding, namespaces, depth=-1, maxdepth=None): " Write HTML to file " "" if encoding is None: encoding = "utf-8" tag = node.tag tail = node.tail text = node.text tail = node.tail to_write = "" if tag is Replace: if not node.structure: if cdata_needs_escaping(text): text = _escape_cdata(text) write(text.encode(encoding)) elif tag is Comment: if cdata_needs_escaping(text): text = _escape_cdata(text) write("<!-- " + text + " -->".encode(encoding)) elif tag is ProcessingInstruction: if cdata_needs_escaping(text): text = _escape_cdata(text) write("<!-- " + text + " -->".encode(encoding)) else: xmlns_items = [] # new namespaces in this scope try: if tag[:1] == "{": if tag[:_XHTML_PREFIX_LEN] == _XHTML_PREFIX: tag = tag[_XHTML_PREFIX_LEN:] else: tag, xmlns = fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: _raise_serialization_error(tag) to_write += "<%s" % tag.encode(encoding) attrib = node.attrib if attrib is not None: if len(attrib) > 1: attrib_keys = attrib.keys() attrib_keys.sort() else: attrib_keys = attrib for k in attrib_keys: try: if k[:1] == "{": continue except TypeError: _raise_serialization_error(k) if k in _HTMLATTRS_BOOLEAN: to_write += " " + k.encode(encoding) else: v = attrib[k] to_write += ' %s="%s"' % (k, v) for k, v in xmlns_items: to_write += ' %s="%s"' % (k, v) to_write += ">" if text is not None and text: if tag in _HTMLTAGS_NOESCAPE: to_write += text.encode(encoding) elif cdata_needs_escaping(text): to_write += _escape_cdata(text) else: to_write += text.encode(encoding) write(to_write) for child in node._children: if maxdepth is not None: depth = depth + 1 if depth < maxdepth: _write_html(write, child, encoding, namespaces, depth, maxdepth) elif depth == maxdepth and text: write(" [...]\n") else: _write_html(write, child, encoding, namespaces, depth, maxdepth) if text or node._children or tag not in _HTMLTAGS_UNBALANCED: write("</" + tag.encode(encoding) + ">") if tail: if cdata_needs_escaping(tail): write(_escape_cdata(tail)) else: write(tail.encode(encoding))
def _write_xml(write, node, encoding, namespaces, pipeline, xhtml=False): """ Write XML to a file """ if encoding is None: encoding = "utf-8" tag = node.tag if tag is Comment: write("<!-- %s -->" % _escape_cdata(node.text, encoding)) elif tag is ProcessingInstruction: write("<?%s?>" % _escape_cdata(node.text, encoding)) elif tag is Replace: if node.structure: # this may produce invalid xml write(node.text.encode(encoding)) else: write(_escape_cdata(node.text, encoding)) else: if xhtml: if tag[:_XHTML_PREFIX_LEN] == _XHTML_PREFIX: tag = tag[_XHTML_PREFIX_LEN:] if node.attrib: items = node.attrib.items() else: items = [] # must always be sortable. xmlns_items = [] # new namespaces in this scope try: if tag[:1] == "{": tag, xmlns = fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: _raise_serialization_error(tag) write("<" + tag.encode(encoding)) if items or xmlns_items: items.sort() # lexical order for k, v in items: try: if k[:1] == "{": if not pipeline: if k == _MELD_ID: continue k, xmlns = fixtag(k, namespaces) if xmlns: xmlns_items.append(xmlns) if not pipeline: # special-case for HTML input if k == "xmlns:meld": continue except TypeError: _raise_serialization_error(k) write(' %s="%s"' % (k.encode(encoding), _escape_attrib(v, encoding))) for k, v in xmlns_items: write(' %s="%s"' % (k.encode(encoding), _escape_attrib(v, encoding))) if node.text or node._children: write(">") if node.text: write(_escape_cdata(node.text, encoding)) for n in node._children: _write_xml(write, n, encoding, namespaces, pipeline, xhtml) write("</" + tag.encode(encoding) + ">") else: write(" />") for k, v in xmlns_items: del namespaces[v] if node.tail: write(_escape_cdata(node.tail, encoding))
def _write_html_no_encoding(write, node, namespaces): """ Append HTML to string without any particular unicode encoding. We have a separate function for this due to the fact that encoding while recursing is very expensive if this will get serialized out to utf8 anyway (the encoding can happen afterwards). We append to a string because it's faster than calling any 'write' or 'append' function.""" tag = node.tag tail = node.tail text = node.text tail = node.tail to_write = "" if tag is Replace: if not node.structure: if cdata_needs_escaping(text): text = _escape_cdata_noencoding(text) write(text) elif tag is Comment: if cdata_needs_escaping(text): text = _escape_cdata_noencoding(text) write("<!-- " + text + " -->") elif tag is ProcessingInstruction: if cdata_needs_escaping(text): text = _escape_cdata_noencoding(text) write("<!-- " + text + " -->") else: xmlns_items = [] # new namespaces in this scope try: if tag[:1] == "{": if tag[:_XHTML_PREFIX_LEN] == _XHTML_PREFIX: tag = tag[_XHTML_PREFIX_LEN:] else: tag, xmlns = fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: _raise_serialization_error(tag) to_write += "<" + tag attrib = node.attrib if attrib is not None: if len(attrib) > 1: attrib_keys = attrib.keys() attrib_keys.sort() else: attrib_keys = attrib for k in attrib_keys: try: if k[:1] == "{": continue except TypeError: _raise_serialization_error(k) if k in _HTMLATTRS_BOOLEAN: to_write += " " + k else: v = attrib[k] to_write += ' %s="%s"' % (k, v) for k, v in xmlns_items: to_write += ' %s="%s"' % (k, v) to_write += ">" if text is not None and text: if tag in _HTMLTAGS_NOESCAPE: to_write += text elif cdata_needs_escaping(text): to_write += _escape_cdata_noencoding(text) else: to_write += text write(to_write) for child in node._children: _write_html_no_encoding(write, child, namespaces) if text or node._children or tag not in _HTMLTAGS_UNBALANCED: write("</" + tag + ">") if tail: if cdata_needs_escaping(tail): write(_escape_cdata_noencoding(tail)) else: write(tail)
def _write_xml(write, node, encoding, namespaces, pipeline, xhtml=False): """ Write XML to a file """ if encoding is None: encoding = 'utf-8' tag = node.tag if tag is Comment: write("<!-- %s -->" % _escape_cdata(node.text, encoding)) elif tag is ProcessingInstruction: write("<?%s?>" % _escape_cdata(node.text, encoding)) elif tag is Replace: if node.structure: # this may produce invalid xml write(node.text.encode(encoding)) else: write(_escape_cdata(node.text, encoding)) else: if xhtml: if tag[:_XHTML_PREFIX_LEN] == _XHTML_PREFIX: tag = tag[_XHTML_PREFIX_LEN:] if node.attrib: items = node.attrib.items() else: items = [] # must always be sortable. xmlns_items = [] # new namespaces in this scope try: if tag[:1] == "{": tag, xmlns = fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: _raise_serialization_error(tag) write("<" + tag.encode(encoding)) if items or xmlns_items: items.sort() # lexical order for k, v in items: try: if k[:1] == "{": if not pipeline: if k == _MELD_ID: continue k, xmlns = fixtag(k, namespaces) if xmlns: xmlns_items.append(xmlns) if not pipeline: # special-case for HTML input if k == 'xmlns:meld': continue except TypeError: _raise_serialization_error(k) write(" %s=\"%s\"" % (k.encode(encoding), _escape_attrib(v, encoding))) for k, v in xmlns_items: write(" %s=\"%s\"" % (k.encode(encoding), _escape_attrib(v, encoding))) if node.text or node._children: write(">") if node.text: write(_escape_cdata(node.text, encoding)) for n in node._children: _write_xml(write, n, encoding, namespaces, pipeline, xhtml) write("</" + tag.encode(encoding) + ">") else: write(" />") for k, v in xmlns_items: del namespaces[v] if node.tail: write(_escape_cdata(node.tail, encoding))
def _write_html_no_encoding(write, node, namespaces): """ Append HTML to string without any particular unicode encoding. We have a separate function for this due to the fact that encoding while recursing is very expensive if this will get serialized out to utf8 anyway (the encoding can happen afterwards). We append to a string because it's faster than calling any 'write' or 'append' function.""" tag = node.tag tail = node.tail text = node.text tail = node.tail to_write = "" if tag is Replace: if not node.structure: if cdata_needs_escaping(text): text = _escape_cdata_noencoding(text) write(text) elif tag is Comment: if cdata_needs_escaping(text): text = _escape_cdata_noencoding(text) write('<!-- ' + text + ' -->') elif tag is ProcessingInstruction: if cdata_needs_escaping(text): text = _escape_cdata_noencoding(text) write('<!-- ' + text + ' -->') else: xmlns_items = [] # new namespaces in this scope try: if tag[:1] == "{": if tag[:_XHTML_PREFIX_LEN] == _XHTML_PREFIX: tag = tag[_XHTML_PREFIX_LEN:] else: tag, xmlns = fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: _raise_serialization_error(tag) to_write += "<" + tag attrib = node.attrib if attrib is not None: if len(attrib) > 1: attrib_keys = attrib.keys() attrib_keys.sort() else: attrib_keys = attrib for k in attrib_keys: try: if k[:1] == "{": continue except TypeError: _raise_serialization_error(k) if k in _HTMLATTRS_BOOLEAN: to_write += ' ' + k else: v = attrib[k] to_write += " %s=\"%s\"" % (k, v) for k, v in xmlns_items: to_write += " %s=\"%s\"" % (k, v) to_write += ">" if text is not None and text: if tag in _HTMLTAGS_NOESCAPE: to_write += text elif cdata_needs_escaping(text): to_write += _escape_cdata_noencoding(text) else: to_write += text write(to_write) for child in node._children: _write_html_no_encoding(write, child, namespaces) if text or node._children or tag not in _HTMLTAGS_UNBALANCED: write("</" + tag + ">") if tail: if cdata_needs_escaping(tail): write(_escape_cdata_noencoding(tail)) else: write(tail)
def _write_html(write, node, encoding, namespaces, depth=-1, maxdepth=None): " Write HTML to file " "" if encoding is None: encoding = 'utf-8' tag = node.tag tail = node.tail text = node.text tail = node.tail to_write = "" if tag is Replace: if not node.structure: if cdata_needs_escaping(text): text = _escape_cdata(text) write(text.encode(encoding)) elif tag is Comment: if cdata_needs_escaping(text): text = _escape_cdata(text) write('<!-- ' + text + ' -->'.encode(encoding)) elif tag is ProcessingInstruction: if cdata_needs_escaping(text): text = _escape_cdata(text) write('<!-- ' + text + ' -->'.encode(encoding)) else: xmlns_items = [] # new namespaces in this scope try: if tag[:1] == "{": if tag[:_XHTML_PREFIX_LEN] == _XHTML_PREFIX: tag = tag[_XHTML_PREFIX_LEN:] else: tag, xmlns = fixtag(tag, namespaces) if xmlns: xmlns_items.append(xmlns) except TypeError: _raise_serialization_error(tag) to_write += "<%s" % tag.encode(encoding) attrib = node.attrib if attrib is not None: if len(attrib) > 1: attrib_keys = attrib.keys() attrib_keys.sort() else: attrib_keys = attrib for k in attrib_keys: try: if k[:1] == "{": continue except TypeError: _raise_serialization_error(k) if k in _HTMLATTRS_BOOLEAN: to_write += ' ' + k.encode(encoding) else: v = attrib[k] to_write += " %s=\"%s\"" % (k, v) for k, v in xmlns_items: to_write += " %s=\"%s\"" % (k, v) to_write += ">" if text is not None and text: if tag in _HTMLTAGS_NOESCAPE: to_write += text.encode(encoding) elif cdata_needs_escaping(text): to_write += _escape_cdata(text) else: to_write += text.encode(encoding) write(to_write) for child in node._children: if maxdepth is not None: depth = depth + 1 if depth < maxdepth: _write_html(write, child, encoding, namespaces, depth, maxdepth) elif depth == maxdepth and text: write(' [...]\n') else: _write_html(write, child, encoding, namespaces, depth, maxdepth) if text or node._children or tag not in _HTMLTAGS_UNBALANCED: write("</" + tag.encode(encoding) + ">") if tail: if cdata_needs_escaping(tail): write(_escape_cdata(tail)) else: write(tail.encode(encoding))