#!/usr/bin/env python import libxml2dom, xml.dom.minidom print print "This is libxml2dom's behaviour for default namespaces:" print document = libxml2dom.createDocument(None, "doc", None) top = document.xpath("*")[0] elem1 = document.createElementNS("DAV:", "href") print "Namespace is", repr(elem1.namespaceURI) document.replaceChild(elem1, top) elem2 = document.createElementNS(None, "no_ns") print "Namespace is", repr(elem2.namespaceURI) document.xpath("*")[0].appendChild(elem2) print "Find href", len(document.xpath("href")) != 0 print "Find x:href", len(document.xpath("x:href", namespaces={"x": "DAV:"})) != 0 print "Find //no_ns", len(document.xpath("//no_ns")) != 0 print "Find x:href/no_ns", len(document.xpath("x:href/no_ns", namespaces={"x": "DAV:"})) != 0 print document.toString() document.toFile(open("test_ns.xml", "wb")) document = libxml2dom.parse("test_ns.xml") print "Namespace is", repr(document.xpath("*")[0].namespaceURI) print "Namespace is", repr(document.xpath("*/*")[0].namespaceURI) print "Find href", len(document.xpath("href")) != 0 print "Find x:href", len(document.xpath("x:href", namespaces={"x": "DAV:"})) != 0 print "Find //no_ns", len(document.xpath("//no_ns")) != 0 print "Find x:href/no_ns", len(document.xpath("x:href/no_ns", namespaces={"x": "DAV:"})) != 0 print document.toString() print "--------"
def storeContent(params, filename=None): global docTypes # this gets blanked sometimes binid = params['bin_id'] title = params['bin_title'] elementType = params['elementType'] if filename is None: filename = "%s.xml" % title filename = urllib.quote(filename, pathChars) if os.path.exists(filename): try: matched = binid == getbinid(filename) if not matched: count = 1 (base, ext) = os.path.splitext(filename) filename = "%s.%s%s" % (base, count, ext) while not matched and os.path.exists(filename): matched = binid == getbinid(filename) if not matched: count += 1 filename = "%s.%s%s" % (base, count, ext) if matched and not replaceFiles: flush_print("Skipping: Extant: %s" % filename) return except LSException: flush_print("Error: Couldn't parse: %s" % filename) docType = "unknown" if docTypes.has_key(elementType): docType = docTypes[elementType] else: flush_print("Error: Unknown document type: %s: Using unknown" % elementType) docType = "unknown" contentPage = getPage(params['contenturi']) params = url_utils.getParams(contentPage, "docForm", params) if params is None and contentPage.find( "<script language=\"JavaScript\">alert('Permission Denied.')</script>" ) > 0: flush_print("Error: Permission Denied: %s" % title) docType = "error" params = {"error": "Permission Denied", "title": title} elif params is None: flush_print("Could Not Load: %s - %s" % (elementType, title)) return for key in params.keys(): if params[key] is not None: try: params[key] = unicode(params[key]) except UnicodeError: flush_print("Error: Converting Key to Entities: %s" % key) params[key] = encode_unicode(params[key]) flush_print("Created Output Document: (%s): %s" % (docType, filename)) outputDoc = libxml2dom.createDocument( "http://mpp.org/migration/%s" % docType, docType, None) outputRoot = outputDoc.documentElement if params.has_key("url"): outputRoot.setAttribute("url", params['url']) outputRoot.setAttribute("type", elementType) outputRoot.setAttribute("binid", binid) if params.has_key("content_year"): date = "%s/%s/%s" % (params['content_month'], params['content_day'], params['content_year']) if date != "//": params['content_date'] = date paramNames = { "error": "error", "bin_title": "title", "bin_subtitle": "subtitle", "bin_comment": "comment", "bin_description": "description", "bin_source_id": "sourceid", "bin_link": "href", "event_id": "eventid", "searchlocation": "locationid", "searchsegmentation": "searchsegmentation", "spreadword_subject": "subject", "spreadword_appendmsg": "editmsg", "spreadword_body": "body", "content_date_created": "datecreated", "content_date_modified": "datemodified", "author": "content_author", "copyright": "content_copyright", "content_date": "contentdate" } for param in paramNames.keys(): if params.has_key(param) and params[param] != "": outputRoot.appendChild(outputDoc.createElement(paramNames[param])) outputRoot.lastChild.appendChild( outputDoc.createTextNode(params[param])) htmlParamNames = {"content_summary": "summary", "content_body": "body"} for param in htmlParamNames.keys(): if params.has_key(param) and params[param] != "": params[param] = replaceEntities(params[param]) params[param] = encode_unicode(params[param]) outputRoot.appendChild( outputDoc.createElement(htmlParamNames[param])) appendHTML(outputRoot.lastChild, params[param]) if not dryRun: outFile = open(filename, "w") outFile.write(outputDoc.toString(encoding="iso-8859-1", prettyprint=1)) outFile.close()
#!/usr/bin/env python import libxml2dom import sys d = libxml2dom.parse(sys.argv[1]) root = d.xpath("*[1]")[0] d2 = libxml2dom.createDocument(None, "new", None) root2 = d2.xpath("*[1]")[0] for i in range(0, 10): imported = d2.importNode(root, 1) root2.appendChild(imported) libxml2dom.toStream(d2, sys.stdout) #del root2 _d2 = d2.as_native_node() #del d2 _d2.freeDoc() #del root _d = d.as_native_node() #del d _d.freeDoc() # vim: tabstop=4 expandtab shiftwidth=4
#!/usr/bin/env python # -*- coding: iso-8859-15 -*- import libxml2dom d = libxml2dom.parseString( """<?xml version='1.0' encoding="iso-8859-15"?> <doc> <![CDATA[I am the character data champion! זרוֶ״ֵ]]> </doc> """ ) d2 = libxml2dom.createDocument( "http://www.w3.org/1999/xhtml", "html", libxml2dom.createDocumentType("html", "-//W3C//DTD XHTML 1.1//EN", "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"), ) doc = d.xpath("doc")[0] doc2 = d2.importNode(doc, 1) html = d2.xpath("*")[0] html.appendChild(doc2) print d2.toString("iso-8859-15") # vim: tabstop=4 expandtab shiftwidth=4