def xmlDoc_from_xml(response): """Return libxml2 doc for XMLs""" utf8body = body_as_utf8(response) or ' ' try: lxdoc = libxml2.readDoc(utf8body, response.url, 'utf-8', \ xml_parser_options) except TypeError: # libxml2 doesn't parse text with null bytes lxdoc = libxml2.readDoc(utf8body.replace("\x00", ""), response.url, \ 'utf-8', xml_parser_options) return lxdoc
def xmlDoc_from_xml(response): """Return libxml2 doc for XMLs""" utf8body = _body_as_utf8(response) or ' ' try: lxdoc = libxml2.readDoc(utf8body, response.url, 'utf-8', \ xml_parser_options) except TypeError: # libxml2 doesn't parse text with null bytes lxdoc = libxml2.readDoc(utf8body.replace("\x00", ""), response.url, \ 'utf-8', xml_parser_options) return lxdoc
def tearDown(self): self.basedoc = libxml2.readDoc( self.base_str + self.events_str.format(datexml=self.datexml_src), '', None, libxml2.XML_PARSE_NONET) expect = libxml2.readDoc( self.base_str + self.events_str.format(datexml=self.datexml_trg), '', None, libxml2.XML_PARSE_NONET) try: self.do_case(self.basedoc, expect) except: raise finally: expect.freeDoc()
def parse_xml(title, text): encoding = 'gbk' options = libxml2.HTML_PARSE_RECOVER + libxml2.HTML_PARSE_NOWARNING + libxml2.HTML_PARSE_NOERROR doc = libxml2.readDoc(text, None, encoding, options).doc ctxt = doc.xpathNewContext() items = ctxt.xpathEval(u'//downloadList') out = [] ret = {} want = [u'size', u'urls', u'urlsource'] if items: for z in items: for q in want: if q == "urls": durl = (z.xpathEval(q)[0].get_content()).decode('utf-8') if z.xpathEval(q) else u'没有合理的解释' newfilename = (urllib.unquote_plus(title.decode('utf-8').strip()) + "." + (durl.split(".")[-1])) print "Downloading:" + newfilename #newfilename = newfilename.decode('utf-8', 'ignore') #newfilename = newfilename.encode('utf-8', 'replace') cmd = "axel " + " \"" + durl + "\" --output=\"/opt/music/" + newfilename.replace(" ", "") + "\" && mplayer /opt/music/" + newfilename.replace(" ", "") #cmd = "cd /opt/music && curl " + " \"" + durl + "\" -o \"" + newfilename + "\" && cd -" try: print cmd os.system(cmd.encode('utf-8')) return except: print traceback.format_exc()
def save(self, filename=None): tofile = os.path.realpath(filename) if filename else self._filename logger.info("save xml: %s", tofile) savedoc = libxml2.readDoc(self._doc.serialize(), URL=None, encoding=None, options=libxml2.XML_PARSE_NOBLANKS) savedoc.saveFormatFile(tofile, format=1) # libxml2.keepBlanksDefault(False) # self._doc.saveFormatFile(tofile, format=1) logger.log(logging.NOTSET, "save xml string: \n%s", savedoc.serialize(format=1)) self._xpc.xpathFreeContext() self._doc.freeDoc()
def get_domain_device_path_from_xml(vm_xml_doc, device_name): logger.info('***** get_domain_device_path_from_xml is running *****') doc = libxml2.readDoc(vm_xml_doc, None, None, libxml2.XML_PARSE_NOENT) ctxt = doc.xpathNewContext() res = ctxt.xpathEval("/domain/devices/%s/target[@dev]" % device_name) if type(res) != type([]) or len(res) == 0: doc.free() return None disk_paths = [] for i in xrange(len(res)): disk_paths.append(res[i].properties.content) doc.free() return disk_paths
def parse_peer_response_xml(self, string): """Parses an XML peer response. """ snippet_list = snipdata.SnippetList() total_results = None string = re.sub(r"xmlns=(\'|\")[^\'\"]*\1", " ", string) # remove default namespace try: if self.mimetype == 'text/html': xdoc = libxml2.htmlReadDoc(string, '', None, HTML_PARSE_OPTIONS) else: xdoc = libxml2.readDoc(string, '', None, XML_PARSE_OPTIONS) except libxml2.treeError: raise ValueError('Peer output error.') ctxt = xdoc.xpathNewContext() for (name, uri) in re.findall("xmlns:([^\=]*)=[\'\"]([^\'\"]*)", string): ctxt.xpathRegisterNs(name, uri) # register all namespaces if name == 'opensearch': total_results = self.xpath_string_value(ctxt, "//opensearch:totalResults") items = ctxt.xpathEval(self.item_path) #print "ITEMS:", items, self.item_path right_now = snipdata.right_now() for item in items: ctxt.setContextNode(item) title = self.xpath_string_value(ctxt, self.title_path) title = bound_text_no_markup(title, 60) link = self.xpath_link(ctxt, self.link_path) attributes = list() if self.attribute_paths: for key_path in self.attribute_paths.split(','): (key, path) = key_path.split('{', 1) path=path[:-1] # remove trailing '}' value = self.xpath_string_value(ctxt, path) if value: attributes.append((key, value)) if self.thumbnail_path: # xpath_thumbnail changes: ctxt thumbnail = self.xpath_thumbnail(ctxt, self.thumbnail_path) else: thumbnail = None if self.summary_path: summary = self.xpath_string_value(ctxt, self.summary_path) else: for node in ctxt.xpathEval(self.title_path + '|.//script'): # remove title and (possibly uncommented) javascript node.unlinkNode() summary = self.xpath_string_value(ctxt, '.') summary = bound_text_no_markup(summary, 300) snippet = snipdata.Snippet([], link, title, right_now, summary, None, thumbnail, attributes=attributes) snippet_list.append(snippet) ctxt.xpathFreeContext() xdoc.freeDoc() new_query = snipdata.Query() return (new_query, snipdata.PeerList(), snippet_list, total_results)
def save(self, filename=None): tofile = os.path.realpath(filename) if filename else self.__filename logger.info("save xml: %s", tofile) savedoc = libxml2.readDoc(self.__doc.serialize(), URL=None, encoding=None, options=libxml2.XML_PARSE_NOBLANKS) savedoc.saveFormatFile(tofile, format=1) #libxml2.keepBlanksDefault(False) #self._doc.saveFormatFile(tofile, format=1) logger.log(logging.NOTSET, "save xml string: \n%s", savedoc.serialize(format=1)) self.__xpc.xpathFreeContext() self.__doc.freeDoc()
def processUrl(url, depth=0): global processed_urls, network if slash_end.match(url): url = re.sub(r'/$', '', url) if url in processed_urls: return dprint('[%d] Processing %s' % (depth, url)) try: sock = urllib.urlopen(url) except: if depth == 0: sys.exit(2) else: return options = dict(force_output=1, output_xhtml=1, add_xml_decl=1, char_encoding='utf8') page = tidy.parseString(sock.read(), **options) sock.close() # @todo Tidy tidiyng success check if len(str(page)): src = libxml2.readDoc(str(page), url, None, libxml2.XML_PARSE_RECOVER + libxml2.XML_PARSE_NOERROR) else: dprint('\tFailed on %s' % url) return if (depth == (max_depth + 1)): site = mf_extract_btm.applyStylesheet(src, {'site-url' : '"' + url + '"'}) else: site = mf_extract.applyStylesheet(src, {'site-url' : '"' + url + '"'}) network.addChild(site.getRootElement()) processed_urls.append(url) if depth <= max_depth: nexturls_list = get_urls.applyStylesheet(site, None) for next in nexturls_list.get_content().splitlines(): processUrl(next, depth + 1)
def get_ids(query): result = list() req = Request( "http://railnavigator.bahn.de/bin/rnav/query.exe/dn", '<?xml version="1.0" encoding="UTF-8" ?><ReqC ver="1.1" prod="String" lang="DE"><MLcReq><MLc n="%s" t="ST" /></MLcReq></ReqC>' % query, {"User-Agent": "Java/1.6.0_0"}, ) root = readDoc(urlopen(req).read(), None, "iso-8859-1", XML_PARSE_NOERROR | XML_PARSE_RECOVER).children # <MLc t="ST" n="Bahlen Germann "Bahler Buur", Dinklage" i="A=1@O=Bahlen Germann "Bahler Buur", Dinklage@X=81 # grrrrrrrrr - too stupid to escape their xml!!!! for child in root.children.children: if child.properties: for p in child.properties: if p.type == "attribute" and p.name == "i": tokens = p.content.strip("@").split("@") d = dict() for t in tokens: key, value = t.split("=", 1) d[key] = value result.append({"O": d["O"], "X": int(d["X"]), "Y": int(d["Y"]), "L": int(d["L"])}) return result
def oparse_xml(text, key, page): encoding = 'gbk' options = libxml2.HTML_PARSE_RECOVER + libxml2.HTML_PARSE_NOWARNING + libxml2.HTML_PARSE_NOERROR doc = libxml2.readDoc(text, None, encoding, options).doc ctxt = doc.xpathNewContext() items = ctxt.xpathEval(u'//rss/sogouresult/item') out = [] ret = {} want = [u'title', u'album', u'size', u'type', u'artist', u'urls'] if items: print "请选择要下载的项目:" for z in items: o = {} for q in want: # if q == "urls": # url = (z.xpathEval(q)[0].get_content()).decode('utf-8') if z.xpathEval(q) else u'没有合理的解释' # print (z.xpathEval(q)[0].get_content()).decode('utf-8') if z.xpathEval(q) else u'没有合理的解释' # geturl.get_resource(url) o[q] = (z.xpathEval(q)[0].get_content()).decode('utf-8') if z.xpathEval(q) else u'没有合理的解释' out.append(o) index = 0 for z in out: print u"[" + str(index) +u"][" + z["title"]+ u"][" + z["type"]+ u"]["+ z["artist"]+ u"][" + z["album"] + u"]:" + z["size"] index += 1 zz = raw_input() if zz.strip().lower() == "n": # that means next page return get_resource(key, page + 1) if zz.strip().lower() == "p": return get_resource(key, page - 1) z = out[int(zz)] print z geturl.get_resource(z["title"].encode('utf-8'), z["urls"]) ret["status"] = True ret["msg"] = out else: ret["status"] = False ret["msg"] = u'没有搜索到结果'
def canonicalize(self, doctxt): """ Return a canonicalized string representation :param doctxt: the text to bring in canonical form. :type doctxt: either a string or an Xml document. :returns: The text but in canonical form. :rtype: string """ result = '' if isinstance(doctxt, basestring): doc = libxml2.readDoc(doctxt, '', None, libxml2.XML_PARSE_NONET) elif isinstance(doctxt, libxml2.xmlDoc): doc = doctxt else: raise TypeError param = {} canonical_doc = self.style.applyStylesheet(doc, param) result = self.style.saveResultToString(canonical_doc) canonical_doc.freeDoc() if isinstance(doctxt, basestring): doc.freeDoc() return result
def __init__(self, xmldoc): self.xmldoc = xmldoc self.copy = libxml2.readDoc(str(self.xmldoc), '', None, libxml2.XML_PARSE_NONET)
def read_rss_from_url(url): f = urllib.urlopen(url) doc = libxml2.readDoc( f.read(), None, 'UTF-8', libxml2.XML_PARSE_RECOVER | libxml2.XML_PARSE_NOERROR) return doc
def _parse_xml(obj): if obj.isFile(): return libxml2.parseFile(obj.getFile()) else: return libxml2.readDoc(obj.getStream().read(), obj.getUrl(), None, 0)
def test_get_storagepool_xml(self): poolDefs = [ { "def": {"type": "dir", "name": "unitTestDirPool", "path": "/var/temp/images"}, "xml": """ <pool type='dir'> <name>unitTestDirPool</name> <target> <path>/var/temp/images</path> </target> </pool> """, }, { "def": { "type": "netfs", "name": "unitTestNFSPool", "source": {"host": "127.0.0.1", "path": "/var/export"}, }, "xml": """ <pool type='netfs'> <name>unitTestNFSPool</name> <source> <host name='127.0.0.1'/> <dir path='/var/export'/> </source> <target> <path>/var/lib/kimchi/nfs_mount/unitTestNFSPool</path> </target> </pool> """, }, { "def": { "type": "logical", "name": "unitTestLogicalPool", "source": {"devices": ["/dev/hda", "/dev/hdb"]}, }, "xml": """ <pool type='logical'> <name>unitTestLogicalPool</name> <source> <device path="/dev/hda" /> <device path="/dev/hdb" /> </source> <target> <path>/dev/unitTestLogicalPool</path> </target> </pool> """, }, { "def": { "type": "iscsi", "name": "unitTestISCSIPool", "source": {"host": "127.0.0.1", "target": "iqn.2003-01.org.linux-iscsi.localhost"}, }, "xml": """ <pool type='iscsi'> <name>unitTestISCSIPool</name> <source> <host name='127.0.0.1' /> <device path='iqn.2003-01.org.linux-iscsi.localhost'/> </source> <target> <path>/dev/disk/by-id</path> </target> </pool> """, }, { "def": { "type": "iscsi", "name": "unitTestISCSIPoolPort", "source": {"host": "127.0.0.1", "port": 3266, "target": "iqn.2003-01.org.linux-iscsi.localhost"}, }, "xml": """ <pool type='iscsi'> <name>unitTestISCSIPoolPort</name> <source> <host name='127.0.0.1' port='3266' /> <device path='iqn.2003-01.org.linux-iscsi.localhost'/> </source> <target> <path>/dev/disk/by-id</path> </target> </pool> """, }, { "def": { "type": "iscsi", "name": "unitTestISCSIPoolAuth", "source": { "host": "127.0.0.1", "target": "iqn.2003-01.org.linux-iscsi.localhost", "auth": {"username": "******", "password": "******"}, }, }, "xml": """ <pool type='iscsi'> <name>unitTestISCSIPoolAuth</name> <source> <host name='127.0.0.1' /> <device path='iqn.2003-01.org.linux-iscsi.localhost'/> <auth type='chap' username='******'> <secret type='iscsi' usage='unitTestISCSIPoolAuth'/> </auth> </source> <target> <path>/dev/disk/by-id</path> </target> </pool> """, }, { "def": { "type": "scsi", "name": "unitTestSCSIFCPool", "path": "/dev/disk/by-path", "source": { "name": "scsi_host3", "adapter_type": "fc_host", "wwpn": "0123456789abcdef", "wwnn": "abcdef0123456789", }, }, "xml": """ <pool type='scsi'> <name>unitTestSCSIFCPool</name> <source> <adapter type='fc_host' name='scsi_host3' wwnn='abcdef0123456789' wwpn='0123456789abcdef'></adapter> </source> <target> <path>/dev/disk/by-path</path> </target> </pool> """, }, ] for poolDef in poolDefs: defObj = StoragePoolDef.create(poolDef["def"]) xmlStr = defObj.xml with RollbackContext() as rollback: t1 = libxml2.readDoc(xmlStr, URL="", encoding="UTF-8", options=libxml2.XML_PARSE_NOBLANKS) rollback.prependDefer(t1.freeDoc) t2 = libxml2.readDoc(poolDef["xml"], URL="", encoding="UTF-8", options=libxml2.XML_PARSE_NOBLANKS) rollback.prependDefer(t2.freeDoc) self.assertEquals(t1.serialize(), t2.serialize())
def test_get_storagepool_xml(self): poolDefs = [ {'def': {'type': 'dir', 'name': 'unitTestDirPool', 'path': '/var/temp/images'}, 'xml': """ <pool type='dir'> <name>unitTestDirPool</name> <target> <path>/var/temp/images</path> </target> </pool> """}, {'def': {'type': 'netfs', 'name': 'unitTestNFSPool', 'source': {'host': '127.0.0.1', 'path': '/var/export'}}, 'xml': """ <pool type='netfs'> <name>unitTestNFSPool</name> <source> <host name='127.0.0.1'/> <dir path='/var/export'/> </source> <target> <path>/var/lib/kimchi/nfs_mount/unitTestNFSPool</path> </target> </pool> """}, {'def': {'type': 'logical', 'name': 'unitTestLogicalPool', 'source': {'devices': ['/dev/hda', '/dev/hdb']}}, 'xml': """ <pool type='logical'> <name>unitTestLogicalPool</name> <source> <device path="/dev/hda" /> <device path="/dev/hdb" /> </source> <target> <path>/dev/unitTestLogicalPool</path> </target> </pool> """}, {'def': {'type': 'iscsi', 'name': 'unitTestISCSIPool', 'source': { 'host': '127.0.0.1', 'target': 'iqn.2003-01.org.linux-iscsi.localhost'}}, 'xml': """ <pool type='iscsi'> <name>unitTestISCSIPool</name> <source> <host name='127.0.0.1' /> <device path='iqn.2003-01.org.linux-iscsi.localhost'/> </source> <target> <path>/dev/disk/by-id</path> </target> </pool> """}, {'def': {'type': 'iscsi', 'name': 'unitTestISCSIPoolPort', 'source': { 'host': '127.0.0.1', 'port': 3266, 'target': 'iqn.2003-01.org.linux-iscsi.localhost'}}, 'xml': """ <pool type='iscsi'> <name>unitTestISCSIPoolPort</name> <source> <host name='127.0.0.1' port='3266' /> <device path='iqn.2003-01.org.linux-iscsi.localhost'/> </source> <target> <path>/dev/disk/by-id</path> </target> </pool> """}, {'def': {'type': 'iscsi', 'name': 'unitTestISCSIPoolAuth', 'source': { 'host': '127.0.0.1', 'target': 'iqn.2003-01.org.linux-iscsi.localhost', 'auth': {'username': '******', 'password': '******'}}}, 'xml': """ <pool type='iscsi'> <name>unitTestISCSIPoolAuth</name> <source> <host name='127.0.0.1' /> <device path='iqn.2003-01.org.linux-iscsi.localhost'/> <auth type='chap' username='******'> <secret type='iscsi' usage='unitTestISCSIPoolAuth'/> </auth> </source> <target> <path>/dev/disk/by-id</path> </target> </pool> """}, {'def': {'type': 'scsi', 'name': 'unitTestSCSIFCPool', 'path': '/dev/disk/by-path', 'source': { 'name': 'scsi_host3', 'adapter_type': 'fc_host', 'wwpn': '0123456789abcdef', 'wwnn': 'abcdef0123456789'}}, 'xml': """ <pool type='scsi'> <name>unitTestSCSIFCPool</name> <source> <adapter type='fc_host' name='scsi_host3' wwnn='abcdef0123456789' wwpn='0123456789abcdef'></adapter> </source> <target> <path>/dev/disk/by-path</path> </target> </pool> """}] for poolDef in poolDefs: defObj = StoragePoolDef.create(poolDef['def']) xmlStr = defObj.xml with RollbackContext() as rollback: t1 = libxml2.readDoc(xmlStr, URL='', encoding='UTF-8', options=libxml2.XML_PARSE_NOBLANKS) rollback.prependDefer(t1.freeDoc) t2 = libxml2.readDoc(poolDef['xml'], URL='', encoding='UTF-8', options=libxml2.XML_PARSE_NOBLANKS) rollback.prependDefer(t2.freeDoc) self.assertEquals(t1.serialize(), t2.serialize())
def test_get_storagepool_xml(self): poolDefs = [{ 'def': { 'type': 'dir', 'name': 'unitTestDirPool', 'path': '/var/temp/images' }, 'xml': """ <pool type='dir'> <name>unitTestDirPool</name> <target> <path>/var/temp/images</path> </target> </pool> """ }, { 'def': { 'type': 'netfs', 'name': 'unitTestNFSPool', 'source': { 'host': '127.0.0.1', 'path': '/var/export' } }, 'xml': """ <pool type='netfs'> <name>unitTestNFSPool</name> <source> <host name='127.0.0.1'/> <dir path='/var/export'/> </source> <target> <path>/var/lib/kimchi/nfs_mount/unitTestNFSPool</path> </target> </pool> """ }, { 'def': { 'type': 'logical', 'name': 'unitTestLogicalPool', 'source': { 'devices': ['/dev/hda', '/dev/hdb'] } }, 'xml': """ <pool type='logical'> <name>unitTestLogicalPool</name> <source> <device path="/dev/hda" /> <device path="/dev/hdb" /> </source> <target> <path>/var/lib/kimchi/logical_mount/unitTestLogicalPool</path> </target> </pool> """ }, { 'def': { 'type': 'iscsi', 'name': 'unitTestISCSIPool', 'source': { 'host': '127.0.0.1', 'target': 'iqn.2003-01.org.linux-iscsi.localhost' } }, 'xml': """ <pool type='iscsi'> <name>unitTestISCSIPool</name> <source> <host name='127.0.0.1' /> <device path='iqn.2003-01.org.linux-iscsi.localhost'/> </source> <target> <path>/dev/disk/by-id</path> </target> </pool> """ }, { 'def': { 'type': 'iscsi', 'name': 'unitTestISCSIPoolPort', 'source': { 'host': '127.0.0.1', 'port': 3266, 'target': 'iqn.2003-01.org.linux-iscsi.localhost' } }, 'xml': """ <pool type='iscsi'> <name>unitTestISCSIPoolPort</name> <source> <host name='127.0.0.1' port='3266' /> <device path='iqn.2003-01.org.linux-iscsi.localhost'/> </source> <target> <path>/dev/disk/by-id</path> </target> </pool> """ }, { 'def': { 'type': 'iscsi', 'name': 'unitTestISCSIPoolAuth', 'source': { 'host': '127.0.0.1', 'target': 'iqn.2003-01.org.linux-iscsi.localhost', 'auth': { 'username': '******', 'password': '******' } } }, 'xml': """ <pool type='iscsi'> <name>unitTestISCSIPoolAuth</name> <source> <host name='127.0.0.1' /> <device path='iqn.2003-01.org.linux-iscsi.localhost'/> <auth type='chap' username='******'> <secret type='iscsi' usage='unitTestISCSIPoolAuth'/> </auth> </source> <target> <path>/dev/disk/by-id</path> </target> </pool> """ }, { 'def': { 'type': 'scsi', 'name': 'unitTestSCSIFCPool', 'path': '/dev/disk/by-path', 'source': { 'name': 'scsi_host3', 'adapter_type': 'fc_host', 'wwpn': '0123456789abcdef', 'wwnn': 'abcdef0123456789' } }, 'xml': """ <pool type='scsi'> <name>unitTestSCSIFCPool</name> <source> <adapter type='fc_host' name='scsi_host3' wwnn='abcdef0123456789' wwpn='0123456789abcdef'></adapter> </source> <target> <path>/dev/disk/by-path</path> </target> </pool> """ }] for poolDef in poolDefs: defObj = StoragePoolDef.create(poolDef['def']) xmlStr = defObj.xml with RollbackContext() as rollback: t1 = libxml2.readDoc(xmlStr, URL='', encoding='UTF-8', options=libxml2.XML_PARSE_NOBLANKS) rollback.prependDefer(t1.freeDoc) t2 = libxml2.readDoc(poolDef['xml'], URL='', encoding='UTF-8', options=libxml2.XML_PARSE_NOBLANKS) rollback.prependDefer(t2.freeDoc) self.assertEquals(t1.serialize(), t2.serialize())
def create_file(self, request=None): # Build an index page xmlfile = StringIO() # FIXME: Shouldn't hardcode this urlroot = "/music/" if request: inet, addr, port = request.getHost() if port == 80: hostport = '' else: hostport = ':%d' % port import urllib server = urllib.quote('http%s://%s%s' % ( request.isSecure() and 's' or '', request.getRequestHostname(), hostport), "/:") path = request.path[len(urlroot):] else: server = "http://unknown.org" path = self.cwd() print >> xmlfile, '<?xml version="1.0" encoding="utf-8"?>\n' cnt = self.select() print >> xmlfile, '<%s length="%s">\n' % (self.document_type, cnt) # Strip away "list/<stylesheet>", if present if posixpath.basename(posixpath.dirname(path)) == "list": path = posixpath.dirname(posixpath.dirname(path)) + "/" path = xml_fix_string(path).replace("%20", ' ') or '/' if path != '/': path = "".join([ (elem and "<d>%s</d>" % elem) for elem in path.split("/") ]) print >> xmlfile, ' <path>%s</path>\n' % path self.write_body(xmlfile) print >> xmlfile, "</%s>\n" % self.document_type self.cursor.close() self.cursor = None # Perform xslt transformation on the file from commands import getstatusoutput import remus.i18n import libxslt import libxml2 sess = request.getSession(remus.i18n.ITranslator) params = { 'audiostore.root': "'%s'" % urlroot, 'audiostore.url': "'%s%s'" % (server, urlroot), 'l10n.gentext.language':"'%s'" % sess.lang, } xsltfile = open(self.xsltfile).read() # If the XSLT file contains the <menu/> element, create menu # and insert it. if xsltfile.find("<menu/>") != -1: import remus.webserver.menu menu = remus.webserver.menu.create_basemenu() widget = remus.webserver.menu.Menu(menu) menu = widget.generate(request, remus.webserver.menu.document).toxml() xsltfile = xsltfile.replace("<menu/>", menu) style = libxml2.readDoc(xsltfile, self.xsltfile, None, 0) stylesheet = libxslt.parseStylesheetDoc(style) if stylesheet == None: style.freeDoc() self.file = None logger.error("XSLT processing error") return xmlfile.seek(0) doc = libxml2.parseDoc(xmlfile.read()) res = stylesheet.applyStylesheet(doc, params) result = stylesheet.saveResultToString(res) # Postprocess HTML pages, so IE6 is happy (#&¤%/&%#¤) if result.find('PUBLIC "-//W3C//DTD XHTML 1.0') != -1: # Remove the XML header result = '\n'.join(result.split('\n')[1:]) # Remove CDATA markers result = result.replace("<![CDATA[", "").replace("]]>", "") self.file = StringIO(result) style.freeDoc() doc.freeDoc()
def apply_xslt(buf, encoding, url, xsltfile, params=None): """Apply xslt transform from file xsltfile to the string buf with parameters params. url is the location of buf. Returns the transformed file as a string, or None if the transformation couldn't be completed.""" stylesheet = libxslt.parseStylesheetFile(xsltfile) if stylesheet is None: #self.log_info('Can\'t open stylesheet %s' % xsltfile, 'warning') return None try: # htmlReadDoc fails if the buffer is empty but succeeds # (returning an empty tree) if the buffer is a single # space. if buf == '': buf = ' ' # Guess whether this is an XML or HTML document. if buf.startswith('<?xml'): doc = libxml2.readDoc(buf, url, None, libxml2.XML_PARSE_NOERROR | libxml2.XML_PARSE_NOWARNING | libxml2.XML_PARSE_NONET) else: #self.log_info('Using HTML parser', 'debug') doc = libxml2.htmlReadDoc(buf, url, encoding, libxml2.HTML_PARSE_NOERROR | libxml2.HTML_PARSE_NOWARNING | libxml2.HTML_PARSE_NONET) except libxml2.treeError: stylesheet.freeStylesheet() #self.log_info('Can\'t parse XML document', 'warning') return None resultdoc = stylesheet.applyStylesheet(doc, params) stylesheet.freeStylesheet() doc.freeDoc() if resultdoc is None: #self.log_info('Can\'t apply stylesheet', 'warning') return None # Postprocess the document: # Resolve relative URLs in srcurl (TODO: this should be done in XSLT) root = resultdoc.getRootElement() if root is None: resultdoc.freeDoc() return None node2 = root.children while node2 is not None: if node2.name not in ['link', 'button']: node2 = node2.next continue node = node2.children while node is not None: if (node.name == 'ref') or (node.name == 'stream') or \ (node.name == 'submission'): refurl = node.getContent() match = re.search(r'\?.*srcurl=([^&]*)', refurl) if match is not None: oldurl = urllib.unquote(match.group(1)) absurl = urljoin_query_fix(url, oldurl) newurl = refurl[:match.start(1)] + \ urllib.quote(absurl) + \ refurl[match.end(1):] node.setContent(resultdoc.encodeSpecialChars(newurl)) node = node.next node2 = node2.next ret = resultdoc.serialize('UTF-8') resultdoc.freeDoc() return ret
def read_rss_from_url(url): f = urllib.urlopen(url) doc = libxml2.readDoc(f.read(), None, 'UTF-8', libxml2.XML_PARSE_RECOVER | libxml2.XML_PARSE_NOERROR) return doc