def transform(ctxt, src=None, dest=None, stylesheet=None): """Apply an XSLT stylesheet to a source XML document. This command requires either libxslt (with Python bindings), or MSXML to be installed. :param ctxt: the build context :type ctxt: `Context` :param src: name of the XML input file :param dest: name of the XML output file :param stylesheet: name of the file containing the XSLT stylesheet """ assert src, 'Missing required attribute "src"' assert dest, 'Missing required attribute "dest"' assert stylesheet, 'Missing required attribute "stylesheet"' if have_libxslt: log.debug('Using libxslt for XSLT transformation') srcdoc, styledoc, result = None, None, None try: srcdoc = libxml2.parseFile(ctxt.resolve(src)) styledoc = libxslt.parseStylesheetFile(ctxt.resolve(stylesheet)) result = styledoc.applyStylesheet(srcdoc, None) styledoc.saveResultToFilename(ctxt.resolve(dest), result, 0) finally: if styledoc: styledoc.freeStylesheet() if srcdoc: srcdoc.freeDoc() if result: result.freeDoc() elif have_msxml: log.debug('Using MSXML for XSLT transformation') srcdoc = win32com.client.Dispatch('MSXML2.DOMDocument.3.0') if not srcdoc.load(ctxt.resolve(src)): err = srcdoc.parseError ctxt.error('Failed to parse XML source %s: %s', src, err.reason) return styledoc = win32com.client.Dispatch('MSXML2.DOMDocument.3.0') if not styledoc.load(ctxt.resolve(stylesheet)): err = styledoc.parseError ctxt.error('Failed to parse XSLT stylesheet %s: %s', stylesheet, err.reason) return result = srcdoc.transformNode(styledoc) # MSXML seems to always write produce the resulting XML document using # UTF-16 encoding, regardless of the encoding specified in the # stylesheet. For better interoperability, recode to UTF-8 here. result = result.encode('utf-8').replace(' encoding="UTF-16"?>', '?>') dest_file = file(ctxt.resolve(dest), 'w') try: dest_file.write(result) finally: dest_file.close() else: ctxt.error('No usable XSLT implementation found')
def generate_documentation(src_file, destination_file, stylesheet_file): stylesheet_args = dict() style = libxslt.parseStylesheetFile(stylesheet_file) document = libxml2.parseFile(src_file) result = style.applyStylesheet(document, stylesheet_args) fh = open(destination_file, "w") style.saveResultToFile(fh, result) fh.close()
# @todo -v option handling if len(sys.argv) <= 1: print "URL not specified!" sys.exit(1) if len(sys.argv) <= 2: max_depth = 0 else: max_depth = int(sys.argv[2]) slash_end = re.compile(r'.*/$') # Load all necessary stylsheets mf_extract = libxslt.parseStylesheetFile("mf-extract.xsl") mf_extract_btm = libxslt.parseStylesheetFile("mf-extract-bottom.xsl") get_urls = libxslt.parseStylesheetFile("get-urls.xsl") postprocess = libxslt.parseStylesheetFile("postprocess.xsl") result_xml = libxml2.newDoc("1.0") network = result_xml.addChild(libxml2.newNode('network')) def dprint(msg): sys.stderr.write(msg + '\n') def processUrl(url, depth=0): global processed_urls, network if slash_end.match(url): url = re.sub(r'/$', '', url)
def parse_xsl_file(self, xsl_file): try: self._style = libxslt.parseStylesheetFile(xsl_file) except Exception, ex: raise XMLError("Error parsing xsl: %s" % str(ex))
def __init__(self, template_file): self.template_file = template_file self.template = libxslt.parseStylesheetFile(template_file)
def apply_xslt(buf, encoding, url, xsltfile, params=None): """Apply xslt transform from file xsltfile to the string buf with parameters params. url is the location of buf. Returns the transformed file as a string, or None if the transformation couldn't be completed.""" stylesheet = libxslt.parseStylesheetFile(xsltfile) if stylesheet is None: #self.log_info('Can\'t open stylesheet %s' % xsltfile, 'warning') return None try: # htmlReadDoc fails if the buffer is empty but succeeds # (returning an empty tree) if the buffer is a single # space. if buf == '': buf = ' ' # Guess whether this is an XML or HTML document. if buf.startswith('<?xml'): doc = libxml2.readDoc(buf, url, None, libxml2.XML_PARSE_NOERROR | libxml2.XML_PARSE_NOWARNING | libxml2.XML_PARSE_NONET) else: #self.log_info('Using HTML parser', 'debug') doc = libxml2.htmlReadDoc(buf, url, encoding, libxml2.HTML_PARSE_NOERROR | libxml2.HTML_PARSE_NOWARNING | libxml2.HTML_PARSE_NONET) except libxml2.treeError: stylesheet.freeStylesheet() #self.log_info('Can\'t parse XML document', 'warning') return None resultdoc = stylesheet.applyStylesheet(doc, params) stylesheet.freeStylesheet() doc.freeDoc() if resultdoc is None: #self.log_info('Can\'t apply stylesheet', 'warning') return None # Postprocess the document: # Resolve relative URLs in srcurl (TODO: this should be done in XSLT) root = resultdoc.getRootElement() if root is None: resultdoc.freeDoc() return None node2 = root.children while node2 is not None: if node2.name not in ['link', 'button']: node2 = node2.next continue node = node2.children while node is not None: if (node.name == 'ref') or (node.name == 'stream') or \ (node.name == 'submission'): refurl = node.getContent() match = re.search(r'\?.*srcurl=([^&]*)', refurl) if match is not None: oldurl = urllib.unquote(match.group(1)) absurl = urljoin_query_fix(url, oldurl) newurl = refurl[:match.start(1)] + \ urllib.quote(absurl) + \ refurl[match.end(1):] node.setContent(resultdoc.encodeSpecialChars(newurl)) node = node.next node2 = node2.next ret = resultdoc.serialize('UTF-8') resultdoc.freeDoc() return ret