Exemple #1
0
def transform(ctxt, src=None, dest=None, stylesheet=None):
    """Apply an XSLT stylesheet to a source XML document.
    
    This command requires either libxslt (with Python bindings), or MSXML to
    be installed.
    
    :param ctxt: the build context
    :type ctxt: `Context`
    :param src: name of the XML input file
    :param dest: name of the XML output file
    :param stylesheet: name of the file containing the XSLT stylesheet
    """
    assert src, 'Missing required attribute "src"'
    assert dest, 'Missing required attribute "dest"'
    assert stylesheet, 'Missing required attribute "stylesheet"'

    if have_libxslt:
        log.debug('Using libxslt for XSLT transformation')
        srcdoc, styledoc, result = None, None, None
        try:
            srcdoc = libxml2.parseFile(ctxt.resolve(src))
            styledoc = libxslt.parseStylesheetFile(ctxt.resolve(stylesheet))
            result = styledoc.applyStylesheet(srcdoc, None)
            styledoc.saveResultToFilename(ctxt.resolve(dest), result, 0)
        finally:
            if styledoc:
                styledoc.freeStylesheet()
            if srcdoc:
                srcdoc.freeDoc()
            if result:
                result.freeDoc()

    elif have_msxml:
        log.debug('Using MSXML for XSLT transformation')
        srcdoc = win32com.client.Dispatch('MSXML2.DOMDocument.3.0')
        if not srcdoc.load(ctxt.resolve(src)):
            err = srcdoc.parseError
            ctxt.error('Failed to parse XML source %s: %s', src, err.reason)
            return
        styledoc = win32com.client.Dispatch('MSXML2.DOMDocument.3.0')
        if not styledoc.load(ctxt.resolve(stylesheet)):
            err = styledoc.parseError
            ctxt.error('Failed to parse XSLT stylesheet %s: %s', stylesheet,
                       err.reason)
            return
        result = srcdoc.transformNode(styledoc)

        # MSXML seems to always write produce the resulting XML document using
        # UTF-16 encoding, regardless of the encoding specified in the
        # stylesheet. For better interoperability, recode to UTF-8 here.
        result = result.encode('utf-8').replace(' encoding="UTF-16"?>', '?>')

        dest_file = file(ctxt.resolve(dest), 'w')
        try:
            dest_file.write(result)
        finally:
            dest_file.close()

    else:
        ctxt.error('No usable XSLT implementation found')
Exemple #2
0
def transform(ctxt, src=None, dest=None, stylesheet=None):
    """Apply an XSLT stylesheet to a source XML document.
    
    This command requires either libxslt (with Python bindings), or MSXML to
    be installed.
    
    :param ctxt: the build context
    :type ctxt: `Context`
    :param src: name of the XML input file
    :param dest: name of the XML output file
    :param stylesheet: name of the file containing the XSLT stylesheet
    """
    assert src, 'Missing required attribute "src"'
    assert dest, 'Missing required attribute "dest"'
    assert stylesheet, 'Missing required attribute "stylesheet"'

    if have_libxslt:
        log.debug('Using libxslt for XSLT transformation')
        srcdoc, styledoc, result = None, None, None
        try:
            srcdoc = libxml2.parseFile(ctxt.resolve(src))
            styledoc = libxslt.parseStylesheetFile(ctxt.resolve(stylesheet))
            result = styledoc.applyStylesheet(srcdoc, None)
            styledoc.saveResultToFilename(ctxt.resolve(dest), result, 0)
        finally:
            if styledoc:
                styledoc.freeStylesheet()
            if srcdoc:
                srcdoc.freeDoc()
            if result:
                result.freeDoc()

    elif have_msxml:
        log.debug('Using MSXML for XSLT transformation')
        srcdoc = win32com.client.Dispatch('MSXML2.DOMDocument.3.0')
        if not srcdoc.load(ctxt.resolve(src)):
            err = srcdoc.parseError
            ctxt.error('Failed to parse XML source %s: %s', src, err.reason)
            return
        styledoc = win32com.client.Dispatch('MSXML2.DOMDocument.3.0')
        if not styledoc.load(ctxt.resolve(stylesheet)):
            err = styledoc.parseError
            ctxt.error('Failed to parse XSLT stylesheet %s: %s', stylesheet,
                       err.reason)
            return
        result = srcdoc.transformNode(styledoc)

        # MSXML seems to always write produce the resulting XML document using
        # UTF-16 encoding, regardless of the encoding specified in the
        # stylesheet. For better interoperability, recode to UTF-8 here.
        result = result.encode('utf-8').replace(' encoding="UTF-16"?>', '?>')

        dest_file = file(ctxt.resolve(dest), 'w')
        try:
            dest_file.write(result)
        finally:
            dest_file.close()

    else:
        ctxt.error('No usable XSLT implementation found')
def generate_documentation(src_file, destination_file, stylesheet_file):

    stylesheet_args = dict()
    style = libxslt.parseStylesheetFile(stylesheet_file)
    document = libxml2.parseFile(src_file)
    result = style.applyStylesheet(document, stylesheet_args)

    fh = open(destination_file, "w")
    style.saveResultToFile(fh, result)
    fh.close()
Exemple #4
0
# @todo -v option handling

if len(sys.argv) <= 1:
    print "URL not specified!"
    sys.exit(1)

if len(sys.argv) <= 2:
    max_depth = 0
else:
    max_depth = int(sys.argv[2])

slash_end = re.compile(r'.*/$')

# Load all necessary stylsheets
mf_extract = libxslt.parseStylesheetFile("mf-extract.xsl")
mf_extract_btm = libxslt.parseStylesheetFile("mf-extract-bottom.xsl")
get_urls = libxslt.parseStylesheetFile("get-urls.xsl")
postprocess = libxslt.parseStylesheetFile("postprocess.xsl")

result_xml = libxml2.newDoc("1.0")
network = result_xml.addChild(libxml2.newNode('network'))

def dprint(msg):
    sys.stderr.write(msg + '\n')

def processUrl(url, depth=0):
    global processed_urls, network

    if slash_end.match(url):
        url = re.sub(r'/$', '', url)
Exemple #5
0
 def parse_xsl_file(self, xsl_file):
     try:
         self._style = libxslt.parseStylesheetFile(xsl_file)
     except Exception, ex:
         raise XMLError("Error parsing xsl: %s" % str(ex))
Exemple #6
0
 def parse_xsl_file(self, xsl_file):
     try:
         self._style = libxslt.parseStylesheetFile(xsl_file)
     except Exception, ex:
         raise XMLError("Error parsing xsl: %s" % str(ex))
Exemple #7
0
 def __init__(self, template_file):
     self.template_file = template_file
     self.template = libxslt.parseStylesheetFile(template_file)
Exemple #8
0
def apply_xslt(buf, encoding, url, xsltfile, params=None):
    """Apply xslt transform from file xsltfile to the string buf
    with parameters params. url is the location of buf. Returns
    the transformed file as a string, or None if the
    transformation couldn't be completed."""
    stylesheet = libxslt.parseStylesheetFile(xsltfile)

    if stylesheet is None:
        #self.log_info('Can\'t open stylesheet %s' % xsltfile, 'warning')
        return None
    try:
        # htmlReadDoc fails if the buffer is empty but succeeds
        # (returning an empty tree) if the buffer is a single
        # space.
        if buf == '':
            buf = ' '

        # Guess whether this is an XML or HTML document.
        if buf.startswith('<?xml'):
            doc = libxml2.readDoc(buf, url, None,
                                  libxml2.XML_PARSE_NOERROR |
                                  libxml2.XML_PARSE_NOWARNING |
                                  libxml2.XML_PARSE_NONET)
        else:
            #self.log_info('Using HTML parser', 'debug')
            doc = libxml2.htmlReadDoc(buf, url, encoding,
                                      libxml2.HTML_PARSE_NOERROR |
                                      libxml2.HTML_PARSE_NOWARNING |
                                      libxml2.HTML_PARSE_NONET)
    except libxml2.treeError:
        stylesheet.freeStylesheet()
        #self.log_info('Can\'t parse XML document', 'warning')
        return None
    resultdoc = stylesheet.applyStylesheet(doc, params)
    stylesheet.freeStylesheet()
    doc.freeDoc()
    if resultdoc is None:
        #self.log_info('Can\'t apply stylesheet', 'warning')
        return None

    # Postprocess the document:
    # Resolve relative URLs in srcurl (TODO: this should be done in XSLT)
    root = resultdoc.getRootElement()
    if root is None:
        resultdoc.freeDoc()
        return None

    node2 = root.children
    while node2 is not None:
        if node2.name not in ['link', 'button']:
            node2 = node2.next
            continue

        node = node2.children
        while node is not None:
            if (node.name == 'ref') or (node.name == 'stream') or \
                    (node.name == 'submission'):
                refurl = node.getContent()

                match = re.search(r'\?.*srcurl=([^&]*)', refurl)
                if match is not None:
                    oldurl = urllib.unquote(match.group(1))
                    absurl = urljoin_query_fix(url, oldurl)
                    newurl = refurl[:match.start(1)] + \
                        urllib.quote(absurl) + \
                        refurl[match.end(1):]
                    node.setContent(resultdoc.encodeSpecialChars(newurl))

            node = node.next
        node2 = node2.next

    ret = resultdoc.serialize('UTF-8')
    resultdoc.freeDoc()
    return ret