Exemple #1
0
    def transform(self, tree, template_path, **args):
        """
        Transform an element tree with XSLT

        @param tree: the element tree
        @param template_path: pathname of the XSLT stylesheet
        @param args: dict of arguments to pass to the stylesheet

        """

        self.error = None

        if args:
            _args = [(k, "'%s'" % args[k]) for k in args]
            _args = dict(_args)
        else:
            _args = None
        ac = etree.XSLTAccessControl(read_file=True, read_network=True)
        template = self.parse(template_path)

        if template:
            try:
                transformer = etree.XSLT(template, access_control=ac)
                if _args:
                    result = transformer(tree, **_args)
                else:
                    result = transformer(tree)
                return result
            except:
                e = sys.exc_info()[1]
                self.error = e
                return None
        else:
            # Error parsing the XSL template
            return None
Exemple #2
0
def __build_lxml(target, source, env):
    """
    General XSLT builder (HTML/FO), using the lxml module.
    """
    from lxml import etree

    xslt_ac = etree.XSLTAccessControl(read_file=True,
                                      write_file=True,
                                      create_dir=True,
                                      read_network=False,
                                      write_network=False)
    xsl_style = env.subst('$DOCBOOK_XSL')
    xsl_tree = etree.parse(xsl_style)
    transform = etree.XSLT(xsl_tree, access_control=xslt_ac)
    doc = etree.parse(str(source[0]))
    # Support for additional parameters
    parampass = {}
    if parampass:
        result = transform(doc, **parampass)
    else:
        result = transform(doc)

    try:
        of = open(str(target[0]), "w")
        of.write(of.write(etree.tostring(result, pretty_print=True)))
        of.close()
    except:
        pass

    return None
Exemple #3
0
def __build_lxml(target, source, env):
    """
    General XSLT builder (HTML/FO), using the lxml module.
    """
    from lxml import etree

    xslt_ac = etree.XSLTAccessControl(read_file=True,
                                      write_file=True,
                                      create_dir=True,
                                      read_network=False,
                                      write_network=False)
    xsl_style = env.subst('$DOCBOOK_XSL')
    xsl_tree = etree.parse(xsl_style)
    transform = etree.XSLT(xsl_tree, access_control=xslt_ac)
    doc = etree.parse(str(source[0]))
    # Support for additional parameters
    parampass = {}
    if parampass:
        result = transform(doc, **parampass)
    else:
        result = transform(doc)

    # we'd like the resulting output to be readably formatted,
    # so try pretty-print. Sometimes (esp. if the output is
    # not an xml file) we end up with a None type somewhere in
    # the transformed tree and tostring throws TypeError,
    # so provide a fallback.
    try:
        with open(str(target[0]), "wb") as of:
            of.write(etree.tostring(result, pretty_print=True))
    except TypeError:
        result.write_output(str(target[0]))

    return None
Exemple #4
0
def __build_lxml(target, source, env):
    """
    General XSLT builder (HTML/FO), using the lxml module.
    """
    from lxml import etree

    xslt_ac = etree.XSLTAccessControl(read_file=True,
                                      write_file=True,
                                      create_dir=True,
                                      read_network=False,
                                      write_network=False)
    xsl_style = env.subst('$DOCBOOK_XSL')
    xsl_tree = etree.parse(xsl_style)
    transform = etree.XSLT(xsl_tree, access_control=xslt_ac)
    doc = etree.parse(str(source[0]))
    # Support for additional parameters
    parampass = {}
    if parampass:
        result = transform(doc, **parampass)
    else:
        result = transform(doc)

    try:
        with open(str(target[0]), "wb") as of:
            of.write(
                etree.tostring(result, encoding="utf-8", pretty_print=True))
    except Exception as e:
        print("ERROR: Failed to write {}".format(str(target[0])))
        print(e)

    return None
Exemple #5
0
def compileThemeTransform(rules, absolutePrefix=None, readNetwork=False, parameterExpressions=None, runtrace=False):
    """Prepare the theme transform by compiling the rules with the given options
    """

    if parameterExpressions is None:
        parameterExpressions = {}

    accessControl = etree.XSLTAccessControl(read_file=True, write_file=False, create_dir=False, read_network=readNetwork, write_network=False)

    if absolutePrefix:
        absolutePrefix = expandAbsolutePrefix(absolutePrefix)

    params = set(parameterExpressions.keys() + ['url', 'base', 'path', 'scheme', 'host'])
    xslParams = dict((k, '') for k in params)

    compiledTheme = compile_theme(rules,
            absolute_prefix=absolutePrefix,
            parser=getParser('theme', readNetwork),
            rules_parser=getParser('rules', readNetwork),
            compiler_parser=getParser('compiler', readNetwork),
            read_network=readNetwork,
            access_control=accessControl,
            update=True,
            xsl_params=xslParams,
            runtrace=runtrace,
        )

    if not compiledTheme:
        return None

    return etree.XSLT(compiledTheme,
            access_control=accessControl,
        )
def transform(tree, stylesheet_path, **args):

    if args:
        _args = [(k, "'%s'" % args[k]) for k in args]
        _args = dict(_args)
    else:
        _args = None
    stylesheet = etree.parse(stylesheet_path)

    ac = etree.XSLTAccessControl(read_file=True, read_network=True)
    transformer = etree.XSLT(stylesheet, access_control=ac)
    if _args:
        result = transformer(tree, **_args)
    else:
        result = transformer(tree)
    return result
 def __new__(cls, cname, cbases, cvars):
     xslpaste = cvars.pop('cls__xslpaste', None)
     ret = type.__new__(cls, cname, cbases, cvars)
     if not hasattr(ret, 'cls__xslacl') or not hasattr(
             ret, 'cls__xslparser'):
         ret.cls__xslacl = et.XSLTAccessControl(read_network=False)
         ret.cls__xslparser = et.XMLParser(dtd_validation=False,
                                           resolve_entities=True,
                                           load_dtd=True,
                                           ns_clean=False)
     if xslpaste is not None:
         xml = et.XML(xslpaste, parser=ret.cls__xslparser)
         ret.cls__xslpaste = et.XSLT(xml,
                                     access_control=ret.cls__xslacl)
     if not hasattr(ret, 'cls__xslpaths'):
         ret.cls__xslpaths = {}
     ret.cls__xslnames = {}
     if ret.debug:
         if ret.cls__template_path is not None:
             ret.cls__xsllist.extend(ret.cls__xsllist_default)
             for name, filename in ret.cls__xsllist:
                 ret.xslload_path(name, filename)
     return ret
Exemple #8
0
    def __init__(self,
                 app,
                 global_conf,
                 ignore_paths=None,
                 xslt_file=None,
                 xslt_source="",
                 xslt_tree=None,
                 read_network=False):
        """Initialise, giving a filename or file pointer for an XSLT file.
        """

        self.app = app
        self.global_conf = global_conf

        if xslt_file:
            xslt_file = open(xslt_file)
            xslt_source = xslt_file.read()
            xslt_file.close()

        if xslt_source:
            xslt_tree = etree.fromstring(xslt_source)

        self.read_network = read_network
        self.access_control = etree.XSLTAccessControl(
            read_file=True,
            write_file=False,
            create_dir=False,
            read_network=read_network,
            write_network=False)
        self.transform = etree.XSLT(xslt_tree,
                                    access_control=self.access_control)

        self.ignore_paths = []
        if ignore_paths:
            ignore_paths = [s.strip() for s in ignore_paths if s.strip()]
            for p in ignore_paths:
                self.ignore_paths.append(re.compile(p))
Exemple #9
0
def __build_lxml_noresult(target, source, env):
    """
    Specialized XSLT builder for transformations without a direct result where the Docbook
    stylesheet itself creates the target file, using the lxml module.
    """
    from lxml import etree

    xslt_ac = etree.XSLTAccessControl(read_file=True,
                                      write_file=True,
                                      create_dir=True,
                                      read_network=False,
                                      write_network=False)
    xsl_style = env.subst('$DOCBOOK_XSL')
    xsl_tree = etree.parse(xsl_style)
    transform = etree.XSLT(xsl_tree, access_control=xslt_ac)
    doc = etree.parse(str(source[0]))
    # Support for additional parameters
    parampass = {}
    if parampass:
        result = transform(doc, **parampass)
    else:
        result = transform(doc)

    return None
def xslt_ac_write_network_off():
    ac = etree.XSLTAccessControl(read_network=True,
                                 write_network=False)  # Noncompliant
    #    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    transform = etree.XSLT(rootxsl, access_control=ac)
Exemple #11
0
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger('docbook2epub')

# XXX REMARK -- Original:
#DOCBOOK_XSL = os.path.abspath('../docbook-xsl-1.74.0/epub/docbook.xsl')
# XXX REMARK -- 2017-07-22: Modified by Marcos Cruz (programandala.net):
DOCBOOK_XSL = os.path.abspath(
    '/usr/share/xml/docbook/stylesheet/docbook-xsl/epub/docbook.xsl')

MIMETYPE = 'mimetype'
MIMETYPE_CONTENT = 'application/epub+zip'

xslt_ac = etree.XSLTAccessControl(read_file=True,
                                  write_file=True,
                                  create_dir=True,
                                  read_network=True,
                                  write_network=False)
transform = etree.XSLT(etree.parse(DOCBOOK_XSL), access_control=xslt_ac)


def convert_docbook(docbook_file):
    '''Use DocBook XSL to transform our DocBook book into EPUB'''
    cwd = os.getcwd()
    # Create a temporary working directory for the output files
    output_path = os.path.basename(os.path.splitext(docbook_file)[0])
    if not os.path.exists(output_path):
        os.mkdir(output_path)

    # DocBook needs the source file in the current working directory to output correctly
    shutil.copy(docbook_file, output_path)
Exemple #12
0
def localname(name):
    return name.rsplit('}', 1)[1]


def namespace(name):
    return name.rsplit('}', 1)[0][1:]


def fullname(namespace, name):
    return '{%s}%s' % (namespace, name)  # NOQA: S001


AC_READ_FILE = etree.XSLTAccessControl(
    read_file=True,
    write_file=False,
    create_dir=False,
    read_network=False,
    write_network=False,
)
AC_READ_NET = etree.XSLTAccessControl(
    read_file=True,
    write_file=False,
    create_dir=False,
    read_network=True,
    write_network=False,
)


class CustomResolver(etree.Resolver):
    def __init__(self, data):
        self.data = data
def xslt_inline_ac():
    transform = etree.XSLT(
        rootxsl, access_control=etree.XSLTAccessControl())  # Noncompliant
def xslt_ac_default():
    ac = etree.XSLTAccessControl()  # Noncompliant
    #    ^^^^^^^^^^^^^^^^^^^^^^^^^
    transform = etree.XSLT(rootxsl, access_control=ac)
Exemple #15
0
    def __init__(
            self,
            app,
            global_conf,
            live=False,
            rules=None,
            theme=None,
            extra=None,
            css=True,
            xinclude=True,
            absolute_prefix=None,
            update=False,
            includemode='document',
            notheme=None,
            read_network=False,
            # BBB parameters
            theme_uri=None,
            extraurl=None):
        """Create the middleware. The parameters are:
        
        * ``rules``, the rules file
        * ``theme``, the theme file
        * ``extra``, an optional XSLT file with XDV extensions
        * ``css``, can be set to False to disable CSS syntax support (providing
          a  moderate speed gain)
        * ``xinclude`` can be set to True to enable XInclude support (at a
          moderate speed cost)
        * ``absolute_prefix`` can be set to a string that will be prefixed to
          any *relative* URL referenced in an image, link or stylesheet in the
          theme HTML file before the theme is passed to the compiler. This
          allows a theme to be written so that it can be opened and views
          standalone on the filesystem, even if at runtime its static
          resources are going to be served from some other location. For
          example, an ``<img src="images/foo.jpg" />`` can be turned into 
          ``<img src="/static/images/foo.jpg" />`` with an ``absolute_prefix``
          of "/static".
        * ``update`` can be set to False to disable the automatic update support for
          the old Deliverance 0.2 namespace (for a moderate speed gain)
        * ``includemode`` can be set to 'document', 'esi' or 'ssi' to change
          the way in which includes are processed
        * ``live``, set to True to recompile the theme on each request
        * ``notheme``, a list of regular expressions for paths which should
          not be themed.
        """

        if isinstance(notheme, basestring):
            notheme = [p for p in notheme.split('\n') if p.strip()]

        self.app = app
        self.global_conf = global_conf

        self.rules = rules
        self.theme = theme or theme_uri  # theme_uri is for BBB
        self.extra = extra or extraurl  # extraurl is for BBB
        self.css = asbool(css)
        self.xinclude = xinclude
        self.absolute_prefix = absolute_prefix
        self.update = update
        self.includemode = includemode

        self.live = asbool(live)
        self.notheme = notheme
        self.read_network = read_network
        self.access_control = etree.XSLTAccessControl(
            read_file=True,
            write_file=False,
            create_dir=False,
            read_network=read_network,
            write_network=False)
        self.transform = None
Exemple #16
0
    def __init__(self,
                 app,
                 global_conf,
                 filename=None,
                 tree=None,
                 read_network=False,
                 read_file=True,
                 update_content_length=False,
                 ignored_extensions=('js', 'css', 'gif', 'jpg', 'jpeg', 'pdf',
                                     'ps', 'doc', 'png', 'ico', 'mov', 'mpg',
                                     'mpeg', 'mp3', 'm4a', 'txt', 'rtf', 'swf',
                                     'wav', 'zip', 'wmv', 'ppt', 'gz', 'tgz',
                                     'jar', 'xls', 'bmp', 'tif', 'tga', 'hqx',
                                     'avi'),
                 environ_param_map=None,
                 unquoted_params=None,
                 doctype=None,
                 content_type=None,
                 charset=None,
                 remove_conditional_headers=False,
                 **params):
        """Initialise, giving a filename or parsed XSLT tree.

        The parameters are:

        * ``filename``, a filename from which to read the XSLT file
        * ``tree``, a pre-parsed lxml tree representing the XSLT file

        ``filename`` and ``tree`` are mutually exclusive.

        * ``read_network``, should be set to True to allow resolving resources
          from the network.
        * ``read_file``, should be set to False to disallow resolving resources
          from the filesystem.
        * ``update_content_length``, can be set to True to update the
          Content-Length header when applying the transformation. When set to
          False (the default), the header is removed and it is left to the WSGI
          server recalculate or send a chunked response.
        * ``ignored_extensions`` can be set to a list of filename extensions
          for which the transformation should never be applied
        * ``environ_param_map`` can be set to a dict of environ keys to
          parameter names. The corresponding values will then be sent to the
          transformation as parameters.
        * ``unquoted_params``, can be set to a list of parameter names which
          will not be quoted.
        * ``doctype``, can be set to a string which will replace that set in
          the XSLT, for example, "<!DOCTYPE html>".
        * ``content_type``, can be set to a string which will be set in the
          Content-Type header. By default it is inferred from the stylesheet.
        * ``charset``, can be set to a string which will be set in the
          Content-Type header. By default it is inferred from the stylesheet.
        * ``remove_conditional_headers``, should be set to True if the
        transformed output includes other files.

        Additional keyword arguments will be passed to the transformation as
        parameters.
        """

        self.app = app
        self.global_conf = global_conf

        if filename is not None:
            xslt_file = open(filename)
            source = xslt_file.read()
            tree = etree.fromstring(source)
            xslt_file.close()

        if content_type is None:
            mediatype = tree.xpath(
                '/xsl:stylesheet/xsl:output/@media-type',
                namespaces=dict(xsl="http://www.w3.org/1999/XSL/Transform"))
            if mediatype:
                content_type = mediatype[-1]
            else:
                method = tree.xpath(
                    '/xsl:stylesheet/xsl:output/@method',
                    namespaces=dict(
                        xsl="http://www.w3.org/1999/XSL/Transform"))
                if method:
                    method = method[-1]
                    if method.lower() == 'html':
                        content_type = 'text/html'
                    elif method.lower() == 'text':
                        content_type = 'text/plain'
                    elif method.lower() == 'xml':
                        content_type = 'text/xml'
        self.content_type = content_type

        if charset is None:
            encoding = tree.xpath(
                '/xsl:stylesheet/xsl:output/@encoding',
                namespaces=dict(xsl="http://www.w3.org/1999/XSL/Transform"))
            if encoding:
                charset = encoding[-1]
            else:
                charset = "UTF-8"
        self.charset = charset

        self.read_network = asbool(read_network)
        self.read_file = asbool(read_file)
        self.access_control = etree.XSLTAccessControl(
            read_file=self.read_file,
            write_file=False,
            create_dir=False,
            read_network=self.read_network,
            write_network=False)
        self.transform = etree.XSLT(tree, access_control=self.access_control)
        self.update_content_length = asbool(update_content_length)
        self.ignored_extensions = frozenset(ignored_extensions)

        self.ignored_pattern = re.compile("^.*\.(%s)$" %
                                          '|'.join(ignored_extensions))

        self.environ_param_map = environ_param_map or {}
        if isinstance(unquoted_params, basestring):
            unquoted_params = unquoted_params.split()
        self.unquoted_params = unquoted_params and \
            frozenset(unquoted_params) or ()
        self.params = params
        self.doctype = doctype
        self.remove_conditional_headers = asbool(remove_conditional_headers)
Exemple #17
0
    def __init__(self,
                 app,
                 global_conf,
                 rules,
                 theme=None,
                 prefix=None,
                 includemode='document',
                 debug=False,
                 read_network=False,
                 read_file=True,
                 update_content_length=False,
                 ignored_extensions=('js', 'css', 'gif', 'jpg', 'jpeg', 'pdf',
                                     'ps', 'doc', 'png', 'ico', 'mov', 'mpg',
                                     'mpeg', 'mp3', 'm4a', 'txt', 'rtf', 'swf',
                                     'wav', 'zip', 'wmv', 'ppt', 'gz', 'tgz',
                                     'jar', 'xls', 'bmp', 'tif', 'tga', 'hqx',
                                     'avi'),
                 environ_param_map=None,
                 unquoted_params=None,
                 doctype=None,
                 content_type=None,
                 filter_xpath=False,
                 **params):
        """Create the middleware. The parameters are:

        * ``rules``, the rules file
        * ``theme``, a URL to the theme file (may be a file:// URL)
        * ``debug``, set to True to recompile the theme on each request
        * ``prefix`` can be set to a string that will be prefixed to
          any *relative* URL referenced in an image, link or stylesheet in the
          theme HTML file before the theme is passed to the compiler. This
          allows a theme to be written so that it can be opened and views
          standalone on the filesystem, even if at runtime its static
          resources are going to be served from some other location. For
          example, an ``<img src="images/foo.jpg" />`` can be turned into
          ``<img src="/static/images/foo.jpg" />`` with a ``prefix`` of
          "/static".
        * ``includemode`` can be set to 'document', 'esi' or 'ssi' to change
          the way in which includes are processed
        * ``read_network``, should be set to True to allow resolving resources
          from the network.
        * ``read_file``, should be set to False to disallow resolving resources
          from the filesystem.
        * ``update_content_length``, can be set to True to update the
          Content-Length header when applying the transformation. When set to
          False (the default), the header is removed and it is left to the WSGI
          server recalculate or send a chunked response.
        * ``ignored_extensions`` can be set to a list of filename extensions
          for which the transformation should never be applied
        * ``environ_param_map`` can be set to a dict of environ keys to
          parameter names. The corresponding values will then be sent to the
          transformation as parameters.
        * ``unquoted_params``, can be set to a list of parameter names which
          will not be quoted.
        * ``doctype``, can be set to a string which will replace the default
          XHTML 1.0 transitional Doctype or that set in the Diazo theme. For
          example, "<!DOCTYPE html>".
        * ``content_type``, can be set to a string which will be set in the
          Content-Type header. By default it is inferred from the stylesheet.
        * ``charset``, can be set to a string which will be set in the
          Content-Type header. By default it is inferred from the stylesheet.
        * ``remove_conditional_headers``, should be set to True if the
        transformed output includes other files.
        * ``filter_xpath``, should be set to True to enable filter_xpath
          support for external includes.

        Additional keyword arguments will be passed to the theme
        transformation as parameters.
        """

        self.app = app
        self.global_conf = global_conf

        self.rules = rules
        self.theme = theme
        self.absolute_prefix = prefix
        self.includemode = includemode
        self.debug = asbool(debug)
        self.read_network = asbool(read_network)
        self.read_file = asbool(read_file)
        self.update_content_length = asbool(update_content_length)
        self.ignored_extensions = ignored_extensions
        self.doctype = doctype
        self.content_type = content_type
        self.unquoted_params = unquoted_params
        self.filter_xpath = asbool(filter_xpath)

        self.access_control = etree.XSLTAccessControl(
            read_file=self.read_file,
            write_file=False,
            create_dir=False,
            read_network=self.read_network,
            write_network=False)
        self.transform_middleware = None
        self.filter_middleware = self.get_filter_middleware()

        self.environ_param_map = environ_param_map or {}
        self.environ_param_map.update({
            'diazo.path': 'path',
            'diazo.host': 'host',
            'diazo.scheme': 'scheme',
        })

        self.params = params.copy()
def xslt_ac_read_write_network_off():
    ac = etree.XSLTAccessControl(read_network=False,
                                 write_network=False)  # Compliant
    transform = etree.XSLT(rootxsl, access_control=ac)  # Compliant
Exemple #19
0
def compileThemeTransform(rules,
                          absolutePrefix=None,
                          readNetwork=False,
                          parameterExpressions=None):
    """Prepare the theme transform by compiling the rules with the given options
    """

    if parameterExpressions is None:
        parameterExpressions = {}

    accessControl = etree.XSLTAccessControl(read_file=True,
                                            write_file=False,
                                            create_dir=False,
                                            read_network=readNetwork,
                                            write_network=False)

    if absolutePrefix:
        absolutePrefix = expandAbsolutePrefix(absolutePrefix)

    params = set(parameterExpressions.keys() +
                 ['url', 'base', 'path', 'scheme', 'host'])
    xslParams = dict((k, '') for k in params)

    internalResolver = InternalResolver()
    pythonResolver = PythonResolver()
    if readNetwork:
        networkResolver = NetworkResolver()

    rulesParser = etree.XMLParser(recover=False)
    rulesParser.resolvers.add(internalResolver)
    rulesParser.resolvers.add(pythonResolver)
    if readNetwork:
        rulesParser.resolvers.add(networkResolver)

    themeParser = etree.HTMLParser()
    themeParser.resolvers.add(internalResolver)
    themeParser.resolvers.add(pythonResolver)
    if readNetwork:
        themeParser.resolvers.add(networkResolver)

    compilerParser = etree.XMLParser()
    compilerParser.resolvers.add(internalResolver)
    compilerParser.resolvers.add(pythonResolver)
    if readNetwork:
        compilerParser.resolvers.add(networkResolver)

    compiledTheme = compile_theme(
        rules,
        absolute_prefix=absolutePrefix,
        parser=themeParser,
        rules_parser=rulesParser,
        compiler_parser=compilerParser,
        read_network=readNetwork,
        access_control=accessControl,
        update=True,
        xsl_params=xslParams,
    )

    if not compiledTheme:
        return None

    return etree.XSLT(
        compiledTheme,
        access_control=accessControl,
    )