Esempio n. 1
0
def BaseUri(node, fallback=None):
    """
    `BaseUri` is an implementation of the `node.baseURI` attribute that
    should be attached to DOM Level 3 nodes, but which is currently broken
    in 4Suite XML.  Where you would use `node.baseURI` according to DOM
    Level 3, use `BaseUri(node)` (this function) instead.

    `BaseUri` returns the absolute base URI for a given `node` in a Domlette
    tree, or `None` if no such *absolute* base URI exists.

    If `fallback` is specified, then it is used to construct a base URI when
    everything else fails.
    """

    baseUriPart = ''
    xmlBaseAncestorList = node.xpath('ancestor-or-self::*[@xml:base][1]')
    if len(xmlBaseAncestorList) > 0:
        node = xmlBaseAncestorList[0]
        baseUriPart = node.getAttributeNS(XML_NAMESPACE, 'base')

    # If the xml:base in scope for the current node is not absolute, we find
    # the element where that xml:base was declared, then Absolutize our
    # relative xml:base against the base URI of the parent of declaring
    # element, recursively.
    if (not Uri.IsAbsolute(baseUriPart) and node.parentNode is not None):
        baseUriPart = Uri.Absolutize(baseUriPart, BaseUri(node.parentNode))

    # If we still don't have an absolute base URI, resolve against the
    # document's URI.
    if not Uri.IsAbsolute(baseUriPart):
        if hasattr(node, 'createElementNS'):
            baseUriPart = Uri.Absolutize(baseUriPart, node.documentURI)
        else:
            baseUriPart = Uri.Absolutize(baseUriPart,
                                         node.ownerDocument.documentURI)

    # Next, we try resolving against the fallback base URI, if one has been
    # provided.
    if not Uri.IsAbsolute(baseUriPart) and fallback is not None:
        baseUriPart = Uri.Absolutize(baseUriPart, fallback)

    # And if we *still* don't have an absolute base URI, well, there's not
    # much more we can do.  No biscuit.  Do we want to generate one if we
    # get to this case, instead of returning `None`?
    if not Uri.IsAbsolute(baseUriPart):
        return None
    else:
        return baseUriPart
Esempio n. 2
0
def pushdom(source, xpatterns, prefixes=None, validate=False):
    parser = Sax.CreateParser()
    if validate:
        parser.setFeature(xml.sax.handler.feature_validation, True)
    else:
        parser.setFeature(xml.sax.handler.feature_external_pes, False)
    parser.setFeature(Sax.FEATURE_GENERATOR, True)

    def handle_chunk(docfrag):
        parser.setProperty(Sax.PROPERTY_YIELD_RESULT, docfrag)

    handler = sax2dom_chunker(xpatterns=xpatterns,
                              nss=prefixes,
                              chunk_consumer=handle_chunk)
    parser.setContentHandler(handler)
    if isinstance(source, InputSource.InputSource):
        pass
    elif hasattr(source, 'read'):
        #Create dummy Uri to use as base
        dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        source = InputSource.DefaultFactory.fromStream(source, dummy_uri)
    elif IsXml(source):
        #Create dummy Uri to use as base
        dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        source = InputSource.DefaultFactory.fromString(source, dummy_uri)
    elif Uri.IsAbsolute(source):  #or not os.path.isfile(source):
        source = InputSource.DefaultFactory.fromUri(source)
    else:
        source = InputSource.DefaultFactory.fromUri(Uri.OsPathToUri(source))

    return parser.parse(source)
Esempio n. 3
0
def CreateInputSource(obj, uri=None):
    """
    Convenience function for creating an InputSource.
    obj - a string, Unicode object (only if you really know what you're doing),
          file-like object (stream), file path or URI.  You can also pass an
          InputSource object, in which case the return value is just the same
          object, possibly with the URI modified
    uri - optional override URI.  The base URI for the IS will be set to this
          value

    Returns an InputSource which can be passed to 4Suite APIs.
    """
    #do the imports within the function: a tad bit less efficient, but
    #avoid circular crap
    from Ft.Xml import InputSource
    factory = InputSource.DefaultFactory
    from Ft.Lib import Uri, Uuid
    from Ft.Xml.Lib.XmlString import IsXml

    if isinstance(obj, InputSource.InputSource):
        isrc = obj
    elif hasattr(obj, 'read'):
        #Create dummy Uri to use as base
        dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        isrc = factory.fromStream(obj, dummy_uri)
    elif IsXml(obj):
        dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        isrc = factory.fromString(obj, dummy_uri)
    elif Uri.IsAbsolute(obj):  #or not os.path.isfile(obj):
        isrc = factory.fromUri(obj)
    else:
        isrc = factory.fromUri(Uri.OsPathToUri(obj))
    if uri: isrc.uri = uri
    return isrc
Esempio n. 4
0
def GetDefaultCatalog(basename='default.cat'):
    """
    Load the default catalog file(s).
    """
    quiet = 'XML_DEBUG_CATALOG' not in os.environ

    uris = []
    # original 4Suite XML Catalog support
    if 'XML_CATALOGS' in os.environ:
        # os.pathsep seperated list of pathnames
        for path in os.environ['XML_CATALOGS'].split(os.pathsep):
            uris.append(Uri.OsPathToUri(path))

    # libxml2 XML Catalog support
    if 'XML_CATALOG_FILES' in os.environ:
        # whitespace-separated list of pathnames or URLs (ick!)
        for path in os.environ['XML_CATALOG_FILES'].split():
            # if its already not already an URL, make it one
            if not Uri.IsAbsolute(path):
                uris.append(Uri.OsPathToUri(path))
            else:
                uris.append(path)

    # add the default 4Suite catalog
    pathname = os.path.join(GetConfigVar('DATADIR'), basename)
    if GetConfigVar('RESOURCEBUNDLE'):
        resource = ImportUtil.OsPathToResource(pathname)
        uri = Uri.ResourceToUri('Ft.Xml', resource)
    else:
        uri = Uri.OsPathToUri(pathname)
    uris.append(uri)

    if not quiet:
        prefix = "Catalog URIs:"
        for uri in uris:
            sys.stderr.write('%s %s\n' % (prefix, uri))
            prefix = " "*len(prefix)

    catalog = None
    for uri in uris:
        if not quiet:
            sys.stderr.write('Reading %s\n' % uri)
            sys.stderr.flush()
        try:
            # FIXME: Use dict merging rather than this inefficient cascading
            if catalog is None:
                if not quiet:
                    sys.stderr.write('Creating catalog from %s\n' % uri)
                    sys.stderr.flush()
                catalog = Catalog(uri, quiet)
            else:
                if not quiet:
                    sys.stderr.write('Appending %s\n' % uri)
                    sys.stderr.flush()
                catalog.catalogs.append(Catalog(uri, quiet))
        except UriException, e:
            #warnings.warn("Catalog resource (%s) disabled: %s" % (uri,
            #                                                      e.message),
            #              FtWarning)
            pass
Esempio n. 5
0
def _AttachStylesheetToProcessor(stylesheet, processor):
    from Ft.Lib import Uri, Uuid
    from Ft.Xml import InputSource
    from Ft.Xml.Catalog import IsXml
    if isinstance(stylesheet, InputSource.InputSource):
        processor.appendStylesheet(stylesheet)
    #elif stylesheet.find(XSL_NAMESPACE) > 0 and IsXml(stylesheet):
    #Note: this would break in pathological cases such as a user
    #passing in a stylesheet string with only an XInclude to the actual XSLT
    elif IsXml(stylesheet):
        #Create dummy Uri to use as base
        dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        processor.appendStylesheet(
            InputSource.DefaultFactory.fromString(stylesheet, dummy_uri))
    elif hasattr(stylesheet, 'read'):
        #Create dummy Uri to use as base
        dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        processor.appendStylesheet(
            InputSource.DefaultFactory.fromStream(stylesheet, dummy_uri))
    elif Uri.IsAbsolute(stylesheet):  # or not os.path.isfile(stylesheet):
        processor.appendStylesheet(
            InputSource.DefaultFactory.fromUri(stylesheet))
    else:
        processor.appendStylesheet(
            InputSource.DefaultFactory.fromUri(Uri.OsPathToUri(stylesheet)))
    return
Esempio n. 6
0
    def __init__(self, uri, quiet=True):
        self.systemIds = {}
        self.publicIds = {}
        self.uris = {}
        self.publicDelegates = []
        self.systemDelegates = []
        self.uriDelegates = []
        self.systemRewrites = []
        self.uriRewrites = []
        self.catalogs = []
        self.uri = uri
        self.quiet = quiet

        if not Uri.IsAbsolute(uri):
            # Using a relative URI here makes it hard to reliably
            # locate the catalog. Also, if the catalog doesn't set
            # its own base URI with xml:base, then we won't be able
            # to resolve relative URI references within the catalog.
            # So we should warn that this situation is undesirable.
            warnings.warn("Catalog URI '%s' is not absolute.",
                          FtWarning, 2)

        stream = Uri.BASIC_RESOLVER.resolve(uri)
        data = stream.read()
        stream.close()

        if IsXml(data):
            # cannot be a TR 9401 document, assume an XML Catalog
            self._parseXmlCat(data)
        else:
            # cannot be an XML Catalog, assume a TR 9401 file
            self._parseTr9401(data)

        # longest match first
        self.publicDelegates.sort()
        self.publicDelegates.reverse()
        self.systemDelegates.sort()
        self.systemDelegates.reverse()
        self.uriDelegates.sort()
        self.uriDelegates.reverse()
        self.systemRewrites.sort()
        self.systemRewrites.reverse()
        self.uriRewrites.sort()
        self.uriRewrites.reverse()

        if not quiet:
            sys.stderr.write('Catalog contents:\n')
            for key in self.__dict__.keys():
                sys.stderr.write('  %s = %r\n' % (key, self.__dict__[key]))
            sys.stderr.flush()
        return
Esempio n. 7
0
def SourceArgToUri(arg, resolver=Uri.BASIC_RESOLVER):
    """
    Some command-line scripts take an argument that is supposed to be
    either "-" (denoting standard input) or a URI reference that can be
    resolved against the URI equivalent of the current working
    directory. This function processes such an argument, given as a
    string, and returns an appropriate URI.

    Since users tend to expect local OS paths to work as URIs, this
    function will accept and use an OS path argument if does appear to
    point to an existing local file, even though this could interfere
    with catalog-based resolution.

    Raises a ValueError if arg is neither a local file nor a valid URI
    reference nor "-".

    The resolver object must support a normalize() method that
    can resolve a URI reference against a base URI, returning a URI.
    """
    if not isinstance(resolver, Uri.UriResolverBase):
        msg = 'It appears there is a bug in this command-line' \
              ' script. A %s was passed as URI resolver to a function that' \
              ' requires an instance of Ft.Lib.Uri.UriResolverBase (or' \
              ' a subclass thereof).'
        raise TypeError(msg % type(resolver))
    if not isinstance(arg, str) and not isinstance(arg, unicode):
        msg = 'It appears there is a bug in this command-line' \
              ' script. A %s was passed as an argument needing to be' \
              ' converted to a URI. A string must be provided instead.'
        raise TypeError(msg % type(arg))

    if arg == '-':
        return Uri.OsPathToUri('unknown-STDIN', attemptAbsolute=True)
    elif arg:
        if os.path.isfile(arg):
            return Uri.OsPathToUri(arg, attemptAbsolute=True)
        elif not Uri.MatchesUriRefSyntax(arg):
            raise ValueError("'%s' is not a valid URI reference." % arg)
        elif Uri.IsAbsolute(arg):
            return arg

    base = Uri.OsPathToUri(os.getcwd(), attemptAbsolute=True)
    if base[-1] != '/':
        base += '/'
    return resolver.normalize(arg, base)
Esempio n. 8
0
    def instantiate(self, context, processor):
        context.processorNss = self.namespaces
        context.currentInstruction = self

        # this uses attributes directly from self
        self._output_parameters.avtParse(self, context)
        href = self._href.evaluate(context)

        if Uri.IsAbsolute(href):
            uri = href
        else:
            try:
                uri = Uri.Absolutize(href,
                  Uri.OsPathToUri(processor.writer.getStream().name))
            except Exception, e:
                raise XsltRuntimeException(
                        ExsltError.NO_EXSLTDOCUMENT_BASE_URI,
                        context.currentInstruction, href)
Esempio n. 9
0
def Parse(source):
    """
    Convenience function for parsing XML.  Use this function with a single
    argument, which must either be a string (not Unicode object), file-like
    object (stream), file path or URI.

    Returns a Domlette node.

    Only pass strings or streams to this function if the XML is self-contained
    XML (i.e. not requiring access to any other resource such as external
    entities or includes).  If you get URI resolution errors, do not use this
    function: use the lower-level APIs instead.  As an example, if you want
    such resolution to use the current working directory as a base, parse
    as follows for strings:

    from Ft.Xml.Domlette import NonvalidatingReader
    from Ft.Lib import Uri

    XML = "<!DOCTYPE a [ <!ENTITY b "b.xml"> ]><a>&b;</a>"

    base = Uri.OsPathToUri('')  #Turn CWD into a file: URL
    doc = NonvalidatingReader.parseString(XML, base)
    # during parsing, the replacement text for &b;
    # will be obtained from b.xml in the CWD

    For streams, use "parseStream" rather than "parseString" in the above.
    """
    #do the imports within the function: a tad bit less efficient, but
    #avoid circular crap
    from Ft.Xml.Domlette import NonvalidatingReader
    from Ft.Lib import Uri, Uuid
    from Ft.Xml.Lib.XmlString import IsXml

    if hasattr(source, 'read'):
        #Create dummy Uri to use as base
        dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        return NonvalidatingReader.parseStream(source, dummy_uri)
    elif IsXml(source):
        dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        return NonvalidatingReader.parseString(source, dummy_uri)
    elif Uri.IsAbsolute(source):  #or not os.path.isfile(source):
        return NonvalidatingReader.parseUri(source)
    else:
        return NonvalidatingReader.parseUri(Uri.OsPathToUri(source))
Esempio n. 10
0
def Transform(source, stylesheet, params=None, output=None):
    """
    Convenience function for applying an XSLT transform.  Returns
    a string.

    source - XML source document in the form of a a string (not Unicode
             object), file-like object (stream), file path, URI or
             Ft.Xml.InputSource.InputSource instance.  If string or stream
             it must be self-contained  XML (i.e. not requiring access to
             any other resource such as external entities or includes)
    stylesheet - XSLT document in the form of a string, stream, URL,
                 file path or Ft.Xml.InputSource.InputSource instance
    params - optional dictionary of stylesheet parameters, the keys of
             which may be given as unicode objects if they have no namespace,
             or as (uri, localname) tuples if they do.
    output - optional file-like object to which output is written (incrementally, as processed)
    """
    #do the imports within the function: a tad bit less efficient, but
    #avoid circular crap
    from Ft.Xml.Xslt import Processor
    from Ft.Xml import InputSource
    from Ft.Lib import Uri, Uuid
    from Ft.Xml.Lib.XmlString import IsXml

    params = params or {}
    processor = Processor.Processor()
    _AttachStylesheetToProcessor(stylesheet, processor)
    if isinstance(source, InputSource.InputSource):
        pass
    elif hasattr(source, 'read'):
        #Create dummy Uri to use as base
        dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        source = InputSource.DefaultFactory.fromStream(source, dummy_uri)
    elif IsXml(source):
        dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid())
        source = InputSource.DefaultFactory.fromString(source, dummy_uri)
    elif Uri.IsAbsolute(source):  # or not os.path.isfile(source):
        source = InputSource.DefaultFactory.fromUri(source)
    else:
        source = InputSource.DefaultFactory.fromUri(Uri.OsPathToUri(source))
    return processor.run(source, topLevelParams=params, outputStream=output)
def parse(source,
          uri=None,
          rules=None,
          binderobj=None,
          prefixes=None,
          validate=False,
          binding_classes=None):
    """
    Convenience function for parsing XML.  Use this function with a single
    argument, which is a string (not Unicode object), file-like object
    (stream), file path or URI.
    Returns a document binding object.

    Only use this function to parse self-contained  XML (i.e. not requiring
    access to any other resource).  For example, do not use it for XML with
    external entities.  If you get URI resolution errors, pass in a URI
    parameter.

    uri - establish a base URI for the XML document entity being parsed,
          required if source is a string or stream containing XML that
          uses any external resources.  If source is a path or URI, then
          this parameter, if given, is ignored
    rules - a list of bindery rule objects to fine-tune the binding
    binderobj - optional binder object to control binding details,
                the default is None, in which case a binder object
                will be created
    prefixes - dictionary mapping prefixes to namespace URIs
               the default is None
    validate - True to request DTD validaton by the underlying parser
               the default is False
    """
    from Ft.Xml import InputSource
    from Ft.Lib import Uri, Uuid
    from Ft.Xml.Lib.XmlString import IsXml
    #if isinstance(source, InputSource.InputSource):
    #    pass
    if hasattr(source, 'read'):
        return binderytools.bind_stream(source,
                                        uri=uri,
                                        rules=rules,
                                        binderobj=binderobj,
                                        prefixes=prefixes,
                                        validate=validate,
                                        binding_classes=binding_classes)
    elif IsXml(source):
        return binderytools.bind_string(source,
                                        uri=uri,
                                        rules=rules,
                                        binderobj=binderobj,
                                        prefixes=prefixes,
                                        validate=validate,
                                        binding_classes=binding_classes)
    elif Uri.IsAbsolute(source):  #or not os.path.isfile(source):
        return binderytools.bind_uri(source,
                                     rules=rules,
                                     binderobj=binderobj,
                                     prefixes=prefixes,
                                     validate=validate,
                                     binding_classes=binding_classes)
    else:
        return binderytools.bind_file(source,
                                      rules=rules,
                                      binderobj=binderobj,
                                      prefixes=prefixes,
                                      validate=validate,
                                      binding_classes=binding_classes)