Ejemplo n.º 1
0
 def resolve(self, uri, base=None):
     scheme = Uri.GetScheme(uri)
     if not scheme:
         if base:
             scheme = Uri.GetScheme(base)
         if not scheme:
             #Another option is to fall back to Base class behavior
             raise Uri.UriException(Uri.UriException.SCHEME_REQUIRED,
                                base=base, ref=uri)
     func = self.handlers.get(scheme)
     if not func:
         func = self.handlers.get(None)
         if not func:
             return Uri.UriResolverBase.resolve(self, uri, base)
     return func(uri, base)
Ejemplo n.º 2
0
 def startDocument(self):
     url = self._locator.getSystemId()
     self._bases = [url]
     self._scheme = Uri.GetScheme(url)
     self._elements = [
         ("http://www.w3.org/2001/XInclude", "include"),
         ]
     if xslt:
         self._elements.extend(self.XSLT_INCLUDES)
Ejemplo n.º 3
0
    def _orig_resolve(self, uri, baseUri=None):
        """
        This function takes a URI or a URI reference plus a base URI, produces
        a normalized URI using the normalize function if a base URI was given,
        then attempts to obtain access to an entity representing the resource
        identified by the resulting URI, returning the entity as a stream (a
        Python file-like object).

        Raises a UriException if the URI scheme is unsupported or if a stream
        could not be obtained for any reason.
        """
        if baseUri is not None:
            uri = self.normalize(uri, baseUri)
            scheme = Uri.GetScheme(uri)
        else:
            scheme = Uri.GetScheme(uri)
            # since we didn't use normalize(), we need to verify here
            if scheme not in Uri.DEFAULT_URI_SCHEMES:
                if scheme is None:
                    raise ValueError('When the URI to resolve is a relative '
                        'reference, it must be accompanied by a base URI.')
                else:
                    raise UriException(UriException.UNSUPPORTED_SCHEME,
                                       scheme=scheme,
                                       resolver=self.__class__.__name__)

        # Bypass urllib for opening local files. This means we don't get all
        # the extra metadata that urllib adds to the stream (Last-modified,
        # Content-Length, a poorly guessed Content-Type, and the URI), but
        # we also avoid its potentially time-consuming socket.gethostbyname()
        # calls, which aren't even warranted and are part of urllib's dubious
        # interpretation of RFC 1738.
        if scheme == 'file':
            path = Uri.UriToOsPath(uri, attemptAbsolute=False)
            try:
                stream = file(path, 'rb')
            except IOError, e:
                raise UriException(UriException.RESOURCE_ERROR,
                                   loc='%s (%s)' % (uri, path),
                                   uri=uri, msg=str(e))
Ejemplo n.º 4
0
        def startElementNS(self, expandedName, tagName, attrs):
            # Update xml:base stack
            xml_base = ("http://www.w3.org/XML/1998/namespace", "base")
            baseUri = attrs.get(xml_base, self._bases[-1])
            self._bases.append(baseUri)

            if expandedName in self._elements:
                try:
                    href = attrs[(None, 'href')]
                except KeyError:
                    # XInclude same document reference, nothing to do
                    return

                # Ignore XInclude's with parse='text'
                if attrs.get((None, 'parse'), 'xml') == 'text':
                    return

                # Only follow inclusions that have the same scheme as the
                # initial document.
                fullurl = Uri.BaseJoin(baseUri, href)
                if Uri.GetScheme(fullurl) == self._scheme:
                    callback(fullurl)
Ejemplo n.º 5
0
    def resolve(self, uri, base=None):
        scheme = Uri.GetScheme(uri)
        if scheme == None:
            if base != None:
                scheme = Uri.GetScheme(base)
            if scheme == None:
                #Another option is to fall back to Base class behavior
                raise Uri.UriException(Uri.UriException.SCHEME_REQUIRED,
                                       base=base,
                                       ref=uri)

        # Add the files path to our sys.path

        if scheme == 'file':
            filename = uri[5:]
            try:
                index = filename.rindex('\\')
                sys.path.append(filename[:0 - (index + 1)])
                #print "Adding [%s]" % filename[: 0 - (index+1)]

            except:
                try:
                    index = filename.rindex('/')
                    sys.path.append(filename[:0 - (index + 1)])
                    #print "Adding [%s]" % filename[: 0 - (index+1)]

                except:
                    #print "Adding [.][%s]" % uri
                    sys.path.append('.')

        try:
            func = self.handlers.get(scheme)
            if func == None:
                func = self.handlers.get(None)
                if func == None:
                    return Uri.UriResolverBase.resolve(self, uri, base)

            return func(uri, base)

        except:

            if scheme != 'file':
                raise PeachException("Peach was unable to locate [%s]" % uri)

            # Lets try looking in our sys.path

            paths = []
            for path in sys.path:
                paths.append(path)
                paths.append("%s/Peach/Engine" % path)

            for path in paths:
                newuri = uri[:5] + path + '/' + uri[5:]
                #print "Trying: [%s]" % newuri

                try:
                    func = self.handlers.get(scheme)
                    if func == None:
                        func = self.handlers.get(None)
                        if func == None:
                            return Uri.UriResolverBase.resolve(
                                self, newuri, base)

                    return func(uri, base)
                except:
                    pass

            raise PeachException("Peach was unable to locate [%s]" % uri)
Ejemplo n.º 6
0
    def _getStreamEncoding(self, stream):
        """
        Returns the encoding of the given stream, if this info can be
        determined from metadata in the stream object with a reasonable
        degree of confidence.

        Adheres to RFC 3023, which requires the the charset value in the
        Content-Type header to take precedence, or if no value is
        available, to assume us-ascii in the case of certain text/*
        media types. For other text/* media types, adheres to RFC 2616
        sec. 3.7.1, which requires the assumption of iso-8859-1, when
        the entity was transmitted by HTTP. Media type and charset info
        is ignored for streams believed to originate from a local file,
        in accordance with XML 1.0 Third Edition appendix F.2.
        """
        # We should never try to deduce the encoding when the stream is
        # a local file, in order to conform with XML 1.0 Third Edition
        # appendix F.2, and also because urllib.urlopen() uses
        # mimetypes.guess_type() to set the media type on both local
        # files and FTP resources, thus causing '*.xml' files to tend to
        # get a 'text/xml' mapping, which is bad because RFC 3023
        # requires them to be assumed to be us-ascii. Therefore, we must
        # look for clues that assure us that the stream is not likely to
        # be wrapping a file or FTP resource. The way to tell is to look
        # for the 'url' attribute on the stream object. urllib.urlopen()
        # MAY create this attribute and set it to the URL that was
        # passed in. Note that this 'URL' have just been a local
        # filesystem path or partial URL or junk like 'C:/x/y/z'
        stream_url = getattr(stream, 'url', None)
        if stream_url is None:
            return None
        scheme = Uri.GetScheme(stream_url)
        if scheme is None or scheme.lower() in ('file', 'ftp') \
                          or len(scheme) == 1:
            return None
        # Get the stream metadata.
        # Streams created by urllib.urlopen() MAY have an info() method
        # that MAY return a mimetools.Message object. We can trust this
        # as a source of metadata since we have already ruled out the
        # likelihood of it being a local file or FTP resource.
        info = None
        if hasattr(self.stream, 'info'):
            if isinstance(self.stream.info, types.MethodType):
                info = self.stream.info()
        if isinstance(info, mimetools.Message):
            # use explicit charset if present and not empty string.
            charset = info.getparam('charset')
            if charset:
                return charset
            # charset empty or not present, so examine media type
            # and protocol.
            maintype = getattr(info, 'maintype', None)
            subtype = getattr(info, 'subtype', None)
            if maintype == 'text':
                if subtype == 'xml' or \
                   subtype == 'xml-external-parsed-entity' or \
                   subtype.endswith('+xml'):
                    return 'us-ascii'
                elif scheme == 'http':
                    return 'iso-8859-1'
        # If we reach this point, the stream metadata was of no use,
        # so we'll let the parser determine the encoding from
        # the entity itself.
        return None
Ejemplo n.º 7
0
def isSafeDownloadTarget(candidate):
    schemeOrNone = Uri.GetScheme(candidate)
    return (schemeOrNone != None) and (schemeOrNone.lower() == "http")