def resolve(self, uri, base=None): scheme = Uri.GetScheme(uri) if not scheme: if base: scheme = Uri.GetScheme(base) if not scheme: #Another option is to fall back to Base class behavior raise Uri.UriException(Uri.UriException.SCHEME_REQUIRED, base=base, ref=uri) func = self.handlers.get(scheme) if not func: func = self.handlers.get(None) if not func: return Uri.UriResolverBase.resolve(self, uri, base) return func(uri, base)
def startDocument(self): url = self._locator.getSystemId() self._bases = [url] self._scheme = Uri.GetScheme(url) self._elements = [ ("http://www.w3.org/2001/XInclude", "include"), ] if xslt: self._elements.extend(self.XSLT_INCLUDES)
def _orig_resolve(self, uri, baseUri=None): """ This function takes a URI or a URI reference plus a base URI, produces a normalized URI using the normalize function if a base URI was given, then attempts to obtain access to an entity representing the resource identified by the resulting URI, returning the entity as a stream (a Python file-like object). Raises a UriException if the URI scheme is unsupported or if a stream could not be obtained for any reason. """ if baseUri is not None: uri = self.normalize(uri, baseUri) scheme = Uri.GetScheme(uri) else: scheme = Uri.GetScheme(uri) # since we didn't use normalize(), we need to verify here if scheme not in Uri.DEFAULT_URI_SCHEMES: if scheme is None: raise ValueError('When the URI to resolve is a relative ' 'reference, it must be accompanied by a base URI.') else: raise UriException(UriException.UNSUPPORTED_SCHEME, scheme=scheme, resolver=self.__class__.__name__) # Bypass urllib for opening local files. This means we don't get all # the extra metadata that urllib adds to the stream (Last-modified, # Content-Length, a poorly guessed Content-Type, and the URI), but # we also avoid its potentially time-consuming socket.gethostbyname() # calls, which aren't even warranted and are part of urllib's dubious # interpretation of RFC 1738. if scheme == 'file': path = Uri.UriToOsPath(uri, attemptAbsolute=False) try: stream = file(path, 'rb') except IOError, e: raise UriException(UriException.RESOURCE_ERROR, loc='%s (%s)' % (uri, path), uri=uri, msg=str(e))
def startElementNS(self, expandedName, tagName, attrs): # Update xml:base stack xml_base = ("http://www.w3.org/XML/1998/namespace", "base") baseUri = attrs.get(xml_base, self._bases[-1]) self._bases.append(baseUri) if expandedName in self._elements: try: href = attrs[(None, 'href')] except KeyError: # XInclude same document reference, nothing to do return # Ignore XInclude's with parse='text' if attrs.get((None, 'parse'), 'xml') == 'text': return # Only follow inclusions that have the same scheme as the # initial document. fullurl = Uri.BaseJoin(baseUri, href) if Uri.GetScheme(fullurl) == self._scheme: callback(fullurl)
def resolve(self, uri, base=None): scheme = Uri.GetScheme(uri) if scheme == None: if base != None: scheme = Uri.GetScheme(base) if scheme == None: #Another option is to fall back to Base class behavior raise Uri.UriException(Uri.UriException.SCHEME_REQUIRED, base=base, ref=uri) # Add the files path to our sys.path if scheme == 'file': filename = uri[5:] try: index = filename.rindex('\\') sys.path.append(filename[:0 - (index + 1)]) #print "Adding [%s]" % filename[: 0 - (index+1)] except: try: index = filename.rindex('/') sys.path.append(filename[:0 - (index + 1)]) #print "Adding [%s]" % filename[: 0 - (index+1)] except: #print "Adding [.][%s]" % uri sys.path.append('.') try: func = self.handlers.get(scheme) if func == None: func = self.handlers.get(None) if func == None: return Uri.UriResolverBase.resolve(self, uri, base) return func(uri, base) except: if scheme != 'file': raise PeachException("Peach was unable to locate [%s]" % uri) # Lets try looking in our sys.path paths = [] for path in sys.path: paths.append(path) paths.append("%s/Peach/Engine" % path) for path in paths: newuri = uri[:5] + path + '/' + uri[5:] #print "Trying: [%s]" % newuri try: func = self.handlers.get(scheme) if func == None: func = self.handlers.get(None) if func == None: return Uri.UriResolverBase.resolve( self, newuri, base) return func(uri, base) except: pass raise PeachException("Peach was unable to locate [%s]" % uri)
def _getStreamEncoding(self, stream): """ Returns the encoding of the given stream, if this info can be determined from metadata in the stream object with a reasonable degree of confidence. Adheres to RFC 3023, which requires the the charset value in the Content-Type header to take precedence, or if no value is available, to assume us-ascii in the case of certain text/* media types. For other text/* media types, adheres to RFC 2616 sec. 3.7.1, which requires the assumption of iso-8859-1, when the entity was transmitted by HTTP. Media type and charset info is ignored for streams believed to originate from a local file, in accordance with XML 1.0 Third Edition appendix F.2. """ # We should never try to deduce the encoding when the stream is # a local file, in order to conform with XML 1.0 Third Edition # appendix F.2, and also because urllib.urlopen() uses # mimetypes.guess_type() to set the media type on both local # files and FTP resources, thus causing '*.xml' files to tend to # get a 'text/xml' mapping, which is bad because RFC 3023 # requires them to be assumed to be us-ascii. Therefore, we must # look for clues that assure us that the stream is not likely to # be wrapping a file or FTP resource. The way to tell is to look # for the 'url' attribute on the stream object. urllib.urlopen() # MAY create this attribute and set it to the URL that was # passed in. Note that this 'URL' have just been a local # filesystem path or partial URL or junk like 'C:/x/y/z' stream_url = getattr(stream, 'url', None) if stream_url is None: return None scheme = Uri.GetScheme(stream_url) if scheme is None or scheme.lower() in ('file', 'ftp') \ or len(scheme) == 1: return None # Get the stream metadata. # Streams created by urllib.urlopen() MAY have an info() method # that MAY return a mimetools.Message object. We can trust this # as a source of metadata since we have already ruled out the # likelihood of it being a local file or FTP resource. info = None if hasattr(self.stream, 'info'): if isinstance(self.stream.info, types.MethodType): info = self.stream.info() if isinstance(info, mimetools.Message): # use explicit charset if present and not empty string. charset = info.getparam('charset') if charset: return charset # charset empty or not present, so examine media type # and protocol. maintype = getattr(info, 'maintype', None) subtype = getattr(info, 'subtype', None) if maintype == 'text': if subtype == 'xml' or \ subtype == 'xml-external-parsed-entity' or \ subtype.endswith('+xml'): return 'us-ascii' elif scheme == 'http': return 'iso-8859-1' # If we reach this point, the stream metadata was of no use, # so we'll let the parser determine the encoding from # the entity itself. return None
def isSafeDownloadTarget(candidate): schemeOrNone = Uri.GetScheme(candidate) return (schemeOrNone != None) and (schemeOrNone.lower() == "http")