def urisplit(url): """A better urlsplit. It differentiates empty querystring/fragment from none. e.g.:: urisplit('http://a.b/c/d') -> ('http', 'a.b', '/c/d', None, None) urisplit('http://a.b/c/d?') -> ('http', 'a.b', '/c/d', '', None) urisplit('http://a.b/c/d#') -> ('http', 'a.b', '/c/d', None, '') urisplit('http://a.b/c/d?#') -> ('http', 'a.b', '/c/d', '', '') """ ret = list(urlparse.urlsplit(url)) if ret[4] == '' and url[-1] != '#': ret[4] = None before_fragment = -1 else: # there is a (possibly empty) fragment # -> remove it and the '#', to test query-string below before_fragment = - (len(ret[4]) + 2) if ret[3] == '' and url[before_fragment] != '?': ret[3] = None return urlparse.SplitResult(*ret)
def extract_path(url): '''Extracts path (with parameters) from given url, if url is already a path (starts with /) it's rerurned without modifications. In case url is empty or only contains domain without trailing slash, returns a single slash.''' # This method was actually contributed by Wiktor Bachnik (wbachnik) # but because I forgot to rebase before my branch, this is going to # appear in a different commit :-/ if len(url) == 0: # empty, assume / path = '/' elif url[0] == '/': # url is already a path path = url else: # url is a proper url scheme://host/... parts = urlparse.urlsplit(url) needed_parts = urlparse.SplitResult(scheme='', netloc='', path=parts.path, query=parts.query, fragment='') path = needed_parts.geturl() if len(path) == 0: # case for http://example.com path = '/' # If there was a question mark in the url, but no query string # then we must still preserve the question mark. if (not parts.query) and ('?' in url): path = path + '?' return path
def handleRequest(self): syncengine.dbsession = sessionmaker(bind=syncengine.dbengine)() # TODO: enforce authentication... # if len(sconf.users) > 0: # ... # self.assertEqual(pysyncml.Context.getAuthInfo(request, None), # adict(auth=pysyncml.NAMESPACE_AUTH_BASIC, # username='******', password='******')) # context, adapter = syncengine._makeAdapter() clen = 0 if 'Content-Length' in self.headers: clen = int(self.headers['Content-Length']) request = pysyncml.adict(headers=dict((('content-type', 'application/vnd.syncml+xml'),)), body=self.rfile.read(clen)) self.session.syncml.effectiveID = pysyncml.Context.getTargetID(request) # todo: this should be a bit more robust... urlparts = list(urlparse.urlsplit(self.session.syncml.effectiveID)) if self.path_params.get('sessionid') != self.session.id: urlparts[2] += ';sessionid=' + self.session.id self.session.syncml.returnUrl = urlparse.SplitResult(*urlparts).geturl() response = pysyncml.Response() self.stats = adapter.handleRequest(self.session.syncml, request, response) syncengine.dbsession.commit() return response
def buildURL(self, txrequest, action='', **query): """Build a URL relative to the server base_url, with the given query parameters added.""" base = '//%s/%s/' % (self.eventhandler.url, '/'.join( txrequest.prepath)) if not base: a = urlparse.urlparse(txrequest.prePathURL() + '/') port = a.port if self.eventhandler.external_port: port = self.eventhandler.external_port if port == 80: port = '' else: port = ':%s' % port url = urlparse.SplitResult( a.scheme, '%s:%s@%s%s' % (a.username, a.password, a.hostname, port), a.path, a.query, a.fragment) base = url.geturl() if action: base = urlparse.urljoin(base, action) print 'buildURL', base return appendArgs(base, query)
def urlsplit(url): """ urlparse.urlsplit() doesn't separate the query string from the path for schemes not in the list urlparse.uses_query, but furl should support proper parsing of query strings and paths for any scheme users decide to use (custom schemes, internal schemes, etc). So as a workaround, use 'http', a scheme in urlparse.uses_query, for the purposes of urlparse.urlsplit(), but then revert back to the original scheme provided. Parameters: url: URL string to split. Returns: urlparse.SplitResult tuple subclass (just like urlparse.urlsplit() does) with fields (scheme, netloc, path, query, fragment, username, password, hostname, port). See http://docs.python.org/library/urlparse.html#urlparse.urlsplit for more details. """ def _change_urltoks_scheme(tup, scheme): l = list(tup) l[0] = scheme return tuple(l) toks = urlparse.urlsplit(url) if not toks.scheme or toks.scheme in urlparse.uses_query: return toks original_scheme = toks.scheme httpurl = _change_urltoks_scheme(toks, 'http') toks = urlparse.urlsplit(urlparse.urlunsplit(httpurl)) return urlparse.SplitResult(*_change_urltoks_scheme(toks, original_scheme))
def __init__(self, version, address): super(RyuClientBase, self).__init__() self.version = version res = urlparse.SplitResult('', address, '', '', '') self.host = res.hostname self.port = res.port self.url_prefix = '/' + self.version + '/'
def urlsplit(url): """ Parameters: url: URL string to split. Returns: urlparse.SplitResult tuple subclass, just like urlparse.urlsplit() returns, with fields (scheme, netloc, path, query, fragment, username, password, hostname, port). See the url below for more details on urlsplit(). http://docs.python.org/library/urlparse.html#urlparse.urlsplit """ # If a scheme wasn't provided, we shouldn't add one by setting the # scheme to 'http'. We can use urlparse.urlsplit(url) as-is. if '://' not in url: return urlparse.urlsplit(url) def _change_urltoks_scheme(tup, scheme): l = list(tup) l[0] = scheme return tuple(l) original_scheme = _get_scheme(url) toks = urlparse.urlsplit(_set_scheme(url, 'http')) return urlparse.SplitResult(*_change_urltoks_scheme(toks, original_scheme))
def delete(url): url = urllib.unquote(url) match, project = Cache.match(url) if match: path = Cache.path(url, project, include_file=True) # Rather then wait for last updated statistics to expire, remove the # project cache if applicable. if project: apiurl, _ = Cache.spliturl(url) if project.isdigit(): # Clear target project cache upon request acceptance. project = osc.core.get_request(apiurl, project).actions[0].tgt_project Cache.delete_project(apiurl, project) if os.path.exists(path): if conf.config['debug']: print('CACHE_DELETE', url, file=sys.stderr) os.remove(path) # Also delete version without query. This does not handle other # variations using different query strings. Handy for PUT with ?force=1. o = urlparse.urlsplit(url) if o.query != '': url_plain = urlparse.SplitResult(o.scheme, o.netloc, o.path, '', o.fragment).geturl() Cache.delete(url_plain)
def urlsplit(url): """ Parameters: url: URL string to split. Returns: urlparse.SplitResult tuple subclass, just like urlparse.urlsplit() returns, with fields (scheme, netloc, path, query, fragment, username, password, hostname, port). See the url below for more details on urlsplit(). http://docs.python.org/library/urlparse.html#urlparse.urlsplit """ original_scheme = _get_scheme(url) def _change_urltoks_scheme(tup, scheme): l = list(tup) l[0] = scheme return tuple(l) # urlsplit() only parses the query for schemes in urlparse.uses_query, # so switch to 'http' (a scheme in urlparse.uses_query) for # urlparse.urlsplit() and switch back afterwards. if original_scheme is not None: url = _set_scheme(url, 'http') toks = urlparse.urlsplit(url) return urlparse.SplitResult(*_change_urltoks_scheme(toks, original_scheme))
def create_url(request, swap_scheme=False, swap_origin=False, query_parameter_to_remove="redirection"): parsed = urlparse.urlsplit(request.url) destination_netloc = parsed.netloc scheme = parsed.scheme if swap_scheme: scheme = "http" if parsed.scheme == "https" else "https" hostname = parsed.netloc.split(':')[0] port = request.server.config["ports"][scheme][0] destination_netloc = ":".join([hostname, str(port)]) if swap_origin: destination_netloc = __get_swapped_origin_netloc(destination_netloc) parsed_query = urlparse.parse_qsl(parsed.query, keep_blank_values=True) parsed_query = filter(lambda x: x[0] != query_parameter_to_remove, parsed_query) destination_url = urlparse.urlunsplit( urlparse.SplitResult(scheme=scheme, netloc=destination_netloc, path=parsed.path, query=urllib.urlencode(parsed_query), fragment=None)) return destination_url
def _defaults(self): """Default initialization for the MalleableRequest object.""" super(MalleableRequest, self)._defaults() self._url = urlparse.SplitResult("http", "", "/", "", "") self.verb = "GET" self.extra = "" self.headers = {} self.body = ""
def attempt_redirect(self): url = self._url() if not url: return False try: old_path_elements = self.request.physicalPathFromURL(url) except ValueError: return False storage = queryUtility(IRedirectionStorage) if storage is None: return False old_path = '/'.join(old_path_elements) # First lets try with query string in cases or content migration new_path = None query_string = self.request.QUERY_STRING if query_string: new_path = storage.get("%s?%s" % (old_path, query_string)) # if we matched on the query_string we don't want to include it # in redirect if new_path: query_string = '' if not new_path: new_path = storage.get(old_path) if not new_path: new_path = self.find_redirect_if_view(old_path_elements, storage) if not new_path: new_path = self.find_redirect_if_template(url, old_path_elements, storage) if not new_path: return False url = urlparse.urlsplit(new_path) if url.netloc: # External URL # avoid double quoting url_path = unquote(url.path) url_path = quote(url_path) url = urlparse.SplitResult(*(url[:2] + (url_path, ) + url[3:])).geturl() else: url = self.request.physicalPathToURL(new_path) # some analytics programs might use this info to track if query_string: url += "?" + query_string self.request.response.redirect(url, status=301, lock=1) return True
def getFilenameFromUri(uri): """ Convert a file:// URI into a local file system reference """ uriparts = urlparse.urlsplit(uri) assert uriparts.scheme == "file", "RO %s is not in local file system" % uri uriparts = urlparse.SplitResult("", "", uriparts.path, uriparts.query, uriparts.fragment) return urllib.url2pathname(urlparse.urlunsplit(uriparts))
def create_connect_args(self, url): phoenix_url = urlparse.urlunsplit( urlparse.SplitResult( scheme='http', netloc='{}:{}'.format(url.host, url.port or 8765), path='/', query=urllib.urlencode(url.query), fragment='', )) return [phoenix_url], {'autocommit': True}
def update_query(self, **kwargs): qsl = urlparse.parse_qsl(self.parsed_url.query) for key, value in kwargs.items(): qsl.append((key, value)) new_url = urlparse.SplitResult(scheme=self.parsed_url.scheme, netloc=self.parsed_url.netloc, path=self.parsed_url.path, query=urllib.urlencode(qsl), fragment=self.parsed_url.fragment) return new_url.geturl()
def get_url_for_local_path(path): """ >>> PathTools.get_url_for_local_path('/foo') 'file:///foo' #>>> PathTools.get_url_for_local_path('D:\\\\foo') #'file:///D:/foo' #>>> PathTools.get_url_for_local_path('D:\\\\foo bar') #'file:///D:/foo%20bar' """ return urlparse.SplitResult("file", None, urllib.pathname2url(path), None, None).geturl()
def _youtube_callback_step2(info, video_id, callback): try: body = info['body'] params = cgi.parse_qs(body) if params.get("status", [""])[0] == "fail": logging.warning("youtube download failed because: %s", params.get("reason", ["unknown"])[0]) callback(None) return # fmt_url_map is a comma separated list of pipe separated # pairs of fmt, url # build the format codes. fmt_list = [x.split('/')[0] for x in params['fmt_list'][0].split(',')] # build the list of available urls. stream_map = params["url_encoded_fmt_stream_map"][0].split(",") fmt_url_map = dict() # strip url= from url=xxxxxx, strip trailer. Strip duplicate params. for fmt, u in zip(fmt_list, stream_map): o = urlparse.urlsplit(unquote_plus(u[4:]).split(';')[0]) qs = urlencode(list(set(urlparse.parse_qsl(o.query)))) # Let's put humpty dumpty back together again fmt_url_map[fmt] = urlparse.urlunsplit( urlparse.SplitResult(o.scheme, o.netloc, o.path, qs, o.fragment)) title = params.get("title", ["No title"])[0] try: title = title.decode("utf-8") except UnicodeDecodeError: title = title.decode("ascii", "ignore") logging.debug("fmt_url_map keys: %s", fmt_url_map.keys()) # http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs for fmt, content_type in [("22", u"video/mp4"), ("18", u"video/mp4"), ("5", u"video/x-flv")]: if fmt in fmt_url_map: new_url = fmt_url_map[fmt] logging.debug("youtube download: trying %s %s", fmt, new_url) callback(unicode(new_url), content_type=content_type, title=title) return _youtube_errback(info, callback) except StandardError: logging.exception( "youtube_callback_step2: unable to scrape YouTube URL") callback(None)
def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL using urlparse.urlsplit(), splitting query and fragments. This function papers over Python issue9374 when needed. The parameters are the same as urlparse.urlsplit. """ scheme, netloc, path, query, fragment = urlparse.urlsplit( url, scheme, allow_fragments) if allow_fragments and '#' in path: path, fragment = path.split('#', 1) if '?' in path: path, query = path.split('?', 1) return urlparse.SplitResult(scheme, netloc, path, query, fragment)
def build_url(scheme=None, netloc='', path='', query='', fragment=''): if not scheme and netloc: scheme = 'http' try: query_str = urllib.urlencode(query) except TypeError: query_str = urllib.quote(query) return urlparse.urlunsplit( urlparse.SplitResult(scheme=scheme, netloc=netloc, path=path, query=query_str, fragment=fragment))
def __pathToTvrh(self, solrUrl, collection): import urlparse userSpecifiedUrl = urlparse.urlsplit(solrUrl) schemeAndNetloc = urlparse.SplitResult(scheme=userSpecifiedUrl.scheme, netloc=userSpecifiedUrl.netloc, path='', query='', fragment='') solrBaseUrl = urlparse.urlunsplit(schemeAndNetloc) solrBaseUrl = urlparse.urljoin(solrBaseUrl, 'solr/') solrBaseUrl = urlparse.urljoin(solrBaseUrl, collection + '/') solrBaseUrl = urlparse.urljoin(solrBaseUrl, 'tvrh') return solrBaseUrl
def GetAuthenticatedGitURL(url): """Returns the authenticated version of a git URL. In chromium, there is a special URL that is the "authenticated" version. The URLs are identical but the authenticated one has special privileges. """ urlsplit = urlparse.urlsplit(url) if urlsplit.scheme in ('https', 'http'): urldict = urlsplit._asdict() urldict['scheme'] = 'https' urldict['path'] = '/a' + urlsplit.path urlsplit = urlparse.SplitResult(**urldict) return urlsplit.geturl()
def create_redirect_url(request, cross_origin = False): parsed = urlparse.urlsplit(request.url) destination_netloc = parsed.netloc if cross_origin: destination_netloc = get_swapped_origin_netloc(parsed.netloc) destination_url = urlparse.urlunsplit(urlparse.SplitResult( scheme = parsed.scheme, netloc = destination_netloc, path = parsed.path, query = None, fragment = None)) return destination_url
def __pathToUpdate(self, solrUrl, collection): #TODO there is plenty of stuff duplicated in __pathToTvrh above - DRY import urlparse userSpecifiedUrl = urlparse.urlsplit(solrUrl) schemeAndNetloc = urlparse.SplitResult(scheme=userSpecifiedUrl.scheme, netloc=userSpecifiedUrl.netloc, path='', query='', fragment='') solrBaseUrl = urlparse.urlunsplit(schemeAndNetloc) solrBaseUrl = urlparse.urljoin(solrBaseUrl, 'solr/') solrBaseUrl = urlparse.urljoin(solrBaseUrl, collection + '/') solrBaseUrl = urlparse.urljoin(solrBaseUrl, 'update') return solrBaseUrl
def build_url(self, format_string, format_params={}, query_params={}): if 'hostname' not in format_params: format_params['hostname'] = self.hostname path = format_string % format_params query_string = urllib.urlencode(query_params) parsed_url = urlparse.SplitResult(scheme=self.API_SCHEME, netloc=self.API_HOST, path=path, query=query_string, fragment=None) request_url = urlparse.urlunsplit(parsed_url) logging.debug('Built URL: %s ' % request_url) return request_url
def create_redirect_url(request, cross_origin=False): parsed = urlparse.urlsplit(request.url) destination_netloc = parsed.netloc if cross_origin: destination_netloc = get_swapped_origin_netloc(parsed.netloc) query = filter(lambda x: x.startswith('id='), parsed.query.split('&')) destination_url = urlparse.urlunsplit( urlparse.SplitResult(scheme=parsed.scheme, netloc=destination_netloc, path=parsed.path, query=query[0] if query else None, fragment=None)) return destination_url
def create_url(request, swap_scheme=False, swap_origin=False, downgrade=False, query_parameter_to_remove="redirection"): parsed = urlparse.urlsplit(request.url) destination_netloc = parsed.netloc scheme = parsed.scheme if swap_scheme: scheme = "http" if parsed.scheme == "https" else "https" hostname = parsed.netloc.split(':')[0] port = request.server.config["ports"][scheme][0] destination_netloc = ":".join([hostname, str(port)]) if downgrade: # These rely on some unintuitive cleverness due to WPT's test setup: # 'Upgrade-Insecure-Requests' does not upgrade the port number, # so we use URLs in the form `http://[domain]:[https-port]`, # which will be upgraded to `https://[domain]:[https-port]`. # If the upgrade fails, the load will fail, as we don't serve HTTP over # the secure port. if parsed.scheme == "https": scheme = "http" elif parsed.scheme == "wss": scheme = "ws" else: raise ValueError("Downgrade redirection: Invalid scheme '%s'" % parsed.scheme) hostname = parsed.netloc.split(':')[0] port = request.server.config["ports"][parsed.scheme][0] destination_netloc = ":".join([hostname, str(port)]) if swap_origin: destination_netloc = __get_swapped_origin_netloc(destination_netloc) parsed_query = urlparse.parse_qsl(parsed.query, keep_blank_values=True) parsed_query = filter(lambda x: x[0] != query_parameter_to_remove, parsed_query) destination_url = urlparse.urlunsplit( urlparse.SplitResult(scheme=scheme, netloc=destination_netloc, path=parsed.path, query=urllib.urlencode(parsed_query), fragment=None)) return destination_url
def getG2OHeaders(self, url): if not 'G2O_KEY' in globals(): return {} parsedUrl = urlparse.urlsplit(url) uri = urlparse.urlunsplit(urlparse.SplitResult('', '', parsedUrl.path, parsedUrl.query, parsedUrl.fragment)) expiry = '%s' % (int(time.time()) + 29) dataFields = [G2O_VERSION, G2O_GHOST_IP, G2O_CLIENT_IP, expiry, G2O_UNIQUE_ID, G2O_NONCE] data = ', '.join(dataFields) dig = hmac.new(G2O_KEY, msg=data + uri, digestmod=hashlib.sha256).digest() sign = base64.b64encode(dig) return { G2O_DATA_HEADER_NAME: data, G2O_SIGN_HEADER_NAME: sign, }
def _geturl(self, params): """Get AFE URL. All AFE URLs have the format: http://host/afe/#param1=val1¶m2=val2 This function constructs such a URL given a mapping of parameters. @param params: Mapping of URL parameters. @returns: URL string. """ scheme, netloc, path, query, _fragment = self._root_url_parts fragment = urllib.urlencode(params) return urlparse.SplitResult( scheme, netloc, path, query, fragment).geturl()
def get_postgresql_uri(services, service_name): for service in services.get('elephantsql', ()): if service['name'] == service_name: # Replace the URI scheme old = urlparse.urlsplit(service['credentials']['uri']) scheme = 'postgresql+psycopg2' netloc = old.netloc path = old.path query = urllib.urlencode({ 'application_name': 'bluemix_promocodes', 'sslmode': 'require', 'client_encoding': 'utf-8', 'connect_timeout': 10, }) new = urlparse.SplitResult(scheme, netloc, path, query, '') return urlparse.urlunsplit(new) return None
def parse_msg(msg): """Static method for constructing a Request instance out of a message read straight off a zmq socket. """ sender, conn_id, path, rest = msg.split(' ', 3) headers, rest = parse_netstring(rest) body, _ = parse_netstring(rest) headers = json.loads(headers) # construct url from request scheme = headers.get('URL_SCHEME', 'http') netloc = headers.get('host') path = headers.get('PATH') query = headers.get('QUERY') url = urlparse.SplitResult(scheme, netloc, path, query, None) r = Request(sender, conn_id, path, headers, body, url) r.is_wsgi = False return r