def get_couch_controller_or_bust(db_name): """ Get :py:class:`djali.couchdb.CloudiControl` instance for given database. Args: db_name: Database name Returns: djali.couchdb.CloudiControl: controller instance Raises: werkzeug.exceptions.HTTPException: When controller cannot be instanciated """ p_url = urlparse(storage_backend) couch_db_pr = ParseResult(p_url.scheme, p_url.netloc, '/{db_name}'.format(db_name=db_name), '', '', '') db_url = couch_db_pr.geturl() try: return CloudiControl(db_url) except requests.exceptions.HTTPError as hexc: app.logger.error( "Cloudi error while trying to use {!r}".format(db_url)) app.logger.error(hexc) if hexc.response.status_code == 403: abort(502) abort(503) except requests.exceptions.ConnectionError as cexc: app.logger.error( "Cloudi error while trying to use {!r}".format(db_url)) app.logger.error(cexc) abort(503)
def urlparams(url_, fragment=None, query_dict=None, **query): """ Add a fragment and/or query parameters to a URL. New query params will be appended to exising parameters, except duplicate names, which will be replaced. """ url_ = urlparse(url_) fragment = fragment if fragment is not None else url_.fragment q = url_.query new_query_dict = (QueryDict(smart_bytes(q), mutable=True) if q else QueryDict('', mutable=True)) if query_dict: for k, l in query_dict.lists(): new_query_dict[k] = None # Replace, don't append. for v in l: new_query_dict.appendlist(k, v) for k, v in query.items(): # Replace, don't append. if isinstance(v, list): new_query_dict.setlist(k, v) else: new_query_dict[k] = v query_string = urlencode([(k, v) for k, l in new_query_dict.lists() for v in l if v is not None]) new = ParseResult(url_.scheme, url_.netloc, url_.path, url_.params, query_string, fragment) return new.geturl()
def clean_url(url): """ Remove params, query and fragment parts from URL so that `os.path.basename` and `os.path.splitext` can work correctly. @param url: URL to clean. @type url: str @return: Cleaned URL. @rtype: str """ parsed = urlparse(url.strip()) reconstructed = ParseResult( parsed.scheme, parsed.netloc, parsed.path, params='', query='', fragment='') return reconstructed.geturl()
def handle_authcode(request, client, redirection_uri, state=None): parts = urlparse(redirection_uri.uri) qparams = dict(parse_qsl(parts.query)) user_id = authenticated_userid(request) auth_code = Oauth2Code(client, user_id) db.add(auth_code) db.flush() qparams['code'] = auth_code.authcode if state: qparams['state'] = state parts = ParseResult(parts.scheme, parts.netloc, parts.path, parts.params, urlencode(qparams), '') return HTTPFound(location=parts.geturl())
def validate(self, instance, value): """Check if input is valid URL""" value = super(URL, self).validate(instance, value) parsed_url = urlparse(value) if not parsed_url.scheme or not parsed_url.netloc: self.error(instance, value, extra='URL needs scheme and netloc.') parse_result = ParseResult( scheme=parsed_url.scheme, netloc=parsed_url.netloc, path=parsed_url.path, params='' if self.remove_parameters else parsed_url.params, query='' if self.remove_parameters else parsed_url.query, fragment='' if self.remove_fragment else parsed_url.fragment, ) parse_result = parse_result.geturl() return parse_result
def __html__(self): '''Return a transformed URL if necessary (appending protocol and CDN)''' host = self.get('netloc', None) # Don't CDN urls with hosts we're not re-writing if host: if (context.config.STATIC_SOURCES is None or host not in context.config.STATIC_SOURCES): return self.raw_url if (context.config.USE_CDN and (context.config.CDN_HOST or context.config.STATIC_HOSTS)): # get the protocol for the current request # this requires the custom HTTP header X-Forwarded-Proto # set if running behind a proxy (or if SSL is terminated # upstream) try: protocol = request_config().protocol except AttributeError: # are we not in a request? Use a default protocol protocol = context.config.DEFAULT_PROTOCOL # use the round robin hosts to speed download when not https if protocol != "https" and context.config.STATIC_HOSTS: self['netloc'] = context.config.STATIC_HOSTS[hashfunc( self.raw_url) % len(context.config.STATIC_HOSTS)] else: self['netloc'] = context.config.CDN_HOST # adjust the scheme of any link with a net location # to match the current request so we don't have mixed link # protocols if self['netloc']: self['scheme'] = protocol return ParseResult(**self).geturl()
def handle_authcode(request, client, redirection_uri, state=None): parts = urlparse(redirection_uri.uri) qparams = dict(parse_qsl(parts.query)) user_id = authenticated_userid(request) auth_code = Oauth2Code(client, user_id) db.add(auth_code) db.flush() qparams['code'] = auth_code.authcode if state: qparams['state'] = state parts = ParseResult( parts.scheme, parts.netloc, parts.path, parts.params, urlencode(qparams), '') return HTTPFound(location=parts.geturl())
def _unicode_urlparse(url, encoding='utf-8', errors='ignore'): """ Safely parse a URL into a :class:`urlparse.ParseResult` ensuring that all elements of the parse result are unicode. :param url: A URL. :type url: ``bytes``, ``unicode`` or :class:`urlparse.ParseResult` :param encoding: The string encoding assumed in the underlying ``str`` or :class:`urlparse.ParseResult` (default is utf-8). :type encoding: ``bytes`` :param errors: response from ``decode`` if string cannot be converted to unicode given encoding (default is ignore). :type errors: ``bytes`` """ if isinstance(url, bytes): url = url.decode(encoding, errors) elif isinstance(url, ParseResult): # Ensure every part is unicode because we can't rely on clients to do so parts = list(url) for i in range(len(parts)): if isinstance(parts[i], bytes): parts[i] = parts[i].decode(encoding, errors) return ParseResult(*parts) try: return urlparse(url) except ValueError: msg = 'Malformed URL "{}" could not parse'.format(url) log.debug(msg, exc_info=True) return None
def _fields_to_pr(cls, fields): """Recompose back fields dict to ParseResult""" netloc = fields['username'] or '' if fields['password']: netloc += ':' + fields['password'] if netloc: netloc += '@' netloc += fields['hostname'] if fields['port']: if fields['hostname'].count(':') >= 2: # ipv6 -- need to enclose in [] netloc = '[%s]:%s' % (netloc, fields['port']) else: netloc += ':%s' % fields['port'] pr_fields = { f: fields[f] for f in cls._FIELDS if f not in ('hostname', 'password', 'username', 'port') } pr_fields['netloc'] = netloc pr_fields['params'] = '' # We need to quote the path pr_fields['path'] = urlquote(pr_fields['path']) # TODO: figure out what to do with query/fragment... one step at a time return ParseResult(**pr_fields)
def force_https(url): """Force ``url`` to use https as its scheme. Parameters: url: str A string representation of a URL. Returns: str ``url`` with its scheme replaced with https. Throws: Any potential error thrown by the urlparse function during parsing. """ parsed = urlparse(url) with_https = ParseResult("https", *parsed[1:]) return with_https.geturl()
def _parse_uri(self): # MySQL connection string is a DataSourceName(DSN), # the username, passwd can be any character. pattern = r"^(\w+)://(.*):(.*)@tcp\(([.a-zA-Z0-9\-]*):?([0-9]*)\)/(\w*)(\?.*)?$" # noqa: W605, E501 found_result = re.findall(pattern, self.uristr) scheme, user, passwd, host, port, db, config = found_result[0] netloc = "{}:{}@{}:{}".format(user, passwd, host, port or 3306) return ParseResult(scheme, netloc, db, "", config.lstrip("?"), "")
def _parse_uri(self): # MySQL connection string is a DataSourceName(DSN), # we need to do some pre-process pattern = r"^(\w+)://(\w*):(\w*)@tcp\(([.a-zA-Z0-9\-]*):([0-9]*)\)/(\w*)(\?.*)?$" # noqa: W605, E501 found_result = re.findall(pattern, self.uristr) scheme, user, passwd, host, port, db, config = found_result[0] netloc = "{}:{}@{}:{}".format(user, passwd, host, port) return ParseResult(scheme, netloc, db, "", config.lstrip("?"), "")
def parse(s): ''' Parse a path given as a url. Accepts strings of the form: s3://bucket-name/path/to/key file:///path/to/file /absolution/path/to/file relative/path/to/file ~/path/from/home/dir/to/file To avoid surprises, s3:// and file:// URLs should not include ;, ? or #. You should URL-encode such paths. Return value is a ParseResult; one of the following: ('s3', bucketname, valid_s3_key, ...) ('file', '', absolute_path_for_current_filesystem, ...) ''' import re import six from six.moves.urllib.parse import urlparse, ParseResult if not isinstance(s, six.string_types): raise ValueError("An S3 path must be a string, got %s" % s.__class__.__name__) is_windows_path = (len(s) >= 2 and s[1] == ':') if is_windows_path: scheme, netloc, s3path = 'file', '', s else: scheme, netloc, s3path, params, query, fragment = urlparse(s) if any([params, query, fragment]): raise ValueError("Invalid URI: %s" % s) if any(char in ';?#' for char in s): raise ValueError("Invalid URI: %s" % s) try: s3path.encode('UTF-8') except (UnicodeDecodeError, UnicodeEncodeError): raise ValueError("Invalid URI (bad unicode): %s" % s) # If somehow something ever gets uploaded with binary in the # key, this seems to be the only way to fix it: # `s3cmd fixbucket s3://bodylabs-korper-assets` if re.match(r'/\w:', s3path): # urlparse, given file:///C:\foo parses us to /C:\foo, so on reconstruction (on windows) we get C:\C:\foo. s3path = s3path[1:] is_windows_path = True if scheme == '': scheme = 'file' if scheme == 'file' and not is_windows_path: if s3path.endswith(os.sep) or s3path.endswith('/'): # os.path.abspath strips the trailing '/' so we need to put it back s3path = os.path.join(os.path.abspath(os.path.expanduser(s3path)), '') else: s3path = os.path.abspath(os.path.expanduser(s3path)) if scheme == 's3' and netloc == '': raise ValueError('s3 urls must specify the bucket') return ParseResult(scheme, netloc, s3path, params=None, query=None, fragment=None) # pylint: disable=too-many-function-args,unexpected-keyword-arg
def _quote_path(self, path): """Return percent encoded path.""" parts = urlparse(path) path = self._unquote(parts.path, ignore='/%') path = quote(path, safe='/%') parts = ParseResult('', '', path, parts.params, parts.query, parts.fragment) path = urlunparse(parts) return path or '/'
def __init__(self, instance_url, *args, **kwargs): self.log = kwargs.get("use_log", logging.getLogger(__name__)) p_url = urlparse(instance_url) self._scheme = p_url.scheme self._netloc = p_url.netloc if '@' in p_url.netloc: self._netloc = p_url.netloc.split('@')[1] username = '' password = '' if p_url.username is not None: username = p_url.username if p_url.password is not None: password = p_url.password self._root_auth = (username, password) couch_db_pr = ParseResult(self._scheme, self._netloc, '', '', '', '') self.instance_url = couch_db_pr.geturl()
def _quote_path(self, path): """Return percent encoded path.""" parts = urlparse(path) path = self._unquote(parts.path, ignore='/%') # quote do not work with unicode strings in Python 2.7 if six.PY2: path = quote(path.encode('utf-8'), safe='/%') else: path = quote(path, safe='/%') parts = ParseResult('', '', path, parts.params, parts.query, parts.fragment) path = urlunparse(parts) return path
def parse_url_to_dict(url, assume_localhost=False): """Parse a url and return a dict with keys for all of the parts. The urlparse function() returns a wacky combination of a namedtuple with properties. """ from six.moves.urllib.parse import urlparse, urlsplit, urlunsplit, unquote_plus, ParseResult from six import text_type import re assert url is not None url = text_type(url) if re.match(r'^[a-zA-Z]:', url): url = path2url(url) p = urlparse(unquote_plus(url)) # urlparse leaves a '/' before the drive letter. p = ParseResult(p.scheme, p.netloc, p.path.lstrip('/'), p.params, p.query, p.fragment) else: p = urlparse(url) # '+' indicates that the scheme has a scheme extension if '+' in p.scheme: scheme_extension, scheme = p.scheme.split('+') else: scheme = p.scheme scheme_extension = None if scheme is '': scheme = 'file' return { 'scheme': scheme, 'scheme_extension': scheme_extension, 'netloc': p.netloc, 'hostname': p.hostname, 'path': p.path, 'params': p.params, 'query': p.query, 'fragment': unquote_plus(p.fragment) if p.fragment else None, 'username': p.username, 'password': p.password, 'port': p.port }
def urlparse(url, scheme='', allow_fragments=True): """return ParseResult where netloc is populated from path if required, no need to test .netloc anymore""" # noinspection PyUnresolvedReferences from six.moves.urllib.parse import urlparse as _urlparse, ParseResult parsed_url = _urlparse(url, scheme, allow_fragments) if '' != parsed_url.netloc: return parsed_url # fix occasional cases where '' == netloc and its data is in parsed_result.path # noinspection PyArgumentList fix = ParseResult(scheme=parsed_url.scheme, netloc=parsed_url.path, path=url, params=parsed_url.params, query=parsed_url.query, fragment=parsed_url.fragment) return fix
def _quote_pattern(self, pattern): # Corner case for query only (e.g. '/abc?') and param only (e.g. '/abc;') URLs. # Save the last character otherwise, urlparse will kill it. last_char = '' if pattern[-1] == '?' or pattern[-1] == ';' or pattern[-1] == '$': last_char = pattern[-1] pattern = pattern[:-1] parts = urlparse(pattern) pattern = self._unquote(parts.path, ignore='/*$%') pattern = quote(pattern, safe='/*%') parts = ParseResult('', '', pattern + last_char, parts.params, parts.query, parts.fragment) pattern = urlunparse(parts) return pattern
def Parse(self, uri): parsed = urlparse(uri) # Work around python 2.7.3 not handling # in URIs correctly. if '#' in parsed.path: path, fragment = parsed.path.split('#', 1) parsed = ParseResult(scheme=parsed.scheme, netloc=parsed.netloc, path=path, params=parsed.params, query=parsed.query, fragment=fragment) handler = self.handlers.get(parsed.scheme.lower(), None) if not handler: raise Exception("No handler found for prefix %s" % parsed.scheme) return handler(parsed)
def iter_request_log(self): for parsed, method, path, args, kwargs, resp in self.request_log: parts = parsed._asdict() parts['path'] = path full_path = ParseResult(**parts).geturl() args = list(args) log = dict(zip(('body', 'headers'), args)) log.update({ 'method': method, 'full_path': full_path, 'parsed_path': urlparse(full_path), 'path': path, 'headers': CaseInsensitiveDict(log.get('headers')), 'resp': resp, 'status': resp.status, }) yield log
def join(base, *additions): ''' Extends os.path.join so work with s3:// and file:// urls This inherits a quirk of os.path.join: if 'addition' is an absolute path, path components of base are thrown away. 'addition' must be an absolute or relative path, not a URL. `base` and `addition` can use any path separator, but the result will always be normalized to os.sep. ''' from six.moves.urllib.parse import urlparse, urljoin, ParseResult addition = sep.join(additions) (scheme, netloc, _, params, query, fragment) = urlparse(addition) if any([scheme, netloc, params, query, fragment]): raise ValueError('Addition must be an absolute or relative path, not a URL') if islocal(base): return os.path.join(parse(base).path, addition.replace(sep, os.sep)) k = parse(base) # Call urljoin instead of os.path.join, since it uses '/' instead of # os.sep, which is '\' on Windows. # # Given disparity between os.path.join and urljoin, we prefer the # behavior of os.path.join: # # >>> os.path.join('foo/bar', 'baz') # 'foo/bar/baz' # >>> urlparse.urljoin('foo/bar', 'baz') # 'foo/baz' # # So we add a trailing slash if there is none if k.path.endswith(sep): s3path = urljoin(k.path, addition) else: s3path = urljoin(k.path + sep, addition) return ParseResult(k.scheme, k.netloc, s3path, k.params, k.query, k.fragment).geturl() # pylint: disable=too-many-function-args,unexpected-keyword-arg
def get(self, request): data = self.get_data(request) code = self.get_data(request, "code") error = self.get_data(request, "error") client = self.get_data(request, "client") # client must be properly deserialized to become a valid instance client = Client.deserialize(client) # this is an edge case that is caused by making a request with no data # it should only happen if this view is called manually, out of the # normal capture-authorize-redirect flow. if data is None or client is None: return self.error_response({ 'error': 'invalid_data', 'error_description': _('Data has not been captured') }) redirect_uri = data.get('redirect_uri', None) or client.redirect_uri parsed = urlparse(redirect_uri) query = QueryDict('', mutable=True) if 'state' in data: query['state'] = data['state'] if error is not None: query.update(error) elif code is None: query['error'] = 'access_denied' else: query['code'] = code parsed = parsed[:4] + (query.urlencode(), '') redirect_uri = ParseResult(*parsed).geturl() self.clear_data(request) return HttpResponseRedirect(redirect_uri)
def _quote_pattern(self, pattern): # Corner case for query only (e.g. '/abc?') and param only (e.g. '/abc;') URLs. # Save the last character otherwise, urlparse will kill it. last_char = '' if pattern[-1] == '?' or pattern[-1] == ';' or pattern[-1] == '$': last_char = pattern[-1] pattern = pattern[:-1] parts = urlparse(pattern) pattern = self._unquote(parts.path, ignore='/*$%') # quote do not work with unicode strings in Python 2.7 if six.PY2: pattern = quote(pattern.encode('utf-8'), safe='/*%') else: pattern = quote(pattern, safe='/*%') parts = ParseResult('', '', pattern + last_char, parts.params, parts.query, parts.fragment) pattern = urlunparse(parts) return pattern
def _parse_uri(self, uri): if not uri: return if PY26: # Retrieve scheme from `uri` scheme_end = uri.index('://') scheme = uri[0:scheme_end] # Replace scheme in `uri` with "http" (to avoid parsing bugs) uri = 'http' + uri[scheme_end:] # Parse URI parsed = urlparse(uri) # Build parse result with original scheme self.uri = ParseResult(scheme, *parsed[1:]) else: self.uri = urlparse(uri) if self.uri.query: self.parameters = dict(parse_qsl(self.uri.query))
def query_build_dir_url(self, file_name): """ Resolve a file name to a potential url in the build upload directory where that file can be found. """ if self.test_packages_url: reference_url = self.test_packages_url elif self.installer_url: reference_url = self.installer_url else: self.fatal( "Can't figure out build directory urls without an installer_url " "or test_packages_url!") reference_url = urllib.parse.unquote(reference_url) parts = list(urlparse(reference_url)) last_slash = parts[2].rfind('/') parts[2] = '/'.join([parts[2][:last_slash], file_name]) url = ParseResult(*parts).geturl() return url
def _marshal_uri(target_uri, origin_uri): """ Translate the URL string representation into a new string which could be used as JSON keys. This method is needed because many JSON parsers and reference resolvers are using '/' as indicator of object nesting. To workaround this limitation we can re-write the url representation in a way that the parsers will accept it, for example "#/definitions/data_type" could become "|..definitions..data_type" Example: Assume that you have the following JSON document { "definitions": { "a/possible/def": { "type": "object" }, "a": { "possible": { "def": { "type": "string" } } }, "def": { "$ref": "#/definitions/a/possible/def" } } } Assuming that the JSON parser is not raising exception the dereferenced value of "#/definitions/def" could be {"type": "object"} or {"type": "string"} which is an undetermined condition which can lead to weird errors. Let's assume instead that the JSON parser will raise an exception in this case the JSON object will not be usable. To prevent this conditions we are removing possible '/' from the JSON keys. :param target_uri: URI to marshal :type target_uri: ParseResult :param origin_uri: URI of the root swagger spec file :type origin_uri: ParseResult :return: a string representation of the URL which could be used into the JSON keys :rtype: str """ marshalled_target = urlunparse(target_uri) if marshalled_target and target_uri.scheme == '': # scheme is empty for relative paths. It should NOT happen! target_uri = ParseResult('file', *target_uri[1:]) marshalled_target = urlunparse(target_uri) if not marshalled_target or target_uri.scheme not in {'file', 'http', 'https'}: raise ValueError( 'Invalid target: \'{target_uri}\''.format(target_uri=urlunparse(target_uri)) ) if origin_uri and target_uri.scheme == 'file': scheme, netloc, path, params, query, fragment = target_uri # Masquerade the absolute file path on the "local" server using # relative paths from the root swagger spec file spec_dir = os.path.dirname(origin_uri.path) scheme = 'lfile' path = os.path.relpath(path, spec_dir) marshalled_target = urlunparse((scheme, netloc, path, params, query, fragment)) for src, dst in iteritems(MARSHAL_REPLACEMENT_PATTERNS): marshalled_target = marshalled_target.replace(src, dst) return marshalled_target
def __init__(self, *args, **kwargs): self.client = None username = COUCHDB_USERNAME password = COUCHDB_PASSWORD db_name = COUCHDB_DB_NAME couch_db_url = None if kwargs.get("use_log"): self.log = kwargs['use_log'] else: self.log = logging.getLogger(__name__) if args: p_url = urlparse(args[0]) if p_url.username is not None: username = p_url.username if p_url.password is not None: password = p_url.password if p_url.path: split_path = p_url.path.split("/", 2) try: db_name = split_path[1] except IndexError: db_name = split_path[0] if p_url.scheme: couch_db_pr = ParseResult(p_url.scheme, p_url.netloc, '', '', '', '') couch_db_url = couch_db_pr.geturl() if couch_db_url is None: default_p = urlparse(COUCHDB_URL) auth_prefix_parts = list() username = kwargs.get("username", COUCHDB_USERNAME) password = kwargs.get("password", COUCHDB_PASSWORD) scheme = kwargs.get("scheme", default_p.scheme) port = kwargs.get("port", default_p.port) host = kwargs.get("host", default_p.hostname) if not port: port = 5984 if scheme == 'https': port = 6984 if username: auth_prefix_parts.append(username) if password: auth_prefix_parts.append(':') auth_prefix_parts.append(password) if len(auth_prefix_parts) > 0: auth_prefix_parts.append('@') netloc = "{auth_prefix}{host:s}:{port:d}".format( auth_prefix=''.join(auth_prefix_parts), host=host, port=int(port)) couch_db_url = ParseResult(scheme, netloc, "", "", "", "").geturl() if couch_db_url is None: couch_db_url = COUCHDB_URL self._auth = (username, password) self._couch_db_url = couch_db_url self._storage_db = db_name self._connect() if kwargs.get("create"): try: self.database except KeyError: self.client.create_database(self._storage_db)