def _urlsplit(url, scheme="", allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" url, scheme, _coerce_result = _coerce_args(url, scheme) netloc = query = fragment = "" i = url.find(":") if i > 0: for c in url[:i]: if c not in scheme_chars: break else: scheme, url = url[:i].lower(), url[i + 1:] if url[:2] == "//": netloc, url = _splitnetloc(url, 2) if ("[" in netloc and "]" not in netloc) or ("]" in netloc and "[" not in netloc): raise ValueError("Invalid IPv6 URL") if allow_fragments and "#" in url: url, fragment = url.split("#", 1) if "?" in url: url, query = url.split("?", 1) v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v)
def _urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" if _coerce_args: url, scheme, _coerce_result = _coerce_args(url, scheme) allow_fragments = bool(allow_fragments) netloc = query = fragment = '' i = url.find(':') if i > 0: for c in url[:i]: if c not in scheme_chars: break else: scheme, url = url[:i].lower(), url[i + 1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v) if _coerce_args else v
def _urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" url, scheme, _coerce_result = _coerce_args(url, scheme) allow_fragments = bool(allow_fragments) netloc = query = fragment = '' i = url.find(':') if i > 0: for c in url[:i]: if c not in scheme_chars: break else: scheme, url = url[:i].lower(), url[i + 1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v)
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False): """Modify `urllib.parse.parse_qsl` to handle percent-encoded characters properly. `parse_qsl` replaces percent-encoded characters with replacement character (U+FFFD) (if errors = "replace") or drops them (if errors = "ignore") (See https://docs.python.org/3/howto/unicode.html#the-string-type). Instead we want to keep the raw bytes. And later we can percent-encode them directly when we need to. Code from https://github.com/python/cpython/blob/73c4708630f99b94c35476529748629fff1fc63e/Lib/urllib/parse.py#L658 with `unquote` replaced with `unquote_to_bytes` """ qs, _coerce_result = _coerce_args(qs) pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: if not name_value and not strict_parsing: continue nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: raise ValueError("bad query field: %r" % (name_value, )) # Handle case of a control-name with no equal sign if keep_blank_values: nv.append('') else: continue if len(nv[1]) or keep_blank_values: name = nv[0].replace('+', ' ') name = unquote_to_bytes(name) name = _coerce_result(name) value = nv[1].replace('+', ' ') value = unquote_to_bytes(value) value = _coerce_result(value) r.append((name, value)) return r
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace'): """ Copy of urlib.parse pars_qsl but without splitting by semicolon Parse a query given as a string argument. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. Returns a list, as G-d intended. >>> parse_qsl('_openstat=openstat.ru;camp1;ad1234;top-left-corner') [('_openstat', 'openstat.ru;camp1;ad1234;top-left-corner')] """ qs, _coerce_result = _coerce_args(qs) pairs = [s1 for s1 in qs.split('&')] r = [] for name_value in pairs: if not name_value and not strict_parsing: continue nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: raise ValueError("bad query field: %r" % (name_value, )) # Handle case of a control-name with no equal sign if keep_blank_values: nv.append('') else: continue if len(nv[1]) or keep_blank_values: name = nv[0].replace('+', ' ') name = unquote(name, encoding=encoding, errors=errors) name = _coerce_result(name) value = nv[1].replace('+', ' ') value = unquote(value, encoding=encoding, errors=errors) value = _coerce_result(value) r.append((name, value)) return r
def urlparse(url, scheme='', allow_fragments=True): """Parse a URL into 6 components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> :param url: :param scheme: (Default value = '') :param allow_fragments: (Default value = True) :returns: Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes. """ from urllib.parse import uses_params, urlsplit, _splitparams, _coerce_args, ParseResult url, scheme, _coerce_result = _coerce_args(url, scheme) splitresult = urlsplit(url, scheme, allow_fragments) scheme, netloc, url, query, fragment = splitresult if scheme in uses_params and ';' in url: url, params = _splitparams(url) else: params = '' result = ParseResult(scheme, netloc, url, params, query, fragment) # FIXME: Appropriately parse netloc into parts using the real ``urlparse``. # However, Pycom MicroPython currently lacks the ``rpartition`` method on string objects. uri = URI() for fieldname in ['scheme', 'netloc', 'path', 'params', 'query', 'fragment']: key = fieldname value = getattr(result, fieldname) #print(key, value) setattr(uri, key, value) setattr(uri, 'hostname', uri.netloc) setattr(uri, 'username', None) setattr(uri, 'password', None) #print('URI-1:', uri) # Manually parse credentials from netloc. # Fixme: Improve urlparse to do the same. if '@' in uri.netloc: credentials, hostname = uri.netloc.split('@') username, password = credentials.split(':') setattr(uri, 'hostname', hostname) setattr(uri, 'username', username) setattr(uri, 'password', password) #print('URI-2:', uri) return uri
def _urlparse(url, scheme='', allow_fragments=True): """Parse a URL into 6 components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Return a 6-tuple: (scheme, netloc, path, params, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" url, scheme, _coerce_result = _coerce_args(url, scheme) splitresult = _urlsplit(url, scheme, allow_fragments) scheme, netloc, url, query, fragment = splitresult if scheme in uses_params and ';' in url: url, params = _splitparams(url) else: params = '' result = ParseResult(scheme, netloc, url, params, query, fragment) return _coerce_result(result)
def parse_qsl_to_bytes(qs, keep_blank_values=False, strict_parsing=False): """Parse a query given as a string argument. Data are returned as a list of name, value pairs as bytes. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. """ # This code is the same as Python3's parse_qsl() # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a) # except for the unquote(s, encoding, errors) calls replaced # with unquote_to_bytes(s) qs, _coerce_result = _coerce_args(qs) pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: if not name_value and not strict_parsing: continue nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: raise ValueError("bad query field: %r" % (name_value,)) # Handle case of a control-name with no equal sign if keep_blank_values: nv.append('') else: continue if len(nv[1]) or keep_blank_values: name = nv[0].replace('+', ' ') name = unquote_to_bytes(name) name = _coerce_result(name) value = nv[1].replace('+', ' ') value = unquote_to_bytes(value) value = _coerce_result(value) r.append((name, value)) return r
def parse_qsl_to_bytes(qs, keep_blank_values=False, strict_parsing=False): """Parse a query given as a string argument. Data are returned as a list of name, value pairs as bytes. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. """ # This code is the same as Python3's parse_qsl() # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a) # except for the unquote(s, encoding, errors) calls replaced # with unquote_to_bytes(s) qs, _coerce_result = _coerce_args(qs) pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: if not name_value and not strict_parsing: continue nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: raise ValueError("bad query field: %r" % (name_value, )) # Handle case of a control-name with no equal sign if keep_blank_values: nv.append('') else: continue if len(nv[1]) or keep_blank_values: name = nv[0].replace('+', ' ') name = unquote_to_bytes(name) name = _coerce_result(name) value = nv[1].replace('+', ' ') value = unquote_to_bytes(value) value = _coerce_result(value) r.append((name, value)) return r
def parse_qsl_to_bytes(query_string, keep_blank_values=False, strict_parsing=False): """Parse a query given as a string argument. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. Returns a list, as G-d intended. """ query_string, _coerce_result = _coerce_args(query_string) pairs = [s2 for s1 in query_string.split('&') for s2 in s1.split(';')] res = [] for name_value in pairs: if not name_value and not strict_parsing: continue nval = name_value.split('=', 1) if len(nval) != 2: if strict_parsing: raise ValueError("bad query field: %r" % (name_value,)) # Handle case of a control-name with no equal sign if keep_blank_values: nval.append('') else: continue if len(nval[1]) or keep_blank_values: name = nval[0].replace('+', ' ') name = unquote_to_bytes(name) name = _coerce_result(name) value = nval[1].replace('+', ' ') value = unquote_to_bytes(value) value = _coerce_result(value) res.append((name, value)) return res
def _urlsplit(url, scheme='', allow_fragments=True): """Templating safe version of urllib.parse.urlsplit Ignores '?' and '#' inside {{}} templating tags. Caching disabled. """ url, scheme, _coerce_result = _coerce_args(url, scheme) allow_fragments = bool(allow_fragments) netloc = query = fragment = '' i = url.find(':') if i > 0: for c in url[:i]: if c not in scheme_chars: # pragma: no cover break # https://github.com/nedbat/coveragepy/issues/198 else: scheme, url = url[:i].lower(), url[i + 1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or # noqa: W504, W503 (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: result = re.split(r'#(?![^{{}}]*}})', url, maxsplit=1) url = result[0] if len(result) > 1: fragment = result[1] if '?' in url: result = re.split(r'\?(?![^{{}}]*}})', url, maxsplit=1) url = result[0] if len(result) > 1: query = result[1] v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v)
def parse_qsl( qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&', ): """ Return a list of key/value tuples parsed from query string. Backport of urllib.parse.parse_qsl() from Python 3.8.8. Copyright (C) 2021 Python Software Foundation (see LICENSE.python). ---- Parse a query given as a string argument. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). separator: str. The symbol to use for separating the query arguments. Defaults to &. Returns a list, as G-d intended. """ qs, _coerce_result = _coerce_args(qs) if not separator or not isinstance(separator, (str, bytes)): raise ValueError('Separator must be of type string or bytes.') # If max_num_fields is defined then check that the number of fields is less # than max_num_fields. This prevents a memory exhaustion DOS attack via # post bodies with many fields. if max_num_fields is not None: num_fields = 1 + qs.count(separator) if max_num_fields < num_fields: raise ValueError('Max number of fields exceeded') pairs = [s1 for s1 in qs.split(separator)] r = [] for name_value in pairs: if not name_value and not strict_parsing: continue nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: raise ValueError("bad query field: %r" % (name_value,)) # Handle case of a control-name with no equal sign. if keep_blank_values: nv.append('') else: continue if len(nv[1]) or keep_blank_values: name = nv[0].replace('+', ' ') name = unquote(name, encoding=encoding, errors=errors) name = _coerce_result(name) value = nv[1].replace('+', ' ') value = unquote(value, encoding=encoding, errors=errors) value = _coerce_result(value) r.append((name, value)) return r