Esempio n. 1
0
    def urlsplit(url, scheme='', allow_fragments=True):
        """Parse a URL into 5 components:
        <scheme>://<netloc>/<path>?<query>#<fragment>
        Return a 5-tuple: (scheme, netloc, path, query, fragment).
        Note that we don't break the components up in smaller bits
        (e.g. netloc is a single string) and we don't expand % escapes."""
        allow_fragments = bool(allow_fragments)
        key = url, scheme, allow_fragments, type(url), type(scheme)
        cached = _parse_cache.get(key, None)
        if cached:
            return cached
        if len(_parse_cache) >= MAX_CACHE_SIZE:  # avoid runaway growth
            clear_cache()
        netloc = query = fragment = ''
        i = url.find(':')
        if i > 0:
            if url[:i] == 'http':  # optimize the common case
                scheme = url[:i].lower()
                url = url[i + 1:]
                if url[:2] == '//':
                    netloc, url = _splitnetloc(url, 2)
                    if (('[' in netloc and ']' not in netloc)
                            or (']' in netloc and '[' not in netloc)):
                        raise ValueError("Invalid IPv6 URL")
                if allow_fragments and '#' in url:
                    url, fragment = url.split('#', 1)
                if '?' in url:
                    url, query = url.split('?', 1)
                v = SplitResult(scheme, netloc, url, query, fragment)
                _parse_cache[key] = v
                return v
            for c in url[:i]:
                if c not in scheme_chars:
                    break
            else:
                # make sure "url" is not actually a port number (in which case
                # "scheme" is really part of the path)
                rest = url[i + 1:]
                if not rest or any(c not in '0123456789' for c in rest):
                    # not a port number
                    scheme, url = url[:i].lower(), rest

        if url[:2] == '//':
            netloc, url = _splitnetloc(url, 2)
            if (('[' in netloc and ']' not in netloc)
                    or (']' in netloc and '[' not in netloc)):
                raise ValueError("Invalid IPv6 URL")
        if allow_fragments and '#' in url:
            url, fragment = url.split('#', 1)
        if '?' in url:
            url, query = url.split('?', 1)
        v = SplitResult(scheme, netloc, url, query, fragment)
        _parse_cache[key] = v
        return v
Esempio n. 2
0
def _urlsplit(url, scheme='', allow_fragments=True):
    """Parse a URL into 5 components:
    <scheme>://<netloc>/<path>?<query>#<fragment>
    Return a 5-tuple: (scheme, netloc, path, query, fragment).
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes."""
    url, scheme, _coerce_result = _coerce_args(url, scheme)
    netloc = query = fragment = ''
    i = url.find(':')
    if i > 0:
        for c in url[:i]:
            if c not in scheme_chars:
                break
        else:
            scheme, url = url[:i].lower(), url[i + 1:]

    if url[:2] == '//':
        netloc, url = _splitnetloc(url, 2)
        if (('[' in netloc and ']' not in netloc) or
                (']' in netloc and '[' not in netloc)):
            raise ValueError("Invalid IPv6 URL")
    if allow_fragments and '#' in url:
        url, fragment = url.split('#', 1)
    if '?' in url:
        url, query = url.split('?', 1)
    v = SplitResult(scheme, netloc, url, query, fragment)
    return _coerce_result(v)
Esempio n. 3
0
def _urlsplit(url, scheme="", allow_fragments=True):
    """Parse a URL into 5 components:
    <scheme>://<netloc>/<path>?<query>#<fragment>
    Return a 5-tuple: (scheme, netloc, path, query, fragment).
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes."""
    url, scheme, _coerce_result = _coerce_args(url, scheme)
    allow_fragments = bool(allow_fragments)
    netloc = query = fragment = ""
    i = url.find(":")
    if i > 0:
        for c in url[:i]:
            if c not in scheme_chars:
                break
        else:
            scheme, url = url[:i].lower(), url[i + 1:]

    if url[:2] == "//":
        netloc, url = _splitnetloc(url, 2)
        if ("[" in netloc and "]" not in netloc) or ("]" in netloc
                                                     and "[" not in netloc):
            raise ValueError("Invalid IPv6 URL")
    if allow_fragments and "#" in url:
        url, fragment = url.split("#", 1)
    if "?" in url:
        url, query = url.split("?", 1)
    v = SplitResult(scheme, netloc, url, query, fragment)
    return _coerce_result(v)
Esempio n. 4
0
def _urlsplit(url, scheme='', allow_fragments=True):
    """Parse a URL into 5 components:
    <scheme>://<netloc>/<path>?<query>#<fragment>
    Return a 5-tuple: (scheme, netloc, path, query, fragment).
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes."""
    url, scheme, _coerce_result = _coerce_args(url, scheme)
    allow_fragments = bool(allow_fragments)
    netloc = query = fragment = ''
    i = url.find(':')
    if i > 0:
        for c in url[:i]:
            if c not in scheme_chars:
                break
        else:
            scheme, url = url[:i].lower(), url[i + 1:]

    if url[:2] == '//':
        netloc, url = _splitnetloc(url, 2)
        if (('[' in netloc and ']' not in netloc) or
                (']' in netloc and '[' not in netloc)):
            raise ValueError("Invalid IPv6 URL")
    if allow_fragments and '#' in url:
        url, fragment = url.split('#', 1)
    if '?' in url:
        url, query = url.split('?', 1)
    v = SplitResult(scheme, netloc, url, query, fragment)
    return _coerce_result(v)
Esempio n. 5
0
def _urlsplit_netloc(url: str) -> Tuple[str, str]:
    netloc, url = _splitnetloc(url, 2)
    if (
            ('[' in netloc and ']' not in netloc)
            or  # noqa: W504, W503
            (']' in netloc and '[' not in netloc)
    ):
        raise ValueError("Invalid IPv6 URL")
    return netloc, url
Esempio n. 6
0
def _urlsplit(url, scheme='', allow_fragments=True):
    """Templating safe version of urllib.parse.urlsplit

    Ignores '?' and '#' inside {{}} templating tags.

    Caching disabled.
    """

    url, scheme, _coerce_result = _coerce_args(url, scheme)
    allow_fragments = bool(allow_fragments)
    netloc = query = fragment = ''
    i = url.find(':')
    if i > 0:
        for c in url[:i]:
            if c not in scheme_chars:  # pragma: no cover
                break  # https://github.com/nedbat/coveragepy/issues/198
        else:
            scheme, url = url[:i].lower(), url[i + 1:]

    if url[:2] == '//':
        netloc, url = _splitnetloc(url, 2)
        if (('[' in netloc and ']' not in netloc) or  # noqa: W504, W503
            (']' in netloc and '[' not in netloc)):
            raise ValueError("Invalid IPv6 URL")
    if allow_fragments and '#' in url:
        result = re.split(r'#(?![^{{}}]*}})', url, maxsplit=1)
        url = result[0]
        if len(result) > 1:
            fragment = result[1]
    if '?' in url:
        result = re.split(r'\?(?![^{{}}]*}})', url, maxsplit=1)
        url = result[0]
        if len(result) > 1:
            query = result[1]
    v = SplitResult(scheme, netloc, url, query, fragment)
    return _coerce_result(v)