Ejemplo n.º 1
0
def url_unparse(components):
    """The reverse operation to :meth:`url_parse`.  This accepts arbitrary
    as well as :class:`URL` tuples and returns a URL as a string.

    :param components: the parsed URL as tuple which should be converted
                       into a URL string.
    """
    scheme, netloc, path, query, fragment = \
        normalize_string_tuple(components)
    s = make_literal_wrapper(scheme)
    url = s('')

    # We generally treat file:///x and file:/x the same which is also
    # what browsers seem to do.  This also allows us to ignore a schema
    # register for netloc utilization or having to differenciate between
    # empty and missing netloc.
    if netloc or (scheme and path.startswith(s('/'))):
        if path and path[:1] != s('/'):
            path = s('/') + path
        url = s('//') + (netloc or s('')) + path
    elif path:
        url += path
    if scheme:
        url = scheme + s(':') + url
    if query:
        url = url + s('?') + query
    if fragment:
        url = url + s('#') + fragment
    return url
Ejemplo n.º 2
0
    def __init__(self,
                 stream=None,
                 filename=None,
                 name=None,
                 content_type=None,
                 content_length=None,
                 headers=None):
        self.name = name
        self.stream = stream or _empty_stream

        # if no filename is provided we can attempt to get the filename
        # from the stream object passed.  There we have to be careful to
        # skip things like <fdopen>, <stderr> etc.  Python marks these
        # special filenames with angular brackets.
        if filename is None:
            filename = getattr(stream, 'name', None)
            s = make_literal_wrapper(filename)
            if filename and filename[0] == s('<') and filename[-1] == s('>'):
                filename = None

            # On Python 3 we want to make sure the filename is always unicode.
            # This might not be if the name attribute is bytes due to the
            # file being opened from the bytes API.
            if isinstance(filename, bytes):
                filename = filename.decode(sys.getfilesystemencoding(),
                                           'replace')

        self.filename = filename
        if headers is None:
            headers = Headers()
        self.headers = headers
        if content_type is not None:
            headers['Content-Type'] = content_type
        if content_length is not None:
            headers['Content-Length'] = str(content_length)
Ejemplo n.º 3
0
def url_unparse(components):
    """The reverse operation to :meth:`url_parse`.  This accepts arbitrary
    as well as :class:`URL` tuples and returns a URL as a string.

    :param components: the parsed URL as tuple which should be converted
                       into a URL string.
    """
    scheme, netloc, path, query, fragment = \
        normalize_string_tuple(components)
    s = make_literal_wrapper(scheme)
    url = s('')

    # We generally treat file:///x and file:/x the same which is also
    # what browsers seem to do.  This also allows us to ignore a schema
    # register for netloc utilization or having to differenciate between
    # empty and missing netloc.
    if netloc or (scheme and path.startswith(s('/'))):
        if path and path[:1] != s('/'):
            path = s('/') + path
        url = s('//') + (netloc or s('')) + path
    elif path:
        url += path
    if scheme:
        url = scheme + s(':') + url
    if query:
        url = url + s('?') + query
    if fragment:
        url = url + s('#') + fragment
    return url
Ejemplo n.º 4
0
    def __init__(self, path='/', base_url=None, query_string=None,
                 method='GET', input_stream=None, content_type=None,
                 content_length=None, errors_stream=None, multithread=False,
                 multiprocess=False, run_once=False, headers=None, data=None,
                 environ_base=None, environ_overrides=None, charset='utf-8'):
        path_s = make_literal_wrapper(path)
        if query_string is None and path_s('?') in path:
            path, query_string = path.split(path_s('?'), 1)
        self.charset = charset
        self.path = iri_to_uri(path)
        if base_url is not None:
            base_url = url_fix(iri_to_uri(base_url, charset), charset)
        self.base_url = base_url
        if isinstance(query_string, (bytes, text_type)):
            self.query_string = query_string
        else:
            if query_string is None:
                query_string = MultiDict()
            elif not isinstance(query_string, MultiDict):
                query_string = MultiDict(query_string)
            self.args = query_string
        self.method = method
        if headers is None:
            headers = Headers()
        elif not isinstance(headers, Headers):
            headers = Headers(headers)
        self.headers = headers
        if content_type is not None:
            self.content_type = content_type
        if errors_stream is None:
            errors_stream = sys.stderr
        self.errors_stream = errors_stream
        self.multithread = multithread
        self.multiprocess = multiprocess
        self.run_once = run_once
        self.environ_base = environ_base
        self.environ_overrides = environ_overrides
        self.input_stream = input_stream
        self.content_length = content_length
        self.closed = False

        if data:
            if input_stream is not None:
                raise TypeError('can\'t provide input stream and data')
            if hasattr(data, 'read'):
                data = data.read()
            if isinstance(data, text_type):
                data = data.encode(self.charset)
            if isinstance(data, bytes):
                self.input_stream = BytesIO(data)
                if self.content_length is None:
                    self.content_length = len(data)
            else:
                for key, value in _iter_data(data):
                    if isinstance(value, (tuple, dict)) or \
                       hasattr(value, 'read'):
                        self._add_file_from_data(key, value)
                    else:
                        self.form.setlistdefault(key).append(value)
Ejemplo n.º 5
0
    def __init__(self, path='/', base_url=None, query_string=None,
                 method='GET', input_stream=None, content_type=None,
                 content_length=None, errors_stream=None, multithread=False,
                 multiprocess=False, run_once=False, headers=None, data=None,
                 environ_base=None, environ_overrides=None, charset='utf-8'):
        path_s = make_literal_wrapper(path)
        if query_string is None and path_s('?') in path:
            path, query_string = path.split(path_s('?'), 1)
        self.charset = charset
        self.path = iri_to_uri(path)
        if base_url is not None:
            base_url = url_fix(iri_to_uri(base_url, charset), charset)
        self.base_url = base_url
        if isinstance(query_string, (bytes, str)):
            self.query_string = query_string
        else:
            if query_string is None:
                query_string = MultiDict()
            elif not isinstance(query_string, MultiDict):
                query_string = MultiDict(query_string)
            self.args = query_string
        self.method = method
        if headers is None:
            headers = Headers()
        elif not isinstance(headers, Headers):
            headers = Headers(headers)
        self.headers = headers
        if content_type is not None:
            self.content_type = content_type
        if errors_stream is None:
            errors_stream = sys.stderr
        self.errors_stream = errors_stream
        self.multithread = multithread
        self.multiprocess = multiprocess
        self.run_once = run_once
        self.environ_base = environ_base
        self.environ_overrides = environ_overrides
        self.input_stream = input_stream
        self.content_length = content_length
        self.closed = False

        if data:
            if input_stream is not None:
                raise TypeError('can\'t provide input stream and data')
            if isinstance(data, str):
                data = data.encode(self.charset)
            if isinstance(data, bytes):
                self.input_stream = BytesIO(data)
                if self.content_length is None:
                    self.content_length = len(data)
            else:
                for key, value in _iter_data(data):
                    if (
                        isinstance(value, (tuple, dict)) or
                        hasattr(value, 'read')
                    ):
                        self._add_file_from_data(key, value)
                    else:
                        self.form.setlistdefault(key).append(value)
Ejemplo n.º 6
0
def url_parse(url, scheme=None, allow_fragments=True):
    """Parses a URL from a string into a :class:`URL` tuple.  If the URL
    is lacking a scheme it can be provided as second argument. Otherwise,
    it is ignored.  Optionally fragments can be stripped from the URL
    by setting `allow_fragments` to `False`.

    The inverse of this function is :func:`url_unparse`.

    :param url: the URL to parse.
    :param scheme: the default schema to use if the URL is schemaless.
    :param allow_fragments: if set to `False` a fragment will be removed
                            from the URL.
    """
    s = make_literal_wrapper(url)
    is_text_based = isinstance(url, text_type)

    if scheme is None:
        scheme = s("")
    netloc = query = fragment = s("")
    i = url.find(s(":"))
    if i > 0 and _scheme_re.match(to_native(url[:i], errors="replace")):
        # make sure "iri" is not actually a port number (in which case
        # "scheme" is really part of the path)
        rest = url[i + 1:]
        if not rest or any(c not in s("0123456789") for c in rest):
            # not a port number
            scheme, url = url[:i].lower(), rest

    if url[:2] == s("//"):
        delim = len(url)
        for c in s("/?#"):
            wdelim = url.find(c, 2)
            if wdelim >= 0:
                delim = min(delim, wdelim)
        netloc, url = url[2:delim], url[delim:]
        if (s("[") in netloc
                and s("]") not in netloc) or (s("]") in netloc
                                              and s("[") not in netloc):
            raise ValueError("Invalid IPv6 URL")

    if allow_fragments and s("#") in url:
        url, fragment = url.split(s("#"), 1)
    if s("?") in url:
        url, query = url.split(s("?"), 1)

    result_type = is_text_based and URL or BytesURL
    return result_type(scheme, netloc, url, query, fragment)
Ejemplo n.º 7
0
def _url_decode_impl(pair_iter, charset, decode_keys, include_empty, errors):
    for pair in pair_iter:
        if not pair:
            continue
        s = make_literal_wrapper(pair)
        equal = s('=')
        if equal in pair:
            key, value = pair.split(equal, 1)
        else:
            if not include_empty:
                continue
            key = pair
            value = s('')
        key = url_unquote_plus(key, charset, errors)
        if charset is not None and PY2 and not decode_keys:
            key = try_coerce_native(key)
        yield key, url_unquote_plus(value, charset, errors)
Ejemplo n.º 8
0
def _url_decode_impl(pair_iter, charset, decode_keys, include_empty, errors):
    for pair in pair_iter:
        if not pair:
            continue
        s = make_literal_wrapper(pair)
        equal = s('=')
        if equal in pair:
            key, value = pair.split(equal, 1)
        else:
            if not include_empty:
                continue
            key = pair
            value = s('')
        key = url_unquote_plus(key, charset, errors)
        if charset is not None and PY2 and not decode_keys:
            key = try_coerce_native(key)
        yield key, url_unquote_plus(value, charset, errors)
Ejemplo n.º 9
0
def url_parse(url, scheme=None, allow_fragments=True):
    """Parses a URL from a string into a :class:`URL` tuple.  If the URL
    is lacking a scheme it can be provided as second argument. Otherwise,
    it is ignored.  Optionally fragments can be stripped from the URL
    by setting `allow_fragments` to `False`.

    The inverse of this function is :func:`url_unparse`.

    :param url: the URL to parse.
    :param scheme: the default schema to use if the URL is schemaless.
    :param allow_fragments: if set to `False` a fragment will be removed
                            from the URL.
    """
    s = make_literal_wrapper(url)
    is_text_based = isinstance(url, text_type)

    if scheme is None:
        scheme = s('')
    netloc = query = fragment = s('')
    i = url.find(s(':'))
    if i > 0 and _scheme_re.match(to_native(url[:i], errors='replace')):
        # make sure "iri" is not actually a port number (in which case
        # "scheme" is really part of the path)
        rest = url[i + 1:]
        if not rest or any(c not in s('0123456789') for c in rest):
            # not a port number
            scheme, url = url[:i].lower(), rest

    if url[:2] == s('//'):
        delim = len(url)
        for c in s('/?#'):
            wdelim = url.find(c, 2)
            if wdelim >= 0:
                delim = min(delim, wdelim)
        netloc, url = url[2:delim], url[delim:]
        if ((s('[') in netloc and s(']') not in netloc) or
            (s(']') in netloc and s('[') not in netloc)):
            raise ValueError('Invalid IPv6 URL')

    if allow_fragments and s('#') in url:
        url, fragment = url.split(s('#'), 1)
    if s('?') in url:
        url, query = url.split(s('?'), 1)

    result_type = is_text_based and URL or BytesURL
    return result_type(scheme, netloc, url, query, fragment)
Ejemplo n.º 10
0
def make_line_iter(stream, limit=None, buffer_size=10 * 1024):
    """Safely iterates line-based over an input stream.  If the input stream
    is not a :class:`LimitedStream` the `limit` parameter is mandatory.

    This uses the stream's :meth:`~file.read` method internally as opposite
    to the :meth:`~file.readline` method that is unsafe and can only be used
    in violation of the WSGI specification.  The same problem applies to the
    `__iter__` function of the input stream which calls :meth:`~file.readline`
    without arguments.

    If you need line-by-line processing it's strongly recommended to iterate
    over the input stream using this helper function.

    .. versionchanged:: 0.8
       This function now ensures that the limit was reached.

    .. versionadded:: 0.9
       added support for iterators as input stream.

    :param stream: the stream or iterate to iterate over.
    :param limit: the limit in bytes for the stream.  (Usually
                  content length.  Not necessary if the `stream`
                  is a :class:`LimitedStream`.
    :param buffer_size: The optional buffer size.
    """
    _iter = _make_chunk_iter(stream, limit, buffer_size)

    first_item = next(_iter, '')
    if not first_item:
        return

    s = make_literal_wrapper(first_item)
    empty = s('')
    cr = s('\r')
    lf = s('\n')
    crlf = s('\r\n')

    _iter = chain((first_item,), _iter)

    def _iter_basic_lines():
        _join = empty.join
        buffer = []
        while 1:
            new_data = next(_iter, '')
            if not new_data:
                break
            new_buf = []
            for item in chain(buffer, new_data.splitlines(True)):
                new_buf.append(item)
                if item and item[-1:] in crlf:
                    yield _join(new_buf)
                    new_buf = []
            buffer = new_buf
        if buffer:
            yield _join(buffer)

    # This hackery is necessary to merge 'foo\r' and '\n' into one item
    # of 'foo\r\n' if we were unlucky and we hit a chunk boundary.
    previous = empty
    for item in _iter_basic_lines():
        if item == lf and previous[-1:] == cr:
            previous += item
            item = empty
        if previous:
            yield previous
        previous = item
    if previous:
        yield previous
Ejemplo n.º 11
0
def make_line_iter(stream,
                   limit=None,
                   buffer_size=10 * 1024,
                   cap_at_buffer=False):
    """Safely iterates line-based over an input stream.  If the input stream
    is not a :class:`LimitedStream` the `limit` parameter is mandatory.

    This uses the stream's :meth:`~file.read` method internally as opposite
    to the :meth:`~file.readline` method that is unsafe and can only be used
    in violation of the WSGI specification.  The same problem applies to the
    `__iter__` function of the input stream which calls :meth:`~file.readline`
    without arguments.

    If you need line-by-line processing it's strongly recommended to iterate
    over the input stream using this helper function.

    .. versionchanged:: 0.8
       This function now ensures that the limit was reached.

    .. versionadded:: 0.9
       added support for iterators as input stream.

    .. versionadded:: 0.11.10
       added support for the `cap_at_buffer` parameter.

    :param stream: the stream or iterate to iterate over.
    :param limit: the limit in bytes for the stream.  (Usually
                  content length.  Not necessary if the `stream`
                  is a :class:`LimitedStream`.
    :param buffer_size: The optional buffer size.
    :param cap_at_buffer: if this is set chunks are split if they are longer
                          than the buffer size.  Internally this is implemented
                          that the buffer size might be exhausted by a factor
                          of two however.
    """
    _iter = _make_chunk_iter(stream, limit, buffer_size)

    first_item = next(_iter, '')
    if not first_item:
        return

    s = make_literal_wrapper(first_item)
    empty = s('')
    cr = s('\r')
    lf = s('\n')
    crlf = s('\r\n')

    _iter = chain((first_item, ), _iter)

    def _iter_basic_lines():
        _join = empty.join
        buffer = []
        while 1:
            new_data = next(_iter, '')
            if not new_data:
                break
            new_buf = []
            buf_size = 0
            for item in chain(buffer, new_data.splitlines(True)):
                new_buf.append(item)
                buf_size += len(item)
                if item and item[-1:] in crlf:
                    yield _join(new_buf)
                    new_buf = []
                elif cap_at_buffer and buf_size >= buffer_size:
                    rv = _join(new_buf)
                    while len(rv) >= buffer_size:
                        yield rv[:buffer_size]
                        rv = rv[buffer_size:]
                    new_buf = [rv]
            buffer = new_buf
        if buffer:
            yield _join(buffer)

    # This hackery is necessary to merge 'foo\r' and '\n' into one item
    # of 'foo\r\n' if we were unlucky and we hit a chunk boundary.
    previous = empty
    for item in _iter_basic_lines():
        if item == lf and previous[-1:] == cr:
            previous += item
            item = empty
        if previous:
            yield previous
        previous = item
    if previous:
        yield previous
Ejemplo n.º 12
0
def url_join(base, url, allow_fragments=True):
    """Join a base URL and a possibly relative URL to form an absolute
    interpretation of the latter.

    :param base: the base URL for the join operation.
    :param url: the URL to join.
    :param allow_fragments: indicates whether fragments should be allowed.
    """
    if isinstance(base, tuple):
        base = url_unparse(base)
    if isinstance(url, tuple):
        url = url_unparse(url)

    base, url = normalize_string_tuple((base, url))
    s = make_literal_wrapper(base)

    if not base:
        return url
    if not url:
        return base

    bscheme, bnetloc, bpath, bquery, bfragment = \
        url_parse(base, allow_fragments=allow_fragments)
    scheme, netloc, path, query, fragment = \
        url_parse(url, bscheme, allow_fragments)
    if scheme != bscheme:
        return url
    if netloc:
        return url_unparse((scheme, netloc, path, query, fragment))
    netloc = bnetloc

    if path[:1] == s('/'):
        segments = path.split(s('/'))
    elif not path:
        segments = bpath.split(s('/'))
        if not query:
            query = bquery
    else:
        segments = bpath.split(s('/'))[:-1] + path.split(s('/'))

    # If the rightmost part is "./" we want to keep the slash but
    # remove the dot.
    if segments[-1] == s('.'):
        segments[-1] = s('')

    # Resolve ".." and "."
    segments = [segment for segment in segments if segment != s('.')]
    while 1:
        i = 1
        n = len(segments) - 1
        while i < n:
            if segments[i] == s('..') and \
               segments[i - 1] not in (s(''), s('..')):
                del segments[i - 1:i + 1]
                break
            i += 1
        else:
            break

    # Remove trailing ".." if the URL is absolute
    unwanted_marker = [s(''), s('..')]
    while segments[:2] == unwanted_marker:
        del segments[1]

    path = s('/').join(segments)
    return url_unparse((scheme, netloc, path, query, fragment))
Ejemplo n.º 13
0
def url_join(base, url, allow_fragments=True):
    """Join a base URL and a possibly relative URL to form an absolute
    interpretation of the latter.

    :param base: the base URL for the join operation.
    :param url: the URL to join.
    :param allow_fragments: indicates whether fragments should be allowed.
    """
    if isinstance(base, tuple):
        base = url_unparse(base)
    if isinstance(url, tuple):
        url = url_unparse(url)

    base, url = normalize_string_tuple((base, url))
    s = make_literal_wrapper(base)

    if not base:
        return url
    if not url:
        return base

    bscheme, bnetloc, bpath, bquery, bfragment = \
        url_parse(base, allow_fragments=allow_fragments)
    scheme, netloc, path, query, fragment = \
        url_parse(url, bscheme, allow_fragments)
    if scheme != bscheme:
        return url
    if netloc:
        return url_unparse((scheme, netloc, path, query, fragment))
    netloc = bnetloc

    if path[:1] == s('/'):
        segments = path.split(s('/'))
    elif not path:
        segments = bpath.split(s('/'))
        if not query:
            query = bquery
    else:
        segments = bpath.split(s('/'))[:-1] + path.split(s('/'))

    # If the rightmost part is "./" we want to keep the slash but
    # remove the dot.
    if segments[-1] == s('.'):
        segments[-1] = s('')

    # Resolve ".." and "."
    segments = [segment for segment in segments if segment != s('.')]
    while 1:
        i = 1
        n = len(segments) - 1
        while i < n:
            if segments[i] == s('..') and \
               segments[i - 1] not in (s(''), s('..')):
                del segments[i - 1:i + 1]
                break
            i += 1
        else:
            break

    # Remove trailing ".." if the URL is absolute
    unwanted_marker = [s(''), s('..')]
    while segments[:2] == unwanted_marker:
        del segments[1]

    path = s('/').join(segments)
    return url_unparse((scheme, netloc, path, query, fragment))
Ejemplo n.º 14
0
def make_line_iter(stream, limit=None, buffer_size=10 * 1024):
    """Safely iterates line-based over an input stream.  If the input stream
    is not a :class:`LimitedStream` the `limit` parameter is mandatory.

    This uses the stream's :meth:`~file.read` method internally as opposite
    to the :meth:`~file.readline` method that is unsafe and can only be used
    in violation of the WSGI specification.  The same problem applies to the
    `__iter__` function of the input stream which calls :meth:`~file.readline`
    without arguments.

    If you need line-by-line processing it's strongly recommended to iterate
    over the input stream using this helper function.

    :param stream: the stream or iterate to iterate over.
    :param limit: the limit in bytes for the stream.  (Usually
                  content length.  Not necessary if the `stream`
                  is a :class:`LimitedStream`.
    :param buffer_size: The optional buffer size.
    """
    _iter = _make_chunk_iter(stream, limit, buffer_size)

    first_item = next(_iter, '')
    if not first_item:
        return

    s = make_literal_wrapper(first_item)
    empty = s('')
    cr = s('\r')
    lf = s('\n')
    crlf = s('\r\n')

    _iter = chain((first_item, ), _iter)

    def _iter_basic_lines():
        _join = empty.join
        buffer = []
        while 1:
            new_data = next(_iter, '')
            if not new_data:
                break
            new_buf = []
            for item in chain(buffer, new_data.splitlines(True)):
                new_buf.append(item)
                if item and item[-1:] in crlf:
                    yield _join(new_buf)
                    new_buf = []
            buffer = new_buf
        if buffer:
            yield _join(buffer)

    # This hackery is necessary to merge 'foo\r' and '\n' into one item
    # of 'foo\r\n' if we were unlucky and we hit a chunk boundary.
    previous = empty
    for item in _iter_basic_lines():
        if item == lf and previous[-1:] == cr:
            previous += item
            item = empty
        if previous:
            yield previous
        previous = item
    if previous:
        yield previous