def __init__(self, environ, limit):
     LimitedStreamBase.__init__(self,
                                environ['wsgi.input'],
                                min(limit,
                                    int(environ.get('CONTENT_LENGTH')
                                        or 0)),
                                silent=True)
def parse_multipart(file, boundary, content_length, stream_factory=None,
                    charset='utf-8', errors='ignore', buffer_size=10 * 1024,
                    max_form_memory_size=None):
    """Parse a multipart/form-data stream.  This is invoked by
    :func:`utils.parse_form_data` if the content type matches.  Currently it
    exists for internal usage only, but could be exposed as separate
    function if it turns out to be useful and if we consider the API stable.
    """
    # XXX: this function does not support multipart/mixed.  I don't know of
    #      any browser that supports this, but it should be implemented
    #      nonetheless.

    # make sure the buffer size is divisible by four so that we can base64
    # decode chunk by chunk
    assert buffer_size % 4 == 0, 'buffer size has to be divisible by 4'
    # also the buffer size has to be at least 1024 bytes long or long headers
    # will freak out the system
    assert buffer_size >= 1024, 'buffer size has to be at least 1KB'

    if stream_factory is None:
        stream_factory = default_stream_factory
    else:
        stream_factory = _make_stream_factory(stream_factory)

    if not boundary:
        raise ValueError('Missing boundary')
    if not is_valid_multipart_boundary(boundary):
        raise ValueError('Invalid boundary: %s' % boundary)
    if len(boundary) > buffer_size:
        raise ValueError('Boundary longer than buffer size')

    total_content_length = content_length
    next_part = '--' + boundary
    last_part = next_part + '--'

    form = []
    files = []
    in_memory = 0

    # convert the file into a limited stream with iteration capabilities
    file = LimitedStream(file, content_length)
    iterator = chain(make_line_iter(file, buffer_size=buffer_size),
                     repeat(''))

    def _find_terminator():
        """The terminator might have some additional newlines before it.
        There is at least one application that sends additional newlines
        before headers (the python setuptools package).
        """
        for line in iterator:
            if not line:
                break
            line = line.strip()
            if line:
                return line
        return ''

    try:
        terminator = _find_terminator()
        if terminator != next_part:
            raise ValueError('Expected boundary at start of multipart data')

        while terminator != last_part:
            headers = parse_multipart_headers(iterator)
            disposition = headers.get('content-disposition')
            if disposition is None:
                raise ValueError('Missing Content-Disposition header')
            disposition, extra = parse_options_header(disposition)
            filename = extra.get('filename')
            name = extra.get('name')
            transfer_encoding = headers.get('content-transfer-encoding')

            content_type = headers.get('content-type')
            if content_type is None:
                is_file = False
            else:
                content_type = parse_options_header(content_type)[0]
                is_file = True

            if is_file:
                if filename is not None:
                    filename = _fix_ie_filename(_decode_unicode(filename,
                                                                charset,
                                                                errors))
                try:
                    content_length = int(headers['content-length'])
                except (KeyError, ValueError):
                    content_length = 0
                stream = stream_factory(total_content_length, content_type,
                                        filename, content_length)
            else:
                stream = StringIO()

            buf = ''
            for line in iterator:
                if not line:
                    raise ValueError('unexpected end of stream')
                if line[:2] == '--':
                    terminator = line.rstrip()
                    if terminator in (next_part, last_part):
                        break
                if transfer_encoding in _supported_multipart_encodings:
                    try:
                        line = line.decode(transfer_encoding)
                    except:
                        raise ValueError('could not base 64 decode chunk')
                # we have something in the buffer from the last iteration.
                # write that value to the output stream now and clear the buffer.
                if buf:
                    stream.write(buf)
                    buf = ''

                # If the line ends with windows CRLF we write everything except
                # the last two bytes.  In all other cases however we write everything
                # except the last byte.  If it was a newline, that's fine, otherwise
                # it does not matter because we write it the last iteration.  If the
                # loop aborts early because the end of a part was reached, the last
                # newline is not written which is exactly what we want.
                newline_length = line[-2:] == '\r\n' and 2 or 1
                stream.write(line[:-newline_length])
                buf = line[-newline_length:]
                if not is_file and max_form_memory_size is not None:
                    in_memory += len(line)
                    if in_memory > max_form_memory_size:
                        from werkzeug.exceptions import RequestEntityTooLarge
                        raise RequestEntityTooLarge()
            else:
                raise ValueError('unexpected end of part')

            # rewind the stream
            stream.seek(0)

            if is_file:
                files.append((name, FileStorage(stream, filename, name,
                                                content_type,
                                                content_length)))
            else:
                form.append((name, _decode_unicode(stream.read(),
                                                   charset, errors)))
    finally:
        # make sure the whole input stream is read
        file.exhaust()

    return form, files
コード例 #3
0
ファイル: limiter.py プロジェクト: AndryulE/kitsune
 def __init__(self, environ, limit):
     LimitedStreamBase.__init__(self,
         environ['wsgi.input'],
         min(limit, int(environ.get('CONTENT_LENGTH') or 0)),
         silent=True
     )