Esempio n. 1
0
    def __init__(self, headers, upload_handlers, encoding=None):
        """
        Initialize the MultiPartParser object.

        :request:
            The standard ``headers`` dictionary in tornado request object.
        :upload_handlers:
            A list of UploadHandler instances that perform operations on the
            uploaded data.
        """
        # Content-Type should contain multipart and the boundary information.
        content_type = headers.get('Content-Type', '')
        if not content_type.startswith('multipart/'):
            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)

        # Parse the header to get the boundary to split the parts.
        ctypes, opts = _parse_header(content_type)
        boundary = opts.get('boundary')
        if not boundary or not cgi.valid_boundary(boundary):
            raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary.decode())

        # Content-Length should contain the length of the body we are about
        # to receive.
        try:
            content_length = int(headers.get('Content-Length', 0))
        except (ValueError, TypeError):
            content_length = 0

        if content_length < 0:
            # This means we shouldn't continue...raise an error.
            raise MultiPartParserError("Invalid content length: %r" % content_length)

        self.boundary = boundary
        if self.boundary.startswith('"') and self.boundary.endswith('"'):
            self.boundary = self.boundary[1:-1]

        self._boundary_delimiter = "--{}\r\n".format(self.boundary).encode('ascii')
        self._end_boundary = "\r\n--{}--\r\n".format(self.boundary).encode('ascii')
        self._boundary = self.boundary.encode('ascii')

        self.upload_handlers = upload_handlers
        self.content_length = content_length
        self.encoding = encoding or settings.DEFAULT_CHARSET

        self.current_phase = PHASE_BOUNDARY
        self.current_field_type = RAW

        self._buffer = b""
        self._data_size = 0
        self._field_name = None
        self._skip_field_name = None  # Tuple (field_name, upload_handler_index)
        self._transfer_encoding = None
        self._file_name = None

        self._read_field_data = b""

        self.files = {}
        self.arguments = {}

        IOLoop.current().add_callback(self.start)
    def __init__(self, META, input_data, upload_handlers, encoding=None):
        """
        Initialize the MultiPartParser object.

        :META:
            The standard ``META`` dictionary in Django request objects.
        :input_data:
            The raw post data, as a file-like object.
        :upload_handlers:
            A list of UploadHandler instances that perform operations on the
            uploaded data.
        :encoding:
            The encoding with which to treat the incoming data.
        """
        # Content-Type should contain multipart and the boundary information.
        content_type = META.get('CONTENT_TYPE', '')
        if not content_type.startswith('multipart/'):
            raise MultiPartParserError('Invalid Content-Type: %s' %
                                       content_type)

        # Parse the header to get the boundary to split the parts.
        try:
            ctypes, opts = parse_header(content_type.encode('ascii'))
        except UnicodeEncodeError:
            raise MultiPartParserError(
                'Invalid non-ASCII Content-Type in multipart: %s' %
                force_str(content_type))
        boundary = opts.get('boundary')
        if not boundary or not cgi.valid_boundary(boundary):
            raise MultiPartParserError('Invalid boundary in multipart: %s' %
                                       force_str(boundary))

        # Content-Length should contain the length of the body we are about
        # to receive.
        try:
            content_length = int(META.get('CONTENT_LENGTH', 0))
        except (ValueError, TypeError):
            content_length = 0

        if content_length < 0:
            # This means we shouldn't continue...raise an error.
            raise MultiPartParserError("Invalid content length: %r" %
                                       content_length)

        if isinstance(boundary, str):
            boundary = boundary.encode('ascii')
        self._boundary = boundary
        self._input_data = input_data

        # For compatibility with low-level network APIs (with 32-bit integers),
        # the chunk size should be < 2^31, but still divisible by 4.
        possible_sizes = [
            x.chunk_size for x in upload_handlers if x.chunk_size
        ]
        self._chunk_size = min([2**31 - 4] + possible_sizes)

        self._meta = META
        self._encoding = encoding or settings.DEFAULT_CHARSET
        self._content_length = content_length
        self._upload_handlers = upload_handlers
        def read_multi(self, environ, keep_blank_values, strict_parsing):
            """Internal: read a part that is itself multipart."""
            ib = self.innerboundary
            if not cgi.valid_boundary(ib):
                raise ValueError(
                    'Invalid boundary in multipart form: %r' % (ib,))
            self.list = []
            if self.qs_on_post:
                query = cgi.urllib.parse.parse_qsl(
                    self.qs_on_post, self.keep_blank_values,
                    self.strict_parsing,
                    encoding=self.encoding, errors=self.errors)
                for key, value in query:
                    self.list.append(cgi.MiniFieldStorage(key, value))

            klass = self.FieldStorageClass or self.__class__
            first_line = self.fp.readline()  # bytes
            if not isinstance(first_line, bytes):
                raise ValueError("%s should return bytes, got %s"
                                 % (self.fp, type(first_line).__name__))
            self.bytes_read += len(first_line)

            # Ensure that we consume the file until we've hit our innerboundary
            while (first_line.strip() != (b"--" + self.innerboundary) and
                    first_line):
                first_line = self.fp.readline()
                self.bytes_read += len(first_line)

            while True:
                parser = cgi.FeedParser()
                hdr_text = b""
                while True:
                    data = self.fp.readline()
                    hdr_text += data
                    if not data.strip():
                        break
                if not hdr_text:
                    break
                # parser takes strings, not bytes
                self.bytes_read += len(hdr_text)
                parser.feed(hdr_text.decode(self.encoding, self.errors))
                headers = parser.close()
                # Some clients add Content-Length for part headers, ignore them
                if 'content-length' in headers:
                    filename = None
                    if 'content-disposition' in self.headers:
                        cdisp, pdict = parse_header(self.headers['content-disposition'])
                        if 'filename' in pdict:
                            filename = pdict['filename']
                    if filename is None:
                        del headers['content-length']
                part = klass(self.fp, headers, ib, environ, keep_blank_values,
                             strict_parsing, self.limit-self.bytes_read,
                             self.encoding, self.errors)
                self.bytes_read += part.bytes_read
                self.list.append(part)
                if part.done or self.bytes_read >= self.length > 0:
                    break
            self.skip_lines()
Esempio n. 4
0
        def read_multi(self, environ, keep_blank_values, strict_parsing):
            """Internal: read a part that is itself multipart."""
            ib = self.innerboundary
            if not cgi.valid_boundary(ib):
                raise ValueError(
                    'Invalid boundary in multipart form: %r' % (ib,))
            self.list = []
            if self.qs_on_post:
                query = cgi.urllib.parse.parse_qsl(
                    self.qs_on_post, self.keep_blank_values,
                    self.strict_parsing,
                    encoding=self.encoding, errors=self.errors)
                for key, value in query:
                    self.list.append(cgi.MiniFieldStorage(key, value))

            klass = self.FieldStorageClass or self.__class__
            first_line = self.fp.readline()  # bytes
            if not isinstance(first_line, bytes):
                raise ValueError("%s should return bytes, got %s"
                                 % (self.fp, type(first_line).__name__))
            self.bytes_read += len(first_line)

            # Ensure that we consume the file until we've hit our innerboundary
            while (first_line.strip() != (b"--" + self.innerboundary) and
                    first_line):
                first_line = self.fp.readline()
                self.bytes_read += len(first_line)

            while True:
                parser = cgi.FeedParser()
                hdr_text = b""
                while True:
                    data = self.fp.readline()
                    hdr_text += data
                    if not data.strip():
                        break
                if not hdr_text:
                    break
                # parser takes strings, not bytes
                self.bytes_read += len(hdr_text)
                parser.feed(hdr_text.decode(self.encoding, self.errors))
                headers = parser.close()
                # Some clients add Content-Length for part headers, ignore them
                if 'content-length' in headers:
                    filename = None
                    if 'content-disposition' in self.headers:
                        cdisp, pdict = parse_header(self.headers['content-disposition'])
                        if 'filename' in pdict:
                            filename = pdict['filename']
                    if filename is None:
                        del headers['content-length']
                part = klass(self.fp, headers, ib, environ, keep_blank_values,
                             strict_parsing, self.limit-self.bytes_read,
                             self.encoding, self.errors)
                self.bytes_read += part.bytes_read
                self.list.append(part)
                if part.done or self.bytes_read >= self.length > 0:
                    break
            self.skip_lines()
Esempio n. 5
0
    def __init__(self, META, input_data, upload_handlers, encoding=None):
        """
        Initialize the MultiPartParser object.

        :META:
            The standard ``META`` dictionary in Django request objects.
        :input_data:
            The raw post data, as a file-like object.
        :upload_handlers:
            A list of UploadHandler instances that perform operations on the uploaded
            data.
        :encoding:
            The encoding with which to treat the incoming data.
        """
        # 是这个样的
        # Content-Type:multipart/form-data; boundary=----WebKitFormBoundaryweYiMAdsKmqlTSAf

        #
        # Content-Type should contain multipart and the boundary information.
        #

        content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', ''))
        if not content_type.startswith('multipart/'):
            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)

        # Parse the header to get the boundary to split the parts.
        ctypes, opts = parse_header(content_type.encode('ascii'))
        boundary = opts.get('boundary')
        # Content-Type 使用 multipart 时必须制定分割符 boundary
        if not boundary or not cgi.valid_boundary(boundary):
            raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)

        # Content-Length should contain the length of the body we are about
        # to receive.
        try:
            content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH', 0)))
        except (ValueError, TypeError):
            content_length = 0

        if content_length < 0:
            # This means we shouldn't continue...raise an error.
            raise MultiPartParserError("Invalid content length: %r" % content_length)

        if isinstance(boundary, six.text_type):
            boundary = boundary.encode('ascii')
        self._boundary = boundary
        self._input_data = input_data

        # For compatibility with low-level network APIs (with 32-bit integers),
        # the chunk size should be < 2^31, but still divisible by 4.
        possible_sizes = [x.chunk_size for x in upload_handlers if x.chunk_size]
        self._chunk_size = min([2 ** 31 - 4] + possible_sizes)

        self._meta = META
        self._encoding = encoding or settings.DEFAULT_CHARSET
        self._content_length = content_length
        self._upload_handlers = upload_handlers
Esempio n. 6
0
    def __init__(self, META, input_data, upload_handlers, encoding=None):
        """
        Initialize the MultiPartParser object.

        :META:
            The standard ``META`` dictionary in Django request objects.
        :input_data:
            The raw post data, as a bytestring.
        :upload_handler:
            An UploadHandler instance that performs operations on the uploaded
            data.
        :encoding:
            The encoding with which to treat the incoming data.
        """

        #
        # Content-Type should containt multipart and the boundary information.
        #

        content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', ''))
        if not content_type.startswith('multipart/'):
            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)

        # Parse the header to get the boundary to split the parts.
        ctypes, opts = parse_header(content_type)
        boundary = opts.get('boundary')
        if not boundary or not cgi.valid_boundary(boundary):
            raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)


        #
        # Content-Length should contain the length of the body we are about
        # to receive.
        #
        try:
            content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0)))
        except (ValueError, TypeError):
            # For now set it to 0; we'll try again later on down.
            content_length = 0

        if content_length <= 0:
            # This means we shouldn't continue...raise an error.
            raise MultiPartParserError("Invalid content length: %r" % content_length)

        self._boundary = boundary
        self._input_data = input_data

        # For compatibility with low-level network APIs (with 32-bit integers),
        # the chunk size should be < 2^31, but still divisible by 4.
        self._chunk_size = min(2**31-4, *[x.chunk_size for x in upload_handlers if x.chunk_size])

        self._meta = META
        self._encoding = encoding or settings.DEFAULT_CHARSET
        self._content_length = content_length
        self._upload_handlers = upload_handlers
Esempio n. 7
0
    def __init__(self, META, input_data, upload_handlers, encoding=None):
        """
        Initialize the MultiPartParser object.

        :META:
            The standard ``META`` dictionary in Django request objects.
        :input_data:
            The raw post data, as a bytestring.
        :upload_handler:
            An UploadHandler instance that performs operations on the uploaded
            data.
        :encoding:
            The encoding with which to treat the incoming data.
        """

        #
        # Content-Type should containt multipart and the boundary information.
        #

        content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', ''))
        if not content_type.startswith('multipart/'):
            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)

        # Parse the header to get the boundary to split the parts.
        ctypes, opts = parse_header(content_type)
        boundary = opts.get('boundary')
        if not boundary or not cgi.valid_boundary(boundary):
            raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)


        #
        # Content-Length should contain the length of the body we are about
        # to receive.
        #
        try:
            content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0)))
        except (ValueError, TypeError):
            # For now set it to 0; we'll try again later on down.
            content_length = 0

        if content_length <= 0:
            # This means we shouldn't continue...raise an error.
            raise MultiPartParserError("Invalid content length: %r" % content_length)

        self._boundary = boundary
        self._input_data = input_data

        # For compatibility with low-level network APIs (with 32-bit integers),
        # the chunk size should be < 2^31, but still divisible by 4.
        self._chunk_size = min(2**31-4, *[x.chunk_size for x in upload_handlers if x.chunk_size])

        self._meta = META
        self._encoding = encoding or settings.DEFAULT_CHARSET
        self._content_length = content_length
        self._upload_handlers = upload_handlers
Esempio n. 8
0
    def parse(self):
        boundary = self.options.get("boundary", "")
        if not valid_boundary(boundary):
            raise HttpException("Invalid boundary for multipart/form-data", status=422)
        inp = self.environ.get("wsgi.input") or BytesIO()
        self.buffer = bytearray()

        if isinstance(inp, HttpBodyReader):
            return async(self._consume(inp, boundary), loop=inp.reader._loop)
        else:
            producer = BytesProducer(inp)
            return producer(self._consume, boundary)
Esempio n. 9
0
    def parse(self):
        boundary = self.boundary
        if not valid_boundary(boundary):
            raise HttpException("Invalid boundary for multipart/form-data",
                                status=422)
        inp = self.request.get('wsgi.input') or BytesIO()
        self.buffer = bytearray()

        if isinstance(inp, HttpBodyReader):
            return self._consume(inp, boundary)
        else:
            producer = BytesProducer(inp)
            return producer(self._consume, boundary)
Esempio n. 10
0
    def parse(self):
        boundary = self.boundary
        if not valid_boundary(boundary):
            raise HttpException("Invalid boundary for multipart/form-data",
                                status=422)
        inp = self.request.get('wsgi.input') or BytesIO()
        self.buffer = bytearray()

        if isinstance(inp, HttpBodyReader):
            return self._consume(inp, boundary)
        else:
            producer = BytesProducer(inp)
            return producer(self._consume, boundary)
Esempio n. 11
0
    def parse(self):
        boundary = self.options.get('boundary', '')
        if not valid_boundary(boundary):
            raise HttpException("Invalid boundary for multipart/form-data",
                                status=422)
        inp = self.environ.get('wsgi.input') or BytesIO()
        self.buffer = bytearray()

        if isinstance(inp, HttpBodyReader):
            return ensure_future(self._consume(inp, boundary),
                                 loop=inp.reader._loop)
        else:
            producer = BytesProducer(inp)
            return producer(self._consume, boundary)
Esempio n. 12
0
    def read_multi(self, environ, keep_blank_values, strict_parsing):
        """Internal: read a part that is itself multipart."""

        ib = self.innerboundary
        if not valid_boundary(ib):
            raise ValueError, "Invalid boundary in multipart form: %r" % (ib,)
        self.list = []
        if self.qs_on_post:
            for key, value in urlparse.parse_qsl(self.qs_on_post, self.keep_blank_values, self.strict_parsing):
                self.list.append(MiniFieldStorage(key, value))
            # never used!? self.FieldStorageClass?
            FieldStorageClass = None

        klass = self.FieldStorageClass or self.__class__
        part = klass(self.fp, {}, ib, environ, keep_blank_values, strict_parsing)
        # Throw first part away
        while not part.done:
            headers = rfc822.Message(self.fp)
            part = klass(self.fp, headers, ib, environ, keep_blank_values, strict_parsing)
            self.list.append(part)
        self.skip_lines()

        for fieldname in self.keys():
            if fieldname.endswith(".ngx_upload"):

                fn_patterns = {
                    "name": "%s.ngx_upload",
                    "filename": "%s.filename",
                    "path": "%s.path",
                    "content_type": "%s.content_type",
                }

                fn_dict = {}
                upload_fn = self.getvalue(fieldname)
                for k, p in fn_patterns.items():
                    fn = p % upload_fn
                    fn_dict[k] = self.getvalue(fn)

                    # cleanup nginx stuff
                    del self[fn]

                logger.debug("Nginx File Upload (%(name)s, %(filename)s, %(path)s, %(content_type)s)" % fn_dict)
                self.list.append(NginxFieldStorage(**fn_dict))
Esempio n. 13
0
	def read_multi(self, environ, keep_blank_values, strict_parsing):
		"""Internal: read a part that is itself multipart."""
		#print ('read_multi()',)
		ib = self.innerboundary
		if not cgi.valid_boundary(ib):
			raise ValueError, 'Invalid boundary in multipart form: %r' % (ib,)
		self.list = []
		part = NestedFieldStorage(self.req, self, self.fp, {}, ib,
					 environ, keep_blank_values, strict_parsing)
		# Throw first part away
		while not part.done:
			headers = rfc822.Message(self.fp)
			part = NestedFieldStorage(self.req, self, self.fp, headers, ib,
						 environ, keep_blank_values, strict_parsing)
			
			name, value, new = self.parse_field(part.name, part)
			if(new):
				value.name = name
				self.list.append(value)
		
		self.skip_lines()
Esempio n. 14
0
    def __init__(self,
                 stream,
                 content_type,
                 content_length,
                 data_upload_max_number_fields=1000,
                 data_upload_max_memory_size=2621440,
                 file_upload_max_memory_size=2621440,
                 file_upload_temp_dir=None,
                 chunk_size=64 * 1024,
                 encoding='utf-8'):
        # Content-Type should contain multipart and the boundary information.
        if not content_type.startswith('multipart/'):
            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)

        # Parse the header to get the boundary to split the parts.
        boundary = parse_options_header(force_bytes(
            content_type, encoding='ascii')).params.get('boundary')
        if not boundary or not cgi.valid_boundary(boundary):
            raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)

        if content_length < 0:
            # This means we shouldn't continue...raise an error.
            raise MultiPartParserError("Invalid content length: %r" % content_length)

        self._boundary = force_bytes(boundary, encoding='ascii')
        self._stream = stream

        self._chunk_size = (chunk_size // 4 + 1) * 4

        self._data_upload_max_number_fields = data_upload_max_number_fields
        self._data_upload_max_memory_size = data_upload_max_memory_size

        self._file_upload_temp_dir = file_upload_temp_dir
        self._file_upload_max_memory_size = file_upload_max_memory_size

        self._encoding = encoding

        self._content_length = content_length
Esempio n. 15
0
def parse_multipart(fp, pdict):
    """Parse multipart input.

    Arguments:
    fp   : input file
    pdict: dictionary containing other parameters of content-type header

    Returns a dictionary just like parse_qs(): keys are the field names, each
    value is a list of values for that field.  This is easy to use but not
    much good if you are expecting megabytes to be uploaded -- in that case,
    use the FieldStorage class instead which is much more flexible.  Note
    that content-type is the raw, unparsed contents of the content-type
    header.

    """
    import http.client

    boundary = b""
    if 'boundary' in pdict:
        boundary = pdict['boundary']
    if not valid_boundary(boundary):
        raise ValueError('Invalid boundary in multipart form: %r' %
                         (boundary, ))

    nextpart = b"--" + boundary
    lastpart = b"--" + boundary + b"--"
    partdict = {}
    terminator = b""

    while terminator != lastpart:
        bytes = -1
        data = None
        if terminator:
            # At start of next part.  Read headers first.
            headers = http.client.parse_headers(fp)
            clength = headers.get('content-length')
            if clength:
                try:
                    bytes = int(clength)
                except ValueError:
                    pass
            if bytes > 0:
                data = fp.read(bytes)
            else:
                data = b""
        # Read lines until end of part.
        lines = []
        while 1:
            line = fp.readline()
            if not line:
                terminator = lastpart  # End outer loop
                break
            if line.startswith(b"--"):
                terminator = line.rstrip()
                if terminator in (nextpart, lastpart):
                    break
            lines.append(line)
        # Done with part.
        if data is None:
            continue
        if bytes < 0:
            if lines:
                # Strip final line terminator
                line = lines[-1]
                if line[-2:] == b"\r\n":
                    line = line[:-2]
                elif line[-1:] == b"\n":
                    line = line[:-1]
                lines[-1] = line
                data = b"".join(lines)
        line = headers['content-disposition']
        if not line:
            continue
        key, params = parse_header(line)
        if key != 'form-data':
            continue
        if 'name' in params:
            name = params['name']
        else:
            continue
        if 'filename' in params:
            data = File(data, params['filename'])
        if name in partdict:
            partdict[name].append(data)
        else:
            partdict[name] = [data]

    return partdict
Esempio n. 16
0
    def parse_multipart(self, fp, pdict):
        if self.do_log:
            print '%f Parsing Multipart data: ' % time.time()
        rewind = fp.tell()  #save cursor
        fp.seek(0, 0)  #reset cursor

        boundary = ""
        if 'boundary' in pdict:
            boundary = pdict['boundary']
        if not cgi.valid_boundary(boundary):
            raise ValueError('Invalid boundary in multipart form: %r' %
                             (boundary, ))

        nextpart = "--" + boundary
        lastpart = "--" + boundary + "--"
        partdict = {}
        terminator = ""

        while terminator != lastpart:
            c_bytes = -1

            data = tempfile.NamedTemporaryFile(delete=False)
            if terminator:
                # At start of next part.  Read headers first.
                headers = mimetools.Message(fp)
                clength = headers.getheader('content-length')
                if clength:
                    try:
                        c_bytes = int(clength)
                    except ValueError:
                        pass
                if c_bytes > 0:
                    data.write(fp.read(c_bytes))
            # Read lines until end of part.
            while 1:
                line = fp.readline()
                if not line:
                    terminator = lastpart  # End outer loop
                    break
                if line[:2] == "--":
                    terminator = line.strip()
                    if terminator in (nextpart, lastpart):
                        break
                data.write(line)
            # Done with part.
            if data.tell() == 0:
                continue
            if c_bytes < 0:
                # if a Content-Length header was not supplied with the MIME part
                # then the trailing line break must be removed.
                # we have data, read the last 2 bytes
                rewind = min(2, data.tell())
                data.seek(-rewind, os.SEEK_END)
                line = data.read(2)
                if line[-2:] == "\r\n":
                    data.seek(-2, os.SEEK_END)
                    data.truncate()
                elif line[-1:] == "\n":
                    data.seek(-1, os.SEEK_END)
                    data.truncate()

            line = headers['content-disposition']
            if not line:
                continue
            key, params = cgi.parse_header(line)
            if key != 'form-data':
                continue
            if 'name' in params:
                name = params['name']
                # kludge in the filename
                if 'filename' in params:
                    fname_index = name + '_filename'
                    if fname_index in partdict:
                        partdict[fname_index].append(params['filename'])
                    else:
                        partdict[fname_index] = [params['filename']]
            else:
                # Unnamed parts are not returned at all.
                continue
            data.seek(0, 0)
            if name in partdict:
                partdict[name].append(data)
            else:
                partdict[name] = [data]

        fp.seek(rewind)  # Restore cursor
        return partdict
Esempio n. 17
0
    def parse_multipart(self, fp, pdict):
        if self.do_log:
            print '%f Parsing Multipart data: ' % time.time()
        rewind = fp.tell() #save cursor
        fp.seek(0, 0) #reset cursor

        boundary = ""
        if 'boundary' in pdict:
            boundary = pdict['boundary']
        if not cgi.valid_boundary(boundary):
            raise ValueError('Invalid boundary in multipart form: %r' % (boundary,))

        nextpart = "--" + boundary
        lastpart = "--" + boundary + "--"
        partdict = {}
        terminator = ""

        while terminator != lastpart:
            c_bytes = -1

            data = tempfile.NamedTemporaryFile(delete=False)
            if terminator:
                # At start of next part.  Read headers first.
                headers = mimetools.Message(fp)
                clength = headers.getheader('content-length')
                if clength:
                    try:
                        c_bytes = int(clength)
                    except ValueError:
                        pass
                if c_bytes > 0:
                    data.write(fp.read(c_bytes))
            # Read lines until end of part.
            while 1:
                line = fp.readline()
                if not line:
                    terminator = lastpart # End outer loop
                    break
                if line[:2] == "--":
                    terminator = line.strip()
                    if terminator in (nextpart, lastpart):
                        break
                data.write(line)
            # Done with part.
            if data.tell() == 0:
                continue
            if c_bytes < 0:
                # if a Content-Length header was not supplied with the MIME part
                # then the trailing line break must be removed.
                # we have data, read the last 2 bytes
                rewind = min(2, data.tell())
                data.seek(-rewind, os.SEEK_END)
                line = data.read(2)
                if line[-2:] == "\r\n":
                    data.seek(-2, os.SEEK_END)
                    data.truncate()
                elif line[-1:] == "\n":
                    data.seek(-1, os.SEEK_END)
                    data.truncate()

            line = headers['content-disposition']
            if not line:
                continue
            key, params = cgi.parse_header(line)
            if key != 'form-data':
                continue
            if 'name' in params:
                name = params['name']
                # kludge in the filename
                if 'filename' in params:
                    fname_index = name + '_filename'
                    if fname_index in partdict:
                        partdict[fname_index].append(params['filename'])
                    else:
                        partdict[fname_index] = [params['filename']]
            else:
                # Unnamed parts are not returned at all.
                continue
            data.seek(0, 0)
            if name in partdict:
                partdict[name].append(data)
            else:
                partdict[name] = [data]

        fp.seek(rewind)  # Restore cursor
        return partdict
    def __init__(self, META, upload_handlers, encoding=None):
        """
        初始化MultiPartParser对象
        
        :META:
            uWSGI中的env对象,包含HTTP请求的元信息,头部信息以及body等
        :upload_handlers:
            上传文件处理器。文件上传的时候,文件内容是在http的body中的,服务器接收到之后
            需要先放在某个地方(内存或硬盘),等用户来接收处理
        :encoding:
            接收数据的编码方式
        """

        self._encoding = encoding or DEFAULT_CHARSET

        content_type = META.get('HTTP_CONTENT_TYPE',
                                META.get('CONTENT_TYPE', ''))
        # Content-Type应当包含entype信息以及boundary信息
        #
        # 其中boundary一段随机串,用于分割form中的各个字段
        #
        # 比如:
        # 'CONTENT_TYPE': 'multipart/form-data; boundary=----WebKitFormBoundaryB9iAfLg1SnfmaPh5'

        if not content_type.startswith('multipart/'):
            raise MultiPartParserError('Invalid Content-Type: %s' %
                                       content_type)

        ctypes, opts = parse_header(
            content_type.encode('ascii'))  # 获取boundary值
        # ctypes:multipart/form-data
        # opts: {boundary: ----WebKitFormBoundaryB9iAfLg1SnfmaPh5}

        boundary = opts.get('boundary')
        if not boundary or not cgi.valid_boundary(boundary):
            raise MultiPartParserError('Invalid boundary in multipart: %s' %
                                       boundary)

        # Content-Length should contain the length of the body we are about
        # to receive.
        try:
            content_length = int(
                META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH', 0)))
        except (ValueError, TypeError):
            content_length = 0

        if content_length < 0:
            # This means we shouldn't continue...raise an error.
            raise MultiPartParserError("Invalid content length: %r" %
                                       content_length)

        if isinstance(boundary, unicode):
            boundary = boundary.encode('ascii')

        self._boundary = boundary
        self._input_data = META["wsgi.input"]

        # For compatibility with low-level network APIs (with 32-bit integers),
        # the chunk size should be < 2^31, but still divisible by 4.
        possible_sizes = [
            x.chunk_size for x in upload_handlers if x.chunk_size
        ]
        self._chunk_size = min([2**31 - 4] + possible_sizes)

        self._meta = META
        self._content_length = content_length
        self._upload_handlers = upload_handlers
"""
Esempio n. 20
0
def parse_multipart(fp, pdict):
	'''
		most of this code is copied straight from
		`cgi.parse_multipart <https://github.com/python/cpython/blob/ad76602f69d884e491b0913c641dd8e42902c36c/Lib/cgi.py#L201>`_.
		the only difference is that it returns a :class:`.MultipartFile` for any part with a ``filename``
		param in its content-disposition (instead of just the bytes).
	'''
	boundary = pdict.get('boundary', b'')
	if not cgi.valid_boundary(boundary):
		raise ValueError('Invalid boundary in multipart form: %r' % boundary)

	nextpart = b'--' + boundary
	lastpart = b'--' + boundary + b'--'
	partdict = {}
	terminator = b''
	while terminator != lastpart:
		read = -1
		data = None
		if terminator:
			# At start of next part.  Read headers first.
			headers = http.client.parse_headers(fp)
			clength = headers.get('content-length')
			if clength:
				try:
					read = int(clength)
				except ValueError:
					pass
			if read > 0:
				if maxlen and read > maxlen:
					raise ValueError('Maximum content length exceeded')
				data = fp.read(read)
			else:
				data = b''
		# read lines until end of part
		lines = []
		while True:
			line = fp.readline()
			if not line:
				terminator = lastpart
				break
			if line.startswith(b'--'):
				terminator = line.rstrip()
				if terminator in (nextpart, lastpart):
					break
			lines.append(line)
		# done with part
		if data is None:
			continue
		if read < 0:
			if lines:
				# strip final line terminator
				line = lines[-1]
				if line[-2:] == b'\r\n':
					line = line[:-2]
				elif line[-1:] == b'\n':
					line = line[:-1]
				lines[-1] = line
				data = b''.join(lines)
		line = headers['content-disposition']
		if not line:
			continue
		key, params = cgi.parse_header(line)
		if key != 'form-data':
			continue
		if 'name' in params:
			name = params['name']
		else:
			continue

		if 'filename' in params:
			data = MultipartFile(data, params['filename'])
		if name in partdict:
			partdict[name].append(data)
		else:
			partdict[name] = [data]

	return partdict
Esempio n. 21
0
def parse_multipart(fp, pdict):
    """Parse multipart input.
    
    Arguments:
    fp   : input file
    pdict: dictionary containing other parameters of content-type header
    
    Returns a dictionary just like parse_qs(): keys are the field names, each
    value is a list of values for that field.  This is easy to use but not
    much good if you are expecting megabytes to be uploaded -- in that case,
    use the FieldStorage class instead which is much more flexible.  Note
    that content-type is the raw, unparsed contents of the content-type
    header.
    
    XXX This does not parse nested multipart parts -- use FieldStorage for
    that.
    
    XXX This should really be subsumed by FieldStorage altogether -- no
    point in having two implementations of the same parsing algorithm.
    
    """
    boundary = ""
    if 'boundary' in pdict:
        boundary = pdict['boundary']
    if not cgi.valid_boundary(boundary):
        raise ValueError,  ('Invalid boundary in multipart form: %r'
                            % (boundary,))
    
    nextpart = "--" + boundary
    lastpart = "--" + boundary + "--"
    partdict = {}
    terminator = ""
    
    while terminator != lastpart:
        bytes = -1
        data = None
        if terminator:
            # At start of next part.  Read headers first.
            headers = mimetools.Message(fp)
            
            #NOTE(ghowland): This was the reason to copy this function,
            #   we want this file name!
            filename_result = re.findall('filename="(.*?)"', str(headers))
            if filename_result:
              filename_result = filename_result[0]
              if len(filename_result) > 2 and filename_result[1] == ':':
                filename_result = filename_result[2:]
              filename_result = filename_result.replace('\\', '/')
              filename_result = os.path.basename(filename_result)
              # Pack into list again, since the upload side expects that
              partdict['_filename'] = [filename_result]
            
            clength = headers.getheader('content-length')
            if clength:
                try:
                    bytes = int(clength)
                except ValueError:
                    pass
            if bytes > 0:
                if maxlen and bytes > maxlen:
                    raise ValueError, 'Maximum content length exceeded'
                data = fp.read(bytes)
            else:
                data = ""
        # Read lines until end of part.
        lines = []
        while 1:
            line = fp.readline()
            if not line:
                terminator = lastpart # End outer loop
                break
            if line[:2] == "--":
                terminator = line.strip()
                if terminator in (nextpart, lastpart):
                    break
            lines.append(line)
        # Done with part.
        if data is None:
            continue
        if bytes < 0:
            if lines:
                # Strip final line terminator
                line = lines[-1]
                if line[-2:] == "\r\n":
                    line = line[:-2]
                elif line[-1:] == "\n":
                    line = line[:-1]
                lines[-1] = line
                data = "".join(lines)
        line = headers['content-disposition']
        if not line:
            continue
        key, params = cgi.parse_header(line)
        if key != 'form-data':
            continue
        if 'name' in params:
            name = params['name']
        else:
            continue
        if name in partdict:
            partdict[name].append(data)
        else:
            partdict[name] = [data]
    
    return partdict
Esempio n. 22
0
 def update_event(self, inp=-1):
     self.set_output_val(0, cgi.valid_boundary(self.input(0)))