def get_part_charset(self, headers): # Figure out input charset for current part content_type = headers.get('content-type') if content_type: mimetype, ct_params = parse_options_header(content_type) return ct_params.get('charset', self.charset) return self.charset
def parse_from_environ(self, environ): """Parses the information from the environment as form data. :param environ: the WSGI environment to be used for parsing. :return: A tuple in the form ``(stream, form, files)``. """ content_type = environ.get('CONTENT_TYPE', '') mimetype, options = parse_options_header(content_type) try: content_length = int(environ['CONTENT_LENGTH']) except (KeyError, ValueError): content_length = 0 stream = environ['wsgi.input'] return self.parse(stream, mimetype, content_length, options)
def parse(self, file, boundary, content_length): next_part = '--' + boundary last_part = next_part + '--' form = [] files = [] in_memory = 0 iterator = chain(make_line_iter(file, limit=content_length, buffer_size=self.buffer_size), _empty_string_iter) terminator = self._find_terminator(iterator) if terminator != next_part: self.fail('Expected boundary at start of multipart data') while terminator != last_part: headers = parse_multipart_headers(iterator) disposition = headers.get('content-disposition') if disposition is None: self.fail('Missing Content-Disposition header') disposition, extra = parse_options_header(disposition) transfer_encoding = self.get_part_encoding(headers) name = extra.get('name') filename = extra.get('filename') part_charset = self.get_part_charset(headers) # if no content type is given we stream into memory. A list is # used as a temporary container. if filename is None: is_file = False container = [] _write = container.append guard_memory = self.max_form_memory_size is not None # otherwise we parse the rest of the headers and ask the stream # factory for something we can write in. else: is_file = True guard_memory = False filename, container = self.start_file_streaming( filename, headers, content_length) _write = container.write buf = '' for line in iterator: if not line: self.fail('unexpected end of stream') if line[:2] == '--': terminator = line.rstrip() if terminator in (next_part, last_part): break if transfer_encoding is not None: try: line = line.decode(transfer_encoding) except Exception: self.fail('could not decode transfer encoded chunk') # we have something in the buffer from the last iteration. # this is usually a newline delimiter. if buf: _write(buf) buf = '' # If the line ends with windows CRLF we write everything except # the last two bytes. In all other cases however we write # everything except the last byte. If it was a newline, that's # fine, otherwise it does not matter because we will write it # the next iteration. this ensures we do not write the # final newline into the stream. That way we do not have to # truncate the stream. However we do have to make sure that # if something else than a newline is in there we write it # out. if line[-2:] == '\r\n': buf = '\r\n' cutoff = -2 else: buf = line[-1] cutoff = -1 _write(line[:cutoff]) # if we write into memory and there is a memory size limit we # count the number of bytes in memory and raise an exception if # there is too much data in memory. if guard_memory: in_memory += len(line) if in_memory > self.max_form_memory_size: self.in_memory_threshold_reached(in_memory) else: # pragma: no cover raise ValueError('unexpected end of part') # if we have a leftover in the buffer that is not a newline # character we have to flush it, otherwise we will chop of # certain values. if buf not in ('', '\r', '\n', '\r\n'): _write(buf) if is_file: container.seek(0) files.append((name, FileStorage(container, filename, name, headers=headers))) else: form.append((name, _decode_unicode(''.join(container), part_charset, self.errors))) return self.cls(form), self.cls(files)