def get_part_charset(self, headers): # Figure out input charset for current part content_type = headers.get('content-type') if content_type: mimetype, ct_params = parse_options_header(content_type) return ct_params.get('charset', self.charset) return self.charset
def parse_lines(self, stream, boundary, content_length): """Generate parts: ``('begin_form', (headers, name))`` ``('begin_file', (headers, name, filename))`` ``('cont', byte-string)`` ``('end', None)`` Always obeys the grammar: parts = ( begin_form cont* end | begin_file cont* end )* """ empty_string_iter = repeat('') next_part = b'--' + boundary last_part = next_part + b'--' iterator = chain(self.make_line_iter(stream, limit=content_length, buffer_size=self.buffer_size), empty_string_iter) terminator = self._find_terminator(iterator) if terminator == last_part: return elif terminator != next_part: self.fail('Expected boundary at start of multipart data') while terminator != last_part: headers = self.parse_multipart_headers(iterator) disposition = headers.get('content-disposition') if disposition is None: self.fail('Missing Content-Disposition header') disposition, extra = parse_options_header(disposition) transfer_encoding = self.get_part_encoding(headers) name = extra.get('name') filename = extra.get('filename') # if no content type is given we stream into memory. A list is # used as a temporary container. if filename is None: yield _begin_form, (headers, name) # otherwise we parse the rest of the headers and ask the stream # factory for something we can write in. else: yield _begin_file, (headers, name, filename) buf = b'' for line in iterator: if not line: self.fail('unexpected end of stream') if line[:2] == b'--': terminator = line.rstrip() if terminator in (next_part, last_part): break if transfer_encoding is not None: if transfer_encoding == 'base64': transfer_encoding = 'base64_codec' try: line = codecs.decode(line, transfer_encoding) except Exception: self.fail('could not decode transfer encoded chunk.') # we have something in the buffer from the last iteration. # this is usually a newline delimiter. if buf: yield _cont, buf # buf = b'' # If the line ends with windows CRLF we write everything except # the last two bytes. In all other cases however we write # everything except the last byte. If it was a newline, that's # fine, otherwise it does not matter because we will write it # the next iteration. This ensures we do not write the # final newline into the stream. That way we do not have to # truncate the stream. However we do have to make sure that # if something else than a newline is in there we write it # out. if line[-2:] == b'\r\n': buf = b'\r\n' cutoff = -2 else: buf = line[-1:] cutoff = -1 yield _cont, line[:cutoff] else: raise ValueError('unexpected end of part') # if we have a leftover in the buffer that is not a newline # character we have to flush it, otherwise we will chop off # certain values. if buf not in (b'', b'\r', b'\n', b'\r\n'): print('wow~~~a leftover ') yield _cont, buf yield _end, None