Ejemplo n.º 1
0
 def get_part_charset(self, headers):
     # Figure out input charset for current part
     content_type = headers.get('content-type')
     if content_type:
         mimetype, ct_params = parse_options_header(content_type)
         return ct_params.get('charset', self.charset)
     return self.charset
Ejemplo n.º 2
0
    def parse_lines(self, stream, boundary, content_length):
        """Generate parts:
        ``('begin_form', (headers, name))``
        ``('begin_file', (headers, name, filename))``
        ``('cont', byte-string)``
        ``('end', None)``

        Always obeys the grammar:
        parts = ( begin_form cont* end |
                  begin_file cont* end )*
        """
        empty_string_iter = repeat('')

        next_part = b'--' + boundary
        last_part = next_part + b'--'

        iterator = chain(self.make_line_iter(stream, limit=content_length,
                                             buffer_size=self.buffer_size),
                         empty_string_iter)

        terminator = self._find_terminator(iterator)

        if terminator == last_part:
            return
        elif terminator != next_part:
            self.fail('Expected boundary at start of multipart data')

        while terminator != last_part:
            headers = self.parse_multipart_headers(iterator)
            disposition = headers.get('content-disposition')
            if disposition is None:
                self.fail('Missing Content-Disposition header')
            disposition, extra = parse_options_header(disposition)

            transfer_encoding = self.get_part_encoding(headers)
            name = extra.get('name')
            filename = extra.get('filename')

            # if no content type is given we stream into memory.  A list is
            # used as a temporary container.
            if filename is None:
                yield _begin_form, (headers, name)
            # otherwise we parse the rest of the headers and ask the stream
            # factory for something we can write in.
            else:
                yield _begin_file, (headers, name, filename)

            buf = b''
            for line in iterator:
                if not line:
                    self.fail('unexpected end of stream')

                if line[:2] == b'--':
                    terminator = line.rstrip()
                    if terminator in (next_part, last_part):
                        break

                if transfer_encoding is not None:
                    if transfer_encoding == 'base64':
                        transfer_encoding = 'base64_codec'
                    try:
                        line = codecs.decode(line, transfer_encoding)
                    except Exception:
                        self.fail('could not decode transfer encoded chunk.')

                # we have something in the buffer from the last iteration.
                # this is usually a newline delimiter.
                if buf:
                    yield _cont, buf
                    # buf = b''

                # If the line ends with windows CRLF we write everything except
                # the last two bytes.  In all other cases however we write
                # everything except the last byte.  If it was a newline, that's
                # fine, otherwise it does not matter because we will write it
                # the next iteration.  This ensures we do not write the
                # final newline into the stream.  That way we do not have to
                # truncate the stream.  However we do have to make sure that
                # if something else than a newline is in there we write it
                # out.
                if line[-2:] == b'\r\n':
                    buf = b'\r\n'
                    cutoff = -2
                else:
                    buf = line[-1:]
                    cutoff = -1
                yield _cont, line[:cutoff]

            else:
                raise ValueError('unexpected end of part')

            # if we have a leftover in the buffer that is not a newline
            # character we have to flush it, otherwise we will chop off
            # certain values.
            if buf not in (b'', b'\r', b'\n', b'\r\n'):
                print('wow~~~a leftover ')
                yield _cont, buf

            yield _end, None