def __init__(self, META, input_data, upload_handlers, encoding=None): """ Initialize the MultiPartParser object. :META: The standard ``META`` dictionary in Django request objects. :input_data: The raw post data, as a file-like object. :upload_handlers: A list of UploadHandler instances that perform operations on the uploaded data. :encoding: The encoding with which to treat the incoming data. """ # Content-Type should contain multipart and the boundary information. content_type = META.get("CONTENT_TYPE", "") if not content_type.startswith("multipart/"): raise MultiPartParserError("Invalid Content-Type: %s" % content_type) try: content_type.encode("ascii") except UnicodeEncodeError: raise MultiPartParserError( "Invalid non-ASCII Content-Type in multipart: %s" % force_str(content_type)) # Parse the header to get the boundary to split the parts. _, opts = parse_header_parameters(content_type) boundary = opts.get("boundary") if not boundary or not self.boundary_re.fullmatch(boundary): raise MultiPartParserError("Invalid boundary in multipart: %s" % force_str(boundary)) # Content-Length should contain the length of the body we are about # to receive. try: content_length = int(META.get("CONTENT_LENGTH", 0)) except (ValueError, TypeError): content_length = 0 if content_length < 0: # This means we shouldn't continue...raise an error. raise MultiPartParserError("Invalid content length: %r" % content_length) self._boundary = boundary.encode("ascii") self._input_data = input_data # For compatibility with low-level network APIs (with 32-bit integers), # the chunk size should be < 2^31, but still divisible by 4. possible_sizes = [ x.chunk_size for x in upload_handlers if x.chunk_size ] self._chunk_size = min([2**31 - 4] + possible_sizes) self._meta = META self._encoding = encoding or settings.DEFAULT_CHARSET self._content_length = content_length self._upload_handlers = upload_handlers
def _set_content_type_params(self, meta): """Set content_type, content_params, and encoding.""" self.content_type, self.content_params = parse_header_parameters( meta.get("CONTENT_TYPE", "")) if "charset" in self.content_params: try: codecs.lookup(self.content_params["charset"]) except LookupError: pass else: self.encoding = self.content_params["charset"]
def test_basic(self): tests = [ ("text/plain", ("text/plain", {})), ("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})), ("text/plain;charset=us-ascii", ("text/plain", { "charset": "us-ascii" })), ( 'text/plain ; charset="us-ascii"', ("text/plain", { "charset": "us-ascii" }), ), ( 'text/plain ; charset="us-ascii"; another=opt', ("text/plain", { "charset": "us-ascii", "another": "opt" }), ), ( 'attachment; filename="silly.txt"', ("attachment", { "filename": "silly.txt" }), ), ( 'attachment; filename="strange;name"', ("attachment", { "filename": "strange;name" }), ), ( 'attachment; filename="strange;name";size=123;', ("attachment", { "filename": "strange;name", "size": "123" }), ), ( 'form-data; name="files"; filename="fo\\"o;bar"', ("form-data", { "name": "files", "filename": 'fo"o;bar' }), ), ] for header, expected in tests: with self.subTest(header=header): self.assertEqual(parse_header_parameters(header), expected)
def parse_boundary_stream(stream, max_header_size): """ Parse one and exactly one stream that encapsulates a boundary. """ # Stream at beginning of header, look for end of header # and parse it if found. The header must fit within one # chunk. chunk = stream.read(max_header_size) # 'find' returns the top of these four bytes, so we'll # need to munch them later to prevent them from polluting # the payload. header_end = chunk.find(b"\r\n\r\n") if header_end == -1: # we find no header, so we just mark this fact and pass on # the stream verbatim stream.unget(chunk) return (RAW, {}, stream) header = chunk[:header_end] # here we place any excess chunk back onto the stream, as # well as throwing away the CRLFCRLF bytes from above. stream.unget(chunk[header_end + 4:]) TYPE = RAW outdict = {} # Eliminate blank lines for line in header.split(b"\r\n"): # This terminology ("main value" and "dictionary of # parameters") is from the Python docs. try: main_value_pair, params = parse_header_parameters(line.decode()) name, value = main_value_pair.split(":", 1) params = {k: v.encode() for k, v in params.items()} except ValueError: # Invalid header. continue if name == "content-disposition": TYPE = FIELD if params.get("filename"): TYPE = FILE outdict[name] = value, params if TYPE == RAW: stream.unget(chunk) return (TYPE, outdict, stream)
def test_rfc2231_wrong_title(self): """ Test wrongly formatted RFC 2231 headers (missing double single quotes). Parsing should not crash (#24209). """ test_data = ( ( "Content-Type: application/x-stuff; " "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A", "'This%20is%20%2A%2A%2Afun%2A%2A%2A", ), ("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"), ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"), ) for raw_line, expected_title in test_data: parsed = parse_header_parameters(raw_line) self.assertEqual(parsed[1]["title"], expected_title)
def test_rfc2231_parsing(self): test_data = ( ( "Content-Type: application/x-stuff; " "title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A", "This is ***fun***", ), ( "Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html", "foo-ä.html", ), ( "Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html", "foo-ä.html", ), ) for raw_line, expected_title in test_data: parsed = parse_header_parameters(raw_line) self.assertEqual(parsed[1]["title"], expected_title)
def __init__(self, media_type_raw_line): full_type, self.params = parse_header_parameters( media_type_raw_line if media_type_raw_line else "") self.main_type, _, self.sub_type = full_type.partition("/")
def download(self, url): """ Download the given URL and return the file name. """ def cleanup_url(url): tmp = url.rstrip("/") filename = tmp.split("/")[-1] if url.endswith("/"): display_url = tmp + "/" else: display_url = url return filename, display_url prefix = "django_%s_template_" % self.app_or_project tempdir = tempfile.mkdtemp(prefix=prefix, suffix="_download") self.paths_to_remove.append(tempdir) filename, display_url = cleanup_url(url) if self.verbosity >= 2: self.stdout.write("Downloading %s" % display_url) the_path = os.path.join(tempdir, filename) opener = build_opener() opener.addheaders = [("User-Agent", f"Django/{django.__version__}")] try: with opener.open(url) as source, open(the_path, "wb") as target: headers = source.info() target.write(source.read()) except OSError as e: raise CommandError( "couldn't download URL %s to %s: %s" % (url, filename, e) ) used_name = the_path.split("/")[-1] # Trying to get better name from response headers content_disposition = headers["content-disposition"] if content_disposition: _, params = parse_header_parameters(content_disposition) guessed_filename = params.get("filename") or used_name else: guessed_filename = used_name # Falling back to content type guessing ext = self.splitext(guessed_filename)[1] content_type = headers["content-type"] if not ext and content_type: ext = mimetypes.guess_extension(content_type) if ext: guessed_filename += ext # Move the temporary file to a filename that has better # chances of being recognized by the archive utils if used_name != guessed_filename: guessed_path = os.path.join(tempdir, guessed_filename) shutil.move(the_path, guessed_path) return guessed_path # Giving up return the_path