예제 #1
0
    def __init__(self, META, input_data, upload_handlers, encoding=None):
        """
        Initialize the MultiPartParser object.

        :META:
            The standard ``META`` dictionary in Django request objects.
        :input_data:
            The raw post data, as a file-like object.
        :upload_handlers:
            A list of UploadHandler instances that perform operations on the
            uploaded data.
        :encoding:
            The encoding with which to treat the incoming data.
        """
        # Content-Type should contain multipart and the boundary information.
        content_type = META.get("CONTENT_TYPE", "")
        if not content_type.startswith("multipart/"):
            raise MultiPartParserError("Invalid Content-Type: %s" %
                                       content_type)

        try:
            content_type.encode("ascii")
        except UnicodeEncodeError:
            raise MultiPartParserError(
                "Invalid non-ASCII Content-Type in multipart: %s" %
                force_str(content_type))

        # Parse the header to get the boundary to split the parts.
        _, opts = parse_header_parameters(content_type)
        boundary = opts.get("boundary")
        if not boundary or not self.boundary_re.fullmatch(boundary):
            raise MultiPartParserError("Invalid boundary in multipart: %s" %
                                       force_str(boundary))

        # Content-Length should contain the length of the body we are about
        # to receive.
        try:
            content_length = int(META.get("CONTENT_LENGTH", 0))
        except (ValueError, TypeError):
            content_length = 0

        if content_length < 0:
            # This means we shouldn't continue...raise an error.
            raise MultiPartParserError("Invalid content length: %r" %
                                       content_length)

        self._boundary = boundary.encode("ascii")
        self._input_data = input_data

        # For compatibility with low-level network APIs (with 32-bit integers),
        # the chunk size should be < 2^31, but still divisible by 4.
        possible_sizes = [
            x.chunk_size for x in upload_handlers if x.chunk_size
        ]
        self._chunk_size = min([2**31 - 4] + possible_sizes)

        self._meta = META
        self._encoding = encoding or settings.DEFAULT_CHARSET
        self._content_length = content_length
        self._upload_handlers = upload_handlers
예제 #2
0
 def _set_content_type_params(self, meta):
     """Set content_type, content_params, and encoding."""
     self.content_type, self.content_params = parse_header_parameters(
         meta.get("CONTENT_TYPE", ""))
     if "charset" in self.content_params:
         try:
             codecs.lookup(self.content_params["charset"])
         except LookupError:
             pass
         else:
             self.encoding = self.content_params["charset"]
예제 #3
0
 def test_basic(self):
     tests = [
         ("text/plain", ("text/plain", {})),
         ("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up",
                                            {})),
         ("text/plain;charset=us-ascii", ("text/plain", {
             "charset": "us-ascii"
         })),
         (
             'text/plain ; charset="us-ascii"',
             ("text/plain", {
                 "charset": "us-ascii"
             }),
         ),
         (
             'text/plain ; charset="us-ascii"; another=opt',
             ("text/plain", {
                 "charset": "us-ascii",
                 "another": "opt"
             }),
         ),
         (
             'attachment; filename="silly.txt"',
             ("attachment", {
                 "filename": "silly.txt"
             }),
         ),
         (
             'attachment; filename="strange;name"',
             ("attachment", {
                 "filename": "strange;name"
             }),
         ),
         (
             'attachment; filename="strange;name";size=123;',
             ("attachment", {
                 "filename": "strange;name",
                 "size": "123"
             }),
         ),
         (
             'form-data; name="files"; filename="fo\\"o;bar"',
             ("form-data", {
                 "name": "files",
                 "filename": 'fo"o;bar'
             }),
         ),
     ]
     for header, expected in tests:
         with self.subTest(header=header):
             self.assertEqual(parse_header_parameters(header), expected)
예제 #4
0
def parse_boundary_stream(stream, max_header_size):
    """
    Parse one and exactly one stream that encapsulates a boundary.
    """
    # Stream at beginning of header, look for end of header
    # and parse it if found. The header must fit within one
    # chunk.
    chunk = stream.read(max_header_size)

    # 'find' returns the top of these four bytes, so we'll
    # need to munch them later to prevent them from polluting
    # the payload.
    header_end = chunk.find(b"\r\n\r\n")

    if header_end == -1:
        # we find no header, so we just mark this fact and pass on
        # the stream verbatim
        stream.unget(chunk)
        return (RAW, {}, stream)

    header = chunk[:header_end]

    # here we place any excess chunk back onto the stream, as
    # well as throwing away the CRLFCRLF bytes from above.
    stream.unget(chunk[header_end + 4:])

    TYPE = RAW
    outdict = {}

    # Eliminate blank lines
    for line in header.split(b"\r\n"):
        # This terminology ("main value" and "dictionary of
        # parameters") is from the Python docs.
        try:
            main_value_pair, params = parse_header_parameters(line.decode())
            name, value = main_value_pair.split(":", 1)
            params = {k: v.encode() for k, v in params.items()}
        except ValueError:  # Invalid header.
            continue

        if name == "content-disposition":
            TYPE = FIELD
            if params.get("filename"):
                TYPE = FILE

        outdict[name] = value, params

    if TYPE == RAW:
        stream.unget(chunk)

    return (TYPE, outdict, stream)
예제 #5
0
 def test_rfc2231_wrong_title(self):
     """
     Test wrongly formatted RFC 2231 headers (missing double single quotes).
     Parsing should not crash (#24209).
     """
     test_data = (
         (
             "Content-Type: application/x-stuff; "
             "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
             "'This%20is%20%2A%2A%2Afun%2A%2A%2A",
         ),
         ("Content-Type: application/x-stuff; title*='foo.html",
          "'foo.html"),
         ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
     )
     for raw_line, expected_title in test_data:
         parsed = parse_header_parameters(raw_line)
         self.assertEqual(parsed[1]["title"], expected_title)
예제 #6
0
 def test_rfc2231_parsing(self):
     test_data = (
         (
             "Content-Type: application/x-stuff; "
             "title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
             "This is ***fun***",
         ),
         (
             "Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
             "foo-ä.html",
         ),
         (
             "Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
             "foo-ä.html",
         ),
     )
     for raw_line, expected_title in test_data:
         parsed = parse_header_parameters(raw_line)
         self.assertEqual(parsed[1]["title"], expected_title)
예제 #7
0
 def __init__(self, media_type_raw_line):
     full_type, self.params = parse_header_parameters(
         media_type_raw_line if media_type_raw_line else "")
     self.main_type, _, self.sub_type = full_type.partition("/")
예제 #8
0
    def download(self, url):
        """
        Download the given URL and return the file name.
        """

        def cleanup_url(url):
            tmp = url.rstrip("/")
            filename = tmp.split("/")[-1]
            if url.endswith("/"):
                display_url = tmp + "/"
            else:
                display_url = url
            return filename, display_url

        prefix = "django_%s_template_" % self.app_or_project
        tempdir = tempfile.mkdtemp(prefix=prefix, suffix="_download")
        self.paths_to_remove.append(tempdir)
        filename, display_url = cleanup_url(url)

        if self.verbosity >= 2:
            self.stdout.write("Downloading %s" % display_url)

        the_path = os.path.join(tempdir, filename)
        opener = build_opener()
        opener.addheaders = [("User-Agent", f"Django/{django.__version__}")]
        try:
            with opener.open(url) as source, open(the_path, "wb") as target:
                headers = source.info()
                target.write(source.read())
        except OSError as e:
            raise CommandError(
                "couldn't download URL %s to %s: %s" % (url, filename, e)
            )

        used_name = the_path.split("/")[-1]

        # Trying to get better name from response headers
        content_disposition = headers["content-disposition"]
        if content_disposition:
            _, params = parse_header_parameters(content_disposition)
            guessed_filename = params.get("filename") or used_name
        else:
            guessed_filename = used_name

        # Falling back to content type guessing
        ext = self.splitext(guessed_filename)[1]
        content_type = headers["content-type"]
        if not ext and content_type:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                guessed_filename += ext

        # Move the temporary file to a filename that has better
        # chances of being recognized by the archive utils
        if used_name != guessed_filename:
            guessed_path = os.path.join(tempdir, guessed_filename)
            shutil.move(the_path, guessed_path)
            return guessed_path

        # Giving up
        return the_path