def parse_form_data(environ, stream_factory = None, charset = 'utf-8', errors = 'ignore', max_form_memory_size = None, max_content_length = None, cls = None, silent = True): content_type, extra = parse_options_header(environ.get('CONTENT_TYPE', '')) try: content_length = int(environ['CONTENT_LENGTH']) except (KeyError, ValueError): content_length = 0 if cls is None: cls = MultiDict if max_content_length is not None and content_length > max_content_length: raise RequestEntityTooLarge() stream = _empty_stream files = () if content_type == 'multipart/form-data': try: form, files = parse_multipart(environ['wsgi.input'], extra.get('boundary'), content_length, stream_factory, charset, errors, max_form_memory_size=max_form_memory_size) except ValueError as e: if not silent: raise form = cls() else: form = cls(form) elif content_type == 'application/x-www-form-urlencoded' or content_type == 'application/x-url-encoded': if max_form_memory_size is not None and content_length > max_form_memory_size: raise RequestEntityTooLarge() form = url_decode(environ['wsgi.input'].read(content_length), charset, errors=errors, cls=cls) else: form = cls() stream = LimitedStream(environ['wsgi.input'], content_length) return (stream, form, cls(files))
def wrapped_route(*args, **kwargs): if (request.content_length is not None and request.content_length > request.max_content_length): # type: ignore raise RequestEntityTooLarge( f"Content-Length exceeded {request.max_content_length} bytes") return route(*args, **kwargs)
def classify() -> Response: """Classifier routing.""" stream: IO[bytes] if request.headers.get('Content-type') is not None: # Parse stream length length = int(request.headers.get('Content-length', 0)) if length == 0: raise BadRequest('Body empty or content-length not set') # Check that stream is within app size limits max_length = int(current_app.config['MAX_PAYLOAD_SIZE_BYTES']) if length > max_length: raise RequestEntityTooLarge(f'Body exceeds size of {max_length}') # Cast to BytesIO stream = io.BytesIO(request.data) else: # DANGER! request.stream will ONLY be available if (a) the content-type # header is not passed and (b) we have not accessed the body via any # other means, e.g. ``.data``, ``.json``, etc. stream = request.stream # Classify the stream and cast data to JSON results = classify_stream(stream) response = serialize.as_json(results) return response
def post(self): if request.headers.get('Tus-Resumable') is None: raise BadRequest('Received file upload for unsupported file transfer protocol') file_size = request.headers.get('Upload-Length') max_file_size = current_app.config["MAX_CONTENT_LENGTH"] if not file_size: raise BadRequest('Received file upload of unspecified size') file_size = int(file_size) if file_size > max_file_size: raise RequestEntityTooLarge( f'The maximum file upload size is {max_file_size/1024/1024}MB.') data = self.parser.parse_args() filename = data.get('filename') or request.headers.get('Filename') if not filename: raise BadRequest('File name cannot be empty') if filename.endswith(FORBIDDEN_FILETYPES): raise BadRequest('File type is forbidden') document_guid = str(uuid.uuid4()) base_folder = current_app.config['UPLOADED_DOCUMENT_DEST'] folder = data.get('folder') or request.headers.get('Folder') folder = os.path.join(base_folder, folder) file_path = os.path.join(folder, document_guid) pretty_folder = data.get('pretty_folder') or request.headers.get('Pretty-Folder') pretty_path = os.path.join(base_folder, pretty_folder, filename) try: if not os.path.exists(folder): os.makedirs(folder) with open(file_path, "wb") as f: f.seek(file_size - 1) f.write(b"\0") except IOError as e: raise InternalServerError('Unable to create file') cache.set(FILE_UPLOAD_SIZE(document_guid), file_size, TIMEOUT_24_HOURS) cache.set(FILE_UPLOAD_OFFSET(document_guid), 0, TIMEOUT_24_HOURS) cache.set(FILE_UPLOAD_PATH(document_guid), file_path, TIMEOUT_24_HOURS) document_info = Document( document_guid=document_guid, full_storage_path=file_path, upload_started_date=datetime.utcnow(), file_display_name=filename, path_display_name=pretty_path, ) document_info.save() response = make_response(jsonify(document_manager_guid=document_guid), 201) response.headers['Tus-Resumable'] = TUS_API_VERSION response.headers['Tus-Version'] = TUS_API_SUPPORTED_VERSIONS response.headers[ 'Location'] = f'{current_app.config["DOCUMENT_MANAGER_URL"]}/documents/{document_guid}' response.headers['Upload-Offset'] = 0 response.headers[ 'Access-Control-Expose-Headers'] = "Tus-Resumable,Tus-Version,Location,Upload-Offset" response.autocorrect_location_header = False return response
def parse(self, stream, mimetype, content_length, options=None): """Parses the information from the given stream, mimetype, content length and mimetype parameters. :param stream: an input stream :param mimetype: the mimetype of the data :param content_length: the content length of the incoming data :param options: optional mimetype parameters (used for the multipart boundary for instance) :return: A tuple in the form ``(stream, form, files)``. """ if self.max_content_length is not None and \ content_length > self.max_content_length: raise RequestEntityTooLarge() if options is None: options = {} input_stream = LimitedStream(stream, content_length) parse_func = self.get_parse_func(mimetype, options) if parse_func is not None: try: return parse_func(self, input_stream, mimetype, content_length, options) except ValueError: if not self.silent: raise return input_stream, self.cls(), self.cls()
def _parse_urlencoded(self, stream, mimetype, content_length, options): if self.max_form_memory_size is not None and \ content_length > self.max_form_memory_size: raise RequestEntityTooLarge() form = url_decode_stream(stream, self.charset, errors=self.errors, cls=self.cls) return _empty_stream, form, self.cls()
def __call__(self, req): try: # Limit request data in all cases. if req.max_content_length is not None and \ req.content_length > req.max_content_length: raise RequestEntityTooLarge() if req.path in ('/', '/RPC2', '/server'): if req.method == 'POST': # XML-RPC if req.mimetype != 'text/xml': return BadRequest('XML-RPC requests must be text/xml') result = self.xmlrpc_dispatcher._marshaled_dispatch( req.data) return Response(response=result, content_type='text/xml') elif req.method in ('GET', 'HEAD'): # XML-RPC docs return Response(response=self.xmlrpc_dispatcher. generate_html_documentation(), content_type='text/html') else: return MethodNotAllowed() else: (obj, attr), args = self.url_map.bind_to_environ( req.environ).match() if obj is self.proxy: # pseudo-XML-RPC result = getattr(obj, attr)(**args) return Response(response=repr(result), content_type='text/plain') else: return getattr(obj, attr)(req, **args) except HTTPException, e: return e
def upload(): if request.method == 'POST': try: f = request.files['file'] if f.filename == '': flash.warning('No file selected.') return redirect(request.url) except RequestEntityTooLarge: raise RequestEntityTooLarge('Maximum filesize upload limit exceeded. File must be <=' + sizeof_readable(current_app.config['MAX_CONTENT_LENGTH'])) except: raise InternalServerError("Something went wrong. Could not upload the file") # Check upload folder if 'UPLOAD_FOLDER' not in current_app.config: raise InternalServerError("Could not upload the file. Upload folder not specified") upload_path = path.join(path.abspath(current_app.config['UPLOAD_FOLDER']), current_user.musicbrainz_id) if not path.isdir(upload_path): makedirs(upload_path) # Write to a file filename = path.join(upload_path, secure_filename(f.filename)) f.save(filename) if not zipfile.is_zipfile(filename): raise BadRequest('Not a valid zip file.') success = failure = 0 regex = re.compile('json/scrobbles/scrobbles-*') try: zf = zipfile.ZipFile(filename, 'r') files = zf.namelist() # Iterate over file that match the regex for f in [f for f in files if regex.match(f)]: try: # Load listens file jsonlist = ujson.loads(zf.read(f)) if not isinstance(jsonlist, list): raise ValueError except ValueError: failure += 1 continue payload = convert_backup_to_native_format(jsonlist) for listen in payload: validate_listen(listen, LISTEN_TYPE_IMPORT) insert_payload(payload, current_user) success += 1 except Exception: raise BadRequest('Not a valid lastfm-backup-file.') finally: os.remove(filename) # reset listen count for user db_connection = webserver.influx_connection._influx db_connection.reset_listen_count(current_user.musicbrainz_id) flash.info('Congratulations! Your listens from %d files have been uploaded successfully.' % success) return redirect(url_for("profile.import_data"))
def append(self, data: bytes) -> None: if data == b"" or self._must_raise is not None: return self._data.extend(data) self._has_data.set() if self._max_content_length is not None and len(self._data) > self._max_content_length: self._must_raise = RequestEntityTooLarge() self.set_complete()
def uploadPicture(fieType, uuid): """上传图片(单张或多张),方式利用表单form的file方式""" saveNameDict = {} tempPath = Config.FULL_UPLOAD_FOLDER_TEMP + fieType + "/" + uuid + "/" path = Config.FULL_UPLOAD_FOLDER + fieType + "/" + uuid + "/" try: # 取得表单所有file字段 fileList = request.files.lists() for uploadFile in fileList: key = uploadFile[0] # 文件name可以自定义 file = request.files.getlist(key)[0] if file and allowedFileSuffix(file.filename): # 以流的方式读取 blob = file.read() size = len(blob) if size > Config.MAX_CONTENT_LENGTH_VERIFY: raise RequestEntityTooLarge() # 该文件保存的临时目录不存在则创建该目录 if not os.path.exists(tempPath): os.makedirs(tempPath) filename = secure_filename(file.filename) suffix = filename.rsplit('.', 1)[1] #文件名生产规则 filename = str(generateFileName()) + "." + suffix fullPath = os.path.join(tempPath, filename) # 保存文件 with open(fullPath, 'a' ) as fileHandle: fileHandle.write(blob) file.close() saveNameDict[key] = filename else: raise FileTypeException() except FileTypeException as e: Loger.error(e, __file__) raise e except FileTypeException as e: Loger.error(e, __file__) removeAllUploadFile(fieType, uuid) raise e except Exception as e: Loger.error(e, __file__) removeAllUploadFile(fieType, uuid) raise e else: # 真实目录不存在则创建 if not os.path.exists(path): os.makedirs(path) # 从临时目录移动到真实目录 for filename in saveNameDict.values(): fullPath = os.path.join(path, filename) tempFullPath = os.path.join(tempPath, filename) shutil.move(tempFullPath, fullPath) return saveNameDict
def patch(self, document_guid=None): if document_guid is None: raise BadRequest('Must specify document GUID in PATCH') file_path = cache.get(FILE_UPLOAD_PATH(document_guid)) if file_path is None or not os.path.lexists(file_path): raise NotFound('PATCH sent for a upload that does not exist') request_offset = int(request.headers.get('Upload-Offset', 0)) file_offset = cache.get(FILE_UPLOAD_OFFSET(document_guid)) if request_offset != file_offset: raise Conflict( "Offset in request does not match uploaded file's offset") chunk_size = request.headers.get('Content-Length') if chunk_size is None: raise BadRequest('No Content-Length header in request') chunk_size = int(chunk_size) new_offset = file_offset + chunk_size file_size = cache.get(FILE_UPLOAD_SIZE(document_guid)) if new_offset > file_size: raise RequestEntityTooLarge( 'The uploaded chunk would put the file above its declared file size.' ) try: with open(file_path, "r+b") as f: f.seek(file_offset) f.write(request.data) except IOError as e: raise InternalServerError('Unable to write to file') if new_offset == file_size: # File transfer complete. doc = DocumentManager.find_by_document_manager_guid(document_guid) doc.upload_completed_date = datetime.utcnow() doc.save() cache.delete(FILE_UPLOAD_SIZE(document_guid)) cache.delete(FILE_UPLOAD_OFFSET(document_guid)) cache.delete(FILE_UPLOAD_PATH(document_guid)) else: # File upload still in progress cache.set(FILE_UPLOAD_OFFSET(document_guid), new_offset, TIMEOUT_24_HOURS) response = make_response('', 204) response.headers['Tus-Resumable'] = TUS_API_VERSION response.headers['Tus-Version'] = TUS_API_SUPPORTED_VERSIONS response.headers['Upload-Offset'] = new_offset response.headers[ 'Access-Control-Expose-Headers'] = "Tus-Resumable,Tus-Version,Upload-Offset" return response
def _parse_json(parser: FormDataParser, stream, mimetype, content_length, options): if parser.max_content_length is not None and \ content_length is not None and \ content_length > parser.max_content_length: raise RequestEntityTooLarge() # json loads the stream and return it # todo: handle encoding data = stream.read().decode() return stream, json.loads(data), {}
def filter_size(cls, i): """ Filter size. Check for advertised size. """ try: i = int(i) except (ValueError, TypeError): raise BadRequest(description='Size is invalid') if i > current_app.config['MAX_ALLOWED_FILE_SIZE']: raise RequestEntityTooLarge() return i
def validate_file_size(file_size): if not file_size: raise BadRequest('Received file upload of unspecified size') size = int(file_size) if size <= 0: raise BadRequest('File size must be a positve number') max_file_size = current_app.config['MAX_CONTENT_LENGTH'] if size > max_file_size: raise RequestEntityTooLarge(f'The maximum file upload size is {max_file_size/1024/1024}MB.') return size
def __init__(self, expected_content_length: Optional[int], max_content_length: Optional[int]) -> None: self._data = bytearray() self._complete: asyncio.Event = asyncio.Event() self._has_data: asyncio.Event = asyncio.Event() self._max_content_length = max_content_length # Exceptions must be raised within application (not ASGI) # calls, this is achieved by having the ASGI methods set this # to an exception on error. self._must_raise: Optional[Exception] = None if (expected_content_length is not None and max_content_length is not None and expected_content_length > max_content_length): self._must_raise = RequestEntityTooLarge()
def add_file(upload_id): """Mock implementation of file upload route.""" upload_status = _get_upload(upload_id) if 'file' not in request.files: raise BadRequest('{"error": "No file"}') content = request.files['file'].read() if len(content) > 80000: # Arbitrary limit. raise RequestEntityTooLarge('{"error": "Nope!"}') if 'Authorization' not in request.headers: raise Unauthorized('{"error": "No chance"}') if request.headers['Authorization'] != '!': # Arbitrary value. raise Forbidden('{"error": "No sir!"}') # Not sure what the response will look like yet. payload = json.loads(content) upload_status = _add_file(upload_id, payload) return jsonify(upload_status), status.CREATED
def upload_package(): """Mock implementation of upload route.""" if 'file' not in request.files: raise BadRequest('No file') content = request.files['file'].read() if len(content) > 80000: # Arbitrary limit. raise RequestEntityTooLarge('Nope!') if 'Authorization' not in request.headers: raise Unauthorized('Nope!') if request.headers['Authorization'] != '!': # Arbitrary value. raise Forbidden('No sir!') payload = json.loads(content) # This is specific to the mock. # Not sure what the response will look like yet. upload_id = max(UPLOADS.keys()) + 1 upload_status = _set_upload(upload_id, payload) return jsonify(upload_status), status.CREATED
def get_data( self, cache: bool = True, as_text: bool = True, parse_form_data: bool = False, ) -> Union[str, bytes]: """Overwritten method that retrieves request data. Difference is that by default it fetches data as text. For complete description of call arguments see Werkzeug documentation for :meth:`~werkzeug.wrappers.BaseRequest.get_data`. This method raises :exc:`~werkzeug.exceptions.RequestEntityTooLarge` if content length exceeds allowed size (default is 4 megabytes). """ if self.content_length > self.MAX_CONTENT_LENGTH: raise RequestEntityTooLarge( f'Request size exceeds allowed {self.MAX_CONTENT_LENGTH} bytes' ) return super().get_data(cache, as_text, parse_form_data)
def patch(self, document_guid): # Get and validate the file path (not required if object store is enabled) file_path = cache.get(FILE_UPLOAD_PATH(document_guid)) if not Config.OBJECT_STORE_ENABLED and ( file_path is None or not os.path.lexists(file_path)): raise NotFound('File does not exist') # Get and validate the upload offset request_offset = int(request.headers.get('Upload-Offset', 0)) file_offset = cache.get(FILE_UPLOAD_OFFSET(document_guid)) if request_offset != file_offset: raise Conflict( 'Upload offset in request does not match the file\'s upload offset' ) # Get and validate the content length and the expected new upload offset chunk_size = request.headers.get('Content-Length') if chunk_size is None: raise BadRequest('No Content-Length header in request') chunk_size = int(chunk_size) new_offset = file_offset + chunk_size file_size = cache.get(FILE_UPLOAD_SIZE(document_guid)) if new_offset > file_size: raise RequestEntityTooLarge( 'The uploaded chunk would put the file above its declared file size' ) # If the object store is enabled, send the patch request through to TUSD to the object store if Config.OBJECT_STORE_ENABLED: object_store_upload_resource = cache.get( OBJECT_STORE_UPLOAD_RESOURCE(document_guid)) url = f'{Config.TUSD_URL}{object_store_upload_resource}' headers = { key: value for (key, value) in request.headers if key != 'Host' } resp = requests.patch(url=url, headers=headers, data=request.data) if resp.status_code not in [ requests.codes.ok, requests.codes.no_content ]: message = f'Cannot upload file. Object store responded with {resp.status_code} ({resp.reason}): {resp._content}' current_app.logger.error( f'PATCH resp.request:\n{resp.request.__dict__}') current_app.logger.error(f'PATCH resp:\n{resp.__dict__}') current_app.logger.error(message) raise BadGateway(message) # Else, write the content to the file in the file system else: try: with open(file_path, 'r+b') as f: f.seek(file_offset) f.write(request.data) except IOError as e: current_app.logger.error(e) raise InternalServerError('Unable to write to file') # If the file upload is complete, set the upload completion date and delete cached data if new_offset == file_size: document = Document.find_by_document_guid(document_guid) document.upload_completed_date = datetime.utcnow() document.save() cache.delete(FILE_UPLOAD_SIZE(document_guid)) cache.delete(FILE_UPLOAD_OFFSET(document_guid)) cache.delete(FILE_UPLOAD_PATH(document_guid)) cache.delete(OBJECT_STORE_PATH(document_guid)) cache.delete(OBJECT_STORE_UPLOAD_RESOURCE(document_guid)) # Else, the file upload is still in progress, update its upload offset in the cache else: cache.set(FILE_UPLOAD_OFFSET(document_guid), new_offset, TIMEOUT_24_HOURS) response = make_response('', 204) response.headers['Tus-Resumable'] = TUS_API_VERSION response.headers['Tus-Version'] = TUS_API_SUPPORTED_VERSIONS response.headers['Upload-Offset'] = new_offset response.headers[ 'Access-Control-Expose-Headers'] = 'Tus-Resumable,Tus-Version,Upload-Offset' return response
def wrapped_route(*args, **kwargs): if request.content_length > request.max_content_length: raise RequestEntityTooLarge(f"Content-Length exceeded {request.max_content_length} bytes") return route(*args, **kwargs)
def in_memory_threshold_reached(self, bytes): raise RequestEntityTooLarge()
content_length, stream_factory, charset, errors, max_form_memory_size=max_form_memory_size) except ValueError, e: if not silent: raise form = cls() else: form = cls(form) elif content_type == 'application/x-www-form-urlencoded' or \ content_type == 'application/x-url-encoded': if max_form_memory_size is not None and \ content_length > max_form_memory_size: raise RequestEntityTooLarge() form = url_decode(environ['wsgi.input'].read(content_length), charset, errors=errors, cls=cls) else: form = cls() stream = LimitedStream(environ['wsgi.input'], content_length) return stream, form, cls(files) def _fix_ie_filename(filename): """Internet Explorer 6 transmits the full file name if a file is uploaded. This function strips the full path if it thinks the filename is Windows-like absolute.
def on_exhausted(self): # TODO: detect whether size is outer of limit raise RequestEntityTooLarge()
def parse_multipart(file, boundary, content_length, stream_factory=None, charset='utf-8', errors='ignore', buffer_size=10 * 1024, max_form_memory_size=None): """Parse a multipart/form-data stream. This is invoked by :func:`utils.parse_form_data` if the content type matches. Currently it exists for internal usage only, but could be exposed as separate function if it turns out to be useful and if we consider the API stable. """ # XXX: this function does not support multipart/mixed. I don't know of # any browser that supports this, but it should be implemented # nonetheless. # make sure the buffer size is divisible by four so that we can base64 # decode chunk by chunk assert buffer_size % 4 == 0, 'buffer size has to be divisible by 4' # also the buffer size has to be at least 1024 bytes long or long headers # will freak out the system assert buffer_size >= 1024, 'buffer size has to be at least 1KB' if stream_factory is None: stream_factory = default_stream_factory if not boundary: raise ValueError('Missing boundary') if not is_valid_multipart_boundary(boundary): raise ValueError('Invalid boundary: %s' % boundary) if len(boundary) > buffer_size: raise ValueError('Boundary longer than buffer size') total_content_length = content_length next_part = '--' + boundary last_part = next_part + '--' form = [] files = [] in_memory = 0 # convert the file into a limited stream with iteration capabilities file = LimitedStream(file, content_length) iterator = chain(make_line_iter(file, buffer_size=buffer_size), _empty_string_iter) try: terminator = _find_terminator(iterator) if terminator != next_part: raise ValueError('Expected boundary at start of multipart data') while terminator != last_part: headers = parse_multipart_headers(iterator) disposition = headers.get('content-disposition') if disposition is None: raise ValueError('Missing Content-Disposition header') disposition, extra = parse_options_header(disposition) name = extra.get('name') transfer_encoding = headers.get('content-transfer-encoding') try_decode = transfer_encoding is not None and \ transfer_encoding in _supported_multipart_encodings filename = extra.get('filename') # if no content type is given we stream into memory. A list is # used as a temporary container. if filename is None: is_file = False container = [] _write = container.append guard_memory = max_form_memory_size is not None # otherwise we parse the rest of the headers and ask the stream # factory for something we can write in. else: content_type = headers.get('content-type') content_type = parse_options_header(content_type)[0] \ or 'text/plain' is_file = True guard_memory = False if filename is not None: filename = _fix_ie_filename( _decode_unicode(filename, charset, errors)) try: content_length = int(headers['content-length']) except (KeyError, ValueError): content_length = 0 container = stream_factory(total_content_length, content_type, filename, content_length) _write = container.write buf = '' for line in iterator: if not line: raise ValueError('unexpected end of stream') if line[:2] == '--': terminator = line.rstrip() if terminator in (next_part, last_part): break if try_decode: try: line = line.decode(transfer_encoding) except: raise ValueError('could not decode transfer ' 'encoded chunk') # we have something in the buffer from the last iteration. # this is usually a newline delimiter. if buf: _write(buf) buf = '' # If the line ends with windows CRLF we write everything except # the last two bytes. In all other cases however we write # everything except the last byte. If it was a newline, that's # fine, otherwise it does not matter because we will write it # the next iteration. this ensures we do not write the # final newline into the stream. That way we do not have to # truncate the stream. if line[-2:] == '\r\n': buf = '\r\n' cutoff = -2 else: buf = line[-1] cutoff = -1 _write(line[:cutoff]) # if we write into memory and there is a memory size limit we # count the number of bytes in memory and raise an exception if # there is too much data in memory. if guard_memory: in_memory += len(line) if in_memory > max_form_memory_size: from werkzeug.exceptions import RequestEntityTooLarge raise RequestEntityTooLarge() else: raise ValueError('unexpected end of part') if is_file: container.seek(0) files.append( (name, FileStorage(container, filename, name, content_type, content_length, headers))) else: form.append( (name, _decode_unicode(''.join(container), charset, errors))) finally: # make sure the whole input stream is read file.exhaust() return form, files
def parse_form_data(environ, stream_factory=None, charset='utf-8', errors='ignore', max_form_memory_size=None, max_content_length=None, cls=None, silent=True): """Parse the form data in the environ and return it as tuple in the form ``(stream, form, files)``. You should only call this method if the transport method is `POST` or `PUT`. If the mimetype of the data transmitted is `multipart/form-data` the files multidict will be filled with `FileStorage` objects. If the mimetype is unknown the input stream is wrapped and returned as first argument, else the stream is empty. This function does not raise exceptions, even if the input data is malformed. Have a look at :ref:`dealing-with-request-data` for more details. .. versionadded:: 0.5 The `max_form_memory_size`, `max_content_length` and `cls` parameters were added. .. versionadded:: 0.5.1 The optional `silent` flag was added. :param environ: the WSGI environment to be used for parsing. :param stream_factory: An optional callable that returns a new read and writeable file descriptor. This callable works the same as :meth:`~BaseResponse._get_file_stream`. :param charset: The character set for URL and url encoded form data. :param errors: The encoding error behavior. :param max_form_memory_size: the maximum number of bytes to be accepted for in-memory stored form data. If the data exceeds the value specified an :exc:`~exceptions.RequestURITooLarge` exception is raised. :param max_content_length: If this is provided and the transmitted data is longer than this value an :exc:`~exceptions.RequestEntityTooLarge` exception is raised. :param cls: an optional dict class to use. If this is not specified or `None` the default :class:`MultiDict` is used. :param silent: If set to False parsing errors will not be caught. :return: A tuple in the form ``(stream, form, files)``. """ content_type, extra = parse_options_header(environ.get('CONTENT_TYPE', '')) try: content_length = int(environ['CONTENT_LENGTH']) except (KeyError, ValueError): content_length = 0 if cls is None: cls = MultiDict if max_content_length is not None and content_length > max_content_length: raise RequestEntityTooLarge() stream = _empty_stream files = () if content_type == 'multipart/form-data': try: form, files = parse_multipart( environ['wsgi.input'], extra.get('boundary'), content_length, stream_factory, charset, errors, max_form_memory_size=max_form_memory_size) except ValueError as e: if not silent: raise form = cls() else: form = cls(form) elif content_type == 'application/x-www-form-urlencoded' or content_type == 'application/x-url-encoded': if max_form_memory_size is not None and content_length > max_form_memory_size: raise RequestEntityTooLarge() form = url_decode(environ['wsgi.input'].read(content_length), charset, errors=errors, cls=cls) else: form = cls() stream = LimitedStream(environ['wsgi.input'], content_length) return (stream, form, cls(files))
def parse_multipart(file, boundary, content_length, stream_factory=None, charset='utf-8', errors='ignore', buffer_size=10 * 1024, max_form_memory_size=None): """Parse a multipart/form-data stream. This is invoked by :func:`utils.parse_form_data` if the content type matches. Currently it exists for internal usage only, but could be exposed as separate function if it turns out to be useful and if we consider the API stable. """ # XXX: this function does not support multipart/mixed. I don't know of # any browser that supports this, but it should be implemented # nonetheless. # make sure the buffer size is divisible by four so that we can base64 # decode chunk by chunk assert buffer_size % 4 == 0, 'buffer size has to be divisible by 4' # also the buffer size has to be at least 1024 bytes long or long headers # will freak out the system assert buffer_size >= 1024, 'buffer size has to be at least 1KB' if stream_factory is None: stream_factory = default_stream_factory else: stream_factory = _make_stream_factory(stream_factory) if not boundary: raise ValueError('Missing boundary') if not is_valid_multipart_boundary(boundary): raise ValueError('Invalid boundary: %s' % boundary) if len(boundary) > buffer_size: raise ValueError('Boundary longer than buffer size') total_content_length = content_length next_part = '--' + boundary last_part = next_part + '--' form = [] files = [] in_memory = 0 # convert the file into a limited stream with iteration capabilities file = LimitedStream(file, content_length) iterator = chain(make_line_iter(file, buffer_size=buffer_size), repeat('')) def _find_terminator(): """The terminator might have some additional newlines before it. There is at least one application that sends additional newlines before headers (the python setuptools package). """ for line in iterator: if not line: break line = line.strip() if line: return line return '' try: terminator = _find_terminator() if terminator != next_part: raise ValueError('Expected boundary at start of multipart data') while terminator != last_part: headers = parse_multipart_headers(iterator) disposition = headers.get('content-disposition') if disposition is None: raise ValueError('Missing Content-Disposition header') disposition, extra = parse_options_header(disposition) filename = extra.get('filename') name = extra.get('name') transfer_encoding = headers.get('content-transfer-encoding') content_type = headers.get('content-type') if content_type is None: is_file = False else: content_type = parse_options_header(content_type)[0] is_file = True if is_file: if filename is not None: filename = _fix_ie_filename(_decode_unicode(filename, charset, errors)) try: content_length = int(headers['content-length']) except (KeyError, ValueError): content_length = 0 stream = stream_factory(total_content_length, content_type, filename, content_length) else: stream = StringIO() buf = '' for line in iterator: if not line: raise ValueError('unexpected end of stream') if line[:2] == '--': terminator = line.rstrip() if terminator in (next_part, last_part): break if transfer_encoding in _supported_multipart_encodings: try: line = line.decode(transfer_encoding) except: raise ValueError('could not base 64 decode chunk') # we have something in the buffer from the last iteration. # write that value to the output stream now and clear the buffer. if buf: stream.write(buf) buf = '' # If the line ends with windows CRLF we write everything except # the last two bytes. In all other cases however we write everything # except the last byte. If it was a newline, that's fine, otherwise # it does not matter because we write it the last iteration. If the # loop aborts early because the end of a part was reached, the last # newline is not written which is exactly what we want. newline_length = line[-2:] == '\r\n' and 2 or 1 stream.write(line[:-newline_length]) buf = line[-newline_length:] if not is_file and max_form_memory_size is not None: in_memory += len(line) if in_memory > max_form_memory_size: from werkzeug.exceptions import RequestEntityTooLarge raise RequestEntityTooLarge() else: raise ValueError('unexpected end of part') # rewind the stream stream.seek(0) if is_file: files.append((name, FileStorage(stream, filename, name, content_type, content_length))) else: form.append((name, _decode_unicode(stream.read(), charset, errors))) finally: # make sure the whole input stream is read file.exhaust() return form, files
def post(self): if request.headers.get('Tus-Resumable') is None: raise BadRequest( 'Received file upload for unsupported file transfer protocol') # Validate the file size file_size = request.headers.get('Upload-Length') if not file_size: raise BadRequest('Received file upload of unspecified size') file_size = int(file_size) max_file_size = Config.MAX_CONTENT_LENGTH if file_size > max_file_size: raise RequestEntityTooLarge( f'The maximum file upload size is {max_file_size/1024/1024}MB.' ) # Validate the file name and file type data = self.parser.parse_args() filename = data.get('filename') or request.headers.get('Filename') if not filename: raise BadRequest('File name cannot be empty') if filename.endswith(FORBIDDEN_FILETYPES): raise BadRequest('File type is forbidden') # Create the path string for this file document_guid = str(uuid.uuid4()) base_folder = Config.UPLOADED_DOCUMENT_DEST folder = data.get('folder') or request.headers.get('Folder') folder = os.path.join(base_folder, folder) file_path = os.path.join(folder, document_guid) pretty_folder = data.get('pretty_folder') or request.headers.get( 'Pretty-Folder') pretty_path = os.path.join(base_folder, pretty_folder, filename) # If the object store is enabled, send the post request through to TUSD to the object store object_store_path = None if Config.OBJECT_STORE_ENABLED: resp = requests.post(url=Config.TUSD_URL, headers={ key: value for (key, value) in request.headers if key != 'Host' }, data=request.data) if resp.status_code != requests.codes.created: message = f'Cannot upload file. Object store responded with {resp.status_code} ({resp.reason}): {resp._content}' current_app.logger.error( f'POST resp.request:\n{resp.request.__dict__}') current_app.logger.error(f'POST resp:\n{resp.__dict__}') current_app.logger.error(message) raise BadGateway(message) object_store_upload_resource = urlparse( resp.headers['Location']).path.split('/')[-1] object_store_path = Config.S3_PREFIX + object_store_upload_resource.split( '+')[0] cache.set(OBJECT_STORE_UPLOAD_RESOURCE(document_guid), object_store_upload_resource, TIMEOUT_24_HOURS) cache.set(OBJECT_STORE_PATH(document_guid), object_store_path, TIMEOUT_24_HOURS) # Else, create an empty file at this path in the file system else: try: if not os.path.exists(folder): os.makedirs(folder) with open(file_path, 'wb') as f: f.seek(file_size - 1) f.write(b'\0') except IOError as e: current_app.logger.error(e) raise InternalServerError('Unable to create file') # Cache data to be used in future PATCH requests cache.set(FILE_UPLOAD_SIZE(document_guid), file_size, TIMEOUT_24_HOURS) cache.set(FILE_UPLOAD_OFFSET(document_guid), 0, TIMEOUT_24_HOURS) cache.set(FILE_UPLOAD_PATH(document_guid), file_path, TIMEOUT_24_HOURS) # Create document record document = Document(document_guid=document_guid, full_storage_path=file_path, upload_started_date=datetime.utcnow(), file_display_name=filename, path_display_name=pretty_path, object_store_path=object_store_path) document.save() # Create and send response response = make_response(jsonify(document_manager_guid=document_guid), 201) response.headers['Tus-Resumable'] = TUS_API_VERSION response.headers['Tus-Version'] = TUS_API_SUPPORTED_VERSIONS response.headers[ 'Location'] = f'{Config.DOCUMENT_MANAGER_URL}/documents/{document_guid}' response.headers['Upload-Offset'] = 0 response.headers[ 'Access-Control-Expose-Headers'] = 'Tus-Resumable,Tus-Version,Location,Upload-Offset,Content-Type' response.autocorrect_location_header = False return response
def parse_multipart(file, boundary, content_length, stream_factory=None, charset='utf-8', errors='ignore', buffer_size=10240, max_form_memory_size=None): if stream_factory is None: stream_factory = default_stream_factory if not boundary: raise ValueError('Missing boundary') if not is_valid_multipart_boundary(boundary): raise ValueError('Invalid boundary: %s' % boundary) if len(boundary) > buffer_size: raise ValueError('Boundary longer than buffer size') total_content_length = content_length next_part = '--' + boundary last_part = next_part + '--' form = [] files = [] in_memory = 0 file = LimitedStream(file, content_length) iterator = chain(make_line_iter(file, buffer_size=buffer_size), _empty_string_iter) try: terminator = _find_terminator(iterator) if terminator != next_part: raise ValueError('Expected boundary at start of multipart data') while terminator != last_part: headers = parse_multipart_headers(iterator) disposition = headers.get('content-disposition') if disposition is None: raise ValueError('Missing Content-Disposition header') disposition, extra = parse_options_header(disposition) name = extra.get('name') transfer_encoding = headers.get('content-transfer-encoding') try_decode = transfer_encoding is not None and transfer_encoding in _supported_multipart_encodings filename = extra.get('filename') if filename is None: is_file = False container = [] _write = container.append guard_memory = max_form_memory_size is not None else: content_type = headers.get('content-type') content_type = parse_options_header( content_type)[0] or 'text/plain' is_file = True guard_memory = False if filename is not None: filename = _fix_ie_filename( _decode_unicode(filename, charset, errors)) try: content_length = int(headers['content-length']) except (KeyError, ValueError): content_length = 0 container = stream_factory(total_content_length, content_type, filename, content_length) _write = container.write buf = '' for line in iterator: if not line: raise ValueError('unexpected end of stream') if line[:2] == '--': terminator = line.rstrip() if terminator in (next_part, last_part): break if try_decode: try: line = line.decode(transfer_encoding) except: raise ValueError( 'could not decode transfer encoded chunk') if buf: _write(buf) buf = '' if line[-2:] == '\r\n': buf = '\r\n' cutoff = -2 else: buf = line[-1] cutoff = -1 _write(line[:cutoff]) if guard_memory: in_memory += len(line) if in_memory > max_form_memory_size: from werkzeug.exceptions import RequestEntityTooLarge raise RequestEntityTooLarge() else: raise ValueError('unexpected end of part') if is_file: container.seek(0) files.append( (name, FileStorage(container, filename, name, content_type, content_length, headers))) else: form.append( (name, _decode_unicode(''.join(container), charset, errors))) finally: file.exhaust() return (form, files)