def test_mixed_content_varying_chunk_size(self): with open(data_file_path('file.txt'), 'rb') as file_: expected_value = file_.read() with open(data_file_path('file.txt'), 'rb') as file_: fields = { 'name': 'hello world', 'age': '10', 'cv.txt': ('file.txt', file_, 'text/plain') } encoder = MultipartEncoder(fields=fields) body = encoder.to_string() content_type = encoder.content_type for index in range(len(body)): name = ValueTarget() age = ValueTarget() cv = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': content_type}) parser.register('name', name) parser.register('age', age) parser.register('cv.txt', cv) parser.data_received(body[:index]) parser.data_received(body[index:]) self.assertEqual(name.value, b'hello world') self.assertEqual(age.value, b'10') self.assertEqual(cv.value, expected_value)
def test_multiple_files(self): txt_filename = 'file.txt' png_filename = 'image-600x400.png' with open(data_file_path(txt_filename), 'rb') as file_: expected_txt = file_.read() with open(data_file_path(png_filename), 'rb') as file_: expected_png = file_.read() txt_target = ValueTarget() png_target = ValueTarget() with open(data_file_path(txt_filename), 'rb') as txt_file, \ open(data_file_path(png_filename), 'rb') as png_file: encoder = MultipartEncoder( fields={ txt_filename: (txt_filename, txt_file, 'application/plain'), png_filename: (png_filename, png_file, 'image/png') }) parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.register(txt_filename, txt_target) parser.register(png_filename, png_target) parser.data_received(encoder.to_string()) self.assertEqual(txt_target.value, expected_txt) self.assertEqual(png_target.value, expected_png)
def test_directory_upload(tmp_path): data = b'''\ --1234 Content-Disposition: form-data; name="files"; filename="ab.txt" Foo --1234 Content-Disposition: form-data; name="files"; filename="cd.txt" Bar --1234--'''.replace(b'\n', b'\r\n') target = DirectoryTarget(tmp_path) parser = StreamingFormDataParser( headers={'Content-Type': 'multipart/form-data; boundary=1234'}) parser.register('files', target) parser.data_received(data) with open(tmp_path / 'ab.txt') as file: assert file.read() == 'Foo' with open(tmp_path / 'cd.txt') as file: assert file.read() == 'Bar' assert target.multipart_filenames == ['ab.txt', 'cd.txt'] assert tmp_path assert target._started assert target._finished
def test_smoke(): encoder = MultipartEncoder(fields={'name': 'hello'}) parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.data_received(encoder.to_string())
def test_file_content_single(): filenames = ( 'file.txt', 'image-600x400.png', 'image-2560x1600.png', 'empty.html', 'hyphen-hyphen.txt', 'LF.txt', 'CRLF.txt', '1M.dat', '1M-1.dat', '1M+1.dat', ) for filename in filenames: with open_dataset(filename) as dataset_: expected_value = dataset_.read() content_type, body = encoded_dataset(filename) target = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': content_type}) parser.register(filename, target) parser.data_received(body) assert target.value == expected_value
def test_multiple_files(): txt_filename = 'file.txt' png_filename = 'image-600x400.png' with open_dataset(txt_filename) as dataset_: expected_txt = dataset_.read() with open_dataset(png_filename) as dataset_: expected_png = dataset_.read() txt_target = ValueTarget() png_target = ValueTarget() with open_dataset(txt_filename) as txt_file, open_dataset( png_filename) as png_file: encoder = MultipartEncoder( fields={ txt_filename: (txt_filename, txt_file, 'application/plain'), png_filename: (png_filename, png_file, 'image/png'), }) parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.register(txt_filename, txt_target) parser.register(png_filename, png_target) parser.data_received(encoder.to_string()) assert txt_target.value == expected_txt assert png_target.value == expected_png
def test_break_chunk_at_boundary(): expected_first_value = 'hello' * 500 expected_second_value = 'hello' * 500 first = ValueTarget() second = ValueTarget() encoder = MultipartEncoder(fields={ 'first': 'hello' * 500, 'second': 'hello' * 500 }) body = encoder.to_string() boundary = encoder.boundary.encode('utf-8') parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.register('first', first) parser.register('second', second) index = body[50:].index(boundary) + 5 parser.data_received(body[:index]) parser.data_received(body[index:]) assert first.value == expected_first_value.encode('utf-8') assert second.value == expected_second_value.encode('utf-8')
def subTest(self, test_idx, test_name, chunksize, original_data, content_type, multipart_data, multipart_filename): print(test_idx, '; name: ', test_name, '; data_size: ', len(original_data), '; chunksize: ', chunksize) parser = StreamingFormDataParser( headers={'Content-Type': content_type}) target = ValueTarget() parser.register('file', target) remaining = len(multipart_data) offset = 0 while (remaining): step_size = min(remaining, chunksize) parser.data_received(multipart_data[offset:offset + step_size]) offset += step_size remaining -= step_size self.assertEqual(offset, len(multipart_data)) self.assertEqual(target.multipart_filename, multipart_filename) self.assertEqual(target._started, True) self.assertEqual(target._finished, True) result = target.value self.assertEqual(len(result), len(original_data)) self.assertEqual(result, original_data)
def test_special_filenames(): filenames = [ 'a;b.txt', 'a"b.txt', 'a";b.txt', 'a;"b.txt', 'a";";.txt', 'a\\"b.txt', 'a\\b.txt', ] for filename in filenames: data = ('''\ --1234 Content-Disposition: form-data; name=files; filename={} Foo --1234--'''.format(filename).replace('\n', '\r\n').encode('utf-8')) target = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': 'multipart/form-data; boundary=1234'}) parser.register('files', target) parser.data_received(data) assert target.value == b'Foo'
def test_mixed_content_varying_chunk_size(): with open_dataset('file.txt') as dataset_: expected_value = dataset_.read() with open_dataset('file.txt') as dataset_: fields = { 'name': 'hello world', 'age': '10', 'cv.txt': ('file.txt', dataset_, 'text/plain'), } encoder = MultipartEncoder(fields=fields) body = encoder.to_string() content_type = encoder.content_type for index in range(len(body)): name = ValueTarget() age = ValueTarget() cv = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': content_type}) parser.register('name', name) parser.register('age', age) parser.register('cv.txt', cv) parser.data_received(body[:index]) parser.data_received(body[index:]) assert name.value == b'hello world' assert age.value == b'10' assert cv.value == expected_value
class FileUploadHandler(AuthorizedRequestHandler): def initialize(self, max_upload_size): super(FileUploadHandler, self).initialize() self.file_manager = self.server.lookup_component('file_manager') self.max_upload_size = max_upload_size def prepare(self): super(FileUploadHandler, self).prepare() if self.request.method == "POST": self.request.connection.set_max_body_size(self.max_upload_size) tmpname = self.file_manager.gen_temp_upload_path() self._targets = { 'root': ValueTarget(), 'print': ValueTarget(), 'path': ValueTarget(), 'checksum': ValueTarget(), } self._file = FileTarget(tmpname) self._sha256_target = SHA256Target() self._parser = StreamingFormDataParser(self.request.headers) self._parser.register('file', self._file) self._parser.register('file', self._sha256_target) for name, target in self._targets.items(): self._parser.register(name, target) def data_received(self, chunk): if self.request.method == "POST": self._parser.data_received(chunk) async def post(self): form_args = {} chk_target = self._targets.pop('checksum') calc_chksum = self._sha256_target.value.lower() if chk_target.value: # Validate checksum recd_cksum = chk_target.value.decode().lower() if calc_chksum != recd_cksum: # remove temporary file try: os.remove(self._file.filename) except Exception: pass raise self.server.error( f"File checksum mismatch: expected {recd_cksum}, " f"calculated {calc_chksum}", 422) for name, target in self._targets.items(): if target.value: form_args[name] = target.value.decode() form_args['filename'] = self._file.multipart_filename form_args['tmp_file_path'] = self._file.filename debug_msg = "\nFile Upload Arguments:" for name, value in form_args.items(): debug_msg += f"\n{name}: {value}" debug_msg += f"\nChecksum: {calc_chksum}" logging.debug(debug_msg) try: result = await self.file_manager.finalize_upload(form_args) except ServerError as e: raise tornado.web.HTTPError(e.status_code, str(e)) self.finish(result)
def test_case_insensitive_content_disposition_header(): content_disposition_header = 'Content-Disposition' for header in ( content_disposition_header, content_disposition_header.lower(), content_disposition_header.upper(), ): data = b'''\ --1234 {header}: form-data; name="files"; filename="ab.txt" Foo --1234--'''.replace(b'\n', b'\r\n').replace(b'{header}', header.encode('utf-8')) target = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': 'multipart/form-data; boundary=1234'}) parser.register('files', target) parser.data_received(data) assert target.value == b'Foo'
def test_multiple_targets(): filename = 'image-600x400.png' with open_dataset(filename) as dataset_: expected_data = dataset_.read() value_target = ValueTarget() sha256_target = SHA256Target() with open_dataset(filename) as file_: encoder = MultipartEncoder( fields={filename: (filename, file_, 'image/png')}) parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.register(filename, value_target) parser.register(filename, sha256_target) assert not value_target.value assert sha256_target.value == hashlib.sha256(b'').hexdigest() parser.data_received(encoder.to_string()) assert value_target.value == expected_data assert sha256_target.value == hashlib.sha256(expected_data).hexdigest()
def send(): url = f'{host}/files/fileSearchContext.mask' headers = {'Accept-Encoding': "gzip", 'Transfer-Encoding': "gzip"} extension = os.path.splitext(file_path)[1] os.makedirs(folder_name, exist_ok=True) with open(file_path, 'rb') as f: if len(media_type) > 0: encoder = MultipartEncoder(fields={ 'context': ('context', context, 'application/json'), 'file': ('file', f, media_type) }) else: encoder = MultipartEncoder(fields={ 'context': ('context', context, 'application/json'), 'file': ('file', f) }) with session.post(url, data=encoder, stream=True, headers={'Content-Type': encoder.content_type}) as r: if r.status_code >= 300: raise Exception(f"Failed with status {r.status_code}:\n\n{r.json()}") parser = StreamingFormDataParser(headers=r.headers) parser.register('file', FileTarget(f'{folder_name}/masked{extension}')) parser.register('results', FileTarget(f'{folder_name}/results.json')) for chunk in r.iter_content(chunk_size): parser.data_received(chunk)
def test_register_after_data_received(): encoder = MultipartEncoder(fields={'name': 'hello'}) parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.data_received(encoder.to_string()) with pytest.raises(ParseFailedException): parser.register('name', ValueTarget())
def test_parameter_contains_crlf(): target = ValueTarget() encoder = MultipartEncoder(fields={'value': 'hello\r\nworld'}) parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.register('value', target) parser.data_received(encoder.to_string()) assert target.value == b'hello\r\nworld'
def test_parameter_starts_with_crlf(self): target = ValueTarget() encoder = MultipartEncoder(fields={'value': '\r\nworld'}) parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.register('value', target) parser.data_received(encoder.to_string()) self.assertEqual(target.value, b'\r\nworld')
def test_basic_single(self): target = ValueTarget() encoder = MultipartEncoder(fields={'value': 'hello world'}) parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.register('value', target) parser.data_received(encoder.to_string()) self.assertEqual(target.value, b'hello world')
def test_custom_target_exception(): target = CustomTarget() encoder = MultipartEncoder(fields={'value': 'hello world'}) parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.register('value', target) data = encoder.to_string() with pytest.raises(ValueError): parser.data_received(data)
def test_filename_passed_to_target(): filename = 'file.txt' content_type, body = encoded_dataset(filename) target = ValueTarget() assert not target.multipart_filename parser = StreamingFormDataParser(headers={'Content-Type': content_type}) parser.register(filename, target) parser.data_received(body) assert target.multipart_filename == filename
def test_basic_single(): target = ValueTarget() encoder = MultipartEncoder(fields={'value': 'hello world'}) parser = StreamingFormDataParser( headers={'Content-Type': encoder.content_type}) parser.register('value', target) parser.data_received(encoder.to_string()) assert target.value == b'hello world' assert target._started assert target._finished
def test_line_does_not_end_with_correct_linebreak(self): data = b'''\ --1234 Content-Disposition: form-data; name="files"; filename="ab.txt" Foo--1234--'''.replace(b'\n', b'\r\n') target = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': 'multipart/form-data; boundary=1234'}) parser.register('files', target) parser.data_received(data) self.assertEqual(target.value, b'Foo')
def test_missing_headers(): data = '''\ --1234 Foo --1234--'''.replace('\n', '\r\n').encode('utf-8') target = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': 'multipart/form-data; boundary=1234'}) parser.register('files', target) parser.data_received(data) assert target.value == b''
def test_case_insensitive_content_type(self): content_type_header = 'Content-Type' for header_key in (content_type_header, content_type_header.lower(), content_type_header.upper(), 'cOnTeNt-tYPe'): target = ValueTarget() encoder = MultipartEncoder(fields={'value': 'hello world'}) parser = StreamingFormDataParser( headers={header_key: encoder.content_type}) parser.register('value', target) parser.data_received(encoder.to_string()) self.assertEqual(target.value, b'hello world')
def test_target_raises_exception(): filename = 'file.txt' content_type, body = encoded_dataset(filename) class BadTarget(BaseTarget): def data_received(self, data): raise ValueError() target = BadTarget() parser = StreamingFormDataParser(headers={'Content-Type': content_type}) parser.register(filename, target) with pytest.raises(ValueError): parser.data_received(body)
def cgi_upload() -> MethodResponse: LOGGER.info('Upload file begin') # wsgiref does not support async reading from environ['wsgi.input'] # It blocks forever in read(size) call. use_async_implementation = config.WEB_SERVER != 'wsgiref' if use_async_implementation: size = 0 file = NullTarget() if config.DISABLE_STORAGE else StorageFileTarget() parser = StreamingFormDataParser(headers=bottle.request.headers) parser.register('file', file) while True: LOGGER.debug('Read async chunk...') buffer = bottle.request.environ['wsgi.input'] chunk = buffer.read(64 * 1024) if not chunk: break LOGGER.debug('Got async chunk from network: %d bytes', len(chunk)) parser.data_received(chunk) size += len(chunk) LOGGER.info('Uploaded request size: %s bytes', size) else: size = 0 files: bottle.FormsDict = bottle.request.files upload = files.file # pylint: disable=no-member if upload is None: raise Exception('ERROR! "file" multipart field was not found') original_filename = upload.raw_filename body = upload.file with STORAGE.open_file_writer(original_filename) as writer: while True: LOGGER.debug('Read synchronous chunk...') chunk = body.read(64 * 1024) if not chunk: break LOGGER.debug('Got synchronous chunk from network: %d bytes', len(chunk)) if not config.DISABLE_STORAGE: writer.write(chunk) size += len(chunk) LOGGER.info('Uploaded file size: %d bytes', size) return 'OK'
def test_without_name_parameter(): data = b'''\ --1234 Content-Disposition: form-data; filename="ab.txt" Foo --1234--'''.replace(b'\n', b'\r\n') target = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': 'multipart/form-data; boundary=1234'}) parser.register('files', target) parser.data_received(data) assert target.value == b''
def test_unquoted_names(self): data = b'''\ --1234 Content-Disposition: form-data; name=files; filename=ab.txt Foo --1234--'''.replace(b'\n', b'\r\n') target = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': 'multipart/form-data; boundary=1234'}) parser.register('files', target) parser.data_received(data) self.assertEqual(target.value, b'Foo')
def test_file_content_varying_chunk_size(): with open_dataset('file.txt') as dataset_: expected_value = dataset_.read() content_type, body = encoded_dataset('file.txt') for index in range(len(body)): txt = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': content_type}) parser.register('file.txt', txt) parser.data_received(body[:index]) parser.data_received(body[index:]) assert txt.value == expected_value
def test_large_file(self): for filename in ['image-500k.png', 'image-2560x1600.png', 'image-600x400.png', 'image-high-res.jpg']: with open_dataset(filename) as dataset_: expected_value = dataset_.read() content_type, body = encoded_dataset(filename) value = ValueTarget() parser = StreamingFormDataParser( headers={'Content-Type': content_type}) parser.register(filename, value) parser.data_received(body) self.assertEqual(value.value, expected_value)
def cgi_upload(): log('Upload file begin') use_async_implementation = True if use_async_implementation: size = 0 file = NullTarget() if config.DISABLE_STORAGE else StorageFileTarget() parser = StreamingFormDataParser(headers=bottle.request.headers) parser.register('file', file) while True: chunk = bottle.request.environ['wsgi.input'].read(64 * 1024) if not chunk: break parser.data_received(chunk) size += len(chunk) log('Uploaded request size: ' + str(size)) else: size = 0 upload = bottle.request.files.get('file') if upload is None: raise Exception('ERROR! "file" multipart field was not found') original_filename = upload.raw_filename body = upload.file with storage.open_file_writer(original_filename) as writer: while True: chunk = body.read(64 * 1024) if not chunk: break if not config.DISABLE_STORAGE: writer.write(chunk) size += len(chunk) log('Uploaded file size: ' + str(size)) return 'OK'