def test_multipart_upload_by_presigned_requests(self, send_delta): """Test presign_upload_part _and_ complete_multipart_upload""" # Integration-test: use `urllib3` to run presigned responses. # See `test_minio` for canonical usage. user = User.objects.create(username='******', email='*****@*****.**') workflow = Workflow.create_and_init(owner=user) uuid = str(uuidgen.uuid4()) key = f'wf-123/wfm-234/{uuid}.csv' upload_id = minio.create_multipart_upload(minio.UserFilesBucket, key, 'file.csv') wf_module = workflow.tabs.first().wf_modules.create( order=0, module_id_name='x', inprogress_file_upload_id=upload_id, inprogress_file_upload_key=key, inprogress_file_upload_last_accessed_at=timezone.now() ) data = b'1234567' * 1024 * 1024 # 7MB => 5MB+2MB parts data1 = data[:5*1024*1024] data2 = data[5*1024*1024:] md5sum1 = _base64_md5sum(data1) md5sum2 = _base64_md5sum(data2) response1 = self.run_handler(presign_upload_part, user=user, workflow=workflow, wfModuleId=wf_module.id, uploadId=upload_id, partNumber=1, nBytes=len(data1), base64Md5sum=md5sum1) self.assertEqual(response1.error, '') response2 = self.run_handler(presign_upload_part, user=user, workflow=workflow, wfModuleId=wf_module.id, uploadId=upload_id, partNumber=2, nBytes=len(data2), base64Md5sum=md5sum2) self.assertEqual(response2.error, '') http = urllib3.PoolManager() s3response1 = http.request('PUT', response1.data['url'], body=data1, headers=response1.data['headers']) self.assertEqual(s3response1.status, 200) s3response2 = http.request('PUT', response2.data['url'], body=data2, headers=response2.data['headers']) self.assertEqual(s3response2.status, 200) etag1 = s3response1.headers['ETag'][1:-1] # un-wrap quotes etag2 = s3response2.headers['ETag'][1:-1] # un-wrap quotes send_delta.side_effect = async_noop response3 = self.run_handler(complete_multipart_upload, user=user, workflow=workflow, wfModuleId=wf_module.id, uploadId=upload_id, etags=[etag1, etag2]) self.assertResponse(response3, data={'uuid': uuid}) self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, key)['Body'], data )
def test_delete_inprogress_file_upload(self): workflow = Workflow.create_and_init() upload_id = minio.create_multipart_upload(minio.UserFilesBucket, 'key', 'file.csv') wf_module = workflow.tabs.first().wf_modules.create( order=0, inprogress_file_upload_id=upload_id, inprogress_file_upload_key='key', inprogress_file_upload_last_accessed_at=timezone.now(), ) wf_module.delete() # Assert the upload is gone with self.assertRaises(minio.error.NoSuchUpload): minio.client.list_parts(Bucket=minio.UserFilesBucket, Key='key', UploadId=upload_id)
def test_delete_ignore_inprogress_file_upload_not_on_s3(self): workflow = Workflow.create_and_init() upload_id = minio.create_multipart_upload(minio.UserFilesBucket, 'key', 'file.csv') wf_module = workflow.tabs.first().wf_modules.create( order=0, inprogress_file_upload_id=upload_id, inprogress_file_upload_key='key', inprogress_file_upload_last_accessed_at=timezone.now(), ) # Delete from S3, and then delete. # # This mimics a behavior we want: upload timeouts. We can set up a # S3-side policy to delete old uploaded data; we need to expect that # data might be deleted when we delete the WfModule. minio.abort_multipart_upload(minio.UserFilesBucket, 'key', upload_id) wf_module.delete() # do not crash
def test_multipart_upload_by_presigned_requests(self): upload_id = minio.create_multipart_upload(Bucket, 'key', 'file.csv') data = b'1234567' * 1024 * 1024 # 7MB => 5MB+2MB parts data1 = data[:5 * 1024 * 1024] data2 = data[5 * 1024 * 1024:] md5sum1 = _base64_md5sum(data1) md5sum2 = _base64_md5sum(data2) url1, headers1 = minio.presign_upload_part(Bucket, 'key', upload_id, 1, len(data1), md5sum1) url2, headers2 = minio.presign_upload_part(Bucket, 'key', upload_id, 2, len(data2), md5sum2) http = urllib3.PoolManager() response1 = http.request('PUT', url1, body=data1, headers=headers1) self.assertEqual(response1.status, 200) etag1 = response1.headers['ETag'][1:-1] # un-wrap quotes response2 = http.request('PUT', url2, body=data2, headers=headers2) self.assertEqual(response2.status, 200) etag2 = response2.headers['ETag'][1:-1] # un-wrap quotes minio.complete_multipart_upload(Bucket, 'key', upload_id, [etag1, etag2]) self.assertEqual( minio.get_object_with_data(Bucket, 'key')['Body'], data)
def _do_create_multipart_upload( workflow: Workflow, wf_module: WfModule, filename: str ) -> Dict[str, str]: key = _generate_key(wf_module, filename) with workflow.cooperative_lock(): wf_module.refresh_from_db() wf_module.abort_inprogress_upload() # in case there is one already upload_id = minio.create_multipart_upload(minio.UserFilesBucket, key, filename) wf_module.inprogress_file_upload_id = upload_id wf_module.inprogress_file_upload_key = key wf_module.inprogress_file_upload_last_accessed_at = timezone.now() wf_module.save( update_fields=['inprogress_file_upload_id', 'inprogress_file_upload_key', 'inprogress_file_upload_last_accessed_at'] ) return {'key': key, 'uploadId': upload_id}
def test_abort_multipart_upload_upload_already_aborted(self): user = User.objects.create(username='******', email='*****@*****.**') workflow = Workflow.create_and_init(owner=user) upload_id = minio.create_multipart_upload(minio.UserFilesBucket, 'key', 'file.csv') minio.abort_multipart_upload(minio.UserFilesBucket, 'key', upload_id) wf_module = workflow.tabs.first().wf_modules.create( order=0, module_id_name='x', inprogress_file_upload_id=upload_id, inprogress_file_upload_key='key', inprogress_file_upload_last_accessed_at=timezone.now() ) response = self.run_handler(abort_multipart_upload, user=user, workflow=workflow, wfModuleId=wf_module.id, uploadId=upload_id) self.assertResponse(response, data=None) # Must remove data from the DB even if the file isn't in minio. wf_module.refresh_from_db() self.assertIsNone(wf_module.inprogress_file_upload_id) self.assertIsNone(wf_module.inprogress_file_upload_key) self.assertIsNone(wf_module.inprogress_file_upload_last_accessed_at)
def test_abort_multipart_upload_happy_path(self): user = User.objects.create(username='******', email='*****@*****.**') workflow = Workflow.create_and_init(owner=user) upload_id = minio.create_multipart_upload(minio.UserFilesBucket, 'key', 'file.csv') wf_module = workflow.tabs.first().wf_modules.create( order=0, module_id_name='x', inprogress_file_upload_id=upload_id, inprogress_file_upload_key='key', inprogress_file_upload_last_accessed_at=timezone.now() ) response = self.run_handler(abort_multipart_upload, user=user, workflow=workflow, wfModuleId=wf_module.id, uploadId=upload_id) self.assertResponse(response, data=None) wf_module.refresh_from_db() self.assertIsNone(wf_module.inprogress_file_upload_id) self.assertIsNone(wf_module.inprogress_file_upload_key) self.assertIsNone(wf_module.inprogress_file_upload_last_accessed_at) with self.assertRaises(minio.error.NoSuchUpload): minio.abort_multipart_upload(minio.UserFilesBucket, 'key', upload_id)