def _s3_upload(object_client, project_id, datasets_path, content, upload_id, chunk_size): # See # https://aws.amazon.com/premiumsupport/knowledge-center/http-5xx-errors-s3 retries = Retry( backoff_factor=0.1, status=10, status_forcelist=[500, 502, 503, 504], ) session = requests.Session() session.mount("https://", HTTPAdapter(max_retries=retries)) completed_parts = [] for i, chunk in enumerate(_rechunk_data(content, chunk_size)): part_number = i + 1 chunk_url = object_client.presign_upload_part(project_id, datasets_path, upload_id, part_number) upload_response = session.put(chunk_url, data=chunk) upload_response.raise_for_status() completed_parts.append( CompletedUploadPart(part_number=part_number, etag=upload_response.headers["ETag"])) object_client.complete_multipart_upload(project_id, datasets_path, upload_id, completed_parts)
def _s3_upload(object_client, project_id, datasets_path, content, upload_id, chunk_size): completed_parts = [] for i, chunk in enumerate(_rechunk_data(content, chunk_size)): part_number = i + 1 chunk_url = object_client.presign_upload_part(project_id, datasets_path, upload_id, part_number) upload_response = requests.put(chunk_url, data=chunk) upload_response.raise_for_status() completed_parts.append( CompletedUploadPart(part_number=part_number, etag=upload_response.headers["ETag"])) object_client.complete_multipart_upload(project_id, datasets_path, upload_id, completed_parts)
from uuid import uuid4 import pytest from faculty.clients.object import CloudStorageProvider, CompletedUploadPart from faculty.datasets import transfer PROJECT_ID = uuid4() TEST_PATH = "/path/to/file" TEST_URL = "https://example.com/presigned/url" OTHER_URL = "https://example.com/other-presigned/url" TEST_S3_UPLOAD_ID = 123 TEST_ETAG = "5d24e152bcdfa5a0357f46471be3be6c" TEST_COMPLETED_PART = CompletedUploadPart(1, TEST_ETAG) OTHER_ETAG = "d084dd881a190aa5ffdf0ce21cff9509" OTHER_COMPLETED_PART = CompletedUploadPart(2, OTHER_ETAG) TEST_CONTENT = "".join( random.choice(string.printable) for _ in range(2000) ).encode("utf8") @pytest.fixture def mock_client_download(mocker, requests_mock): object_client = mocker.Mock() object_client.presign_download.return_value = TEST_URL requests_mock.get(TEST_URL, content=TEST_CONTENT)
PRESIGN_UPLOAD_RESPONSE_S3_BODY = { "provider": "S3", "uploadId": PRESIGN_UPLOAD_RESPONSE_S3.upload_id, } PRESIGN_UPLOAD_RESPONSE_GCS = PresignUploadResponse( provider=CloudStorageProvider.GCS, upload_id=None, url="https://example.com", ) PRESIGN_UPLOAD_RESPONSE_GCS_BODY = { "provider": "GCS", "url": PRESIGN_UPLOAD_RESPONSE_GCS.url, } COMPLETED_UPLOAD_PART = CompletedUploadPart(123, "etag-123") COMPLETED_UPLOAD_PART_BODY = { "partNumber": COMPLETED_UPLOAD_PART.part_number, "etag": COMPLETED_UPLOAD_PART.etag, } COMPLETED_MULTIPART_UPLOAD = { "path": "/path", "upload_id": "upload-id", "parts": [COMPLETED_UPLOAD_PART], } COMPLETED_MULTIPART_UPLOAD_BODY = { "path": COMPLETED_MULTIPART_UPLOAD["path"], "uploadId": COMPLETED_MULTIPART_UPLOAD["upload_id"], "parts": [COMPLETED_UPLOAD_PART_BODY], }