Exemple #1
0
def _s3_upload(object_client, project_id, datasets_path, content, upload_id,
               chunk_size):

    #  See
    #  https://aws.amazon.com/premiumsupport/knowledge-center/http-5xx-errors-s3
    retries = Retry(
        backoff_factor=0.1,
        status=10,
        status_forcelist=[500, 502, 503, 504],
    )
    session = requests.Session()
    session.mount("https://", HTTPAdapter(max_retries=retries))

    completed_parts = []
    for i, chunk in enumerate(_rechunk_data(content, chunk_size)):

        part_number = i + 1

        chunk_url = object_client.presign_upload_part(project_id,
                                                      datasets_path, upload_id,
                                                      part_number)

        upload_response = session.put(chunk_url, data=chunk)
        upload_response.raise_for_status()
        completed_parts.append(
            CompletedUploadPart(part_number=part_number,
                                etag=upload_response.headers["ETag"]))

    object_client.complete_multipart_upload(project_id, datasets_path,
                                            upload_id, completed_parts)
Exemple #2
0
def _s3_upload(object_client, project_id, datasets_path, content, upload_id,
               chunk_size):

    completed_parts = []
    for i, chunk in enumerate(_rechunk_data(content, chunk_size)):

        part_number = i + 1

        chunk_url = object_client.presign_upload_part(project_id,
                                                      datasets_path, upload_id,
                                                      part_number)

        upload_response = requests.put(chunk_url, data=chunk)
        upload_response.raise_for_status()
        completed_parts.append(
            CompletedUploadPart(part_number=part_number,
                                etag=upload_response.headers["ETag"]))

    object_client.complete_multipart_upload(project_id, datasets_path,
                                            upload_id, completed_parts)
Exemple #3
0
from uuid import uuid4

import pytest

from faculty.clients.object import CloudStorageProvider, CompletedUploadPart
from faculty.datasets import transfer


PROJECT_ID = uuid4()
TEST_PATH = "/path/to/file"
TEST_URL = "https://example.com/presigned/url"
OTHER_URL = "https://example.com/other-presigned/url"
TEST_S3_UPLOAD_ID = 123

TEST_ETAG = "5d24e152bcdfa5a0357f46471be3be6c"
TEST_COMPLETED_PART = CompletedUploadPart(1, TEST_ETAG)

OTHER_ETAG = "d084dd881a190aa5ffdf0ce21cff9509"
OTHER_COMPLETED_PART = CompletedUploadPart(2, OTHER_ETAG)

TEST_CONTENT = "".join(
    random.choice(string.printable) for _ in range(2000)
).encode("utf8")


@pytest.fixture
def mock_client_download(mocker, requests_mock):
    object_client = mocker.Mock()
    object_client.presign_download.return_value = TEST_URL

    requests_mock.get(TEST_URL, content=TEST_CONTENT)
Exemple #4
0
PRESIGN_UPLOAD_RESPONSE_S3_BODY = {
    "provider": "S3",
    "uploadId": PRESIGN_UPLOAD_RESPONSE_S3.upload_id,
}

PRESIGN_UPLOAD_RESPONSE_GCS = PresignUploadResponse(
    provider=CloudStorageProvider.GCS,
    upload_id=None,
    url="https://example.com",
)
PRESIGN_UPLOAD_RESPONSE_GCS_BODY = {
    "provider": "GCS",
    "url": PRESIGN_UPLOAD_RESPONSE_GCS.url,
}

COMPLETED_UPLOAD_PART = CompletedUploadPart(123, "etag-123")
COMPLETED_UPLOAD_PART_BODY = {
    "partNumber": COMPLETED_UPLOAD_PART.part_number,
    "etag": COMPLETED_UPLOAD_PART.etag,
}

COMPLETED_MULTIPART_UPLOAD = {
    "path": "/path",
    "upload_id": "upload-id",
    "parts": [COMPLETED_UPLOAD_PART],
}
COMPLETED_MULTIPART_UPLOAD_BODY = {
    "path": COMPLETED_MULTIPART_UPLOAD["path"],
    "uploadId": COMPLETED_MULTIPART_UPLOAD["upload_id"],
    "parts": [COMPLETED_UPLOAD_PART_BODY],
}