def test_gcs_upload_chunking(mocker, mock_client_upload_gcs, requests_mock): mocker.patch("faculty.datasets.transfer.DEFAULT_CHUNK_SIZE", 1000) chunk_headers = [ { "Content-Length": "1000", "Content-Range": "bytes 0-999/*" }, { "Content-Length": "1000", "Content-Range": "bytes 1000-1999/2000" }, ] for headers in chunk_headers: requests_mock.put(TEST_URL, status_code=200, request_headers=headers) transfer.upload(mock_client_upload_gcs, PROJECT_ID, TEST_PATH, TEST_CONTENT) history = requests_mock.request_history assert len(history) == 2 _assert_contains(history[0].headers, chunk_headers[0]) _assert_contains(history[1].headers, chunk_headers[1]) assert history[0].text.encode("utf8") == TEST_CONTENT[:1000] assert history[1].text.encode("utf8") == TEST_CONTENT[1000:]
def test_s3_dynamic_chunk_upload(mocker, mock_client_upload_s3, requests_mock, max_chunks, expected_chunks): mocker.patch("faculty.datasets.transfer.DEFAULT_CHUNK_SIZE", 100) mocker.patch("faculty.datasets.transfer.S3_MAX_CHUNKS", max_chunks) chunk_size = int(math.ceil(len(TEST_CONTENT) / expected_chunks)) chunks = iter([ TEST_CONTENT[i * chunk_size:(i + 1) * chunk_size] for i in range(expected_chunks) ]) chunk_matchers = [ lambda x: next(chunks) == x.text.encode("utf-8") for i in range(max_chunks) ] urls = [ "https://example.com/presigned-url-{i}/url".format(i=i) for i in range(max_chunks) ] etags = ["tag-{tagid}".format(tagid=i) for i in range(max_chunks)] for url, matcher, etag in zip(urls, chunk_matchers, etags): requests_mock.put( url, status_code=200, additional_matcher=matcher, headers={"ETag": etag}, ) mock_client_upload_s3.presign_upload_part.side_effect = urls transfer.upload(mock_client_upload_s3, PROJECT_ID, TEST_PATH, TEST_CONTENT) history = requests_mock.request_history assert len(history) == expected_chunks
def test_gcs_upload_empty_object(mock_client_upload_gcs, requests_mock): test_content = "".encode("utf8") requests_mock.put(TEST_URL, request_headers={"Content-Length": "0"}, status_code=200) transfer.upload(mock_client_upload_gcs, PROJECT_ID, TEST_PATH, test_content)
def test_gcs_upload(mock_client_upload_gcs, requests_mock): requests_mock.put( TEST_URL, request_headers={ "Content-Length": "2000", "Content-Range": "bytes 0-1999/2000", }, status_code=200, ) transfer.upload(mock_client_upload_gcs, PROJECT_ID, TEST_PATH, TEST_CONTENT)
def test_s3_upload(mock_client_upload_s3, requests_mock): def chunk_request_matcher(request): return TEST_CONTENT == request.text.encode("utf-8") requests_mock.put( TEST_URL, additional_matcher=chunk_request_matcher, headers={"ETag": TEST_ETAG}, status_code=200, ) mock_client_upload_s3.presign_upload_part.return_value = TEST_URL transfer.upload(mock_client_upload_s3, PROJECT_ID, TEST_PATH, TEST_CONTENT) mock_client_upload_s3.complete_multipart_upload.assert_called_once_with( PROJECT_ID, TEST_PATH, TEST_S3_UPLOAD_ID, [TEST_COMPLETED_PART])
def test_s3_upload_chunks(mocker, mock_client_upload_s3, requests_mock): mocker.patch("faculty.datasets.transfer.UPLOAD_CHUNK_SIZE", 1000) def first_chunk_request_matcher(request): return TEST_CONTENT[0:1000] == request.text.encode("utf-8") def second_chunk_request_matcher(request): return TEST_CONTENT[1000::] == request.text.encode("utf-8") mock_client_upload_s3.presign_upload_part.side_effect = [ TEST_URL, OTHER_URL, ] requests_mock.put( TEST_URL, additional_matcher=first_chunk_request_matcher, headers={"ETag": TEST_ETAG}, status_code=200, ) requests_mock.put( OTHER_URL, additional_matcher=second_chunk_request_matcher, headers={"ETag": OTHER_ETAG}, status_code=200, ) mock_client_upload_s3.presign_upload_part.return_value = TEST_URL transfer.upload(mock_client_upload_s3, PROJECT_ID, TEST_PATH, TEST_CONTENT) history = requests_mock.request_history assert len(history) == 2 mock_client_upload_s3.complete_multipart_upload.assert_called_once_with( PROJECT_ID, TEST_PATH, TEST_S3_UPLOAD_ID, [TEST_COMPLETED_PART, OTHER_COMPLETED_PART], )