def testUploadFileHandle(self): with self.subTest("without optional parameters"): fobj = io.BytesIO(b"abcabcabc") dst_blob_name = infra.generate_test_key() self.handle.upload_file_handle(self.test_bucket, dst_blob_name, fobj) # should be able to get metadata for the file. self.assertFalse( self.handle.get_user_metadata(self.test_bucket, dst_blob_name)) with self.subTest("with optional parameters"): fobj = io.BytesIO(b"abcabcabc") dst_blob_name = infra.generate_test_key() content_type = "test/content-type" metadata = {"stuff": "things"} self.handle.upload_file_handle( self.test_bucket, dst_blob_name, fobj, content_type=content_type, metadata=metadata, ) # should be able to get metadata for the file. self.assertEqual( self.handle.get_user_metadata(self.test_bucket, dst_blob_name), metadata) self.assertEqual( self.handle.get_content_type(self.test_bucket, dst_blob_name), content_type)
def _test_gs_cache(self, src_data, content_type, checkout_bucket): replica = Replica.gcp checkout_bucket = checkout_bucket if checkout_bucket else replica.checkout_bucket test_src_key = infra.generate_test_key() gs_blobstore = Config.get_blobstore_handle(Replica.gcp) client = storage.Client() # upload with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() fh.seek(0) gs_blobstore.upload_file_handle(replica.bucket, test_src_key, fh, content_type) # checkout test_dst_key = infra.generate_test_key() event = gscopyclient.copy_sfn_event(replica.bucket, test_src_key, checkout_bucket, test_dst_key) event = gscopyclient.implementation.setup_copy_task(event, None) spoof_context = self.SpoofContext() # parameters of copy_worker are arbitrary, only passed because required. event = gscopyclient.implementation.copy_worker(event, spoof_context) # verify for retry in [1, 1, 1]: try: bucket = client.get_bucket(checkout_bucket) blob_class = bucket.get_blob(test_dst_key).storage_class except AttributeError: time.sleep(retry) else: break # cleanup gs_blobstore.delete(replica.bucket, test_src_key) gs_blobstore.delete(checkout_bucket, test_dst_key) return blob_class
def test_large_copy(self, num_parts=LAMBDA_PARALLELIZATION_FACTOR + 1): test_bucket = infra.get_env("DSS_S3_BUCKET_TEST") test_src_key = infra.generate_test_key() s3_client = boto3.client("s3") mpu = s3_client.create_multipart_upload(Bucket=test_bucket, Key=test_src_key) with ThreadPoolExecutor(max_workers=8) as tpe: parts_futures = tpe.map( lambda part_id: TestS3ParallelCopy.upload_part( test_bucket, test_src_key, mpu['UploadId'], part_id), range(1, num_parts + 1)) parts = [ dict(ETag=part_etag, PartNumber=part_id) for part_id, part_etag in parts_futures ] src_etag = s3_client.complete_multipart_upload( Bucket=test_bucket, Key=test_src_key, MultipartUpload=dict(Parts=parts), UploadId=mpu['UploadId'], )['ETag'].strip('"') test_dst_key = infra.generate_test_key() state = s3copyclient.copy_sfn_event(test_bucket, test_src_key, test_bucket, test_dst_key) execution_id = str(uuid.uuid4()) stepfunctions.step_functions_invoke("dss-s3-copy-sfn-{stage}", execution_id, state) self._check_dst_key_etag(test_bucket, test_dst_key, src_etag)
def _test_aws_cache(self, src_data, content_type, checkout_bucket): replica = Replica.aws checkout_bucket = checkout_bucket if checkout_bucket else replica.checkout_bucket test_src_key = infra.generate_test_key() s3_blobstore = Config.get_blobstore_handle(Replica.aws) # upload with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() fh.seek(0) s3_blobstore.upload_file_handle(replica.bucket, test_src_key, fh, content_type) # checkout test_dst_key = infra.generate_test_key() event = s3copyclient.copy_sfn_event(replica.bucket, test_src_key, checkout_bucket, test_dst_key) event = s3copyclient.implementation.setup_copy_task(event, None) spoof_context = self.SpoofContext() # parameters of copy_worker are arbitrary, only passed because required. event = s3copyclient.implementation.copy_worker( event, spoof_context, 10) # verify tagging = s3_blobstore.get_user_metadata(checkout_bucket, test_dst_key) # cleanup s3_blobstore.delete(replica.bucket, test_src_key) s3_blobstore.delete(checkout_bucket, test_dst_key) return tagging
def upload_file(self, contents): s3_test_bucket = get_env("DSS_S3_BUCKET_TEST") src_key = generate_test_key() s3 = boto3.resource('s3') with io.BytesIO(json.dumps( contents).encode()) as fh, ChecksummingSink() as sink: sink.write(fh.read()) sums = sink.get_checksums() metadata = { 'hca-dss-crc32c': sums['crc32c'].lower(), 'hca-dss-s3_etag': sums['s3_etag'].lower(), 'hca-dss-sha1': sums['sha1'].lower(), 'hca-dss-sha256': sums['sha256'].lower() } fh.seek(0) # TODO: consider switching to unmanaged uploader (putobject w/blob) s3.Bucket(s3_test_bucket).Object(src_key).upload_fileobj( fh, ExtraArgs={"Metadata": metadata}) source_url = f"s3://{s3_test_bucket}/{src_key}" file_uuid = str(uuid4()) version = datetime_to_version_format(datetime.utcnow()) urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) urlbuilder.add_query("version", version) resp_obj = self.assertPutResponse(str(urlbuilder), requests.codes.created, json_request_body=dict( creator_uid=0, source_url=source_url)) return file_uuid, resp_obj.json["version"]
def _test_file_put(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader): src_key = generate_test_key() src_data = os.urandom(1024) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "text/plain") source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime_to_version_format(datetime.datetime.utcnow()) # should be able to do this twice (i.e., same payload, different UUIDs) self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) self.upload_file(source_url, str(uuid.uuid4())) # should be able to do this twice (i.e., same payload, same UUIDs) self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version, expected_code=requests.codes.ok) # should *NOT* be able to do this twice (i.e., different payload, same UUIDs) self.upload_file(source_url, file_uuid, version=version, expected_code=requests.codes.conflict)
def _test_file_get_invalid_token(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader): src_key = generate_test_key() src_data = os.urandom(1024) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "text/plain") source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime_to_version_format(datetime.datetime.utcnow()) # should be able to do this twice (i.e., same payload, different UUIDs) self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name).add_query("version", version).add_query("token", "{}")) @eventually(30, 0.1) def try_get(): self.assertGetResponse(url, requests.codes.bad_request, headers=get_auth_header()) try_get()
def testCopyTokenNotMatching(self): intermediate_blob_name = infra.generate_test_key() self.handle.copy( self.test_fixtures_bucket, "test_good_source_data/0", self.test_bucket, intermediate_blob_name, ) cloud_checksum = self.handle.get_cloud_checksum( self.test_bucket, intermediate_blob_name) copy_token = self.handle.get_copy_token(self.test_bucket, intermediate_blob_name, cloud_checksum) self.handle.copy( self.test_fixtures_bucket, "test_good_source_data/1", self.test_bucket, intermediate_blob_name, ) dst_blob_name = infra.generate_test_key() try: self.handle.copy( self.test_bucket, intermediate_blob_name, self.test_bucket, dst_blob_name, copy_token, ) except BlobNotFoundError: return # either the file should be copied from the _previous_ contents, or it should not be present. try: dst_cloud_checksum = self.handle.get_cloud_checksum( self.test_bucket, dst_blob_name) self.assertEqual(dst_cloud_checksum, cloud_checksum) except BlobNotFoundError: pass
def test_zero_copy(self): test_bucket = infra.get_env("DSS_S3_BUCKET_TEST") test_src_key = infra.generate_test_key() s3_blobstore = Config.get_blobstore_handle(Replica.aws) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.seek(0) s3_blobstore.upload_file_handle(test_bucket, test_src_key, fh) src_etag = s3_blobstore.get_cloud_checksum(test_bucket, test_src_key) test_dst_key = infra.generate_test_key() state = s3copyclient.copy_sfn_event(test_bucket, test_src_key, test_bucket, test_dst_key) execution_id = str(uuid.uuid4()) stepfunctions.step_functions_invoke("dss-s3-copy-sfn-{stage}", execution_id, state) self._check_dst_key_etag(test_bucket, test_dst_key, src_etag)
def testCopy(self): dst_blob_name = infra.generate_test_key() self.handle.copy( self.test_fixtures_bucket, "test_good_source_data/0", self.test_bucket, dst_blob_name, ) # should be able to get metadata for the file. self.handle.get_user_metadata(self.test_bucket, dst_blob_name)
def testDelete(self): fobj = io.BytesIO(b"abcabcabc") dst_blob_name = infra.generate_test_key() self.handle.upload_file_handle(self.test_bucket, dst_blob_name, fobj) # should be able to get metadata for the file. self.handle.get_user_metadata(self.test_bucket, dst_blob_name) self.handle.delete(self.test_bucket, dst_blob_name) with self.assertRaises(BlobNotFoundError): self.handle.get_user_metadata(self.test_bucket, dst_blob_name)
def testUploadFileHandle(self): fobj = io.BytesIO(b"abcabcabc") dst_blob_name = infra.generate_test_key() self.handle.upload_file_handle( self.test_bucket, dst_blob_name, fobj ) # should be able to get metadata for the file. self.handle.get_user_metadata( self.test_bucket, dst_blob_name)
def _test_file_put_cached(self, replica: Replica, scheme: str, test_bucket: str, test_checkout_bucket: str, uploader: Uploader): stored_cache_criteria = os.environ.get('CHECKOUT_CACHE_CRITERIA') try: os.environ[ 'CHECKOUT_CACHE_CRITERIA'] = '[{"type":"application/json","max_size":12314}]' handle = Config.get_blobstore_handle(replica) src_key = generate_test_key() src_data = b'{"status":"valid"}' source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime_to_version_format(datetime.datetime.utcnow()) # write dummy file and upload to upload area with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "application/json") # upload file to DSS self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) metadata = handle.get_user_metadata(test_bucket, src_key) dst_key = ("blobs/" + ".".join([ metadata['hca-dss-sha256'], metadata['hca-dss-sha1'], metadata['hca-dss-s3_etag'], metadata['hca-dss-crc32c'] ])).lower() for wait_to_upload_into_checkout_bucket in range(30): try: # get uploaded blob key from the checkout bucket file_metadata = json.loads( handle.get(test_checkout_bucket, dst_key).decode("utf-8")) break except BlobNotFoundError: time.sleep(1) else: file_metadata = json.loads( handle.get(test_checkout_bucket, dst_key).decode("utf-8")) assert file_metadata[ "status"] == "valid" # the file exists in the checkout bucket finally: os.environ['CHECKOUT_CACHE_CRITERIA'] = stored_cache_criteria
def setUp(self, rounds=3): Config.set_config(BucketConfig.TEST) self.test_bucket = infra.get_env("DSS_GS_BUCKET_TEST") self.gs_blobstore = Config.get_blobstore_handle(Replica.gcp) test_src_keys = [infra.generate_test_key() for _ in range(rounds)] final_key = infra.generate_test_key() bucket_obj = self.gs_blobstore.gcp_client.bucket(self.test_bucket) self.gs_blobstore.upload_file_handle( self.test_bucket, test_src_keys[0], io.BytesIO(os.urandom(1024 * 1024))) for ix in range(len(test_src_keys) - 1): src_blob_obj = bucket_obj.get_blob(test_src_keys[ix]) blobs = [src_blob_obj for _ in range(16)] dst_blob_obj = bucket_obj.blob(test_src_keys[ix + 1]) dst_blob_obj.content_type = "application/octet-stream" dst_blob_obj.compose(blobs) # set the storage class to nearline. # NOTE: compose(…) does not seem to support setting a storage class. The canonical way of changing storage # class is to call update_storage_class(…), but Google's libraries does not seem to handle # update_storage_class(…) calls for large objects. final_blob_obj = bucket_obj.blob(final_key) final_blob_obj.storage_class = "NEARLINE" final_blob_src = bucket_obj.get_blob(test_src_keys[-1]) token = None while True: result = final_blob_obj.rewrite(final_blob_src, token=token) if result[0] is None: # done! break token = result[0] self.src_key = final_key
def _test_put_auth_errors(self, scheme, test_bucket): src_key = generate_test_key() source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) timestamp = datetime.datetime.utcnow() version = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ") urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) urlbuilder.add_query("version", version) self._test_auth_errors('put', str(urlbuilder), json_request_body=dict(bundle_uuid=bundle_uuid, creator_uid=0, source_url=source_url))
def upload_file(app, contents, replica): src_key = generate_test_key() encoded = json.dumps(contents).encode() chunk_size = get_s3_multipart_chunk_size(len(encoded)) with io.BytesIO(encoded) as fh, ChecksummingSink( write_chunk_size=chunk_size) as sink: sink.write(fh.read()) sums = sink.get_checksums() metadata = { 'hca-dss-crc32c': sums['crc32c'].lower(), 'hca-dss-s3_etag': sums['s3_etag'].lower(), 'hca-dss-sha1': sums['sha1'].lower(), 'hca-dss-sha256': sums['sha256'].lower() } fh.seek(0) if replica == 'gcp': gs_test_bucket = get_env("DSS_GS_BUCKET_TEST") gcp_client = gs_storage.Client.from_service_account_json( os.getenv("GOOGLE_APPLICATION_CREDENTIALS")) gs_bucket = gcp_client.bucket(gs_test_bucket) blob = gs_bucket.blob(src_key) blob.upload_from_file(fh, content_type="application/json") blob.metadata = metadata blob.patch() source_url = f"gs://{gs_test_bucket}/{src_key}" if replica == 'aws': # TODO: consider switching to unmanaged uploader (putobject w/blob) s3_test_bucket = get_env("DSS_S3_BUCKET_TEST") s3 = boto3.resource('s3') s3.Bucket(s3_test_bucket).Object(src_key).upload_fileobj( fh, ExtraArgs={"Metadata": metadata}) source_url = f"s3://{s3_test_bucket}/{src_key}" file_uuid = str(uuid4()) version = datetime_to_version_format(datetime.utcnow()) urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) urlbuilder.add_query("version", version) resp_obj = app.put(str(urlbuilder), json=dict(creator_uid=0, source_url=source_url), headers=get_auth_header()) resp_obj.raise_for_status() return file_uuid, resp_obj.json()["version"]
def _test_file_put_large(self, src_data: bytes) -> None: replicas: typing.Sequence[typing.Tuple[Replica, typing.Type[Uploader], str]] = [ (Replica.aws, S3Uploader, self.s3_test_bucket), (Replica.gcp, GSUploader, self.gs_test_bucket) ] src_key = generate_test_key() for replica, uploader_class, bucket in replicas: self._upload_file_to_mock_ingest(uploader_class, bucket, src_key, src_data) expect_async_results: typing.Tuple[typing.Optional[bool], ...] if len(src_data) > ASYNC_COPY_THRESHOLD: if Replica == Replica.aws: # We should be able to do this twice (i.e., same payload, different UUIDs). # First time should be asynchronous since it's new data. Second time should be # synchronous since the data is present, but because S3 does not make # consistency guarantees, a second client might not see that the data is already # there. Therefore, we do not mandate that it is done synchronously. expect_async_results = (True, None) else: # We should be able to do this twice (i.e., same payload, different UUIDs). # First time should be asynchronous since it's new data. Second time should be # synchronous since the data is present. expect_async_results = (True, False) else: # We should be able to do this twice (i.e., same payload, different UUIDs). Neither # time should be asynchronous. expect_async_results = (False, False) for ix, expect_async in enumerate(expect_async_results): with self.subTest( f"replica: {replica.name} size: {len(src_data)} round: {ix}" ): resp_obj = self.upload_file_wait( f"{replica.storage_schema}://{bucket}/{src_key}", replica, expect_async=expect_async) self.assertHeaders(resp_obj.response, { 'content-type': "application/json", }) self.assertIn('version', resp_obj.json)
def testCopyTokenMatching(self): cloud_checksum = self.handle.get_cloud_checksum( self.test_fixtures_bucket, "test_good_source_data/0") copy_token = self.handle.get_copy_token(self.test_fixtures_bucket, "test_good_source_data/0", cloud_checksum) dst_blob_name = infra.generate_test_key() self.handle.copy( self.test_fixtures_bucket, "test_good_source_data/0", self.test_bucket, dst_blob_name, copy_token, ) # should be able to get metadata for the file. self.handle.get_user_metadata(self.test_bucket, dst_blob_name)
def test_file_put_large_incorrect_s3_etag(self) -> None: bucket = self.s3_test_bucket src_key = generate_test_key() src_data = os.urandom(ASYNC_COPY_THRESHOLD + 1) # upload file with incompatible s3 part size self._upload_file_to_mock_ingest(S3Uploader, bucket, src_key, src_data, s3_part_size=6 * 1024 * 1024) file_uuid = str(uuid.uuid4()) timestamp = datetime.datetime.utcnow() file_version = datetime_to_version_format(timestamp) url = UrlBuilder().set(path=f"/v1/files/{file_uuid}") url.add_query("version", file_version) source_url = f"s3://{bucket}/{src_key}" # put file into DSS, starting an async copy which will fail expected_codes = requests.codes.accepted, self.assertPutResponse(str(url), expected_codes, json_request_body=dict( file_uuid=file_uuid, creator_uid=0, source_url=source_url, ), headers=get_auth_header()) # should eventually get unprocessable after async copy fails @eventually(120, 1) def tryHead(): self.assertHeadResponse( f"/v1/files/{file_uuid}?replica=aws&version={file_version}", requests.codes.unprocessable) tryHead() # should get unprocessable on GCP too self.assertHeadResponse( f"/v1/files/{file_uuid}?replica=gcp&version={file_version}", requests.codes.unprocessable)
def _test_file_put_large(self, replica: Replica, test_bucket: str, upload_func: typing.Callable[[str, str], None]): src_key = generate_test_key() upload_func(test_bucket, src_key) # We should be able to do this twice (i.e., same payload, different UUIDs). First time should be asynchronous # since it's new data. Second time should be synchronous since the data is present, but because S3 does not # make consistency guarantees, a second client might not see that the data is already there. Therefore, we do # not mandate that it is done synchronously. for expect_async in [True, None]: resp_obj = self.upload_file_wait( f"{replica.storage_schema}://{test_bucket}/{src_key}", replica, expect_async=expect_async) self.assertHeaders( resp_obj.response, { 'content-type': "application/json", } ) self.assertIn('version', resp_obj.json)
def test_simple_copy(self): dest_key = infra.generate_test_key() state = copy_sfn_event(self.test_bucket, self.src_key, self.test_bucket, dest_key) execution_id = str(uuid.uuid4()) stepfunctions.step_functions_invoke("dss-gs-copy-sfn-{stage}", execution_id, state) # verify that the destination has the same checksum. src_checksum = self.gs_blobstore.get_cloud_checksum( self.test_bucket, self.src_key) @eventually(30.0, 1.0, {BlobNotFoundError, AssertionError}) def test_output(): dst_checksum = self.gs_blobstore.get_cloud_checksum( self.test_bucket, dest_key) self.assertEqual(src_checksum, dst_checksum) test_output()
def _test_file_size(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader): src_key = generate_test_key() src_size = 1024 + int.from_bytes(os.urandom(1), byteorder='little') src_data = os.urandom(src_size) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "text/plain") source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S.%fZ") self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) url = str(UrlBuilder() .set(path="/v1/files/" + file_uuid) .add_query("replica", replica.name)) for i in range(FILE_GET_RETRY_COUNT): with override_bucket_config(BucketConfig.TEST): resp_obj = self.assertGetResponse( url, [requests.codes.found, requests.codes.moved] ) if resp_obj.response.status_code == requests.codes.found: url = resp_obj.response.headers['Location'] data = requests.get(url) self.assertEqual(len(data.content), src_size) self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], str(src_size)) return elif resp_obj.response.status_code == requests.codes.moved: retryAfter = int(resp_obj.response.headers['Retry-After']) self.assertEqual(retryAfter, RETRY_AFTER_INTERVAL) self.assertIn(url, resp_obj.response.headers['Location']) self.fail(f"Failed after {FILE_GET_RETRY_COUNT} retries.")
def test_file_put_bad_checksum(self): src_key = generate_test_key() uploader = S3Uploader(tempfile.gettempdir(), self.s3_test_bucket) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(os.urandom(1024)) fh.flush() uploader.upload_file(fh.name, src_key, 'text/plain', metadata_keys=self.bad_checksums) source_url = f's3://{self.s3_test_bucket}/{src_key}' file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime_to_version_format(datetime.datetime.utcnow()) # catch AssertionError raised when upload returns 422 instead of 201 with self.assertRaises(AssertionError): r = self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) self.assertEqual(r.json['code'], 'invalid_checksum')
def _test_file_size(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader): src_key = generate_test_key() src_size = 1024 + int.from_bytes(os.urandom(1), byteorder='little') src_data = os.urandom(src_size) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "text/plain") source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S.%fZ") self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name)) with override_bucket_config(BucketConfig.TEST): resp_obj = self.assertGetResponse( url, requests.codes.found, headers=get_auth_header(), redirect_follow_retries=FILE_GET_RETRY_COUNT, min_retry_interval_header=RETRY_AFTER_INTERVAL, override_retry_interval=1, ) url = resp_obj.response.headers['Location'] data = requests.get(url) self.assertEqual(len(data.content), src_size) self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], str(src_size))
def _test_file_put(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader): src_key = generate_test_key() src_data = os.urandom(1024) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "text/plain") source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime_to_version_format(datetime.datetime.utcnow()) self._test_put_auth_errors(scheme, test_bucket) with self.subTest( f"{replica}: Created returned when uploading a file with a unique payload, and FQID" ): self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) with self.subTest( f"{replica}: Created returned when uploading a file with same payload, and different FQID" ): self.upload_file(source_url, str(uuid.uuid4())) with self.subTest( f"{replica}: OK returned when uploading a file with the same payload, UUID, version" ): self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version, expected_code=requests.codes.ok) with self.subTest( f"{replica}: Conflict returned when uploading a file with a different payload and same FQID" ): src_key_temp = generate_test_key() src_data_temp = os.urandom(128) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data_temp) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key_temp, "text/plain") source_url_temp = f"{scheme}://{test_bucket}/{src_key_temp}" self.upload_file(source_url_temp, file_uuid, version=version, expected_code=requests.codes.conflict) with self.subTest( f"{replica}: Bad returned when uploading a file with an invalid version" ): self.upload_file(source_url, file_uuid, version='', expected_code=requests.codes.bad_request) invalid_version = 'ABCD' with self.subTest( f"{replica}: bad_request returned " f"when uploading a file with invalid version {invalid_version}" ): self.upload_file(source_url, file_uuid, version=invalid_version, expected_code=requests.codes.bad_request) with self.subTest( f"{replica}: Bad returned when uploading a file without a version" ): self.upload_file(source_url, file_uuid, version='missing', expected_code=requests.codes.bad_request) invalid_uuids = ['ABCD', '1234'] for invalid_uuid in invalid_uuids: with self.subTest( f"{replica}: Bad returned " f"when uploading a file with invalid UUID {invalid_uuid}"): self.upload_file(source_url, invalid_uuid, expected_code=requests.codes.bad_request) with self.subTest( f"{replica}: forbidden returned " f"when uploading a file with without UUID {invalid_uuid}"): self.upload_file(source_url, '', expected_code=requests.codes.forbidden)
def _test_file_get_checkout(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader): handle = Config.get_blobstore_handle(replica) src_key = generate_test_key() src_data = os.urandom(1024) source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime_to_version_format(datetime.datetime.utcnow()) # write dummy file and upload to upload area with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "text/plain") # upload file to DSS self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name).add_query("version", version)) # get uploaded blob key file_metadata = json.loads( handle.get(test_bucket, f"files/{file_uuid}.{version}").decode("utf-8")) file_key = compose_blob_key(file_metadata) @eventually(20, 1) def test_checkout(): # assert 302 and verify checksum on checkout completion api_get = self.assertGetResponse(url, requests.codes.found, headers=get_auth_header(), redirect_follow_retries=0) file_get = requests.get(api_get.response.headers['Location']) self.assertTrue(file_get.ok) self.assertEquals(file_get.content, src_data) with self.subTest( f"{replica}: Initiates checkout and returns 301 for GET on 'uncheckedout' file." ): # assert 301 redirect on first GET self.assertGetResponse(url, requests.codes.moved, headers=get_auth_header(), redirect_follow_retries=0) test_checkout() with self.subTest( f"{replica}: Initiates checkout and returns 301 for GET on nearly expired checkout file." ): now = datetime.datetime.now(datetime.timezone.utc) creation_date_fn = ( "cloud_blobstore.s3.S3BlobStore.get_creation_date" if replica.name == "aws" else "cloud_blobstore.gs.GSBlobStore.get_creation_date") with mock.patch(creation_date_fn) as mock_creation_date: blob_ttl_days = int(os.environ['DSS_BLOB_TTL_DAYS']) mock_creation_date.return_value = now - datetime.timedelta( days=blob_ttl_days, hours=1, minutes=5) self.assertGetResponse(url, requests.codes.moved, headers=get_auth_header(), redirect_follow_retries=0) test_checkout() with self.subTest( f"{replica}: Initiates checkout and returns 302 immediately for GET on stale checkout file." ): now = datetime.datetime.now(datetime.timezone.utc) creation_date = handle.get_creation_date(replica.checkout_bucket, file_key) creation_date_fn = ( "cloud_blobstore.s3.S3BlobStore.get_creation_date" if replica.name == "aws" else "cloud_blobstore.gs.GSBlobStore.get_creation_date") with mock.patch(creation_date_fn) as mock_creation_date: # assert 302 found on stale file and that last modified refreshes blob_ttl_days = int(os.environ['DSS_BLOB_PUBLIC_TTL_DAYS']) mock_creation_date.return_value = now - datetime.timedelta( days=blob_ttl_days + 1) self.assertGetResponse(url, requests.codes.found, headers=get_auth_header(), redirect_follow_retries=0) self.assertTrue( creation_date > handle.get_creation_date( replica.checkout_bucket, file_key), f'\ncurr_creation_date: {creation_date}' f'\nprev_creation_date: {handle.get_creation_date(replica.checkout_bucket)}' ) handle.delete(test_bucket, f"files/{file_uuid}.{version}") handle.delete(replica.checkout_bucket, file_key)