def _test_file_get_not_found(self, replica: Replica): file_uuid = "ce55fd51-7833-469b-be0b-5da88ec0ffee" url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertGetResponse(url, requests.codes.not_found, headers=get_auth_header(), expected_error=ExpectedErrorFields( code="not_found", status=requests.codes.not_found, expect_stacktrace=True)) version = "2017-06-16T193604.240704Z" url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name).add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertGetResponse(url, requests.codes.not_found, headers=get_auth_header(), expected_error=ExpectedErrorFields( code="not_found", status=requests.codes.not_found, expect_stacktrace=True))
def _test_bundle_get_not_found(self, replica: Replica): bundle_uuid = str(uuid.uuid4()) url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid) .add_query("replica", replica.name)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertGetResponse( url, requests.codes.not_found, expected_error=ExpectedErrorFields( code="not_found", status=requests.codes.not_found) ) version = "2017-06-16T193604.240704Z" url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid) .add_query("replica", replica.name) .add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertGetResponse( url, requests.codes.not_found, expected_error=ExpectedErrorFields( code="not_found", status=requests.codes.not_found) )
def _test_file_head(self, replica: Replica): file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd" version = "2017-06-16T193604.240704Z" headers = { 'X-DSS-CREATOR-UID': '4321', 'X-DSS-VERSION': version, 'X-DSS-CONTENT-TYPE': 'text/plain', 'X-DSS-SIZE': '11358', 'X-DSS-CRC32C': 'e16e07b9', 'X-DSS-S3-ETAG': '3b83ef96387f14655fc854ddc3c6bd57', 'X-DSS-SHA1': '2b8b815229aa8a61e483fb4ba0588b8b6c491890', 'X-DSS-SHA256': 'cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30', } url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name).add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertHeadResponse(url, [requests.codes.ok], headers=get_auth_header()) self.assertHeaders(resp_obj.response, headers)
def _test_file_get_latest(self, replica: Replica): """ Verify we can successfully fetch the latest version of a file UUID. """ file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd" url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name)) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, requests.codes.found, headers=get_auth_header(), redirect_follow_retries=FILE_GET_RETRY_COUNT, min_retry_interval_header=RETRY_AFTER_INTERVAL, override_retry_interval=1, ) # TODO: (ttung) verify more of the headers url = resp_obj.response.headers['Location'] sha1 = resp_obj.response.headers['X-DSS-SHA1'] data = requests.get(url) self.assertEqual(len(data.content), 8685) self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], '8685') # verify that the downloaded data matches the stated checksum hasher = hashlib.sha1() hasher.update(data.content) self.assertEqual(hasher.hexdigest(), sha1)
def _test_bundle_get(self, replica: Replica): bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198" version = "2017-06-20T214506.766634Z" url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid) .add_query("replica", replica.name) .add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, requests.codes.ok) self.assertEqual(resp_obj.json['bundle']['uuid'], bundle_uuid) self.assertEqual(resp_obj.json['bundle']['version'], version) self.assertEqual(resp_obj.json['bundle']['creator_uid'], 12345) self.assertEqual(resp_obj.json['bundle']['files'][0]['content-type'], "text/plain") self.assertEqual(resp_obj.json['bundle']['files'][0]['size'], 11358) self.assertEqual(resp_obj.json['bundle']['files'][0]['crc32c'], "e16e07b9") self.assertEqual(resp_obj.json['bundle']['files'][0]['name'], "LICENSE") self.assertEqual(resp_obj.json['bundle']['files'][0]['s3_etag'], "3b83ef96387f14655fc854ddc3c6bd57") self.assertEqual(resp_obj.json['bundle']['files'][0]['sha1'], "2b8b815229aa8a61e483fb4ba0588b8b6c491890") self.assertEqual(resp_obj.json['bundle']['files'][0]['sha256'], "cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30") self.assertEqual(resp_obj.json['bundle']['files'][0]['uuid'], "ce55fd51-7833-469b-be0b-5da88ebebfcd") self.assertEqual(resp_obj.json['bundle']['files'][0]['version'], "2017-06-16T193604.240704Z")
def _test_file_get_latest(self, replica: Replica): """ Verify we can successfully fetch the latest version of a file UUID. """ file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd" url = str(UrlBuilder() .set(path="/v1/files/" + file_uuid) .add_query("replica", replica.name)) for i in range(FILE_GET_RETRY_COUNT): with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, [requests.codes.found, requests.codes.moved] ) if resp_obj.response.status_code == requests.codes.found: url = resp_obj.response.headers['Location'] sha1 = resp_obj.response.headers['X-DSS-SHA1'] data = requests.get(url) self.assertEqual(len(data.content), 8685) self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], '8685') # verify that the downloaded data matches the stated checksum hasher = hashlib.sha1() hasher.update(data.content) self.assertEqual(hasher.hexdigest(), sha1) # TODO: (ttung) verify more of the headers return elif resp_obj.response.status_code == requests.codes.moved: retryAfter = int(resp_obj.response.headers['Retry-After']) self.assertEqual(retryAfter, RETRY_AFTER_INTERVAL) self.assertIn(url, resp_obj.response.headers['Location']) self.fail(f"Failed after {FILE_GET_RETRY_COUNT} retries.")
def _test_bundle_get_directaccess(self, replica: Replica): schema = replica.storage_schema bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198" version = "2017-06-20T214506.766634Z" url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid) .add_query("replica", replica.name) .add_query("version", version) .add_query("directurls", "true")) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, requests.codes.ok) url = resp_obj.json['bundle']['files'][0]['url'] splitted = urllib.parse.urlparse(url) self.assertEqual(splitted.scheme, schema) bucket = splitted.netloc key = splitted.path[1:] # ignore the / part of the path. handle = Config.get_blobstore_handle(replica) contents = handle.get(bucket, key) hasher = hashlib.sha1() hasher.update(contents) sha1 = hasher.hexdigest() self.assertEqual(sha1, "2b8b815229aa8a61e483fb4ba0588b8b6c491890")
def test_manifest_files(self): bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198" version = "2017-06-20T214506.766634Z" replica = Replica.aws file_count = 0 with override_bucket_config(BucketConfig.TEST_FIXTURE): for _ in get_manifest_files(bundle_uuid, version, replica): file_count += 1 self.assertEqual(file_count, 1)
def test_status_fail(self): for replica in Replica: exec_arn = self.launch_checkout('e47114c9-bb96-480f-b6f5-c3e07aae399f', replica) url = str(UrlBuilder().set(path="/v1/bundles/checkout/" + exec_arn)) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, requests.codes.ok ) status = resp_obj.json.get('status') self.assertIsNotNone(status) self.assertIn(status, ['RUNNING', 'FAILED'])
def test_status_success(self): for replica in Replica: exec_arn = self.launch_checkout(replica.checkout_bucket, replica) url = str(UrlBuilder().set(path="/v1/bundles/checkout/" + exec_arn)) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, requests.codes.ok ) status = resp_obj.json.get('status') self.assertIsNotNone(status) self.assertIn(status, ['RUNNING', 'SUCCEEDED'])
def _test_file_head(self, replica: Replica): file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd" version = "2017-06-16T193604.240704Z" url = str(UrlBuilder() .set(path="/v1/files/" + file_uuid) .add_query("replica", replica.name) .add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertHeadResponse( url, [requests.codes.ok, requests.codes.moved] )
def setUpClass(cls): cls.app = ThreadedLocalServer() cls.app.start() cls.bundles = {replica.name: list() for replica in Replica} with override_bucket_config(BucketConfig.TEST): for replica in Replica: pfx = f"flashflood-{replica.name}-{uuid4()}" os.environ[f'DSS_{replica.name.upper()}_FLASHFLOOD_PREFIX_READ'] = pfx os.environ[f'DSS_{replica.name.upper()}_FLASHFLOOD_PREFIX_WRITE'] = pfx for _ in range(3): uuid, version = _upload_bundle(cls.app, replica) cls.bundles[replica.name].append((uuid, version)) events.record_event_for_bundle(replica, f"bundles/{uuid}.{version}", use_version_for_timestamp=True)
def _test_bundle_get_deleted(self, replica: Replica, bundle_uuid: str, version: typing.Optional[str], expected_version: typing.Optional[str]): with override_bucket_config(BucketConfig.TEST_FIXTURE): try: response = get_bundle_from_bucket( uuid=bundle_uuid, replica=replica, version=version, bucket=None, ) except DSSException: response = dict() self.assertEquals( response['bundle']['version'] if 'bundle' in response else None, expected_version)
def test_file_get_no_replica(self): """ Verify we raise the correct error code when we provide no replica. """ file_uuid = "ce55fd51-7833-469b-be0b-5da88ec0ffee" url = str(UrlBuilder().set(path="/v1/files/" + file_uuid)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertGetResponse(url, requests.codes.bad_request, headers=get_auth_header(), expected_error=ExpectedErrorFields( code="illegal_arguments", status=requests.codes.bad_request, expect_stacktrace=True))
def test_no_files(self): """ Verify we raise the correct error code when we do not provide the list of files. """ bundle_uuid = "ce55fd51-7833-469b-be0b-5da88ec0ffee" url = str(UrlBuilder().set(path="/v1/bundles/" + bundle_uuid).add_query("replica", "aws")) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertPutResponse(url, requests.codes.bad_request, json_request_body=dict(creator_uid=12345, ), expected_error=ExpectedErrorFields( code="illegal_arguments", status=requests.codes.bad_request, expect_stacktrace=True))
def test_sanity_check_no_replica(self): bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198" version = "2017-06-20T214506.766634Z" for replica in Replica: request_body = {"destination": replica.checkout_bucket} url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid + "/checkout") .add_query("replica", "") .add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertPostResponse( url, requests.codes.bad_request, request_body )
def test_pre_execution_check_doesnt_exist(self): for replica in Replica: non_existent_bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf111" version = "2017-06-20T214506.766634Z" request_body = {"destination": replica.checkout_bucket} url = str(UrlBuilder() .set(path="/v1/bundles/" + non_existent_bundle_uuid + "/checkout") .add_query("replica", replica.name) .add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertPostResponse( url, requests.codes.not_found, request_body ) self.assertEqual(resp_obj.json['code'], 'not_found')
def launch_checkout(self, dst_bucket: str, replica: Replica) -> str: bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198" version = "2017-06-20T214506.766634Z" request_body = {"destination": dst_bucket} url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid + "/checkout") .add_query("replica", replica.name) .add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertPostResponse( url, requests.codes.ok, request_body ) execution_arn = resp_obj.json["checkout_job_id"] self.assertIsNotNone(execution_arn) return execution_arn
def _test_file_size(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader): src_key = generate_test_key() src_size = 1024 + int.from_bytes(os.urandom(1), byteorder='little') src_data = os.urandom(src_size) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "text/plain") source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S.%fZ") self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) url = str(UrlBuilder() .set(path="/v1/files/" + file_uuid) .add_query("replica", replica.name)) for i in range(FILE_GET_RETRY_COUNT): with override_bucket_config(BucketConfig.TEST): resp_obj = self.assertGetResponse( url, [requests.codes.found, requests.codes.moved] ) if resp_obj.response.status_code == requests.codes.found: url = resp_obj.response.headers['Location'] data = requests.get(url) self.assertEqual(len(data.content), src_size) self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], str(src_size)) return elif resp_obj.response.status_code == requests.codes.moved: retryAfter = int(resp_obj.response.headers['Retry-After']) self.assertEqual(retryAfter, RETRY_AFTER_INTERVAL) self.assertIn(url, resp_obj.response.headers['Location']) self.fail(f"Failed after {FILE_GET_RETRY_COUNT} retries.")
def _test_file_size(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader): src_key = generate_test_key() src_size = 1024 + int.from_bytes(os.urandom(1), byteorder='little') src_data = os.urandom(src_size) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "text/plain") source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S.%fZ") self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name)) with override_bucket_config(BucketConfig.TEST): resp_obj = self.assertGetResponse( url, requests.codes.found, headers=get_auth_header(), redirect_follow_retries=FILE_GET_RETRY_COUNT, min_retry_interval_header=RETRY_AFTER_INTERVAL, override_retry_interval=1, ) url = resp_obj.response.headers['Location'] data = requests.get(url) self.assertEqual(len(data.content), src_size) self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], str(src_size))
def _test_file_get_disposition(self, replica: Replica): """ Verify that passing in "content_disposition" returns the expected "Content-Disposition" header when fetching the final presigned url. """ url = str(UrlBuilder().set( path="/v1/files/ce55fd51-7833-469b-be0b-5da88ebebfcd").add_query( "replica", replica.name).add_query("content_disposition", 'attachment; filename=test-data.json')) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, requests.codes.found, headers=get_auth_header(), redirect_follow_retries=FILE_GET_RETRY_COUNT, min_retry_interval_header=RETRY_AFTER_INTERVAL, override_retry_interval=1) url = resp_obj.response.headers['Location'] response = requests.get(url) self.assertEqual(response.headers['Content-Disposition'], 'attachment; filename=test-data.json')
def _test_file_get_direct(self, replica: Replica): """ Verify that the direct URL option works for GET/ file """ file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd" handle = Config.get_blobstore_handle(replica) direct_url_req = str( UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name).add_query("directurl", "True")) presigned_url_req = str(UrlBuilder().set( path="/v1/files/" + file_uuid).add_query("replica", replica.name)) with override_bucket_config(BucketConfig.TEST_FIXTURE): native_resp_obj = self.assertGetResponse( direct_url_req, requests.codes.found, headers=get_auth_header(), redirect_follow_retries=FILE_GET_RETRY_COUNT, min_retry_interval_header=RETRY_AFTER_INTERVAL, override_retry_interval=1, ) resp_obj = self.assertGetResponse( presigned_url_req, requests.codes.found, headers=get_auth_header(), redirect_follow_retries=FILE_GET_RETRY_COUNT, min_retry_interval_header=RETRY_AFTER_INTERVAL, override_retry_interval=1, ) verify_headers = [ 'X-DSS-VERSION', 'X-DSS-CREATOR-UID', 'X-DSS-S3-ETAG', 'X-DSS-SHA256', 'X-DSS-SHA1', 'X-DSS-CRC32C' ] native_headers_verify = { k: v for k, v in native_resp_obj.response.headers.items() if k in verify_headers } presigned_headers_verify = { k: v for k, v in resp_obj.response.headers.items() if k in verify_headers } self.assertDictEqual(native_headers_verify, presigned_headers_verify) with self.subTest( 'Retry-After headers are not included in a successful response.' ): self.assertEqual( native_resp_obj.response.headers.get('Retry-After'), None) self.assertTrue(native_resp_obj.response.headers['Location'].split( '//')[0].startswith(replica.storage_schema)) self.assertTrue(native_resp_obj.response.headers['Location'].split( '//')[1].startswith(replica.checkout_bucket)) blob_path = native_resp_obj.response.headers['Location'].split( '/blobs/')[1] native_size = handle.get_size(replica.checkout_bucket, f'blobs/{blob_path}') self.assertGreater(native_size, 0) self.assertEqual(native_size, int(resp_obj.response.headers['X-DSS-SIZE']))