def upload_file(self, contents): s3_test_bucket = get_env("DSS_S3_BUCKET_TEST") src_key = generate_test_key() s3 = boto3.resource('s3') with io.BytesIO(json.dumps( contents).encode()) as fh, ChecksummingSink() as sink: sink.write(fh.read()) sums = sink.get_checksums() metadata = { 'hca-dss-crc32c': sums['crc32c'].lower(), 'hca-dss-s3_etag': sums['s3_etag'].lower(), 'hca-dss-sha1': sums['sha1'].lower(), 'hca-dss-sha256': sums['sha256'].lower() } fh.seek(0) # TODO: consider switching to unmanaged uploader (putobject w/blob) s3.Bucket(s3_test_bucket).Object(src_key).upload_fileobj( fh, ExtraArgs={"Metadata": metadata}) source_url = f"s3://{s3_test_bucket}/{src_key}" file_uuid = str(uuid4()) version = datetime_to_version_format(datetime.utcnow()) urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) urlbuilder.add_query("version", version) resp_obj = self.assertPutResponse(str(urlbuilder), requests.codes.created, json_request_body=dict( creator_uid=0, source_url=source_url)) return file_uuid, resp_obj.json["version"]
def put_bundle(self, replica: Replica, bundle_uuid: str, files: typing.Iterable[typing.Tuple[str, str, str]], bundle_version: typing.Optional[str] = None, expected_code: int = requests.codes.created): builder = UrlBuilder().set(path="/v1/bundles/" + bundle_uuid).add_query( "replica", replica.name) if bundle_version: builder.add_query("version", bundle_version) url = str(builder) resp_obj = self.assertPutResponse( url, expected_code, json_request_body=dict( files=[ dict( uuid=file_uuid, version=file_version, name=file_name, indexed=False, ) for file_uuid, file_version, file_name in files ], creator_uid=12345, ), ) if 200 <= resp_obj.response.status_code < 300: self.assertHeaders(resp_obj.response, { 'content-type': "application/json", }) self.assertIn('version', resp_obj.json) return resp_obj
def build_url(self, url_params=None): url = UrlBuilder().set(path="/v1/search").add_query( "replica", self.replica.name) if url_params: for param in url_params: url = url.add_query(param, url_params[param]) return str(url)
def delete_bundle( self, replica: Replica, bundle_uuid: str, bundle_version: typing.Optional[str]=None, authorized: bool=True): # make delete request url_builder = UrlBuilder().set(path="/v1/bundles/" + bundle_uuid).add_query('replica', replica.name) if bundle_version: url_builder = url_builder.add_query('version', bundle_version) url = str(url_builder) json_request_body = dict(reason="reason") if bundle_version: json_request_body['version'] = bundle_version expected_code = requests.codes.ok if authorized else requests.codes.forbidden # delete and check results return self.assertDeleteResponse( url, expected_code, json_request_body=json_request_body, headers=get_auth_header(authorized=authorized), )
def _test_bundle_get_not_found(self, replica: Replica): bundle_uuid = str(uuid.uuid4()) url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid) .add_query("replica", replica.name)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertGetResponse( url, requests.codes.not_found, expected_error=ExpectedErrorFields( code="not_found", status=requests.codes.not_found) ) version = "2017-06-16T193604.240704Z" url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid) .add_query("replica", replica.name) .add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertGetResponse( url, requests.codes.not_found, expected_error=ExpectedErrorFields( code="not_found", status=requests.codes.not_found) )
def _test_file_get_not_found(self, replica: Replica): file_uuid = "ce55fd51-7833-469b-be0b-5da88ec0ffee" url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertGetResponse(url, requests.codes.not_found, headers=get_auth_header(), expected_error=ExpectedErrorFields( code="not_found", status=requests.codes.not_found, expect_stacktrace=True)) version = "2017-06-16T193604.240704Z" url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name).add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): self.assertGetResponse(url, requests.codes.not_found, headers=get_auth_header(), expected_error=ExpectedErrorFields( code="not_found", status=requests.codes.not_found, expect_stacktrace=True))
def test_get(self): find_uuid = self._put_subscription() # Normal request url = str(UrlBuilder().set(path="/v1/subscriptions/" + str(find_uuid)).add_query( "replica", self.replica.name)) resp_obj = self.assertGetResponse(url, requests.codes.okay, headers=get_auth_header()) json_response = resp_obj.json self.assertEqual(self.sample_percolate_query, json_response['es_query']) self.assertEqual(self.callback_url, json_response['callback_url']) # Forbidden request w/ previous url with self.throw_403(): self.assertGetResponse(url, requests.codes.forbidden, headers=get_auth_header()) # File not found request url = str(UrlBuilder().set(path="/v1/subscriptions/" + str(uuid.uuid4())).add_query( "replica", self.replica.name)) self.assertGetResponse(url, requests.codes.not_found, headers=get_auth_header())
def test_get(self): try: find_uuid = self._put_subscription() # Normal request url = str(UrlBuilder() .set(path="/v1/subscriptions/" + str(find_uuid)) .add_query("replica", self.replica.name) .add_query("subscription_type", "elasticsearch")) resp_obj = self.assertGetResponse( url, requests.codes.okay, headers=get_auth_header()) json_response = resp_obj.json self.assertEqual(self.sample_percolate_query, json_response['es_query']) self.assertEqual(self.endpoint, Endpoint.from_subscription(json_response)) self.assertEquals(self.hmac_key_id, json_response['hmac_key_id']) self.assertNotIn('hmac_secret_key', json_response) finally: self._cleanup_subscription(find_uuid) # File not found request url = str(UrlBuilder() .set(path="/v1/subscriptions/" + str(uuid.uuid4())) .add_query("replica", self.replica.name) .add_query("subscription_type", "elasticsearch")) self.assertGetResponse( url, requests.codes.not_found, headers=get_auth_header())
def upload_file( self: typing.Any, source_url: str, file_uuid: str, bundle_uuid: str = None, version: str = None, expected_code: int = requests.codes.created, ): bundle_uuid = str(uuid.uuid4()) if bundle_uuid is None else bundle_uuid if version is None: timestamp = datetime.datetime.utcnow() version = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ") urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) if version != 'missing': urlbuilder.add_query("version", version) resp_obj = self.assertPutResponse(str(urlbuilder), expected_code, json_request_body=dict( bundle_uuid=bundle_uuid, creator_uid=0, source_url=source_url, ), headers=get_auth_header()) if resp_obj.response.status_code == requests.codes.created: self.assertHeaders(resp_obj.response, { 'content-type': "application/json", }) self.assertIn('version', resp_obj.json)
def upload_file_wait( self: typing.Any, source_url: str, replica: Replica, file_uuid: str = None, file_version: str = None, bundle_uuid: str = None, timeout_seconds: int = 120, expect_async: typing.Optional[bool] = None, ) -> DSSAssertResponse: """ Upload a file. If the request is being handled asynchronously, wait until the file has landed in the data store. """ file_uuid = str(uuid.uuid4()) if file_uuid is None else file_uuid bundle_uuid = str(uuid.uuid4()) if bundle_uuid is None else bundle_uuid if expect_async is True: expected_codes = requests.codes.accepted elif expect_async is False: expected_codes = requests.codes.created else: expected_codes = requests.codes.created, requests.codes.accepted if file_version is None: timestamp = datetime.datetime.utcnow() file_version = datetime_to_version_format(timestamp) url = UrlBuilder().set(path=f"/v1/files/{file_uuid}") url.add_query("version", file_version) resp_obj = self.assertPutResponse( str(url), expected_codes, json_request_body=dict( bundle_uuid=bundle_uuid, creator_uid=0, source_url=source_url, ), ) if resp_obj.response.status_code == requests.codes.accepted: # hit the GET /files endpoint until we succeed. start_time = time.time() timeout_time = start_time + timeout_seconds while time.time() < timeout_time: try: self.assertHeadResponse( f"/v1/files/{file_uuid}?replica={replica.name}", requests.codes.ok) break except AssertionError: pass time.sleep(1) else: self.fail("Could not find the output file") return resp_obj
def test_has_query(self): builder = UrlBuilder().set(scheme="https", netloc="humancellatlas.org", path="/abc", query=[ ("ghi", "1"), ("ghi", "2"), ], fragment="def") self.assertTrue(builder.has_query("ghi")) self.assertFalse(builder.has_query("abc"))
def _tombstone_bundle(self, replica: Replica, bundle_uuid: str, bundle_version: str = None): builder = UrlBuilder().set(path="/v1/bundles/" + bundle_uuid).add_query( "replica", replica.name) if bundle_version: builder.add_query("version", bundle_version) url = str(builder) self.assertDeleteResponse( url, requests.codes.ok, json_request_body={'reason': "notification test"}, headers=get_auth_header())
def test_subscription_registration_succeeds_when_query_does_not_match_mappings(self): # It is now possible to register a subscription query before the mapping # of the field exists in the mappings (and may never exist in the mapppings) es_query = { "query": { "bool": { "must": [{ "match": { "assay.fake_field": "this is a negative test" } }], } } } url = str(UrlBuilder() .set(path="/v1/subscriptions") .add_query("replica", self.replica.name) .add_query("subscription_type", "elasticsearch")) resp_obj = self.assertPutResponse( url, requests.codes.created, json_request_body=dict( es_query=es_query, **self.endpoint.to_dict()), headers=get_auth_header() ) self.assertIn('uuid', resp_obj.json)
def _test_file_get_invalid_token(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader): src_key = generate_test_key() src_data = os.urandom(1024) with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "text/plain") source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime_to_version_format(datetime.datetime.utcnow()) # should be able to do this twice (i.e., same payload, different UUIDs) self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name).add_query("version", version).add_query("token", "{}")) @eventually(30, 0.1) def try_get(): self.assertGetResponse(url, requests.codes.bad_request, headers=get_auth_header()) try_get()
def _test_file_get_latest(self, replica: Replica): """ Verify we can successfully fetch the latest version of a file UUID. """ file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd" url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name)) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, requests.codes.found, headers=get_auth_header(), redirect_follow_retries=FILE_GET_RETRY_COUNT, min_retry_interval_header=RETRY_AFTER_INTERVAL, override_retry_interval=1, ) # TODO: (ttung) verify more of the headers url = resp_obj.response.headers['Location'] sha1 = resp_obj.response.headers['X-DSS-SHA1'] data = requests.get(url) self.assertEqual(len(data.content), 8685) self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], '8685') # verify that the downloaded data matches the stated checksum hasher = hashlib.sha1() hasher.update(data.content) self.assertEqual(hasher.hexdigest(), sha1)
def _test_put_auth_errors(self, scheme, test_bucket): src_key = generate_test_key() source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) timestamp = datetime.datetime.utcnow() version = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ") urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) urlbuilder.add_query("version", version) self._test_auth_errors('put', str(urlbuilder), json_request_body=dict(bundle_uuid=bundle_uuid, creator_uid=0, source_url=source_url))
def _test_file_get_latest(self, replica: Replica): """ Verify we can successfully fetch the latest version of a file UUID. """ file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd" url = str(UrlBuilder() .set(path="/v1/files/" + file_uuid) .add_query("replica", replica.name)) for i in range(FILE_GET_RETRY_COUNT): with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, [requests.codes.found, requests.codes.moved] ) if resp_obj.response.status_code == requests.codes.found: url = resp_obj.response.headers['Location'] sha1 = resp_obj.response.headers['X-DSS-SHA1'] data = requests.get(url) self.assertEqual(len(data.content), 8685) self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], '8685') # verify that the downloaded data matches the stated checksum hasher = hashlib.sha1() hasher.update(data.content) self.assertEqual(hasher.hexdigest(), sha1) # TODO: (ttung) verify more of the headers return elif resp_obj.response.status_code == requests.codes.moved: retryAfter = int(resp_obj.response.headers['Retry-After']) self.assertEqual(retryAfter, RETRY_AFTER_INTERVAL) self.assertIn(url, resp_obj.response.headers['Location']) self.fail(f"Failed after {FILE_GET_RETRY_COUNT} retries.")
def _build_scroll_url(_scroll_id: str, per_page: int, replica: Replica, output_format: str) -> str: return request.host_url + str(UrlBuilder().set(path="v1/search").add_query( 'per_page', str(per_page)).add_query("replica", replica.name).add_query( "_scroll_id", _scroll_id).add_query("output_format", output_format))
def _get_subscription(self, uuid: str, replica: Replica): url = str(UrlBuilder().set(path=f"/v1/subscriptions/{uuid}").add_query( "replica", replica.name)) resp = self.assertGetResponse(url, requests.codes.ok, headers=get_auth_header()) return json.loads(resp.body)
def test_subscription_update(self, replica=Replica.aws): """ Test recover of subscriptions during enumeration """ subscription_1 = self._put_subscription( { 'callback_url': "https://nonsense.or.whatever", 'method': "PUT", }, replica) subscription_2 = self._put_subscription( { 'callback_url': "https://nonsense.or.whatever", 'method': "PUT", }, replica) url = str(UrlBuilder().set(path="/v1/subscriptions").add_query( "replica", replica.name)) resp = self.assertGetResponse(url, requests.codes.ok, headers=get_auth_header()) subs = { sub['uuid']: sub for sub in json.loads(resp.body)['subscriptions'] } self.assertIn(subscription_1['uuid'], subs) self.assertIn(subscription_2['uuid'], subs) for key in subscription_1: self.assertEquals(subscription_1[key], subs[subscription_1['uuid']][key]) for key in subscription_2: self.assertEquals(subscription_2[key], subs[subscription_2['uuid']][key])
def test_subscription_enumerate(self, replica=Replica.aws): """ Test recover of subscriptions during enumeration """ subscription_1 = self._put_subscription( { 'callback_url': "https://nonsense.or.whatever", 'method': "PUT", }, replica) subscription_2 = self._put_subscription( { 'callback_url': "https://nonsense.or.whatever", 'method': "PUT", }, replica) url = str(UrlBuilder().set(path="/v1/subscriptions").add_query( "replica", replica.name)) resp = self.assertGetResponse(url, requests.codes.ok, headers=get_auth_header()) subs = { sub['uuid']: sub for sub in json.loads(resp.body)['subscriptions'] } with self.subTest("Test user should own every returned subscription"): for sub in subs.values(): self.assertEquals(self.owner, sub['owner']) with self.subTest("Test subscriptions shuold have been returned"): self.assertIn(subscription_1['uuid'], subs) self.assertIn(subscription_2['uuid'], subs)
def _test_file_head(self, replica: Replica): file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd" version = "2017-06-16T193604.240704Z" headers = { 'X-DSS-CREATOR-UID': '4321', 'X-DSS-VERSION': version, 'X-DSS-CONTENT-TYPE': 'text/plain', 'X-DSS-SIZE': '11358', 'X-DSS-CRC32C': 'e16e07b9', 'X-DSS-S3-ETAG': '3b83ef96387f14655fc854ddc3c6bd57', 'X-DSS-SHA1': '2b8b815229aa8a61e483fb4ba0588b8b6c491890', 'X-DSS-SHA256': 'cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30', } url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query( "replica", replica.name).add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertHeadResponse(url, [requests.codes.ok], headers=get_auth_header()) self.assertHeaders(resp_obj.response, headers)
def _test_bundle_get_directaccess(self, replica: Replica): schema = replica.storage_schema bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198" version = "2017-06-20T214506.766634Z" url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid) .add_query("replica", replica.name) .add_query("version", version) .add_query("directurls", "true")) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, requests.codes.ok) url = resp_obj.json['bundle']['files'][0]['url'] splitted = urllib.parse.urlparse(url) self.assertEqual(splitted.scheme, schema) bucket = splitted.netloc key = splitted.path[1:] # ignore the / part of the path. handle = Config.get_blobstore_handle(replica) contents = handle.get(bucket, key) hasher = hashlib.sha1() hasher.update(contents) sha1 = hasher.hexdigest() self.assertEqual(sha1, "2b8b815229aa8a61e483fb4ba0588b8b6c491890")
def _test_bundle_get(self, replica: Replica): bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198" version = "2017-06-20T214506.766634Z" url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid) .add_query("replica", replica.name) .add_query("version", version)) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, requests.codes.ok) self.assertEqual(resp_obj.json['bundle']['uuid'], bundle_uuid) self.assertEqual(resp_obj.json['bundle']['version'], version) self.assertEqual(resp_obj.json['bundle']['creator_uid'], 12345) self.assertEqual(resp_obj.json['bundle']['files'][0]['content-type'], "text/plain") self.assertEqual(resp_obj.json['bundle']['files'][0]['size'], 11358) self.assertEqual(resp_obj.json['bundle']['files'][0]['crc32c'], "e16e07b9") self.assertEqual(resp_obj.json['bundle']['files'][0]['name'], "LICENSE") self.assertEqual(resp_obj.json['bundle']['files'][0]['s3_etag'], "3b83ef96387f14655fc854ddc3c6bd57") self.assertEqual(resp_obj.json['bundle']['files'][0]['sha1'], "2b8b815229aa8a61e483fb4ba0588b8b6c491890") self.assertEqual(resp_obj.json['bundle']['files'][0]['sha256'], "cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30") self.assertEqual(resp_obj.json['bundle']['files'][0]['uuid'], "ce55fd51-7833-469b-be0b-5da88ebebfcd") self.assertEqual(resp_obj.json['bundle']['files'][0]['version'], "2017-06-16T193604.240704Z")
def _cleanup_subscription(self, uuid, subscription_type=None): if not subscription_type: subscription_type = 'elasticsearch' url = (UrlBuilder() .set(path=f"/v1/subscriptions/{uuid}") .add_query("replica", self.replica.name) .add_query('subscription_type', subscription_type)) self.assertDeleteResponse(url, requests.codes.okay, headers=get_auth_header())
def upload_file(app, contents, replica): src_key = generate_test_key() encoded = json.dumps(contents).encode() chunk_size = get_s3_multipart_chunk_size(len(encoded)) with io.BytesIO(encoded) as fh, ChecksummingSink( write_chunk_size=chunk_size) as sink: sink.write(fh.read()) sums = sink.get_checksums() metadata = { 'hca-dss-crc32c': sums['crc32c'].lower(), 'hca-dss-s3_etag': sums['s3_etag'].lower(), 'hca-dss-sha1': sums['sha1'].lower(), 'hca-dss-sha256': sums['sha256'].lower() } fh.seek(0) if replica == 'gcp': gs_test_bucket = get_env("DSS_GS_BUCKET_TEST") gcp_client = gs_storage.Client.from_service_account_json( os.getenv("GOOGLE_APPLICATION_CREDENTIALS")) gs_bucket = gcp_client.bucket(gs_test_bucket) blob = gs_bucket.blob(src_key) blob.upload_from_file(fh, content_type="application/json") blob.metadata = metadata blob.patch() source_url = f"gs://{gs_test_bucket}/{src_key}" if replica == 'aws': # TODO: consider switching to unmanaged uploader (putobject w/blob) s3_test_bucket = get_env("DSS_S3_BUCKET_TEST") s3 = boto3.resource('s3') s3.Bucket(s3_test_bucket).Object(src_key).upload_fileobj( fh, ExtraArgs={"Metadata": metadata}) source_url = f"s3://{s3_test_bucket}/{src_key}" file_uuid = str(uuid4()) version = datetime_to_version_format(datetime.utcnow()) urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) urlbuilder.add_query("version", version) resp_obj = app.put(str(urlbuilder), json=dict(creator_uid=0, source_url=source_url), headers=get_auth_header()) resp_obj.raise_for_status() return file_uuid, resp_obj.json()["version"]
def check_files_are_associated_with_bundle(self: typing.Any, bundle: TestBundle, replica: Replica): for bundle_file in bundle.files: response = self.assertGetResponse( str(UrlBuilder().set(path='/v1/files/' + bundle_file.uuid) .add_query('replica', replica.name)), requests.codes.found, headers=get_auth_header() ) self.assertEqual(bundle_file.version, response[0].headers['X-DSS-VERSION'])
def strip_next_url(next_url: str) -> str: """ The API returns a fully-qualified url, but hitting self.assert* requires just the path. This method just strips the scheme and the host from the url. """ parsed = urlsplit(next_url) return str(UrlBuilder().set(path=parsed.path, query=parse_qsl(parsed.query), fragment=parsed.fragment))
def get_bundle_and_check_files(self: typing.Any, bundle: TestBundle, replica: Replica): response = self.assertGetResponse( str(UrlBuilder().set(path='/v1/bundles/' + bundle.uuid).add_query( 'replica', replica.name)), requests.codes.ok) response_data = json.loads(response[1]) self.check_bundle_contains_same_files(bundle, response_data['bundle']['files']) self.check_files_are_associated_with_bundle(bundle, replica)
def create_bundle(self: typing.Any, bundle: TestBundle, replica: Replica): response = self.assertPutResponse( str(UrlBuilder().set(path='/v1/bundles/' + bundle.uuid).add_query( 'replica', replica.name)), requests.codes.created, json_request_body=self.put_bundle_payload(bundle)) response_data = json.loads(response[1]) self.assertIs(type(response_data), dict) self.assertIn('version', response_data) bundle.version = response_data['version']