def put_bundle(self, replica: Replica, bundle_uuid: str, files: typing.Iterable[typing.Tuple[str, str, str]], bundle_version: typing.Optional[str] = None, expected_code: int = requests.codes.created): builder = UrlBuilder().set(path="/v1/bundles/" + bundle_uuid).add_query( "replica", replica.name) if bundle_version: builder.add_query("version", bundle_version) url = str(builder) resp_obj = self.assertPutResponse( url, expected_code, json_request_body=dict( files=[ dict( uuid=file_uuid, version=file_version, name=file_name, indexed=False, ) for file_uuid, file_version, file_name in files ], creator_uid=12345, ), ) if 200 <= resp_obj.response.status_code < 300: self.assertHeaders(resp_obj.response, { 'content-type': "application/json", }) self.assertIn('version', resp_obj.json) return resp_obj
def upload_file( self: typing.Any, source_url: str, file_uuid: str, bundle_uuid: str = None, version: str = None, expected_code: int = requests.codes.created, ): bundle_uuid = str(uuid.uuid4()) if bundle_uuid is None else bundle_uuid if version is None: timestamp = datetime.datetime.utcnow() version = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ") urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) if version != 'missing': urlbuilder.add_query("version", version) resp_obj = self.assertPutResponse(str(urlbuilder), expected_code, json_request_body=dict( bundle_uuid=bundle_uuid, creator_uid=0, source_url=source_url, ), headers=get_auth_header()) if resp_obj.response.status_code == requests.codes.created: self.assertHeaders(resp_obj.response, { 'content-type': "application/json", }) self.assertIn('version', resp_obj.json)
def upload_file(self, contents): s3_test_bucket = get_env("DSS_S3_BUCKET_TEST") src_key = generate_test_key() s3 = boto3.resource('s3') with io.BytesIO(json.dumps( contents).encode()) as fh, ChecksummingSink() as sink: sink.write(fh.read()) sums = sink.get_checksums() metadata = { 'hca-dss-crc32c': sums['crc32c'].lower(), 'hca-dss-s3_etag': sums['s3_etag'].lower(), 'hca-dss-sha1': sums['sha1'].lower(), 'hca-dss-sha256': sums['sha256'].lower() } fh.seek(0) # TODO: consider switching to unmanaged uploader (putobject w/blob) s3.Bucket(s3_test_bucket).Object(src_key).upload_fileobj( fh, ExtraArgs={"Metadata": metadata}) source_url = f"s3://{s3_test_bucket}/{src_key}" file_uuid = str(uuid4()) version = datetime_to_version_format(datetime.utcnow()) urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) urlbuilder.add_query("version", version) resp_obj = self.assertPutResponse(str(urlbuilder), requests.codes.created, json_request_body=dict( creator_uid=0, source_url=source_url)) return file_uuid, resp_obj.json["version"]
def upload_file_wait( self: typing.Any, source_url: str, replica: Replica, file_uuid: str = None, file_version: str = None, bundle_uuid: str = None, timeout_seconds: int = 120, expect_async: typing.Optional[bool] = None, ) -> DSSAssertResponse: """ Upload a file. If the request is being handled asynchronously, wait until the file has landed in the data store. """ file_uuid = str(uuid.uuid4()) if file_uuid is None else file_uuid bundle_uuid = str(uuid.uuid4()) if bundle_uuid is None else bundle_uuid if expect_async is True: expected_codes = requests.codes.accepted elif expect_async is False: expected_codes = requests.codes.created else: expected_codes = requests.codes.created, requests.codes.accepted if file_version is None: timestamp = datetime.datetime.utcnow() file_version = datetime_to_version_format(timestamp) url = UrlBuilder().set(path=f"/v1/files/{file_uuid}") url.add_query("version", file_version) resp_obj = self.assertPutResponse( str(url), expected_codes, json_request_body=dict( bundle_uuid=bundle_uuid, creator_uid=0, source_url=source_url, ), ) if resp_obj.response.status_code == requests.codes.accepted: # hit the GET /files endpoint until we succeed. start_time = time.time() timeout_time = start_time + timeout_seconds while time.time() < timeout_time: try: self.assertHeadResponse( f"/v1/files/{file_uuid}?replica={replica.name}", requests.codes.ok) break except AssertionError: pass time.sleep(1) else: self.fail("Could not find the output file") return resp_obj
def _tombstone_bundle(self, replica: Replica, bundle_uuid: str, bundle_version: str = None): builder = UrlBuilder().set(path="/v1/bundles/" + bundle_uuid).add_query( "replica", replica.name) if bundle_version: builder.add_query("version", bundle_version) url = str(builder) self.assertDeleteResponse( url, requests.codes.ok, json_request_body={'reason': "notification test"}, headers=get_auth_header())
def delete_bundle( self, replica: Replica, bundle_uuid: str, bundle_version: typing.Optional[str]=None, authorized: bool=True): # make delete request url_builder = UrlBuilder().set(path="/v1/bundles/" + bundle_uuid).add_query('replica', replica.name) if bundle_version: url_builder = url_builder.add_query('version', bundle_version) url = str(url_builder) json_request_body = dict(reason="reason") if bundle_version: json_request_body['version'] = bundle_version expected_code = requests.codes.ok if authorized else requests.codes.forbidden # delete and check results return self.assertDeleteResponse( url, expected_code, json_request_body=json_request_body, headers=get_auth_header(authorized=authorized), )
def _test_put_auth_errors(self, scheme, test_bucket): src_key = generate_test_key() source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) timestamp = datetime.datetime.utcnow() version = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ") urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) urlbuilder.add_query("version", version) self._test_auth_errors('put', str(urlbuilder), json_request_body=dict(bundle_uuid=bundle_uuid, creator_uid=0, source_url=source_url))
def build_url(self, url_params=None): url = UrlBuilder().set(path="/v1/search").add_query( "replica", self.replica.name) if url_params: for param in url_params: url = url.add_query(param, url_params[param]) return str(url)
def upload_file(app, contents, replica): src_key = generate_test_key() encoded = json.dumps(contents).encode() chunk_size = get_s3_multipart_chunk_size(len(encoded)) with io.BytesIO(encoded) as fh, ChecksummingSink( write_chunk_size=chunk_size) as sink: sink.write(fh.read()) sums = sink.get_checksums() metadata = { 'hca-dss-crc32c': sums['crc32c'].lower(), 'hca-dss-s3_etag': sums['s3_etag'].lower(), 'hca-dss-sha1': sums['sha1'].lower(), 'hca-dss-sha256': sums['sha256'].lower() } fh.seek(0) if replica == 'gcp': gs_test_bucket = get_env("DSS_GS_BUCKET_TEST") gcp_client = gs_storage.Client.from_service_account_json( os.getenv("GOOGLE_APPLICATION_CREDENTIALS")) gs_bucket = gcp_client.bucket(gs_test_bucket) blob = gs_bucket.blob(src_key) blob.upload_from_file(fh, content_type="application/json") blob.metadata = metadata blob.patch() source_url = f"gs://{gs_test_bucket}/{src_key}" if replica == 'aws': # TODO: consider switching to unmanaged uploader (putobject w/blob) s3_test_bucket = get_env("DSS_S3_BUCKET_TEST") s3 = boto3.resource('s3') s3.Bucket(s3_test_bucket).Object(src_key).upload_fileobj( fh, ExtraArgs={"Metadata": metadata}) source_url = f"s3://{s3_test_bucket}/{src_key}" file_uuid = str(uuid4()) version = datetime_to_version_format(datetime.utcnow()) urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid) urlbuilder.add_query("version", version) resp_obj = app.put(str(urlbuilder), json=dict(creator_uid=0, source_url=source_url), headers=get_auth_header()) resp_obj.raise_for_status() return file_uuid, resp_obj.json()["version"]
def fetch_collection_paging_response(self, codes, replica: str, per_page: int): """ GET /collections and iterate through the paging responses containing all of a user's collections. If fetch_all is not True, this will return as soon as it gets one successful 206 paging reply. """ url = UrlBuilder().set(path="/v1/collections/") url.add_query("replica", replica) url.add_query("per_page", str(per_page)) resp_obj = self.assertGetResponse( str(url), codes, headers=get_auth_header(authorized=True)) if codes == requests.codes.bad_request: return True link_header = resp_obj.response.headers.get('Link') paging_response = False while link_header: # Make sure we're getting the expected response status code self.assertEqual(resp_obj.response.status_code, requests.codes.partial) paging_response = True link = parse_header_links(link_header)[0] self.assertEquals(link['rel'], 'next') parsed = urlsplit(link['url']) url = UrlBuilder().set(path=parsed.path, query=parse_qsl(parsed.query), fragment=parsed.fragment) self.assertEqual(resp_obj.response.headers['X-OpenAPI-Pagination'], 'true') self.assertEqual( resp_obj.response.headers['X-OpenAPI-Paginated-Content-Key'], 'collections') resp_obj = self.assertGetResponse( str(url), expected_code=codes, headers=get_auth_header(authorized=True)) link_header = resp_obj.response.headers.get('Link') self.assertEqual(resp_obj.response.headers['X-OpenAPI-Pagination'], 'false') self.assertEqual(resp_obj.response.status_code, requests.codes.ok) return paging_response
def test_file_put_large_incorrect_s3_etag(self) -> None: bucket = self.s3_test_bucket src_key = generate_test_key() src_data = os.urandom(ASYNC_COPY_THRESHOLD + 1) # upload file with incompatible s3 part size self._upload_file_to_mock_ingest(S3Uploader, bucket, src_key, src_data, s3_part_size=6 * 1024 * 1024) file_uuid = str(uuid.uuid4()) timestamp = datetime.datetime.utcnow() file_version = datetime_to_version_format(timestamp) url = UrlBuilder().set(path=f"/v1/files/{file_uuid}") url.add_query("version", file_version) source_url = f"s3://{bucket}/{src_key}" # put file into DSS, starting an async copy which will fail expected_codes = requests.codes.accepted, self.assertPutResponse(str(url), expected_codes, json_request_body=dict( file_uuid=file_uuid, creator_uid=0, source_url=source_url, ), headers=get_auth_header()) # should eventually get unprocessable after async copy fails @eventually(120, 1) def tryHead(): self.assertHeadResponse( f"/v1/files/{file_uuid}?replica=aws&version={file_version}", requests.codes.unprocessable) tryHead() # should get unprocessable on GCP too self.assertHeadResponse( f"/v1/files/{file_uuid}?replica=gcp&version={file_version}", requests.codes.unprocessable)
def put_bundles_reponse(self, path, replica, expected_code): """ Uploads a file from fixtures to the dss, and then adds it to a bundle with the 'path' name. Asserts expected codes were received at each point. """ fixtures_bucket = self.get_test_fixture_bucket( replica.name) # source a file to upload file_version = datetime_to_version_format(datetime.datetime.utcnow()) bundle_version = datetime_to_version_format(datetime.datetime.utcnow()) bundle_uuid = str(uuid.uuid4()) file_uuid = str(uuid.uuid4()) storage_schema = 's3' if replica.name == 'aws' else 'gs' # upload a file from test fixtures self.upload_file_wait( f"{storage_schema}://{fixtures_bucket}/test_good_source_data/0", replica, file_uuid, file_version=file_version, bundle_uuid=bundle_uuid) # add that file to a bundle builder = UrlBuilder().set(path="/v1/bundles/" + bundle_uuid) builder.add_query("replica", replica.name) builder.add_query("version", bundle_version) url = str(builder) self.assertPutResponse(url, expected_code, json_request_body=dict( files=[ dict(uuid=file_uuid, version=file_version, name=path, indexed=False) ], creator_uid=0, ), headers=get_auth_header())
def test_add_query(self): builder = UrlBuilder().set(scheme="https", netloc="humancellatlas.org", path="/abc", query=[ ("ghi", "1"), ("ghi", "2"), ], fragment="def") self.assertTrue(builder.has_query("ghi")) self.assertFalse(builder.has_query("abc")) self.assertEqual("https://humancellatlas.org/abc?ghi=1&ghi=2#def", str(builder)) builder.add_query("abc", "3") self.assertTrue(builder.has_query("ghi")) self.assertTrue(builder.has_query("abc")) self.assertEqual( "https://humancellatlas.org/abc?ghi=1&ghi=2&abc=3#def", str(builder))