def enumerate(replica: str, prefix: typing.Optional[str] = None, token: typing.Optional[str] = None, per_page: int = PerPageBounds.per_page_max, search_after: typing.Optional[str] = None): """ :param replica: replica name to enumerate against :param prefix: uuid prefix used to filter enumeration :param token: used to page searches, should not be set by the user. :param per_page: max items per page to show, 10 <= per_page <= 500 :param search_after: used to page searches, should not be set by the user. """ if prefix: search_prefix = f'{BUNDLE_PREFIX}/{prefix.lower()}' else: search_prefix = f'{BUNDLE_PREFIX}/' api_domain_name = f'https://{os.environ.get("API_DOMAIN_NAME")}' payload = dict(dss_api=api_domain_name, object='list', per_page=per_page, search_prefix=search_prefix, event_timestamp=datetime_to_version_format( datetime.datetime.utcnow())) # type: typing.Any kwargs = dict(replica=Replica[replica].name, prefix=search_prefix, per_page=per_page) if search_after: kwargs['search_after'] = search_after if token: kwargs['token'] = token payload.update(enumerate_available_bundles(**kwargs)) # type: ignore if payload['token'] is None: payload['token'] = "" if payload['page_count'] < per_page: # enumeration is complete payload.update(dict(has_more=False)) del payload['token'] del payload['search_after'] response = make_response(jsonify(payload), requests.codes.ok) response.headers['X-OpenAPI-Pagination'] = 'false' else: next_url = UrlBuilder(request.url) next_url.replace_query("search_after", payload['search_after']) next_url.replace_query("token", payload['token']) link = f"<{next_url}>; rel='next'" payload.update( dict(has_more=True, token=payload['token'], link=f'{next_url}')) response = make_response(jsonify(payload), requests.codes.partial) response.headers['Link'] = link response.headers['X-OpenAPI-Pagination'] = 'true' response.headers['X-OpenAPI-Paginated-Content-Key'] = 'bundles' return response
def list_events(replica: str, from_date: str = None, to_date: str = None, per_page: int = 1, token: str = None): if token: fdate = datetime_from_timestamp(token) else: fdate = datetime_from_timestamp( from_date) if from_date else datetime.min tdate = datetime_from_timestamp(to_date) if to_date else datetime.max if fdate > tdate: raise DSSException(400, "bad_request", "to_date must be greater than from_date") ff = Config.get_flashflood_handle(Replica[replica].flashflood_prefix_read) event_streams = list() for i, event_stream in enumerate(ff.list_event_streams(fdate, tdate)): if datetime_from_timestamp(event_stream['from_date']) < tdate: event_streams.append(event_stream) else: break if i == per_page: break if len(event_streams) <= per_page: response = make_response(jsonify(dict(event_streams=event_streams)), requests.codes.ok) response.headers['X-OpenAPI-Pagination'] = 'false' else: next_url = UrlBuilder(request.url) next_url.replace_query("token", event_streams[-1]['from_date']) link = f"<{next_url}>; rel='next'" response = make_response( jsonify(dict(event_streams=event_streams[:-1])), requests.codes.partial) response.headers['Link'] = link response.headers['X-OpenAPI-Pagination'] = 'true' response.headers['X-OpenAPI-Paginated-Content-Key'] = 'event_streams' return response
def list_collections(per_page: int, start_at: int = 0): """ Return a list of a user's collections. Collection uuids are indexed and called by the user's email in a dynamoDB table. :param int per_page: # of collections returned per paged response. :param int start_at: Where the next chunk of paged response should start at. :return: A dictionary containing a list of dictionaries looking like: {'collections': [{'uuid': uuid, 'version': version}, {'uuid': uuid, 'version': version}, ... , ...]} """ # TODO: Replica is unused, so this does not use replica. Appropriate? owner = security.get_token_email(request.token_info) collections = [] for collection in owner_lookup.get_collection_fqids_for_owner(owner): fqid = CollectionFQID.from_key(f'{COLLECTION_PREFIX}/{collection}') collections.append({'uuid': fqid.uuid, 'version': fqid.version}) # paged response if len(collections) - start_at > per_page: next_url = UrlBuilder(request.url) next_url.replace_query("start_at", str(start_at + per_page)) collection_page = collections[start_at:start_at + per_page] response = make_response(jsonify({'collections': collection_page}), requests.codes.partial) response.headers['Link'] = f"<{next_url}>; rel='next'" response.headers['X-OpenAPI-Pagination'] = 'true' # single response returning all collections (or those remaining) else: collection_page = collections[start_at:] response = make_response(jsonify({'collections': collection_page}), requests.codes.ok) response.headers['X-OpenAPI-Pagination'] = 'false' response.headers['X-OpenAPI-Paginated-Content-Key'] = 'collections' return response
def get( uuid: str, replica: str, per_page: int, version: str = None, directurls: bool = False, presignedurls: bool = False, token: str = None, start_at: int = 0, ): if directurls and presignedurls: raise DSSException( requests.codes.bad_request, "only_one_urltype", "only enable one of `directurls` or `presignedurls`") _replica = Replica[replica] bundle_metadata = get_bundle_manifest(uuid, _replica, version) if bundle_metadata is None: raise DSSException(404, "not_found", "Cannot find bundle!") if version is None: version = bundle_metadata[BundleMetadata.VERSION] if directurls or presignedurls: try: token, ready = verify_checkout(_replica, uuid, version, token) except TokenError as ex: raise DSSException(requests.codes.bad_request, "illegal_token", "Could not understand token", ex) except CheckoutError as ex: raise DSSException(requests.codes.server_error, "checkout_error", "Could not complete checkout", ex) if not ready: builder = UrlBuilder(request.url) builder.replace_query("token", token) response = redirect(str(builder), code=requests.codes.moved) headers = response.headers headers['Retry-After'] = RETRY_AFTER_INTERVAL return response all_files = bundle_metadata[BundleMetadata.FILES] link = None if len(all_files) - start_at > per_page: next_url = UrlBuilder(request.url) next_url.replace_query("start_at", str(start_at + per_page)) next_url.replace_query("version", version) next_url.replace_query("token", token) link = f"<{next_url}>; rel='next'" files = all_files[start_at:start_at + per_page] filesresponse = [] # type: typing.List[dict] for _file in files: file_version = { 'name': _file[BundleFileMetadata.NAME], 'content-type': _file[BundleFileMetadata.CONTENT_TYPE], 'size': _file[BundleFileMetadata.SIZE], 'uuid': _file[BundleFileMetadata.UUID], 'version': _file[BundleFileMetadata.VERSION], 'crc32c': _file[BundleFileMetadata.CRC32C], 's3_etag': _file[BundleFileMetadata.S3_ETAG], 'sha1': _file[BundleFileMetadata.SHA1], 'sha256': _file[BundleFileMetadata.SHA256], 'indexed': _file[BundleFileMetadata.INDEXED], } if directurls: file_version['url'] = str(UrlBuilder().set( scheme=_replica.storage_schema, netloc=_replica.checkout_bucket, path="{}/{}".format( get_dst_bundle_prefix( uuid, bundle_metadata[BundleMetadata.VERSION]), _file[BundleFileMetadata.NAME], ), )) elif presignedurls: handle = Config.get_blobstore_handle(_replica) file_version['url'] = handle.generate_presigned_GET_url( _replica.checkout_bucket, "{}/{}".format( get_dst_bundle_prefix( uuid, bundle_metadata[BundleMetadata.VERSION]), _file[BundleFileMetadata.NAME], ), ) filesresponse.append(file_version) response_body = dict(bundle=dict( uuid=uuid, version=bundle_metadata[BundleMetadata.VERSION], files=filesresponse, creator_uid=bundle_metadata[BundleMetadata.CREATOR_UID], )) if link is None: response = make_response(jsonify(response_body), requests.codes.ok) response.headers['X-OpenAPI-Pagination'] = 'false' else: response = make_response(jsonify(response_body), requests.codes.partial) response.headers['X-OpenAPI-Pagination'] = 'true' response.headers['Link'] = link response.headers['X-OpenAPI-Paginated-Content-Key'] = 'bundle.files' return response
def test_replace_query_mulitple(self): builder = UrlBuilder( "https://humancellatlas.org/abc?def=2&def=boo#ghi") builder.replace_query("def", "4") self.assertEqual("https://humancellatlas.org/abc?def=4#ghi", str(builder))
def get_helper(uuid: str, replica: Replica, version: str = None, token: str = None, directurl: bool = False, content_disposition: str = None): with tracing.Subsegment('parameterization'): handle = Config.get_blobstore_handle(replica) bucket = replica.bucket if version is None: with tracing.Subsegment('find_latest_version'): # list the files and find the one that is the most recent. prefix = "files/{}.".format(uuid) for matching_file in handle.list(bucket, prefix): matching_file = matching_file[len(prefix):] if version is None or matching_file > version: version = matching_file if version is None: # no matches! raise DSSException(404, "not_found", "Cannot find file!") # retrieve the file metadata. try: with tracing.Subsegment('load_file'): file_metadata = json.loads( handle.get( bucket, f"files/{uuid}.{version}" ).decode("utf-8")) except BlobNotFoundError: key = f"files/{uuid}.{version}" item = AsyncStateItem.get(key) if isinstance(item, S3CopyEtagError): raise DSSException( requests.codes.unprocessable, "missing_checksum", "Incorrect s3-etag" ) elif isinstance(item, AsyncStateError): raise item else: raise DSSException(404, "not_found", "Cannot find file!") with tracing.Subsegment('make_path'): blob_path = compose_blob_key(file_metadata) if request.method == "GET": token, ready = _verify_checkout(replica, token, file_metadata, blob_path) if ready: if directurl: response = redirect(str(UrlBuilder().set( scheme=replica.storage_schema, netloc=replica.checkout_bucket, path=get_dst_key(blob_path) ))) else: if content_disposition: # can tell a browser to treat the response link as a download rather than open a new tab response = redirect(handle.generate_presigned_GET_url( replica.checkout_bucket, get_dst_key(blob_path), response_content_disposition=content_disposition)) else: response = redirect(handle.generate_presigned_GET_url( replica.checkout_bucket, get_dst_key(blob_path))) else: with tracing.Subsegment('make_retry'): builder = UrlBuilder(request.url) builder.replace_query("token", token) response = redirect(str(builder), code=301) headers = response.headers headers['Retry-After'] = RETRY_AFTER_INTERVAL return response else: response = make_response('', 200) with tracing.Subsegment('set_headers'): headers = response.headers headers['X-DSS-CREATOR-UID'] = file_metadata[FileMetadata.CREATOR_UID] headers['X-DSS-VERSION'] = version headers['X-DSS-CONTENT-TYPE'] = file_metadata[FileMetadata.CONTENT_TYPE] headers['X-DSS-SIZE'] = file_metadata[FileMetadata.SIZE] headers['X-DSS-CRC32C'] = file_metadata[FileMetadata.CRC32C] headers['X-DSS-S3-ETAG'] = file_metadata[FileMetadata.S3_ETAG] headers['X-DSS-SHA1'] = file_metadata[FileMetadata.SHA1] headers['X-DSS-SHA256'] = file_metadata[FileMetadata.SHA256] return response