Beispiel #1
0
def enumerate(replica: str,
              prefix: typing.Optional[str] = None,
              token: typing.Optional[str] = None,
              per_page: int = PerPageBounds.per_page_max,
              search_after: typing.Optional[str] = None):
    """
    :param replica: replica name to enumerate against
    :param prefix: uuid prefix used to filter enumeration
    :param token: used to page searches, should not be set by the user.
    :param per_page: max items per page to show, 10 <= per_page <= 500
    :param search_after: used to page searches, should not be set by the user.
    """
    if prefix:
        search_prefix = f'{BUNDLE_PREFIX}/{prefix.lower()}'
    else:
        search_prefix = f'{BUNDLE_PREFIX}/'
    api_domain_name = f'https://{os.environ.get("API_DOMAIN_NAME")}'
    payload = dict(dss_api=api_domain_name,
                   object='list',
                   per_page=per_page,
                   search_prefix=search_prefix,
                   event_timestamp=datetime_to_version_format(
                       datetime.datetime.utcnow()))  # type: typing.Any
    kwargs = dict(replica=Replica[replica].name,
                  prefix=search_prefix,
                  per_page=per_page)
    if search_after:
        kwargs['search_after'] = search_after
    if token:
        kwargs['token'] = token

    payload.update(enumerate_available_bundles(**kwargs))  # type: ignore
    if payload['token'] is None:
        payload['token'] = ""

    if payload['page_count'] < per_page:
        # enumeration is complete
        payload.update(dict(has_more=False))
        del payload['token']
        del payload['search_after']
        response = make_response(jsonify(payload), requests.codes.ok)
        response.headers['X-OpenAPI-Pagination'] = 'false'
    else:
        next_url = UrlBuilder(request.url)
        next_url.replace_query("search_after", payload['search_after'])
        next_url.replace_query("token", payload['token'])
        link = f"<{next_url}>; rel='next'"
        payload.update(
            dict(has_more=True, token=payload['token'], link=f'{next_url}'))
        response = make_response(jsonify(payload), requests.codes.partial)
        response.headers['Link'] = link
        response.headers['X-OpenAPI-Pagination'] = 'true'
    response.headers['X-OpenAPI-Paginated-Content-Key'] = 'bundles'
    return response
Beispiel #2
0
def list_events(replica: str,
                from_date: str = None,
                to_date: str = None,
                per_page: int = 1,
                token: str = None):
    if token:
        fdate = datetime_from_timestamp(token)
    else:
        fdate = datetime_from_timestamp(
            from_date) if from_date else datetime.min
    tdate = datetime_from_timestamp(to_date) if to_date else datetime.max
    if fdate > tdate:
        raise DSSException(400, "bad_request",
                           "to_date must be greater than from_date")
    ff = Config.get_flashflood_handle(Replica[replica].flashflood_prefix_read)
    event_streams = list()
    for i, event_stream in enumerate(ff.list_event_streams(fdate, tdate)):
        if datetime_from_timestamp(event_stream['from_date']) < tdate:
            event_streams.append(event_stream)
        else:
            break
        if i == per_page:
            break

    if len(event_streams) <= per_page:
        response = make_response(jsonify(dict(event_streams=event_streams)),
                                 requests.codes.ok)
        response.headers['X-OpenAPI-Pagination'] = 'false'
    else:
        next_url = UrlBuilder(request.url)
        next_url.replace_query("token", event_streams[-1]['from_date'])
        link = f"<{next_url}>; rel='next'"
        response = make_response(
            jsonify(dict(event_streams=event_streams[:-1])),
            requests.codes.partial)
        response.headers['Link'] = link
        response.headers['X-OpenAPI-Pagination'] = 'true'
    response.headers['X-OpenAPI-Paginated-Content-Key'] = 'event_streams'
    return response
Beispiel #3
0
def list_collections(per_page: int, start_at: int = 0):
    """
    Return a list of a user's collections.

    Collection uuids are indexed and called by the user's email in a dynamoDB table.

    :param int per_page: # of collections returned per paged response.
    :param int start_at: Where the next chunk of paged response should start at.
    :return: A dictionary containing a list of dictionaries looking like:
        {'collections': [{'uuid': uuid, 'version': version}, {'uuid': uuid, 'version': version}, ... , ...]}
    """
    # TODO: Replica is unused, so this does not use replica.  Appropriate?
    owner = security.get_token_email(request.token_info)

    collections = []
    for collection in owner_lookup.get_collection_fqids_for_owner(owner):
        fqid = CollectionFQID.from_key(f'{COLLECTION_PREFIX}/{collection}')
        collections.append({'uuid': fqid.uuid, 'version': fqid.version})

    # paged response
    if len(collections) - start_at > per_page:
        next_url = UrlBuilder(request.url)
        next_url.replace_query("start_at", str(start_at + per_page))
        collection_page = collections[start_at:start_at + per_page]
        response = make_response(jsonify({'collections': collection_page}),
                                 requests.codes.partial)
        response.headers['Link'] = f"<{next_url}>; rel='next'"
        response.headers['X-OpenAPI-Pagination'] = 'true'
    # single response returning all collections (or those remaining)
    else:
        collection_page = collections[start_at:]
        response = make_response(jsonify({'collections': collection_page}),
                                 requests.codes.ok)
        response.headers['X-OpenAPI-Pagination'] = 'false'
    response.headers['X-OpenAPI-Paginated-Content-Key'] = 'collections'
    return response
Beispiel #4
0
def get(
    uuid: str,
    replica: str,
    per_page: int,
    version: str = None,
    directurls: bool = False,
    presignedurls: bool = False,
    token: str = None,
    start_at: int = 0,
):
    if directurls and presignedurls:
        raise DSSException(
            requests.codes.bad_request, "only_one_urltype",
            "only enable one of `directurls` or `presignedurls`")

    _replica = Replica[replica]
    bundle_metadata = get_bundle_manifest(uuid, _replica, version)
    if bundle_metadata is None:
        raise DSSException(404, "not_found", "Cannot find bundle!")
    if version is None:
        version = bundle_metadata[BundleMetadata.VERSION]

    if directurls or presignedurls:
        try:
            token, ready = verify_checkout(_replica, uuid, version, token)
        except TokenError as ex:
            raise DSSException(requests.codes.bad_request, "illegal_token",
                               "Could not understand token", ex)
        except CheckoutError as ex:
            raise DSSException(requests.codes.server_error, "checkout_error",
                               "Could not complete checkout", ex)
        if not ready:
            builder = UrlBuilder(request.url)
            builder.replace_query("token", token)
            response = redirect(str(builder), code=requests.codes.moved)
            headers = response.headers
            headers['Retry-After'] = RETRY_AFTER_INTERVAL
            return response

    all_files = bundle_metadata[BundleMetadata.FILES]

    link = None
    if len(all_files) - start_at > per_page:
        next_url = UrlBuilder(request.url)
        next_url.replace_query("start_at", str(start_at + per_page))
        next_url.replace_query("version", version)
        next_url.replace_query("token", token)
        link = f"<{next_url}>; rel='next'"

    files = all_files[start_at:start_at + per_page]

    filesresponse = []  # type: typing.List[dict]
    for _file in files:
        file_version = {
            'name': _file[BundleFileMetadata.NAME],
            'content-type': _file[BundleFileMetadata.CONTENT_TYPE],
            'size': _file[BundleFileMetadata.SIZE],
            'uuid': _file[BundleFileMetadata.UUID],
            'version': _file[BundleFileMetadata.VERSION],
            'crc32c': _file[BundleFileMetadata.CRC32C],
            's3_etag': _file[BundleFileMetadata.S3_ETAG],
            'sha1': _file[BundleFileMetadata.SHA1],
            'sha256': _file[BundleFileMetadata.SHA256],
            'indexed': _file[BundleFileMetadata.INDEXED],
        }
        if directurls:
            file_version['url'] = str(UrlBuilder().set(
                scheme=_replica.storage_schema,
                netloc=_replica.checkout_bucket,
                path="{}/{}".format(
                    get_dst_bundle_prefix(
                        uuid, bundle_metadata[BundleMetadata.VERSION]),
                    _file[BundleFileMetadata.NAME],
                ),
            ))
        elif presignedurls:
            handle = Config.get_blobstore_handle(_replica)
            file_version['url'] = handle.generate_presigned_GET_url(
                _replica.checkout_bucket,
                "{}/{}".format(
                    get_dst_bundle_prefix(
                        uuid, bundle_metadata[BundleMetadata.VERSION]),
                    _file[BundleFileMetadata.NAME],
                ),
            )
        filesresponse.append(file_version)

    response_body = dict(bundle=dict(
        uuid=uuid,
        version=bundle_metadata[BundleMetadata.VERSION],
        files=filesresponse,
        creator_uid=bundle_metadata[BundleMetadata.CREATOR_UID],
    ))

    if link is None:
        response = make_response(jsonify(response_body), requests.codes.ok)
        response.headers['X-OpenAPI-Pagination'] = 'false'
    else:
        response = make_response(jsonify(response_body),
                                 requests.codes.partial)
        response.headers['X-OpenAPI-Pagination'] = 'true'
        response.headers['Link'] = link

    response.headers['X-OpenAPI-Paginated-Content-Key'] = 'bundle.files'
    return response
Beispiel #5
0
 def test_replace_query_mulitple(self):
     builder = UrlBuilder(
         "https://humancellatlas.org/abc?def=2&def=boo#ghi")
     builder.replace_query("def", "4")
     self.assertEqual("https://humancellatlas.org/abc?def=4#ghi",
                      str(builder))
Beispiel #6
0
def get_helper(uuid: str, replica: Replica, version: str = None, token: str = None, directurl: bool = False,
               content_disposition: str = None):

    with tracing.Subsegment('parameterization'):
        handle = Config.get_blobstore_handle(replica)
        bucket = replica.bucket

    if version is None:
        with tracing.Subsegment('find_latest_version'):
            # list the files and find the one that is the most recent.
            prefix = "files/{}.".format(uuid)
            for matching_file in handle.list(bucket, prefix):
                matching_file = matching_file[len(prefix):]
                if version is None or matching_file > version:
                    version = matching_file
    if version is None:
        # no matches!
        raise DSSException(404, "not_found", "Cannot find file!")

    # retrieve the file metadata.
    try:
        with tracing.Subsegment('load_file'):
            file_metadata = json.loads(
                handle.get(
                    bucket,
                    f"files/{uuid}.{version}"
                ).decode("utf-8"))
    except BlobNotFoundError:
        key = f"files/{uuid}.{version}"
        item = AsyncStateItem.get(key)
        if isinstance(item, S3CopyEtagError):
            raise DSSException(
                requests.codes.unprocessable,
                "missing_checksum",
                "Incorrect s3-etag"
            )
        elif isinstance(item, AsyncStateError):
            raise item
        else:
            raise DSSException(404, "not_found", "Cannot find file!")

    with tracing.Subsegment('make_path'):
        blob_path = compose_blob_key(file_metadata)

    if request.method == "GET":
        token, ready = _verify_checkout(replica, token, file_metadata, blob_path)
        if ready:
            if directurl:
                response = redirect(str(UrlBuilder().set(
                    scheme=replica.storage_schema,
                    netloc=replica.checkout_bucket,
                    path=get_dst_key(blob_path)
                )))
            else:
                if content_disposition:
                    # can tell a browser to treat the response link as a download rather than open a new tab
                    response = redirect(handle.generate_presigned_GET_url(
                                        replica.checkout_bucket,
                                        get_dst_key(blob_path),
                                        response_content_disposition=content_disposition))
                else:
                    response = redirect(handle.generate_presigned_GET_url(
                                        replica.checkout_bucket,
                                        get_dst_key(blob_path)))
        else:
            with tracing.Subsegment('make_retry'):
                builder = UrlBuilder(request.url)
                builder.replace_query("token", token)
                response = redirect(str(builder), code=301)
                headers = response.headers
                headers['Retry-After'] = RETRY_AFTER_INTERVAL
                return response

    else:
        response = make_response('', 200)

    with tracing.Subsegment('set_headers'):
        headers = response.headers
        headers['X-DSS-CREATOR-UID'] = file_metadata[FileMetadata.CREATOR_UID]
        headers['X-DSS-VERSION'] = version
        headers['X-DSS-CONTENT-TYPE'] = file_metadata[FileMetadata.CONTENT_TYPE]
        headers['X-DSS-SIZE'] = file_metadata[FileMetadata.SIZE]
        headers['X-DSS-CRC32C'] = file_metadata[FileMetadata.CRC32C]
        headers['X-DSS-S3-ETAG'] = file_metadata[FileMetadata.S3_ETAG]
        headers['X-DSS-SHA1'] = file_metadata[FileMetadata.SHA1]
        headers['X-DSS-SHA256'] = file_metadata[FileMetadata.SHA256]

    return response