Example #1
0
async def get_kge_knowledge_graph_catalog(
        request: web.Request) -> web.Response:
    """Returns the catalog of available KGE File Sets

    :param request:
    :type request: web.json_response
    """
    catalog: Dict = dict()

    # Paranoia: can't see the catalog without being logged in a user session
    session = await get_session(request)
    if not session.empty:

        catalog = KnowledgeGraphCatalog.catalog().get_kg_entries()

    # but don't need to propagate the user session to the output
    response = web.json_response(catalog, status=200)
    return response
Example #2
0
    def threaded_upload():
        """
        Threaded upload process.
        :return:
        """

        local_role = AssumeRole()
        client = s3_client(assumed_role=local_role, config=_s3_transfer_cfg)

        if 'content_name' in tracker:
            content_name = tracker['content_name']
        else:
            content_name = filename

        progress_monitor = ProgressPercentage(filename=content_name,
                                              transfer_tracker=tracker)

        try:
            object_key = get_object_key(tracker['file_set_location'],
                                        content_name)
            transfer_function(bucket=default_s3_bucket,
                              object_key=object_key,
                              source=source,
                              client=client,
                              callback=progress_monitor)
        except Exception as exc:
            exc_msg: str = "threaded_file_transfer(" + \
                           "kg_id: " + tracker["kg_id"] + ", " + \
                           "fileset_version: " + tracker["fileset_version"] + ", " + \
                           "file_type: " + str(tracker["file_type"]) + ") threw exception: " + str(exc)
            logger.error(exc_msg)
            raise RuntimeError(exc_msg)

        # TODO: we could check for and unpack tar.gz archives here, rather than in the KgxArchiver.worker() task?

        # Assuming success, the new file should be
        # added to into the file set in the Catalog.
        # TODO: this sc_file_url has an expiration time associated with it. How does this impact the system?
        #       How is this later used? Should it rather be generated "just-in-time", when it is needed?
        try:
            #
            # RMB (15-Oct-2021): Deprecating long term persistence of the s3_file_url in the file set
            #
            # s3_file_url = create_presigned_url(object_key=object_key)

            # This action adds a file to the given knowledge graph,
            # identified by the 'kg_id', initiating or continuing a
            # the assembly process for the 'fileset_version' KGE file set.
            # May raise an Exception if something goes wrong.
            #
            # Note: aside from general file "type" (i.e. metadata, nodes, edges, archive)
            #       this operation is agnostic as to KGX file format and content.
            KnowledgeGraphCatalog.catalog().add_to_kge_file_set(
                kg_id=tracker["kg_id"],
                fileset_version=tracker["fileset_version"],
                file_type=tracker["file_type"],
                file_name=content_name,
                file_size=int(progress_monitor.get_file_size()),
                object_key=object_key)

        except Exception as exc:
            exc_msg: str = "threaded_file_transfer(" + \
                           "kg_id: " + tracker["kg_id"] + ", " + \
                           "fileset_version: " + tracker["fileset_version"] + ", " + \
                           "file_type: " + str(tracker["file_type"]) + ", " + \
                           "object_key: " + str(object_key) + ") threw exception: " + str(exc)
            logger.error(exc_msg)
            raise RuntimeError(exc_msg)
Example #3
0
async def publish_kge_file_set(request: web.Request, kg_id: str,
                               fileset_version: str):
    """Publish a registered File Set

    :param request:
    :type request: web.Request
    :param kg_id: KGE Knowledge Graph Identifier for the knowledge graph from which data files are being accessed.
    :type kg_id: str
    :param fileset_version: specific version of KGE File Set published for the specified Knowledge Graph Identifier
    :type fileset_version: str
    """
    logger.debug("Entering publish_kge_file_set()")

    session = await get_session(request)
    if user_permitted(session):

        if not (kg_id and fileset_version):
            await report_not_found(
                request,
                "publish_kge_file_set(): knowledge graph id or file set version are null?"
            )

        knowledge_graph: KgeKnowledgeGraph = KnowledgeGraphCatalog.catalog(
        ).get_knowledge_graph(kg_id)

        if not knowledge_graph:
            await report_not_found(
                request,
                f"publish_kge_file_set(): knowledge graph '{kg_id}' was not found in the catalog?",
                active_session=True)

        file_set: KgeFileSet = knowledge_graph.get_file_set(fileset_version)

        if not file_set:
            await report_not_found(
                request,
                f"publish_kge_file_set() errors: unknown '{fileset_version}' for knowledge graph '{kg_id}'?"
            )

        if file_set.get_fileset_status() == KgeFileSetStatusCode.CREATED:
            # Assume that it still needs to be processed
            logger.debug(
                f"\tPublishing fileset version '{fileset_version}' of graph '{kg_id}'"
            )
            try:
                await file_set.publish()
            except Exception as exception:
                logger.error(str(exception))

        if file_set.get_fileset_status() == KgeFileSetStatusCode.ERROR:
            await report_bad_request(
                request,
                f"publish_kge_file_set() errors: file set version '{fileset_version}' "
                +
                f"for knowledge graph '{kg_id}' has errors thus could not be published?"
            )

        # Should either be under PROCESSING or VALIDATED at this point.
        await redirect(
            request,
            f"{SUBMISSION_CONFIRMATION}?" +
            f"kg_name={knowledge_graph.get_name()}&" +
            f"fileset_version={fileset_version}&" +
            f"validated={str(file_set.get_fileset_status() == KgeFileSetStatusCode.VALIDATED)}",
            active_session=True)

    else:
        # If session is not active, then just a redirect
        # directly back to unauthenticated landing page
        await redirect(request, LANDING_PAGE)
Example #4
0
async def register_kge_file_set(request: web.Request):
    """Register core metadata for a distinctly versioned file set of a KGE Knowledge Graph

    Register core metadata for a newly persisted file set version of a
    KGE persisted Knowledge Graph. Since this endpoint assumes a web session
    authenticated session user, this user is automatically designated
    as the 'owner' of the new versioned file set.

    :param request:
    :type request: web.Request
    """
    logger.debug("Entering register_kge_file_set()")

    session = await get_session(request)
    if user_permitted(session):

        # submitter: name & email of submitter of the KGE file set,
        # cached in session from user authentication
        submitter_name = session['name']
        submitter_email = session['email']

        data = await request.post()

        # Identifier of the knowledge graph to
        # which the new KGE File Set belongs
        kg_id = data['kg_id']
        if not kg_id:
            await report_not_found(
                request,
                "register_kge_file_set(): knowledge graph identifier parameter is empty?",
                active_session=True)

        #  SemVer major versioning of the Biolink Model release associated with the file set
        biolink_model_release = data['biolink_model_release']
        if not biolink_model_release:
            await report_not_found(
                request,
                "register_kge_file_set(): missing Biolink Model SemVer release?",
                active_session=True)

        # SemVer minor versioning of the new KGE File Set
        fileset_major_version = data['fileset_major_version']
        if not fileset_major_version:
            await report_not_found(
                request,
                "register_kge_file_set(): missing file set SemVer major version parameter?",
                active_session=True)

        # SemVer minor versioning of the new KGE File Set
        fileset_minor_version = data['fileset_minor_version']
        if not fileset_minor_version:
            await report_not_found(
                request,
                "register_kge_file_set(): missing file set SemVer minor version parameter?",
                active_session=True)

        # Consolidated version of new KGE File Set
        # TODO: Should the fileset_version include more than just the major and minor SemVer versioning?
        fileset_version = str(fileset_major_version) + "." + str(
            fileset_minor_version)

        # TODO: do we need to check if this fileset_version of
        #       file set already exists? If so, then what?

        # Date stamp of the new KGE File Set
        date_stamp = data[
            'date_stamp'] if 'date_stamp' in data else get_default_date_stamp(
            )

        logger.debug("register_kge_file_set() form parameters:\n\t" +
                     "\n\tsubmitter_name: " + submitter_name +
                     "\n\tsubmitter_email: " + submitter_email +
                     "\n\tkg_id: " + kg_id + "\n\tbiolink_model_release: " +
                     biolink_model_release + "\n\tfileset version: " +
                     fileset_version + "\n\tdate_stamp: " + date_stamp)

        knowledge_graph: KgeKnowledgeGraph = \
            KnowledgeGraphCatalog.catalog().get_knowledge_graph(kg_id)

        if not knowledge_graph:
            await report_not_found(
                request,
                f"register_kge_file_set(): knowledge graph '{kg_id}' was not found in the catalog?",
                active_session=True)

        if True:  # location_available(bucket_name, object_key):
            if True:  # api_specification and url:
                # TODO: repair return
                #  1. Store url and api_specification (if needed) in the session
                #  2. replace with /upload form returned

                # Here we start to start to track a specific
                # knowledge graph submission within KGE Archive
                file_set: KgeFileSet = knowledge_graph.get_file_set(
                    fileset_version)

                if file_set is not None:
                    # existing file set for specified version... hmm... what do I do here?
                    if DEV_MODE:
                        # TODO: need to fail more gracefully here
                        await report_bad_request(
                            request,
                            "register_kge_file_set(): encountered duplicate file set version '"
                            + fileset_version + "' for knowledge graph '" +
                            kg_id + "'?",
                            active_session=True)
                else:
                    # expected new instance of KGE File Set to be created and initialized
                    file_set = KgeFileSet(
                        kg_id=kg_id,
                        biolink_model_release=biolink_model_release,
                        fileset_version=fileset_version,
                        date_stamp=date_stamp,
                        submitter_name=submitter_name,
                        submitter_email=submitter_email)

                logger.debug(
                    f"knowledge_graph.add_file_set({knowledge_graph.get_name()}.{file_set.id()})"
                )

                # Add new versioned KGE File Set to the Catalog Knowledge Graph entry
                knowledge_graph.add_file_set(fileset_version, file_set)

                await redirect(
                    request,
                    f"{UPLOAD_FORM}?kg_id={kg_id}&kg_name={knowledge_graph.get_name()}"
                    f"&fileset_version={fileset_version}&submitter_name={submitter_name}",
                    active_session=True)
        #     else:
        #         # TODO: more graceful front end failure signal
        #         await redirect(request, HOME_PAGE)
        # else:
        #     # TODO: more graceful front end failure signal
        #     await print_error_trace(request, "Unknown failure")

    else:
        # If session is not active, then just a redirect
        # directly back to unauthenticated landing page
        await redirect(request, LANDING_PAGE)
Example #5
0
async def register_kge_knowledge_graph(request: web.Request):
    """Register core metadata for a distinct KGE Knowledge Graph

    Register core metadata for a new KGE persisted Knowledge Graph.
    Since this endpoint assumes assumes a web session authenticated user,
    this user is automatically designated as the 'owner' of the new KGE graph.

    :param request:
    :type request: web.Request
    """
    logger.debug("Entering register_kge_knowledge_graph()")

    session = await get_session(request)
    if user_permitted(session):

        # submitter: name & email of submitter of the KGE file set,
        # cached in session from user authentication
        submitter_name = session['name']
        submitter_email = session['email']

        data = await request.post()

        # kg_name: human readable name of the knowledge graph
        kg_name = data['kg_name']

        if not kg_name:
            await report_bad_request(
                request,
                "register_kge_knowledge_graph(): knowledge graph name is unspecified?"
            )

        # kg_description: detailed description of knowledge graph (may be multi-lined with '\n')
        kg_description = data['kg_description']

        # translator_component: Translator component associated with the knowledge graph (e.g. KP, ARA or SRI)
        translator_component = data['translator_component']

        # translator_team: specific Translator team (affiliation)
        # contributing the file set, e.g. Clinical Data Provider
        translator_team = data['translator_team']

        # license_name Open Source license name, e.g. MIT, Apache 2.0, etc.
        license_name = data['license_name']

        # license_url: web site link to project license
        license_url = ''

        if 'license_url' in data:
            license_url = data['license_url'].strip()

        # url may be empty or unavailable - try to take default license?
        if not license_url:
            if license_name in _known_licenses:
                license_url = _known_licenses[license_name]
            elif license_name != "Other":
                await report_bad_request(
                    request,
                    "register_kge_knowledge_graph(): unknown licence_name: '" +
                    license_name + "'?")

        # terms_of_service: specifically relating to the project, beyond the licensing
        terms_of_service = data['terms_of_service']

        logger.debug("register_kge_knowledge_graph() form parameters:\n\t" +
                     "\n\tkg_name: " + kg_name + "\n\tkg_description: " +
                     kg_description + "\n\ttranslator_component: " +
                     translator_component + "\n\ttranslator_team: " +
                     translator_team + "\n\tsubmitter_name: " +
                     submitter_name + "\n\tsubmitter_email: " +
                     submitter_email + "\n\tlicense_name: " + license_name +
                     "\n\tlicense_url: " + license_url +
                     "\n\tterms_of_service: " + terms_of_service)

        # Use a normalized version of the knowledge
        # graph name as the KGE File Set identifier.
        kg_id = KgeKnowledgeGraph.normalize_name(kg_name)

        if True:  # TODO location_available(bucket_name, object_key):
            if True:  # TODO api_specification and url:
                # TODO: repair return
                #  1. Store url and api_specification (if needed) in the session
                #  2. replace with /upload form returned

                # Here we start to start to track a specific
                # knowledge graph submission within KGE Archive
                knowledge_graph = KnowledgeGraphCatalog.catalog(
                ).add_knowledge_graph(
                    kg_id=kg_id,
                    kg_name=kg_name,
                    kg_description=kg_description,
                    translator_component=translator_component,
                    translator_team=translator_team,
                    submitter_name=submitter_name,
                    submitter_email=submitter_email,
                    license_name=license_name,
                    license_url=license_url,
                    terms_of_service=terms_of_service,
                )

                # Also publish a new 'provider.yaml' metadata file to the KGE Archive
                provider_metadata_key = knowledge_graph.publish_provider_metadata(
                )

                if not provider_metadata_key:
                    await report_not_found(
                        request,
                        "register_kge_knowledge_graph(): provider metadata could not be published?",
                        active_session=True)

                await redirect(
                    request,
                    f"{FILESET_REGISTRATION_FORM}?kg_id={kg_id}&kg_name={knowledge_graph.get_name()}",
                    active_session=True)

        #     else:
        #         # TODO: more graceful front end failure signal
        #         await redirect(request, HOME_PAGE)
        # else:
        #     # TODO: more graceful front end failure signal
        #     await print_error_trace(request, "Unknown failure")

    else:
        # If session is not active, then just a redirect
        # directly back to unauthenticated landing page
        await redirect(request, LANDING_PAGE)
Example #6
0
async def kge_meta_knowledge_graph(request: web.Request,
                                   kg_id: str,
                                   fileset_version: str,
                                   downloading: bool = True):
    """Get supported relationships by source and target

    :param request:
    :type request: web.Request
    :param kg_id: KGE File Set identifier for the knowledge graph for which graph metadata is being accessed.
    :type kg_id: str
    :param fileset_version: Version of KGE File Set for a given knowledge graph.
    :type fileset_version: str
    :param downloading: flag set 'True' if file downloading in progress.
    :type downloading: bool

    :rtype: web.Response( Dict[str, Dict[str, List[str]]] )
    """
    if not (kg_id and fileset_version):
        await report_not_found(
            request,
            "kge_meta_knowledge_graph(): KGE File Set 'kg_id' has value " +
            str(kg_id) + " and 'fileset_version' has value " +
            str(fileset_version) + "... both must be non-null.")

    logger.debug("Entering kge_meta_knowledge_graph(kg_id: " + kg_id +
                 ", fileset_version: " + fileset_version + ")")

    session = await get_session(request)
    if not session.empty:

        knowledge_graph: KgeKnowledgeGraph = KnowledgeGraphCatalog.catalog(
        ).get_knowledge_graph(kg_id)

        if not knowledge_graph:
            await report_not_found(
                request,
                f"kge_meta_knowledge_graph(): knowledge graph '{kg_id}' was not found in the catalog?",
                active_session=True)

        file_set_location, assigned_version = with_version(
            func=get_object_location, version=fileset_version)(kg_id)

        content_metadata_file_key = file_set_location + CONTENT_METADATA_FILE

        if not object_key_exists(object_key=content_metadata_file_key):
            if downloading:
                await redirect(
                    request,
                    f"{DATA_UNAVAILABLE}?fileset_version={fileset_version}"
                    f"&kg_name={knowledge_graph.get_name()}&data_type=meta%20knowledge%20graph",
                    active_session=True)
            else:
                response = web.Response(text="unavailable")
                return await with_session(request, response)

        # Current implementation of this handler triggers a
        # download of the KGX content metadata file, if available
        download_url = create_presigned_url(
            object_key=content_metadata_file_key)
        logger.debug(
            f"kge_meta_knowledge_graph() download_url: '{download_url}'")
        if downloading:
            await download(request, download_url)
        else:
            response = web.Response(text=download_url)
            return await with_session(request, response)

        # Alternate version could directly return the JSON
        # of the Content Metadata as a direct response?

        # response = web.json_response(text=str(file_set_location))
        # return await with_session(request, response)

    else:
        # If session is not active, then just a redirect
        # directly back to unauthenticated landing page
        await redirect(request, LANDING_PAGE)
Example #7
0
async def get_kge_file_set_metadata(request: web.Request, kg_id: str,
                                    fileset_version: str) -> web.Response:
    """Get KGE File Set provider metadata.

    :param request:
    :type request: web.Request
    :param kg_id: KGE File Set identifier for the knowledge graph for which data files are being accessed
    :type kg_id: str
    :param fileset_version: Specific version of KGE File Set for the knowledge graph for which metadata are accessed
    :type fileset_version: str

    :return:  KgeMetadata including provider and content metadata with an annotated list of KgeFile entries
    """
    logger.debug("Entering get_kge_file_set_metadata()...")

    session = await get_session(request)
    if not session.empty:

        if not (kg_id and fileset_version):
            await report_not_found(
                request,
                "get_kge_file_set_metadata(): Knowledge Graph identifier and File Set version is not specified?"
            )

        logger.debug("...of file set version '" + fileset_version +
                     "' for knowledge graph '" + kg_id + "'")

        knowledge_graph: KgeKnowledgeGraph = KnowledgeGraphCatalog.catalog(
        ).get_knowledge_graph(kg_id)

        if not knowledge_graph:
            await report_not_found(
                request,
                f"get_kge_file_set_metadata(): knowledge graph '{kg_id}' was not found in the catalog?",
                active_session=True)

        try:
            file_set_metadata: KgeMetadata = knowledge_graph.get_metadata(
                fileset_version)

            if not file_set_metadata:
                await report_not_found(
                    request,
                    f"file_set_metadata(): file set metadata for knowledge graph '{kg_id}' is not available?",
                    active_session=True)

            file_set_status_as_dict = file_set_metadata.to_dict()

            _sanitize_metadata(file_set_status_as_dict)

            response = web.json_response(file_set_status_as_dict,
                                         status=200)  # , dumps=kge_dumps)

            return await with_session(request, response)

        except RuntimeError as rte:
            await report_bad_request(
                request,
                "get_kge_file_set_metadata() errors: file set version '" +
                fileset_version + "' for knowledge graph '" + kg_id + "'" +
                "could not be accessed. Error: " + str(rte))
    else:
        # If session is not active, then just
        # redirect back to unauthenticated landing page
        await redirect(request, LANDING_PAGE)