Esempio n. 1
0
 def from_dict(self, query_graph_dict):
     query_graph = QueryGraph()
     query_graph.nodes = []
     query_graph.edges = []
     if "nodes" in query_graph_dict:
         for node in query_graph_dict["nodes"]:
             qnode = QNode().from_dict(node)
             query_graph.nodes.append(qnode)
     if "edges" in query_graph_dict:
         for edge in query_graph_dict["edges"]:
             qedge = QEdge().from_dict(edge)
             query_graph.edges.append(qedge)
     return query_graph
Esempio n. 2
0
    def _get_query_graph_for_edge(qedge, query_graph, dict_kg):
        # This function creates a query graph for the specified qedge, updating its qnodes' curies as needed
        edge_query_graph = QueryGraph(nodes=[], edges=[])
        qnodes = [
            eu.get_query_node(query_graph, qedge.source_id),
            eu.get_query_node(query_graph, qedge.target_id)
        ]

        # Add (a copy of) this qedge to our edge query graph
        edge_query_graph.edges.append(eu.copy_qedge(qedge))

        # Update this qedge's qnodes as appropriate and add (copies of) them to the edge query graph
        qedge_has_already_been_expanded = qedge.id in dict_kg['edges']
        qnodes_using_curies_from_prior_step = set()
        for qnode in qnodes:
            qnode_copy = eu.copy_qnode(qnode)

            # Handle case where we need to feed curies from a prior Expand() step as the curie for this qnode
            qnode_already_fulfilled = qnode_copy.id in dict_kg['nodes']
            if qnode_already_fulfilled and not qnode_copy.curie and not qedge_has_already_been_expanded:
                qnode_copy.curie = list(dict_kg['nodes'][qnode_copy.id].keys())
                qnodes_using_curies_from_prior_step.add(qnode_copy.id)

            edge_query_graph.nodes.append(qnode_copy)

        return edge_query_graph, qnodes_using_curies_from_prior_step
Esempio n. 3
0
    def _get_query_graph_for_edge(self, qedge: QEdge, query_graph: QueryGraph,
                                  dict_kg: DictKnowledgeGraph,
                                  use_synonyms: bool, kp_to_use: str,
                                  log: Response) -> QueryGraph:
        # This function creates a query graph for the specified qedge, updating its qnodes' curies as needed
        edge_query_graph = QueryGraph(nodes=[], edges=[])
        qnodes = [
            eu.get_query_node(query_graph, qedge.source_id),
            eu.get_query_node(query_graph, qedge.target_id)
        ]

        # Add (a copy of) this qedge to our edge query graph
        edge_query_graph.edges.append(eu.copy_qedge(qedge))

        # Update this qedge's qnodes as appropriate and add (copies of) them to the edge query graph
        qedge_has_already_been_expanded = qedge.id in dict_kg.edges_by_qg_id
        for qnode in qnodes:
            qnode_copy = eu.copy_qnode(qnode)

            # Feed in curies from a prior Expand() step as the curie for this qnode as necessary
            qnode_already_fulfilled = qnode_copy.id in dict_kg.nodes_by_qg_id
            if qnode_already_fulfilled and not qnode_copy.curie and not qedge_has_already_been_expanded:
                qnode_copy.curie = list(
                    dict_kg.nodes_by_qg_id[qnode_copy.id].keys())

            edge_query_graph.nodes.append(qnode_copy)

        if use_synonyms:
            self._add_curie_synonyms_to_query_nodes(
                qnodes=edge_query_graph.nodes, log=log, kp=kp_to_use)
        # Consider both protein and gene if qnode's type is one of those (since KP's handle these differently)
        for qnode in edge_query_graph.nodes:
            if qnode.type in ['protein', 'gene']:
                qnode.type = ['protein', 'gene']
        return edge_query_graph
Esempio n. 4
0
    def __get_next_free_edge_id(self):

        #### Set up local references to the message and verify the query_graph nodes
        message = self.message
        if message.query_graph is None:
            message.query_graph = QueryGraph()
            message.query_graph.nodes = []
            message.query_graph.edges = []
        if message.query_graph.edges is None:
            message.query_graph.edges = []
        qedges = message.query_graph.edges

        #### Loop over the nodes making a dict of the ids
        ids = {}
        for qedge in qedges:
            id = qedge.id
            ids[id] = 1

        #### Find the first unused id
        index = 0
        while 1:
            pad = '0'
            if index > 9:
                pad = ''
            potential_edge_id = f"e{pad}{str(index)}"
            if potential_edge_id not in ids:
                return potential_edge_id
            index += 1
Esempio n. 5
0
    def create_message(self, describe=False):
        """
        Creates a basic empty Message object with basic boilerplate metadata
        :return: Response object with execution information and the new message object inside the data envelope
        :rtype: Response
        """

        # Internal documentation setup
        #allowable_parameters = { 'action': { 'None' } }
        allowable_parameters = {
            'dsl_command': '`create_message()`'
        }  # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/
        if describe:
            allowable_parameters[
                'brief_description'] = """The `create_message` method creates a basic empty Message object with basic boilerplate metadata
            such as reasoner_id, schema_version, etc. filled in. This DSL command takes no arguments"""
            return allowable_parameters

        #### Define a default response
        response = Response()
        self.response = response

        #### Create the top-level message
        response.info("Creating an empty template ARAX Message")
        message = Message()
        self.message = message

        #### Fill it with default information
        message.id = None
        message.type = "translator_reasoner_message"
        message.reasoner_id = "ARAX"
        message.tool_version = RTXConfiguration().version
        message.schema_version = "0.9.3"
        message.message_code = "OK"
        message.code_description = "Created empty template Message"
        message.context = "https://raw.githubusercontent.com/biolink/biolink-model/master/context.jsonld"

        #### Why is this _datetime ?? FIXME
        message._datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        #### Create an empty master knowledge graph
        message.knowledge_graph = KnowledgeGraph()
        message.knowledge_graph.nodes = []
        message.knowledge_graph.edges = []

        #### Create an empty query graph
        message.query_graph = QueryGraph()
        message.query_graph.nodes = []
        message.query_graph.edges = []

        #### Create empty results
        message.results = []
        message.n_results = 0

        #### Return the response
        response.data['message'] = message
        return response
Esempio n. 6
0
    def _extract_query_subgraph(qedge_ids_to_expand: List[str],
                                query_graph: QueryGraph,
                                log: Response) -> QueryGraph:
        # This function extracts a sub-query graph containing the provided qedge IDs from a larger query graph
        sub_query_graph = QueryGraph(nodes=[], edges=[])

        for qedge_id in qedge_ids_to_expand:
            # Make sure this query edge actually exists in the query graph
            if not any(qedge.id == qedge_id for qedge in query_graph.edges):
                log.error(
                    f"An edge with ID '{qedge_id}' does not exist in Message.QueryGraph",
                    error_code="UnknownValue")
                return None
            qedge = next(qedge for qedge in query_graph.edges
                         if qedge.id == qedge_id)

            # Make sure this qedge's qnodes actually exist in the query graph
            if not eu.get_query_node(query_graph, qedge.source_id):
                log.error(
                    f"Qedge {qedge.id}'s source_id refers to a qnode that does not exist in the query graph: "
                    f"{qedge.source_id}",
                    error_code="InvalidQEdge")
                return None
            if not eu.get_query_node(query_graph, qedge.target_id):
                log.error(
                    f"Qedge {qedge.id}'s target_id refers to a qnode that does not exist in the query graph: "
                    f"{qedge.target_id}",
                    error_code="InvalidQEdge")
                return None
            qnodes = [
                eu.get_query_node(query_graph, qedge.source_id),
                eu.get_query_node(query_graph, qedge.target_id)
            ]

            # Add (copies of) this qedge and its two qnodes to our new query sub graph
            qedge_copy = eu.copy_qedge(qedge)
            if not any(qedge.id == qedge_copy.id
                       for qedge in sub_query_graph.edges):
                sub_query_graph.edges.append(qedge_copy)
            for qnode in qnodes:
                qnode_copy = eu.copy_qnode(qnode)
                if not any(qnode.id == qnode_copy.id
                           for qnode in sub_query_graph.nodes):
                    sub_query_graph.nodes.append(qnode_copy)

        return sub_query_graph
Esempio n. 7
0
    def from_dict(self, message):

        if str(message.__class__
               ) != "<class 'swagger_server.models.message.Message'>":
            message = Message().from_dict(message)
        message.query_graph = QueryGraph().from_dict(message.query_graph)
        message.knowledge_graph = KnowledgeGraph().from_dict(
            message.knowledge_graph)

        #### This is an unfortunate hack that fixes qnode.curie entries
        #### Officially a curie can be a str or a list. But Swagger 2.0 only permits one type and we set it to str
        #### so when it gets converted from_dict, the list gets converted to a str because that's its type
        #### Here we force it back. This should no longer be needed when we are properly on OpenAPI 3.0
        if message.query_graph is not None and message.query_graph.nodes is not None:
            for qnode in message.query_graph.nodes:
                if qnode.curie is not None and isinstance(qnode.curie, str):
                    if qnode.curie[0:2] == "['":
                        try:
                            qnode.curie = ast.literal_eval(qnode.curie)
                        except:
                            pass

        #new_nodes = []
        #for qnode in message.query_graph.nodes:
        #    print(type(qnode))
        #    new_nodes.append(QNode().from_dict(qnode))
        #message.query_graph.nodes = new_nodes
        #for qedge in message.query_graph.edges:
        #    new_edges.append(QEdge().from_dict(qedge))
        #message.query_graph.edges = new_edges

        if message.results is not None:
            for result in message.results:
                if result.result_graph is not None:
                    #eprint(str(result.result_graph.__class__))
                    if str(
                            result.result_graph.__class__
                    ) != "<class 'swagger_server.models.knowledge_graph.KnowledgeGraph'>":
                        result.result_graph = KnowledgeGraph().from_dict(
                            result.result_graph)

        return message
Esempio n. 8
0
    def reassign_curies(self, message, input_parameters, describe=False):
        """
        Reassigns CURIEs to the target Knowledge Provider
        :param message: Translator standard Message object
        :type message: Message
        :param input_parameters: Dict of input parameters to control the method
        :type input_parameters: Message
        :return: Response object with execution information
        :rtype: Response
        """

        # #### Internal documentation setup
        allowable_parameters = {
            'knowledge_provider': {
                'Name of the Knowledge Provider CURIE space to map to. Default=KG1. Also currently supported KG2'
            },
            'mismap_result': {
                'Desired action when mapping fails: ERROR or WARNING. Default is ERROR'
            },
        }
        if describe:
            allowable_parameters[
                'dsl_command'] = '`reassign_curies()`'  # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/
            allowable_parameters[
                'brief_description'] = """The `reassign_curies` method reassigns all the CURIEs in the Message QueryGraph to the specified
                knowledge provider. Allowed values are KG1 or KG2. Default is KG1 if not specified."""
            return allowable_parameters

        #### Define a default response
        response = Response()
        self.response = response
        self.message = message

        #### Basic checks on arguments
        if not isinstance(input_parameters, dict):
            response.error("Provided parameters is not a dict",
                           error_code="ParametersNotDict")
            return response

        #### Define a complete set of allowed parameters and their defaults
        parameters = {
            'knowledge_provider': 'KG1',
            'mismap_result': 'ERROR',
        }

        #### Loop through the input_parameters and override the defaults and make sure they are allowed
        for key, value in input_parameters.items():
            if key not in parameters:
                response.error(f"Supplied parameter {key} is not permitted",
                               error_code="UnknownParameter")
            else:
                parameters[key] = value
        #### Return if any of the parameters generated an error (showing not just the first one)
        if response.status != 'OK':
            return response

        #### Store these final parameters for convenience
        response.data['parameters'] = parameters
        self.parameters = parameters

        # Check that the knowledge_provider is valid:
        if parameters['knowledge_provider'] != 'KG1' and parameters[
                'knowledge_provider'] != 'KG2':
            response.error(
                f"Specified knowledge provider must be 'KG1' or 'KG2', not '{parameters['knowledge_provider']}'",
                error_code="UnknownKP")
            return response

        #### Now try to assign the CURIEs
        response.info(
            f"Reassigning the CURIEs in QueryGraph to {parameters['knowledge_provider']} space"
        )

        #### Make sure there's a query_graph already here
        if message.query_graph is None:
            message.query_graph = QueryGraph()
            message.query_graph.nodes = []
            message.query_graph.edges = []
        if message.query_graph.nodes is None:
            message.query_graph.nodes = []

        #### Set up the KGNodeIndex
        kgNodeIndex = KGNodeIndex()

        # Loops through the QueryGraph nodes and adjust them
        for qnode in message.query_graph.nodes:

            # If the CURIE is None, then there's nothing to do
            curie = qnode.curie
            if curie is None:
                continue

            # Map the CURIE to the desired Knowledge Provider
            if parameters['knowledge_provider'] == 'KG1':
                if kgNodeIndex.is_curie_present(curie) is True:
                    mapped_curies = [curie]
                else:
                    mapped_curies = kgNodeIndex.get_KG1_curies(curie)
            elif parameters['knowledge_provider'] == 'KG2':
                if kgNodeIndex.is_curie_present(curie, kg_name='KG2'):
                    mapped_curies = [curie]
                else:
                    mapped_curies = kgNodeIndex.get_curies_and_types(
                        curie, kg_name='KG2')
            else:
                response.error(
                    f"Specified knowledge provider must be 'KG1' or 'KG2', not '{parameters['knowledge_provider']}'",
                    error_code="UnknownKP")
                return response

            # Try to find a new CURIE
            new_curie = None
            if len(mapped_curies) == 0:
                if parameters['mismap_result'] == 'WARNING':
                    response.warning(
                        f"Did not find a mapping for {curie} to KP '{parameters['knowledge_provider']}'. Leaving as is"
                    )
                else:
                    response.error(
                        f"Did not find a mapping for {curie} to KP '{parameters['knowledge_provider']}'. This is an error"
                    )
            elif len(mapped_curies) == 1:
                new_curie = mapped_curies[0]
            else:
                original_curie_is_fine = False
                for potential_curie in mapped_curies:
                    if potential_curie == curie:
                        original_curie_is_fine = True
                if original_curie_is_fine:
                    new_curie = curie
                else:
                    new_curie = mapped_curies[0]
                    response.warning(
                        f"There are multiple possible CURIEs in KP '{parameters['knowledge_provider']}'. Selecting the first one {new_curie}"
                    )

            # If there's no CURIE, then nothing to do
            if new_curie is None:
                pass
            # If it's the same
            elif new_curie == curie:
                response.debug(
                    f"CURIE {curie} is fine for KP '{parameters['knowledge_provider']}'"
                )
            else:
                response.info(
                    f"Remapping CURIE {curie} to {new_curie} for KP '{parameters['knowledge_provider']}'"
                )

        #### Return the response
        return response
Esempio n. 9
0
    def add_qedge(self, message, input_parameters, describe=False):
        """
        Adds a new QEdge object to the QueryGraph inside the Message object
        :return: Response object with execution information
        :rtype: Response
        """

        # #### Internal documentation setup
        allowable_parameters = {
            'id': {
                'Any string that is unique among all QEdge id fields, with recommended format e00, e01, e02, etc.'
            },
            'source_id': {
                'id of the source QNode already present in the QueryGraph (e.g. n01, n02)'
            },
            'target_id': {
                'id of the target QNode already present in the QueryGraph (e.g. n01, n02)'
            },
            'type': {
                'Any valid Translator/BioLink relationship type (e.g. physically_interacts_with, participates_in)'
            },
        }
        if describe:
            #allowable_parameters['action'] = { 'None' }
            #allowable_parameters = dict()
            allowable_parameters[
                'dsl_command'] = '`add_qedge()`'  # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/
            allowable_parameters[
                'brief_description'] = """The `add_qedge` method adds an additional QEdge to the QueryGraph in the Message object. Currently
                source_id and target_id QNodes must already be present in the QueryGraph. The specified type is not currently checked that it is a
                valid Translator/BioLink relationship type, but it should be."""
            return allowable_parameters

        #### Define a default response
        response = Response()
        self.response = response
        self.message = message

        #### Basic checks on arguments
        if not isinstance(input_parameters, dict):
            response.error("Provided parameters is not a dict",
                           error_code="ParametersNotDict")
            return response

        #### Define a complete set of allowed parameters and their defaults
        parameters = {
            'id': None,
            'source_id': None,
            'target_id': None,
            'type': None,
        }

        #### Loop through the input_parameters and override the defaults and make sure they are allowed
        for key, value in input_parameters.items():
            if key not in parameters:
                response.error(f"Supplied parameter {key} is not permitted",
                               error_code="UnknownParameter")
            else:
                parameters[key] = value
        #### Return if any of the parameters generated an error (showing not just the first one)
        if response.status != 'OK':
            return response

        #### Store these final parameters for convenience
        response.data['parameters'] = parameters
        self.parameters = parameters

        #### Now apply the filters. Order of operations is probably quite important
        #### Scalar value filters probably come first like minimum_confidence, then complex logic filters
        #### based on edge or node properties, and then finally maximum_results
        response.info(
            f"Adding a QueryEdge to Message with parameters {parameters}")

        #### Make sure there's a query_graph already here
        if message.query_graph is None:
            message.query_graph = QueryGraph()
            message.query_graph.nodes = []
            message.query_graph.edges = []
        if message.query_graph.edges is None:
            message.query_graph.edges = []

        #### Create a QEdge
        qedge = QEdge()
        if parameters['id'] is not None:
            id = parameters['id']
        else:
            id = self.__get_next_free_edge_id()
        qedge.id = id

        #### Get the list of available node_ids
        qnodes = message.query_graph.nodes
        ids = {}
        for qnode in qnodes:
            id = qnode.id
            ids[id] = 1

        #### Add the source_id
        if parameters['source_id'] is not None:
            if parameters['source_id'] not in ids:
                response.error(
                    f"While trying to add QEdge, there is no QNode with id {parameters['source_id']}",
                    error_code="UnknownSourceId")
                return response
            qedge.source_id = parameters['source_id']
        else:
            response.error(
                f"While trying to add QEdge, source_id is a required parameter",
                error_code="MissingSourceId")
            return response

        #### Add the target_id
        if parameters['target_id'] is not None:
            if parameters['target_id'] not in ids:
                response.error(
                    f"While trying to add QEdge, there is no QNode with id {parameters['target_id']}",
                    error_code="UnknownTargetId")
                return response
            qedge.target_id = parameters['target_id']
        else:
            response.error(
                f"While trying to add QEdge, target_id is a required parameter",
                error_code="MissingTargetId")
            return response

        #### Add the type if any. Need to verify it's an allowed type. FIXME
        if parameters['type'] is not None:
            qedge.type = parameters['type']

        #### Add it to the query_graph edge list
        message.query_graph.edges.append(qedge)

        #### Return the response
        return response
Esempio n. 10
0
    def add_qnode(self, message, input_parameters, describe=False):
        """
        Adds a new QNode object to the QueryGraph inside the Message object
        :return: Response object with execution information
        :rtype: Response
        """

        # #### Internal documentation setup
        allowable_parameters = {
            'id': {
                'Any string that is unique among all QNode id fields, with recommended format n00, n01, n02, etc.'
            },
            'curie': {
                'Any compact URI (CURIE) (e.g. DOID:9281) (May also be a list like [UniProtKB:P12345,UniProtKB:Q54321])'
            },
            'name': {
                'Any name of a bioentity that will be resolved into a CURIE if possible or result in an error if not (e.g. hypertension, insulin)'
            },
            'type': {
                'Any valid Translator bioentity type (e.g. protein, chemical_substance, disease)'
            },
            'is_set': {
                'If set to true, this QNode represents a set of nodes that are all in common between the two other linked QNodes'
            },
        }
        if describe:
            allowable_parameters[
                'dsl_command'] = '`add_qnode()`'  # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/
            allowable_parameters[
                'brief_description'] = """The `add_qnode` method adds an additional QNode to the QueryGraph in the Message object. Currently
                when a curie or name is specified, this method will only return success if a matching node is found in the KG1/KG2 KGNodeIndex."""
            return allowable_parameters

        #### Define a default response
        response = Response()
        self.response = response
        self.message = message

        #### Basic checks on arguments
        if not isinstance(input_parameters, dict):
            response.error("Provided parameters is not a dict",
                           error_code="ParametersNotDict")
            return response

        #### Define a complete set of allowed parameters and their defaults
        parameters = {
            'id': None,
            'curie': None,
            'name': None,
            'type': None,
            'is_set': None,
        }

        #### Loop through the input_parameters and override the defaults and make sure they are allowed
        for key, value in input_parameters.items():
            if key not in parameters:
                response.error(f"Supplied parameter {key} is not permitted",
                               error_code="UnknownParameter")
            else:
                parameters[key] = value
        #### Return if any of the parameters generated an error (showing not just the first one)
        if response.status != 'OK':
            return response

        #### Store these final parameters for convenience
        response.data['parameters'] = parameters
        self.parameters = parameters

        #### Now apply the filters. Order of operations is probably quite important
        #### Scalar value filters probably come first like minimum_confidence, then complex logic filters
        #### based on edge or node properties, and then finally maximum_results
        response.info(
            f"Adding a QueryNode to Message with parameters {parameters}")

        #### Make sure there's a query_graph already here
        if message.query_graph is None:
            message.query_graph = QueryGraph()
            message.query_graph.nodes = []
            message.query_graph.edges = []
        if message.query_graph.nodes is None:
            message.query_graph.nodes = []

        #### Set up the KGNodeIndex
        kgNodeIndex = KGNodeIndex()

        # Create the QNode and set the id
        qnode = QNode()
        if parameters['id'] is not None:
            id = parameters['id']
        else:
            id = self.__get_next_free_node_id()
        qnode.id = id

        # Set the is_set parameter to what the user selected
        if parameters['is_set'] is not None:
            qnode.is_set = (parameters['is_set'].lower() == 'true')

        #### If the CURIE is specified, try to find that
        if parameters['curie'] is not None:

            # If the curie is a scalar then treat it here as a list of one
            if isinstance(parameters['curie'], str):
                curie_list = [parameters['curie']]
                is_curie_a_list = False
                if parameters['is_set'] is not None and qnode.is_set is True:
                    response.error(
                        f"Specified CURIE '{parameters['curie']}' is a scalar, but is_set=true, which doesn't make sense",
                        error_code="CurieScalarButIsSetTrue")
                    return response

            # Or else set it up as a list
            elif isinstance(parameters['curie'], list):
                curie_list = parameters['curie']
                is_curie_a_list = True
                qnode.curie = []
                if parameters['is_set'] is None:
                    response.warning(
                        f"Specified CURIE '{parameters['curie']}' is a list, but is_set was not set to true. It must be true in this context, so automatically setting to true. Avoid this warning by explictly setting to true."
                    )
                    qnode.is_set = True
                else:
                    if qnode.is_set == False:
                        response.warning(
                            f"Specified CURIE '{parameters['curie']}' is a list, but is_set=false, which doesn't make sense, so automatically setting to true. Avoid this warning by explictly setting to true."
                        )
                        qnode.is_set = True

            # Or if it's neither a list or a string, then error out. This cannot be handled at present
            else:
                response.error(
                    f"Specified CURIE '{parameters['curie']}' is neither a string nor a list. This cannot to handled",
                    error_code="CurieNotListOrScalar")
                return response

            # Loop over the available curies and create the list
            for curie in curie_list:
                response.debug(f"Looking up CURIE {curie} in KgNodeIndex")
                nodes = kgNodeIndex.get_curies_and_types(curie, kg_name='KG2')

                # If nothing was found, we won't bail out, but rather just issue a warning
                if len(nodes) == 0:
                    response.warning(
                        f"A node with CURIE {curie} is not in our knowledge graph KG2, but will continue"
                    )
                    if is_curie_a_list:
                        qnode.curie.append(curie)
                    else:
                        qnode.curie = curie

                else:

                    # FIXME. This is just always taking the first result. This could cause problems for CURIEs with multiple types. Is that possible?
                    # In issue #623 on 2020-06-15 we concluded that we should not specify the type here
                    #qnode.type = nodes[0]['type']

                    # Either append or set the found curie
                    if is_curie_a_list:
                        qnode.curie.append(nodes[0]['curie'])
                    else:
                        qnode.curie = nodes[0]['curie']

                if 'type' in parameters and parameters['type'] is not None:
                    if isinstance(parameters['type'], str):
                        qnode.type = parameters['type']
                    else:
                        qnode.type = parameters['type'][0]

            message.query_graph.nodes.append(qnode)
            return response

        #### If the name is specified, try to find that
        if parameters['name'] is not None:
            response.debug(
                f"Looking up CURIE {parameters['name']} in KgNodeIndex")
            nodes = kgNodeIndex.get_curies_and_types(parameters['name'])
            if len(nodes) == 0:
                nodes = kgNodeIndex.get_curies_and_types(parameters['name'],
                                                         kg_name='KG2')
                if len(nodes) == 0:
                    response.error(
                        f"A node with name '{parameters['name']}'' is not in our knowledge graph",
                        error_code="UnknownCURIE")
                    return response
            qnode.curie = nodes[0]['curie']
            qnode.type = nodes[0]['type']
            message.query_graph.nodes.append(qnode)
            return response

        #### If the type is specified, just add that type. There should be checking that it is legal. FIXME
        if parameters['type'] is not None:
            qnode.type = parameters['type']
            if parameters['is_set'] is not None:
                qnode.is_set = (parameters['is_set'].lower() == 'true')
            message.query_graph.nodes.append(qnode)
            return response

        #### If we get here, it means that all three main parameters are null. Just a generic node with no type or anything. This is okay.
        message.query_graph.nodes.append(qnode)
        return response
Esempio n. 11
0
    def answer(self, entity, use_json=False):
        """
		Answer a question of the type "What is X" but is general:
		:param entity: KG neo4j node name (eg "carbetocin")
		:param use_json: If the answer should be in Translator standardized API output format
		:return: a description and type of the node
		"""

        #### See if this entity is in the KG via the index
        eprint("Looking up '%s' in KgNodeIndex" % entity)
        kgNodeIndex = KGNodeIndex()
        curies = kgNodeIndex.get_curies(entity)

        #### If not in the KG, then return no information
        if not curies:
            if not use_json:
                return None
            else:
                error_code = "TermNotFound"
                error_message = "This concept is not in our knowledge graph"
                response = FormatOutput.FormatResponse(0)
                response.add_error_message(error_code, error_message)
                return response.message

        # Get label/kind of node the source is
        eprint("Getting properties for '%s'" % curies[0])
        properties = RU.get_node_properties(curies[0])
        eprint("Properties are:")
        eprint(properties)

        #### By default, return the results just as a plain simple list of data structures
        if not use_json:
            return properties

        #### Or, if requested, format the output as the standardized API output format
        else:
            #### Create a stub Message object
            response = FormatOutput.FormatResponse(0)
            response.message.table_column_names = [
                "id", "type", "name", "description", "uri"
            ]
            response.message.code_description = None

            #### Create a Node object and fill it
            node1 = Node()
            node1.id = properties["id"]
            node1.uri = properties["uri"]
            node1.type = [properties["category"]]
            node1.name = properties["name"]
            node1.description = properties["description"]

            #### Create the first result (potential answer)
            result1 = Result()
            result1.id = "http://arax.ncats.io/api/v1/result/0000"
            result1.description = "The term %s is in our knowledge graph and is defined as %s" % (
                properties["name"], properties["description"])
            result1.confidence = 1.0
            result1.essence = properties["name"]
            result1.essence_type = properties["category"]
            node_types = ",".join(node1.type)
            result1.row_data = [
                node1.id, node_types, node1.name, node1.description, node1.uri
            ]

            #### Create a KnowledgeGraph object and put the list of nodes and edges into it
            result_graph = KnowledgeGraph()
            result_graph.nodes = [node1]
            result_graph.edges = []

            #### Put the ResultGraph into the first result (potential answer)
            result1.result_graph = result_graph

            #### Put the first result (potential answer) into the message
            results = [result1]
            response.message.results = results

            #### Also put the union of all result_graph components into the top Message KnowledgeGraph
            #### Normally the knowledge_graph will be much more complex than this, but take a shortcut for this single-node result
            response.message.knowledge_graph = result_graph

            #### Also manufacture a query_graph post hoc
            qnode1 = QNode()
            qnode1.id = "n00"
            qnode1.curie = properties["id"]
            qnode1.type = None
            query_graph = QueryGraph()
            query_graph.nodes = [qnode1]
            query_graph.edges = []
            response.message.query_graph = query_graph

            #### Create the corresponding knowledge_map
            node_binding = NodeBinding(qg_id="n00", kg_id=properties["id"])
            result1.node_bindings = [node_binding]
            result1.edge_bindings = []

            #eprint(response.message)
            return response.message
    def answer(source_node_ID,
               target_node_type,
               association_node_type,
               use_json=False,
               threshold=0.2,
               n=20):
        """
		Answers the question what X are similar to Y based on overlap of common Z nodes. X is target_node_type,
		Y is source_node_ID, Z is association_node_type. The relationships are automatically determined in
		SimilarNodesInCommon by looking for 1 hop relationships and poping the FIRST one (you are warned).
		:param source_node_ID: actual name in the KG
		:param target_node_type: kinds of nodes you want returned
		:param association_node_type: kind of node you are computing the Jaccard overlap on
		:param use_json: print the results in standardized format
		:param threshold: only return results where jaccard is >= this threshold
		:param n: number of results to return (default 20)
		:return: reponse (or printed text)
		"""

        # Initialize the response class
        response = FormatOutput.FormatResponse(5)
        # add the column names for the row data
        response.message.table_column_names = [
            "source name", "source ID", "target name", "target ID",
            "Jaccard index"
        ]

        # Initialize the similar nodes class
        similar_nodes_in_common = SimilarNodesInCommon.SimilarNodesInCommon()

        # get the description
        source_node_description = RU.get_node_property(source_node_ID, 'name')

        # get the source node label
        source_node_label = RU.get_node_property(source_node_ID, 'label')

        # Get the nodes in common
        node_jaccard_tuples_sorted, error_code, error_message = similar_nodes_in_common.get_similar_nodes_in_common_source_target_association(
            source_node_ID, target_node_type, association_node_type, threshold)

        # reduce to top 100
        if len(node_jaccard_tuples_sorted) > n:
            node_jaccard_tuples_sorted = node_jaccard_tuples_sorted[0:n]

        # make sure that the input node isn't in the list
        node_jaccard_tuples_sorted = [
            i for i in node_jaccard_tuples_sorted if i[0] != source_node_ID
        ]

        # check for an error
        if error_code is not None or error_message is not None:
            if not use_json:
                print(error_message)
                return
            else:
                response.add_error_message(error_code, error_message)
                response.print()
                return

        #### If use_json not specified, then return results as a fairly plain list
        if not use_json:
            to_print = "The %s's involving similar %ss as %s are: \n" % (
                target_node_type, association_node_type,
                source_node_description)
            for other_disease_ID, jaccard in node_jaccard_tuples_sorted:
                to_print += "%s\t%s\tJaccard %f\n" % (
                    other_disease_ID,
                    RU.get_node_property(other_disease_ID, 'name'), jaccard)
            print(to_print)

        #### Else if use_json requested, return the results in the Translator standard API JSON format
        else:

            #### Create the QueryGraph for this type of question
            query_graph = QueryGraph()
            source_node = QNode()
            source_node.id = "n00"
            source_node.curie = source_node_ID
            source_node.type = source_node_label
            association_node = QNode()
            association_node.id = "n01"
            association_node.type = association_node_type
            association_node.is_set = True
            target_node = QNode()
            target_node.id = "n02"
            target_node.type = target_node_type
            query_graph.nodes = [source_node, association_node, target_node]

            #source_association_relationship_type = "unknown1"
            edge1 = QEdge()
            edge1.id = "en00-n01"
            edge1.source_id = "n00"
            edge1.target_id = "n01"
            #edge1.type = source_association_relationship_type

            #association_target_relationship_type = "unknown2"
            edge2 = QEdge()
            edge2.id = "en01-n02"
            edge2.source_id = "n01"
            edge2.target_id = "n02"
            #edge2.type = association_target_relationship_type

            query_graph.edges = [edge1, edge2]

            #### DONT Suppress the query_graph because we can now do the knowledge_map with v0.9.1
            response.message.query_graph = query_graph

            #### Create a mapping dict with the source curie and node types and edge types. This dict is used for reverse lookups by type
            #### for mapping to the QueryGraph. There is a potential point of failure here if there are duplicate node or edge types. FIXME
            response._type_map = dict()
            response._type_map[source_node.curie] = source_node.id
            response._type_map[association_node.type] = association_node.id
            response._type_map[target_node.type] = target_node.id
            response._type_map["e" + edge1.source_id + "-" +
                               edge1.target_id] = edge1.id
            response._type_map["e" + edge2.source_id + "-" +
                               edge2.target_id] = edge2.id

            #### Extract the sorted IDs from the list of tuples
            node_jaccard_ID_sorted = [
                id for id, jac in node_jaccard_tuples_sorted
            ]

            # print(RU.return_subgraph_through_node_labels(source_node_ID, source_node_label, node_jaccard_ID_sorted, target_node_type,
            #										[association_node_type], with_rel=[], directed=True, debug=True))

            # get the entire subgraph
            g = RU.return_subgraph_through_node_labels(source_node_ID,
                                                       source_node_label,
                                                       node_jaccard_ID_sorted,
                                                       target_node_type,
                                                       [association_node_type],
                                                       with_rel=[],
                                                       directed=False,
                                                       debug=False)

            # extract the source_node_number
            for node, data in g.nodes(data=True):
                if data['properties']['id'] == source_node_ID:
                    source_node_number = node
                    break

            # Get all the target numbers
            target_id2numbers = dict()
            node_jaccard_ID_sorted_set = set(node_jaccard_ID_sorted)
            for node, data in g.nodes(data=True):
                if data['properties']['id'] in node_jaccard_ID_sorted_set:
                    target_id2numbers[data['properties']['id']] = node

            for other_disease_ID, jaccard in node_jaccard_tuples_sorted:
                target_name = RU.get_node_property(other_disease_ID, 'name')
                to_print = "The %s %s involves similar %ss as %s with similarity value %f" % (
                    target_node_type, target_name, association_node_type,
                    source_node_description, jaccard)

                # get all the shortest paths between source and target
                all_paths = nx.all_shortest_paths(
                    g, source_node_number, target_id2numbers[other_disease_ID])

                # get all the nodes on these paths
                #try:
                if 1 == 1:
                    rel_nodes = set()
                    for path in all_paths:
                        for node in path:
                            rel_nodes.add(node)

                    if rel_nodes:
                        # extract the relevant subgraph
                        sub_g = nx.subgraph(g, rel_nodes)

                        # add it to the response
                        res = response.add_subgraph(sub_g.nodes(data=True),
                                                    sub_g.edges(data=True),
                                                    to_print,
                                                    jaccard,
                                                    return_result=True)
                        res.essence = "%s" % target_name  # populate with essence of question result
                        res.essence_type = target_node_type
                        row_data = []  # initialize the row data
                        row_data.append("%s" % source_node_description)
                        row_data.append("%s" % source_node_ID)
                        row_data.append("%s" % target_name)
                        row_data.append("%s" % other_disease_ID)
                        row_data.append("%f" % jaccard)
                        res.row_data = row_data


#				except:
#					pass
            response.print()
Esempio n. 13
0
    def answer(self,
               source_name,
               target_label,
               relationship_type,
               use_json=False,
               directed=False):
        """
		Answer a question of the type "What proteins does drug X target" but is general:
		 what <node X type> does <node Y grounded> <relatioship Z> that can be answered in one hop in the KG (increasing the step size if necessary).
		:param query_terms: a triple consisting of a source node name (KG neo4j node name, the target label (KG neo4j
		"node label") and the relationship type (KG neo4j "Relationship type")
		:param source_name: KG neo4j node name (eg "carbetocin")
		:param target_label: KG node label (eg. "protein")
		:param relationship_type: KG relationship type (eg. "physically_interacts_with")
		:param use_json: If the answer should be in Eric's Json standardized API output format
		:return: list of dictionaries containing the nodes that are one hop (along relationship type) that connect source to target.
		"""
        # Get label/kind of node the source is
        source_label = RU.get_node_property(source_name, "label")

        # Get the subgraph (all targets along relationship)
        has_intermediate_node = False
        try:
            g = RU.return_subgraph_paths_of_type(source_name,
                                                 source_label,
                                                 None,
                                                 target_label,
                                                 [relationship_type],
                                                 directed=directed)
        except CustomExceptions.EmptyCypherError:
            try:
                has_intermediate_node = True
                g = RU.return_subgraph_paths_of_type(
                    source_name,
                    source_label,
                    None,
                    target_label, ['subclass_of', relationship_type],
                    directed=directed)
            except CustomExceptions.EmptyCypherError:
                error_message = "No path between %s and %s via relationship %s" % (
                    source_name, target_label, relationship_type)
                error_code = "NoPathsFound"
                response = FormatOutput.FormatResponse(3)
                response.add_error_message(error_code, error_message)
                return response

        # extract the source_node_number
        for node, data in g.nodes(data=True):
            if data['properties']['id'] == source_name:
                source_node_number = node
                break

        # Get all the target numbers
        target_numbers = []
        for node, data in g.nodes(data=True):
            if data['properties']['id'] != source_name:
                target_numbers.append(node)

        # if there's an intermediate node, get the name
        if has_intermediate_node:
            neighbors = list(g.neighbors(source_node_number))
            if len(neighbors) > 1:
                error_message = "More than one intermediate node"
                error_code = "AmbiguousPath"
                response = FormatOutput.FormatResponse(3)
                response.add_error_message(error_code, error_message)
                return response
            else:
                intermediate_node = neighbors.pop()

        #### If use_json not specified, then return results as a fairly plain list
        if not use_json:
            results_list = list()
            for target_number in target_numbers:
                data = g.nodes[target_number]
                results_list.append({
                    'type':
                    list(set(data['labels']) - {'Base'}).pop(),
                    'name':
                    data['properties']['name'],
                    'desc':
                    data['properties']['name'],
                    'prob':
                    1
                })  # All these are known to be true
            return results_list

        #### Else if use_json requested, return the results in the Translator standard API JSON format
        else:
            response = FormatOutput.FormatResponse(3)  # it's a Q3 question
            response.message.table_column_names = [
                "source name", "source ID", "target name", "target ID"
            ]
            source_description = g.nodes[source_node_number]['properties'][
                'name']

            #### Create the QueryGraph for this type of question
            query_graph = QueryGraph()
            source_node = QNode()
            source_node.id = "n00"
            source_node.curie = g.nodes[source_node_number]['properties']['id']
            source_node.type = g.nodes[source_node_number]['properties'][
                'category']
            target_node = QNode()
            target_node.id = "n01"
            target_node.type = target_label
            query_graph.nodes = [source_node, target_node]
            edge1 = QEdge()
            edge1.id = "e00"
            edge1.source_id = "n00"
            edge1.target_id = "n01"
            edge1.type = relationship_type
            query_graph.edges = [edge1]
            response.message.query_graph = query_graph

            #### Create a mapping dict with the source curie and the target type. This dict is used for reverse lookups by type
            #### for mapping to the QueryGraph.
            response._type_map = dict()
            response._type_map[source_node.curie] = source_node.id
            response._type_map[target_node.type] = target_node.id
            response._type_map[edge1.type] = edge1.id

            #### Loop over all the returned targets and put them into the response structure
            for target_number in target_numbers:
                target_description = g.nodes[target_number]['properties'][
                    'name']
                if not has_intermediate_node:
                    subgraph = g.subgraph([source_node_number, target_number])
                else:
                    subgraph = g.subgraph(
                        [source_node_number, intermediate_node, target_number])
                res = response.add_subgraph(
                    subgraph.nodes(data=True),
                    subgraph.edges(data=True),
                    "%s and %s are connected by the relationship %s" %
                    (source_description, target_description,
                     relationship_type),
                    1,
                    return_result=True)
                res.essence = "%s" % target_description  # populate with essence of question result
                res.essence_type = g.nodes[target_number]['properties'][
                    'category']  # populate with the type of the essence of question result
                row_data = []  # initialize the row data
                row_data.append("%s" % source_description)
                row_data.append(
                    "%s" % g.nodes[source_node_number]['properties']['id'])
                row_data.append("%s" % target_description)
                row_data.append("%s" %
                                g.nodes[target_number]['properties']['id'])
                res.row_data = row_data
            return response
Esempio n. 14
0
    def queryTerm(self, term):
        method = "queryTerm"
        attributes = self.findTermAttributesAndTypeByName(term)
        message = self.createMessage()
        if ( attributes["status"] == 'OK' ):
            message.code_description = "1 result found"
            message.table_column_names = [ "id", "type", "name", "description", "uri" ]

            #### Create a Node object and fill it
            node1 = Node()
            node1.id = "MESH:" + attributes["id"]
            node1.uri = "http://purl.obolibrary.org/obo/MESH_" + attributes["id"]
            node1.type = [ attributes["type"] ]
            node1.name = attributes["name"]
            node1.description = attributes["description"]

            #### Create the first result (potential answer)
            result1 = Result()
            result1.id = "http://rtx.ncats.io/api/v1/result/0000"
            result1.description = "The term " + attributes["name"] + " refers to " + attributes["description"]
            result1.confidence = 1.0
            result1.essence = attributes["name"]
            result1.essence_type = attributes["type"]
            node_types = ",".join(node1.type)
            result1.row_data = [ node1.id, node_types, node1.name, node1.description, node1.uri ]

            #### Create a KnowledgeGraph object and put the list of nodes and edges into it
            result_graph = KnowledgeGraph()
            result_graph.nodes = [ node1 ]

            #### Put the ResultGraph into the first result (potential answer)
            result1.result_graph = result_graph

            #### Put the first result (potential answer) into the message
            results = [ result1 ]
            message.results = results

            #### Also put the union of all result_graph components into the top Message KnowledgeGraph
            #### Normally the knowledge_graph will be much more complex than this, but take a shortcut for this single-node result
            message.knowledge_graph = result_graph

            #### Also manufacture a query_graph post hoc
            qnode1 = QNode()
            qnode1.node_id = "n00"
            qnode1.curie = "MESH:" + attributes["id"]
            qnode1.type = None
            query_graph = QueryGraph()
            query_graph.nodes = [ qnode1 ]
            query_graph.edges = []
            message.query_graph = query_graph

            #### Create the corresponding knowledge_map
            knowledge_map = { "n00": "MESH:" + attributes["id"] }
            result1.knowledge_map = knowledge_map

        else:
            message.message_code = "TermNotFound"
            message.code_description = "Unable to find this term in MeSH. No further information is available at this time."
            message.id = None

        return message