def from_dict(self, query_graph_dict): query_graph = QueryGraph() query_graph.nodes = [] query_graph.edges = [] if "nodes" in query_graph_dict: for node in query_graph_dict["nodes"]: qnode = QNode().from_dict(node) query_graph.nodes.append(qnode) if "edges" in query_graph_dict: for edge in query_graph_dict["edges"]: qedge = QEdge().from_dict(edge) query_graph.edges.append(qedge) return query_graph
def _get_query_graph_for_edge(qedge, query_graph, dict_kg): # This function creates a query graph for the specified qedge, updating its qnodes' curies as needed edge_query_graph = QueryGraph(nodes=[], edges=[]) qnodes = [ eu.get_query_node(query_graph, qedge.source_id), eu.get_query_node(query_graph, qedge.target_id) ] # Add (a copy of) this qedge to our edge query graph edge_query_graph.edges.append(eu.copy_qedge(qedge)) # Update this qedge's qnodes as appropriate and add (copies of) them to the edge query graph qedge_has_already_been_expanded = qedge.id in dict_kg['edges'] qnodes_using_curies_from_prior_step = set() for qnode in qnodes: qnode_copy = eu.copy_qnode(qnode) # Handle case where we need to feed curies from a prior Expand() step as the curie for this qnode qnode_already_fulfilled = qnode_copy.id in dict_kg['nodes'] if qnode_already_fulfilled and not qnode_copy.curie and not qedge_has_already_been_expanded: qnode_copy.curie = list(dict_kg['nodes'][qnode_copy.id].keys()) qnodes_using_curies_from_prior_step.add(qnode_copy.id) edge_query_graph.nodes.append(qnode_copy) return edge_query_graph, qnodes_using_curies_from_prior_step
def _get_query_graph_for_edge(self, qedge: QEdge, query_graph: QueryGraph, dict_kg: DictKnowledgeGraph, use_synonyms: bool, kp_to_use: str, log: Response) -> QueryGraph: # This function creates a query graph for the specified qedge, updating its qnodes' curies as needed edge_query_graph = QueryGraph(nodes=[], edges=[]) qnodes = [ eu.get_query_node(query_graph, qedge.source_id), eu.get_query_node(query_graph, qedge.target_id) ] # Add (a copy of) this qedge to our edge query graph edge_query_graph.edges.append(eu.copy_qedge(qedge)) # Update this qedge's qnodes as appropriate and add (copies of) them to the edge query graph qedge_has_already_been_expanded = qedge.id in dict_kg.edges_by_qg_id for qnode in qnodes: qnode_copy = eu.copy_qnode(qnode) # Feed in curies from a prior Expand() step as the curie for this qnode as necessary qnode_already_fulfilled = qnode_copy.id in dict_kg.nodes_by_qg_id if qnode_already_fulfilled and not qnode_copy.curie and not qedge_has_already_been_expanded: qnode_copy.curie = list( dict_kg.nodes_by_qg_id[qnode_copy.id].keys()) edge_query_graph.nodes.append(qnode_copy) if use_synonyms: self._add_curie_synonyms_to_query_nodes( qnodes=edge_query_graph.nodes, log=log, kp=kp_to_use) # Consider both protein and gene if qnode's type is one of those (since KP's handle these differently) for qnode in edge_query_graph.nodes: if qnode.type in ['protein', 'gene']: qnode.type = ['protein', 'gene'] return edge_query_graph
def __get_next_free_edge_id(self): #### Set up local references to the message and verify the query_graph nodes message = self.message if message.query_graph is None: message.query_graph = QueryGraph() message.query_graph.nodes = [] message.query_graph.edges = [] if message.query_graph.edges is None: message.query_graph.edges = [] qedges = message.query_graph.edges #### Loop over the nodes making a dict of the ids ids = {} for qedge in qedges: id = qedge.id ids[id] = 1 #### Find the first unused id index = 0 while 1: pad = '0' if index > 9: pad = '' potential_edge_id = f"e{pad}{str(index)}" if potential_edge_id not in ids: return potential_edge_id index += 1
def create_message(self, describe=False): """ Creates a basic empty Message object with basic boilerplate metadata :return: Response object with execution information and the new message object inside the data envelope :rtype: Response """ # Internal documentation setup #allowable_parameters = { 'action': { 'None' } } allowable_parameters = { 'dsl_command': '`create_message()`' } # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/ if describe: allowable_parameters[ 'brief_description'] = """The `create_message` method creates a basic empty Message object with basic boilerplate metadata such as reasoner_id, schema_version, etc. filled in. This DSL command takes no arguments""" return allowable_parameters #### Define a default response response = Response() self.response = response #### Create the top-level message response.info("Creating an empty template ARAX Message") message = Message() self.message = message #### Fill it with default information message.id = None message.type = "translator_reasoner_message" message.reasoner_id = "ARAX" message.tool_version = RTXConfiguration().version message.schema_version = "0.9.3" message.message_code = "OK" message.code_description = "Created empty template Message" message.context = "https://raw.githubusercontent.com/biolink/biolink-model/master/context.jsonld" #### Why is this _datetime ?? FIXME message._datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S") #### Create an empty master knowledge graph message.knowledge_graph = KnowledgeGraph() message.knowledge_graph.nodes = [] message.knowledge_graph.edges = [] #### Create an empty query graph message.query_graph = QueryGraph() message.query_graph.nodes = [] message.query_graph.edges = [] #### Create empty results message.results = [] message.n_results = 0 #### Return the response response.data['message'] = message return response
def _extract_query_subgraph(qedge_ids_to_expand: List[str], query_graph: QueryGraph, log: Response) -> QueryGraph: # This function extracts a sub-query graph containing the provided qedge IDs from a larger query graph sub_query_graph = QueryGraph(nodes=[], edges=[]) for qedge_id in qedge_ids_to_expand: # Make sure this query edge actually exists in the query graph if not any(qedge.id == qedge_id for qedge in query_graph.edges): log.error( f"An edge with ID '{qedge_id}' does not exist in Message.QueryGraph", error_code="UnknownValue") return None qedge = next(qedge for qedge in query_graph.edges if qedge.id == qedge_id) # Make sure this qedge's qnodes actually exist in the query graph if not eu.get_query_node(query_graph, qedge.source_id): log.error( f"Qedge {qedge.id}'s source_id refers to a qnode that does not exist in the query graph: " f"{qedge.source_id}", error_code="InvalidQEdge") return None if not eu.get_query_node(query_graph, qedge.target_id): log.error( f"Qedge {qedge.id}'s target_id refers to a qnode that does not exist in the query graph: " f"{qedge.target_id}", error_code="InvalidQEdge") return None qnodes = [ eu.get_query_node(query_graph, qedge.source_id), eu.get_query_node(query_graph, qedge.target_id) ] # Add (copies of) this qedge and its two qnodes to our new query sub graph qedge_copy = eu.copy_qedge(qedge) if not any(qedge.id == qedge_copy.id for qedge in sub_query_graph.edges): sub_query_graph.edges.append(qedge_copy) for qnode in qnodes: qnode_copy = eu.copy_qnode(qnode) if not any(qnode.id == qnode_copy.id for qnode in sub_query_graph.nodes): sub_query_graph.nodes.append(qnode_copy) return sub_query_graph
def from_dict(self, message): if str(message.__class__ ) != "<class 'swagger_server.models.message.Message'>": message = Message().from_dict(message) message.query_graph = QueryGraph().from_dict(message.query_graph) message.knowledge_graph = KnowledgeGraph().from_dict( message.knowledge_graph) #### This is an unfortunate hack that fixes qnode.curie entries #### Officially a curie can be a str or a list. But Swagger 2.0 only permits one type and we set it to str #### so when it gets converted from_dict, the list gets converted to a str because that's its type #### Here we force it back. This should no longer be needed when we are properly on OpenAPI 3.0 if message.query_graph is not None and message.query_graph.nodes is not None: for qnode in message.query_graph.nodes: if qnode.curie is not None and isinstance(qnode.curie, str): if qnode.curie[0:2] == "['": try: qnode.curie = ast.literal_eval(qnode.curie) except: pass #new_nodes = [] #for qnode in message.query_graph.nodes: # print(type(qnode)) # new_nodes.append(QNode().from_dict(qnode)) #message.query_graph.nodes = new_nodes #for qedge in message.query_graph.edges: # new_edges.append(QEdge().from_dict(qedge)) #message.query_graph.edges = new_edges if message.results is not None: for result in message.results: if result.result_graph is not None: #eprint(str(result.result_graph.__class__)) if str( result.result_graph.__class__ ) != "<class 'swagger_server.models.knowledge_graph.KnowledgeGraph'>": result.result_graph = KnowledgeGraph().from_dict( result.result_graph) return message
def reassign_curies(self, message, input_parameters, describe=False): """ Reassigns CURIEs to the target Knowledge Provider :param message: Translator standard Message object :type message: Message :param input_parameters: Dict of input parameters to control the method :type input_parameters: Message :return: Response object with execution information :rtype: Response """ # #### Internal documentation setup allowable_parameters = { 'knowledge_provider': { 'Name of the Knowledge Provider CURIE space to map to. Default=KG1. Also currently supported KG2' }, 'mismap_result': { 'Desired action when mapping fails: ERROR or WARNING. Default is ERROR' }, } if describe: allowable_parameters[ 'dsl_command'] = '`reassign_curies()`' # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/ allowable_parameters[ 'brief_description'] = """The `reassign_curies` method reassigns all the CURIEs in the Message QueryGraph to the specified knowledge provider. Allowed values are KG1 or KG2. Default is KG1 if not specified.""" return allowable_parameters #### Define a default response response = Response() self.response = response self.message = message #### Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response #### Define a complete set of allowed parameters and their defaults parameters = { 'knowledge_provider': 'KG1', 'mismap_result': 'ERROR', } #### Loop through the input_parameters and override the defaults and make sure they are allowed for key, value in input_parameters.items(): if key not in parameters: response.error(f"Supplied parameter {key} is not permitted", error_code="UnknownParameter") else: parameters[key] = value #### Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response #### Store these final parameters for convenience response.data['parameters'] = parameters self.parameters = parameters # Check that the knowledge_provider is valid: if parameters['knowledge_provider'] != 'KG1' and parameters[ 'knowledge_provider'] != 'KG2': response.error( f"Specified knowledge provider must be 'KG1' or 'KG2', not '{parameters['knowledge_provider']}'", error_code="UnknownKP") return response #### Now try to assign the CURIEs response.info( f"Reassigning the CURIEs in QueryGraph to {parameters['knowledge_provider']} space" ) #### Make sure there's a query_graph already here if message.query_graph is None: message.query_graph = QueryGraph() message.query_graph.nodes = [] message.query_graph.edges = [] if message.query_graph.nodes is None: message.query_graph.nodes = [] #### Set up the KGNodeIndex kgNodeIndex = KGNodeIndex() # Loops through the QueryGraph nodes and adjust them for qnode in message.query_graph.nodes: # If the CURIE is None, then there's nothing to do curie = qnode.curie if curie is None: continue # Map the CURIE to the desired Knowledge Provider if parameters['knowledge_provider'] == 'KG1': if kgNodeIndex.is_curie_present(curie) is True: mapped_curies = [curie] else: mapped_curies = kgNodeIndex.get_KG1_curies(curie) elif parameters['knowledge_provider'] == 'KG2': if kgNodeIndex.is_curie_present(curie, kg_name='KG2'): mapped_curies = [curie] else: mapped_curies = kgNodeIndex.get_curies_and_types( curie, kg_name='KG2') else: response.error( f"Specified knowledge provider must be 'KG1' or 'KG2', not '{parameters['knowledge_provider']}'", error_code="UnknownKP") return response # Try to find a new CURIE new_curie = None if len(mapped_curies) == 0: if parameters['mismap_result'] == 'WARNING': response.warning( f"Did not find a mapping for {curie} to KP '{parameters['knowledge_provider']}'. Leaving as is" ) else: response.error( f"Did not find a mapping for {curie} to KP '{parameters['knowledge_provider']}'. This is an error" ) elif len(mapped_curies) == 1: new_curie = mapped_curies[0] else: original_curie_is_fine = False for potential_curie in mapped_curies: if potential_curie == curie: original_curie_is_fine = True if original_curie_is_fine: new_curie = curie else: new_curie = mapped_curies[0] response.warning( f"There are multiple possible CURIEs in KP '{parameters['knowledge_provider']}'. Selecting the first one {new_curie}" ) # If there's no CURIE, then nothing to do if new_curie is None: pass # If it's the same elif new_curie == curie: response.debug( f"CURIE {curie} is fine for KP '{parameters['knowledge_provider']}'" ) else: response.info( f"Remapping CURIE {curie} to {new_curie} for KP '{parameters['knowledge_provider']}'" ) #### Return the response return response
def add_qedge(self, message, input_parameters, describe=False): """ Adds a new QEdge object to the QueryGraph inside the Message object :return: Response object with execution information :rtype: Response """ # #### Internal documentation setup allowable_parameters = { 'id': { 'Any string that is unique among all QEdge id fields, with recommended format e00, e01, e02, etc.' }, 'source_id': { 'id of the source QNode already present in the QueryGraph (e.g. n01, n02)' }, 'target_id': { 'id of the target QNode already present in the QueryGraph (e.g. n01, n02)' }, 'type': { 'Any valid Translator/BioLink relationship type (e.g. physically_interacts_with, participates_in)' }, } if describe: #allowable_parameters['action'] = { 'None' } #allowable_parameters = dict() allowable_parameters[ 'dsl_command'] = '`add_qedge()`' # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/ allowable_parameters[ 'brief_description'] = """The `add_qedge` method adds an additional QEdge to the QueryGraph in the Message object. Currently source_id and target_id QNodes must already be present in the QueryGraph. The specified type is not currently checked that it is a valid Translator/BioLink relationship type, but it should be.""" return allowable_parameters #### Define a default response response = Response() self.response = response self.message = message #### Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response #### Define a complete set of allowed parameters and their defaults parameters = { 'id': None, 'source_id': None, 'target_id': None, 'type': None, } #### Loop through the input_parameters and override the defaults and make sure they are allowed for key, value in input_parameters.items(): if key not in parameters: response.error(f"Supplied parameter {key} is not permitted", error_code="UnknownParameter") else: parameters[key] = value #### Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response #### Store these final parameters for convenience response.data['parameters'] = parameters self.parameters = parameters #### Now apply the filters. Order of operations is probably quite important #### Scalar value filters probably come first like minimum_confidence, then complex logic filters #### based on edge or node properties, and then finally maximum_results response.info( f"Adding a QueryEdge to Message with parameters {parameters}") #### Make sure there's a query_graph already here if message.query_graph is None: message.query_graph = QueryGraph() message.query_graph.nodes = [] message.query_graph.edges = [] if message.query_graph.edges is None: message.query_graph.edges = [] #### Create a QEdge qedge = QEdge() if parameters['id'] is not None: id = parameters['id'] else: id = self.__get_next_free_edge_id() qedge.id = id #### Get the list of available node_ids qnodes = message.query_graph.nodes ids = {} for qnode in qnodes: id = qnode.id ids[id] = 1 #### Add the source_id if parameters['source_id'] is not None: if parameters['source_id'] not in ids: response.error( f"While trying to add QEdge, there is no QNode with id {parameters['source_id']}", error_code="UnknownSourceId") return response qedge.source_id = parameters['source_id'] else: response.error( f"While trying to add QEdge, source_id is a required parameter", error_code="MissingSourceId") return response #### Add the target_id if parameters['target_id'] is not None: if parameters['target_id'] not in ids: response.error( f"While trying to add QEdge, there is no QNode with id {parameters['target_id']}", error_code="UnknownTargetId") return response qedge.target_id = parameters['target_id'] else: response.error( f"While trying to add QEdge, target_id is a required parameter", error_code="MissingTargetId") return response #### Add the type if any. Need to verify it's an allowed type. FIXME if parameters['type'] is not None: qedge.type = parameters['type'] #### Add it to the query_graph edge list message.query_graph.edges.append(qedge) #### Return the response return response
def add_qnode(self, message, input_parameters, describe=False): """ Adds a new QNode object to the QueryGraph inside the Message object :return: Response object with execution information :rtype: Response """ # #### Internal documentation setup allowable_parameters = { 'id': { 'Any string that is unique among all QNode id fields, with recommended format n00, n01, n02, etc.' }, 'curie': { 'Any compact URI (CURIE) (e.g. DOID:9281) (May also be a list like [UniProtKB:P12345,UniProtKB:Q54321])' }, 'name': { 'Any name of a bioentity that will be resolved into a CURIE if possible or result in an error if not (e.g. hypertension, insulin)' }, 'type': { 'Any valid Translator bioentity type (e.g. protein, chemical_substance, disease)' }, 'is_set': { 'If set to true, this QNode represents a set of nodes that are all in common between the two other linked QNodes' }, } if describe: allowable_parameters[ 'dsl_command'] = '`add_qnode()`' # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/ allowable_parameters[ 'brief_description'] = """The `add_qnode` method adds an additional QNode to the QueryGraph in the Message object. Currently when a curie or name is specified, this method will only return success if a matching node is found in the KG1/KG2 KGNodeIndex.""" return allowable_parameters #### Define a default response response = Response() self.response = response self.message = message #### Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response #### Define a complete set of allowed parameters and their defaults parameters = { 'id': None, 'curie': None, 'name': None, 'type': None, 'is_set': None, } #### Loop through the input_parameters and override the defaults and make sure they are allowed for key, value in input_parameters.items(): if key not in parameters: response.error(f"Supplied parameter {key} is not permitted", error_code="UnknownParameter") else: parameters[key] = value #### Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response #### Store these final parameters for convenience response.data['parameters'] = parameters self.parameters = parameters #### Now apply the filters. Order of operations is probably quite important #### Scalar value filters probably come first like minimum_confidence, then complex logic filters #### based on edge or node properties, and then finally maximum_results response.info( f"Adding a QueryNode to Message with parameters {parameters}") #### Make sure there's a query_graph already here if message.query_graph is None: message.query_graph = QueryGraph() message.query_graph.nodes = [] message.query_graph.edges = [] if message.query_graph.nodes is None: message.query_graph.nodes = [] #### Set up the KGNodeIndex kgNodeIndex = KGNodeIndex() # Create the QNode and set the id qnode = QNode() if parameters['id'] is not None: id = parameters['id'] else: id = self.__get_next_free_node_id() qnode.id = id # Set the is_set parameter to what the user selected if parameters['is_set'] is not None: qnode.is_set = (parameters['is_set'].lower() == 'true') #### If the CURIE is specified, try to find that if parameters['curie'] is not None: # If the curie is a scalar then treat it here as a list of one if isinstance(parameters['curie'], str): curie_list = [parameters['curie']] is_curie_a_list = False if parameters['is_set'] is not None and qnode.is_set is True: response.error( f"Specified CURIE '{parameters['curie']}' is a scalar, but is_set=true, which doesn't make sense", error_code="CurieScalarButIsSetTrue") return response # Or else set it up as a list elif isinstance(parameters['curie'], list): curie_list = parameters['curie'] is_curie_a_list = True qnode.curie = [] if parameters['is_set'] is None: response.warning( f"Specified CURIE '{parameters['curie']}' is a list, but is_set was not set to true. It must be true in this context, so automatically setting to true. Avoid this warning by explictly setting to true." ) qnode.is_set = True else: if qnode.is_set == False: response.warning( f"Specified CURIE '{parameters['curie']}' is a list, but is_set=false, which doesn't make sense, so automatically setting to true. Avoid this warning by explictly setting to true." ) qnode.is_set = True # Or if it's neither a list or a string, then error out. This cannot be handled at present else: response.error( f"Specified CURIE '{parameters['curie']}' is neither a string nor a list. This cannot to handled", error_code="CurieNotListOrScalar") return response # Loop over the available curies and create the list for curie in curie_list: response.debug(f"Looking up CURIE {curie} in KgNodeIndex") nodes = kgNodeIndex.get_curies_and_types(curie, kg_name='KG2') # If nothing was found, we won't bail out, but rather just issue a warning if len(nodes) == 0: response.warning( f"A node with CURIE {curie} is not in our knowledge graph KG2, but will continue" ) if is_curie_a_list: qnode.curie.append(curie) else: qnode.curie = curie else: # FIXME. This is just always taking the first result. This could cause problems for CURIEs with multiple types. Is that possible? # In issue #623 on 2020-06-15 we concluded that we should not specify the type here #qnode.type = nodes[0]['type'] # Either append or set the found curie if is_curie_a_list: qnode.curie.append(nodes[0]['curie']) else: qnode.curie = nodes[0]['curie'] if 'type' in parameters and parameters['type'] is not None: if isinstance(parameters['type'], str): qnode.type = parameters['type'] else: qnode.type = parameters['type'][0] message.query_graph.nodes.append(qnode) return response #### If the name is specified, try to find that if parameters['name'] is not None: response.debug( f"Looking up CURIE {parameters['name']} in KgNodeIndex") nodes = kgNodeIndex.get_curies_and_types(parameters['name']) if len(nodes) == 0: nodes = kgNodeIndex.get_curies_and_types(parameters['name'], kg_name='KG2') if len(nodes) == 0: response.error( f"A node with name '{parameters['name']}'' is not in our knowledge graph", error_code="UnknownCURIE") return response qnode.curie = nodes[0]['curie'] qnode.type = nodes[0]['type'] message.query_graph.nodes.append(qnode) return response #### If the type is specified, just add that type. There should be checking that it is legal. FIXME if parameters['type'] is not None: qnode.type = parameters['type'] if parameters['is_set'] is not None: qnode.is_set = (parameters['is_set'].lower() == 'true') message.query_graph.nodes.append(qnode) return response #### If we get here, it means that all three main parameters are null. Just a generic node with no type or anything. This is okay. message.query_graph.nodes.append(qnode) return response
def answer(self, entity, use_json=False): """ Answer a question of the type "What is X" but is general: :param entity: KG neo4j node name (eg "carbetocin") :param use_json: If the answer should be in Translator standardized API output format :return: a description and type of the node """ #### See if this entity is in the KG via the index eprint("Looking up '%s' in KgNodeIndex" % entity) kgNodeIndex = KGNodeIndex() curies = kgNodeIndex.get_curies(entity) #### If not in the KG, then return no information if not curies: if not use_json: return None else: error_code = "TermNotFound" error_message = "This concept is not in our knowledge graph" response = FormatOutput.FormatResponse(0) response.add_error_message(error_code, error_message) return response.message # Get label/kind of node the source is eprint("Getting properties for '%s'" % curies[0]) properties = RU.get_node_properties(curies[0]) eprint("Properties are:") eprint(properties) #### By default, return the results just as a plain simple list of data structures if not use_json: return properties #### Or, if requested, format the output as the standardized API output format else: #### Create a stub Message object response = FormatOutput.FormatResponse(0) response.message.table_column_names = [ "id", "type", "name", "description", "uri" ] response.message.code_description = None #### Create a Node object and fill it node1 = Node() node1.id = properties["id"] node1.uri = properties["uri"] node1.type = [properties["category"]] node1.name = properties["name"] node1.description = properties["description"] #### Create the first result (potential answer) result1 = Result() result1.id = "http://arax.ncats.io/api/v1/result/0000" result1.description = "The term %s is in our knowledge graph and is defined as %s" % ( properties["name"], properties["description"]) result1.confidence = 1.0 result1.essence = properties["name"] result1.essence_type = properties["category"] node_types = ",".join(node1.type) result1.row_data = [ node1.id, node_types, node1.name, node1.description, node1.uri ] #### Create a KnowledgeGraph object and put the list of nodes and edges into it result_graph = KnowledgeGraph() result_graph.nodes = [node1] result_graph.edges = [] #### Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph #### Put the first result (potential answer) into the message results = [result1] response.message.results = results #### Also put the union of all result_graph components into the top Message KnowledgeGraph #### Normally the knowledge_graph will be much more complex than this, but take a shortcut for this single-node result response.message.knowledge_graph = result_graph #### Also manufacture a query_graph post hoc qnode1 = QNode() qnode1.id = "n00" qnode1.curie = properties["id"] qnode1.type = None query_graph = QueryGraph() query_graph.nodes = [qnode1] query_graph.edges = [] response.message.query_graph = query_graph #### Create the corresponding knowledge_map node_binding = NodeBinding(qg_id="n00", kg_id=properties["id"]) result1.node_bindings = [node_binding] result1.edge_bindings = [] #eprint(response.message) return response.message
def answer(source_node_ID, target_node_type, association_node_type, use_json=False, threshold=0.2, n=20): """ Answers the question what X are similar to Y based on overlap of common Z nodes. X is target_node_type, Y is source_node_ID, Z is association_node_type. The relationships are automatically determined in SimilarNodesInCommon by looking for 1 hop relationships and poping the FIRST one (you are warned). :param source_node_ID: actual name in the KG :param target_node_type: kinds of nodes you want returned :param association_node_type: kind of node you are computing the Jaccard overlap on :param use_json: print the results in standardized format :param threshold: only return results where jaccard is >= this threshold :param n: number of results to return (default 20) :return: reponse (or printed text) """ # Initialize the response class response = FormatOutput.FormatResponse(5) # add the column names for the row data response.message.table_column_names = [ "source name", "source ID", "target name", "target ID", "Jaccard index" ] # Initialize the similar nodes class similar_nodes_in_common = SimilarNodesInCommon.SimilarNodesInCommon() # get the description source_node_description = RU.get_node_property(source_node_ID, 'name') # get the source node label source_node_label = RU.get_node_property(source_node_ID, 'label') # Get the nodes in common node_jaccard_tuples_sorted, error_code, error_message = similar_nodes_in_common.get_similar_nodes_in_common_source_target_association( source_node_ID, target_node_type, association_node_type, threshold) # reduce to top 100 if len(node_jaccard_tuples_sorted) > n: node_jaccard_tuples_sorted = node_jaccard_tuples_sorted[0:n] # make sure that the input node isn't in the list node_jaccard_tuples_sorted = [ i for i in node_jaccard_tuples_sorted if i[0] != source_node_ID ] # check for an error if error_code is not None or error_message is not None: if not use_json: print(error_message) return else: response.add_error_message(error_code, error_message) response.print() return #### If use_json not specified, then return results as a fairly plain list if not use_json: to_print = "The %s's involving similar %ss as %s are: \n" % ( target_node_type, association_node_type, source_node_description) for other_disease_ID, jaccard in node_jaccard_tuples_sorted: to_print += "%s\t%s\tJaccard %f\n" % ( other_disease_ID, RU.get_node_property(other_disease_ID, 'name'), jaccard) print(to_print) #### Else if use_json requested, return the results in the Translator standard API JSON format else: #### Create the QueryGraph for this type of question query_graph = QueryGraph() source_node = QNode() source_node.id = "n00" source_node.curie = source_node_ID source_node.type = source_node_label association_node = QNode() association_node.id = "n01" association_node.type = association_node_type association_node.is_set = True target_node = QNode() target_node.id = "n02" target_node.type = target_node_type query_graph.nodes = [source_node, association_node, target_node] #source_association_relationship_type = "unknown1" edge1 = QEdge() edge1.id = "en00-n01" edge1.source_id = "n00" edge1.target_id = "n01" #edge1.type = source_association_relationship_type #association_target_relationship_type = "unknown2" edge2 = QEdge() edge2.id = "en01-n02" edge2.source_id = "n01" edge2.target_id = "n02" #edge2.type = association_target_relationship_type query_graph.edges = [edge1, edge2] #### DONT Suppress the query_graph because we can now do the knowledge_map with v0.9.1 response.message.query_graph = query_graph #### Create a mapping dict with the source curie and node types and edge types. This dict is used for reverse lookups by type #### for mapping to the QueryGraph. There is a potential point of failure here if there are duplicate node or edge types. FIXME response._type_map = dict() response._type_map[source_node.curie] = source_node.id response._type_map[association_node.type] = association_node.id response._type_map[target_node.type] = target_node.id response._type_map["e" + edge1.source_id + "-" + edge1.target_id] = edge1.id response._type_map["e" + edge2.source_id + "-" + edge2.target_id] = edge2.id #### Extract the sorted IDs from the list of tuples node_jaccard_ID_sorted = [ id for id, jac in node_jaccard_tuples_sorted ] # print(RU.return_subgraph_through_node_labels(source_node_ID, source_node_label, node_jaccard_ID_sorted, target_node_type, # [association_node_type], with_rel=[], directed=True, debug=True)) # get the entire subgraph g = RU.return_subgraph_through_node_labels(source_node_ID, source_node_label, node_jaccard_ID_sorted, target_node_type, [association_node_type], with_rel=[], directed=False, debug=False) # extract the source_node_number for node, data in g.nodes(data=True): if data['properties']['id'] == source_node_ID: source_node_number = node break # Get all the target numbers target_id2numbers = dict() node_jaccard_ID_sorted_set = set(node_jaccard_ID_sorted) for node, data in g.nodes(data=True): if data['properties']['id'] in node_jaccard_ID_sorted_set: target_id2numbers[data['properties']['id']] = node for other_disease_ID, jaccard in node_jaccard_tuples_sorted: target_name = RU.get_node_property(other_disease_ID, 'name') to_print = "The %s %s involves similar %ss as %s with similarity value %f" % ( target_node_type, target_name, association_node_type, source_node_description, jaccard) # get all the shortest paths between source and target all_paths = nx.all_shortest_paths( g, source_node_number, target_id2numbers[other_disease_ID]) # get all the nodes on these paths #try: if 1 == 1: rel_nodes = set() for path in all_paths: for node in path: rel_nodes.add(node) if rel_nodes: # extract the relevant subgraph sub_g = nx.subgraph(g, rel_nodes) # add it to the response res = response.add_subgraph(sub_g.nodes(data=True), sub_g.edges(data=True), to_print, jaccard, return_result=True) res.essence = "%s" % target_name # populate with essence of question result res.essence_type = target_node_type row_data = [] # initialize the row data row_data.append("%s" % source_node_description) row_data.append("%s" % source_node_ID) row_data.append("%s" % target_name) row_data.append("%s" % other_disease_ID) row_data.append("%f" % jaccard) res.row_data = row_data # except: # pass response.print()
def answer(self, source_name, target_label, relationship_type, use_json=False, directed=False): """ Answer a question of the type "What proteins does drug X target" but is general: what <node X type> does <node Y grounded> <relatioship Z> that can be answered in one hop in the KG (increasing the step size if necessary). :param query_terms: a triple consisting of a source node name (KG neo4j node name, the target label (KG neo4j "node label") and the relationship type (KG neo4j "Relationship type") :param source_name: KG neo4j node name (eg "carbetocin") :param target_label: KG node label (eg. "protein") :param relationship_type: KG relationship type (eg. "physically_interacts_with") :param use_json: If the answer should be in Eric's Json standardized API output format :return: list of dictionaries containing the nodes that are one hop (along relationship type) that connect source to target. """ # Get label/kind of node the source is source_label = RU.get_node_property(source_name, "label") # Get the subgraph (all targets along relationship) has_intermediate_node = False try: g = RU.return_subgraph_paths_of_type(source_name, source_label, None, target_label, [relationship_type], directed=directed) except CustomExceptions.EmptyCypherError: try: has_intermediate_node = True g = RU.return_subgraph_paths_of_type( source_name, source_label, None, target_label, ['subclass_of', relationship_type], directed=directed) except CustomExceptions.EmptyCypherError: error_message = "No path between %s and %s via relationship %s" % ( source_name, target_label, relationship_type) error_code = "NoPathsFound" response = FormatOutput.FormatResponse(3) response.add_error_message(error_code, error_message) return response # extract the source_node_number for node, data in g.nodes(data=True): if data['properties']['id'] == source_name: source_node_number = node break # Get all the target numbers target_numbers = [] for node, data in g.nodes(data=True): if data['properties']['id'] != source_name: target_numbers.append(node) # if there's an intermediate node, get the name if has_intermediate_node: neighbors = list(g.neighbors(source_node_number)) if len(neighbors) > 1: error_message = "More than one intermediate node" error_code = "AmbiguousPath" response = FormatOutput.FormatResponse(3) response.add_error_message(error_code, error_message) return response else: intermediate_node = neighbors.pop() #### If use_json not specified, then return results as a fairly plain list if not use_json: results_list = list() for target_number in target_numbers: data = g.nodes[target_number] results_list.append({ 'type': list(set(data['labels']) - {'Base'}).pop(), 'name': data['properties']['name'], 'desc': data['properties']['name'], 'prob': 1 }) # All these are known to be true return results_list #### Else if use_json requested, return the results in the Translator standard API JSON format else: response = FormatOutput.FormatResponse(3) # it's a Q3 question response.message.table_column_names = [ "source name", "source ID", "target name", "target ID" ] source_description = g.nodes[source_node_number]['properties'][ 'name'] #### Create the QueryGraph for this type of question query_graph = QueryGraph() source_node = QNode() source_node.id = "n00" source_node.curie = g.nodes[source_node_number]['properties']['id'] source_node.type = g.nodes[source_node_number]['properties'][ 'category'] target_node = QNode() target_node.id = "n01" target_node.type = target_label query_graph.nodes = [source_node, target_node] edge1 = QEdge() edge1.id = "e00" edge1.source_id = "n00" edge1.target_id = "n01" edge1.type = relationship_type query_graph.edges = [edge1] response.message.query_graph = query_graph #### Create a mapping dict with the source curie and the target type. This dict is used for reverse lookups by type #### for mapping to the QueryGraph. response._type_map = dict() response._type_map[source_node.curie] = source_node.id response._type_map[target_node.type] = target_node.id response._type_map[edge1.type] = edge1.id #### Loop over all the returned targets and put them into the response structure for target_number in target_numbers: target_description = g.nodes[target_number]['properties'][ 'name'] if not has_intermediate_node: subgraph = g.subgraph([source_node_number, target_number]) else: subgraph = g.subgraph( [source_node_number, intermediate_node, target_number]) res = response.add_subgraph( subgraph.nodes(data=True), subgraph.edges(data=True), "%s and %s are connected by the relationship %s" % (source_description, target_description, relationship_type), 1, return_result=True) res.essence = "%s" % target_description # populate with essence of question result res.essence_type = g.nodes[target_number]['properties'][ 'category'] # populate with the type of the essence of question result row_data = [] # initialize the row data row_data.append("%s" % source_description) row_data.append( "%s" % g.nodes[source_node_number]['properties']['id']) row_data.append("%s" % target_description) row_data.append("%s" % g.nodes[target_number]['properties']['id']) res.row_data = row_data return response
def queryTerm(self, term): method = "queryTerm" attributes = self.findTermAttributesAndTypeByName(term) message = self.createMessage() if ( attributes["status"] == 'OK' ): message.code_description = "1 result found" message.table_column_names = [ "id", "type", "name", "description", "uri" ] #### Create a Node object and fill it node1 = Node() node1.id = "MESH:" + attributes["id"] node1.uri = "http://purl.obolibrary.org/obo/MESH_" + attributes["id"] node1.type = [ attributes["type"] ] node1.name = attributes["name"] node1.description = attributes["description"] #### Create the first result (potential answer) result1 = Result() result1.id = "http://rtx.ncats.io/api/v1/result/0000" result1.description = "The term " + attributes["name"] + " refers to " + attributes["description"] result1.confidence = 1.0 result1.essence = attributes["name"] result1.essence_type = attributes["type"] node_types = ",".join(node1.type) result1.row_data = [ node1.id, node_types, node1.name, node1.description, node1.uri ] #### Create a KnowledgeGraph object and put the list of nodes and edges into it result_graph = KnowledgeGraph() result_graph.nodes = [ node1 ] #### Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph #### Put the first result (potential answer) into the message results = [ result1 ] message.results = results #### Also put the union of all result_graph components into the top Message KnowledgeGraph #### Normally the knowledge_graph will be much more complex than this, but take a shortcut for this single-node result message.knowledge_graph = result_graph #### Also manufacture a query_graph post hoc qnode1 = QNode() qnode1.node_id = "n00" qnode1.curie = "MESH:" + attributes["id"] qnode1.type = None query_graph = QueryGraph() query_graph.nodes = [ qnode1 ] query_graph.edges = [] message.query_graph = query_graph #### Create the corresponding knowledge_map knowledge_map = { "n00": "MESH:" + attributes["id"] } result1.knowledge_map = knowledge_map else: message.message_code = "TermNotFound" message.code_description = "Unable to find this term in MeSH. No further information is available at this time." message.id = None return message