def answer(source_node_ID, target_node_type, association_node_type, use_json=False, threshold=0.2, n=20): """ Answers the question what X are similar to Y based on overlap of common Z nodes. X is target_node_type, Y is source_node_ID, Z is association_node_type. The relationships are automatically determined in SimilarNodesInCommon by looking for 1 hop relationships and poping the FIRST one (you are warned). :param source_node_ID: actual name in the KG :param target_node_type: kinds of nodes you want returned :param association_node_type: kind of node you are computing the Jaccard overlap on :param use_json: print the results in standardized format :param threshold: only return results where jaccard is >= this threshold :param n: number of results to return (default 20) :return: reponse (or printed text) """ # Initialize the response class response = FormatOutput.FormatResponse(5) # add the column names for the row data response.message.table_column_names = [ "source name", "source ID", "target name", "target ID", "Jaccard index" ] # Initialize the similar nodes class similar_nodes_in_common = SimilarNodesInCommon.SimilarNodesInCommon() # get the description source_node_description = RU.get_node_property(source_node_ID, 'name') # get the source node label source_node_label = RU.get_node_property(source_node_ID, 'label') # Get the nodes in common node_jaccard_tuples_sorted, error_code, error_message = similar_nodes_in_common.get_similar_nodes_in_common_source_target_association( source_node_ID, target_node_type, association_node_type, threshold) # reduce to top 100 if len(node_jaccard_tuples_sorted) > n: node_jaccard_tuples_sorted = node_jaccard_tuples_sorted[0:n] # make sure that the input node isn't in the list node_jaccard_tuples_sorted = [ i for i in node_jaccard_tuples_sorted if i[0] != source_node_ID ] # check for an error if error_code is not None or error_message is not None: if not use_json: print(error_message) return else: response.add_error_message(error_code, error_message) response.print() return #### If use_json not specified, then return results as a fairly plain list if not use_json: to_print = "The %s's involving similar %ss as %s are: \n" % ( target_node_type, association_node_type, source_node_description) for other_disease_ID, jaccard in node_jaccard_tuples_sorted: to_print += "%s\t%s\tJaccard %f\n" % ( other_disease_ID, RU.get_node_property(other_disease_ID, 'name'), jaccard) print(to_print) #### Else if use_json requested, return the results in the Translator standard API JSON format else: #### Create the QueryGraph for this type of question query_graph = QueryGraph() source_node = QNode() source_node.id = "n00" source_node.curie = source_node_ID source_node.type = source_node_label association_node = QNode() association_node.id = "n01" association_node.type = association_node_type association_node.is_set = True target_node = QNode() target_node.id = "n02" target_node.type = target_node_type query_graph.nodes = [source_node, association_node, target_node] #source_association_relationship_type = "unknown1" edge1 = QEdge() edge1.id = "en00-n01" edge1.source_id = "n00" edge1.target_id = "n01" #edge1.type = source_association_relationship_type #association_target_relationship_type = "unknown2" edge2 = QEdge() edge2.id = "en01-n02" edge2.source_id = "n01" edge2.target_id = "n02" #edge2.type = association_target_relationship_type query_graph.edges = [edge1, edge2] #### DONT Suppress the query_graph because we can now do the knowledge_map with v0.9.1 response.message.query_graph = query_graph #### Create a mapping dict with the source curie and node types and edge types. This dict is used for reverse lookups by type #### for mapping to the QueryGraph. There is a potential point of failure here if there are duplicate node or edge types. FIXME response._type_map = dict() response._type_map[source_node.curie] = source_node.id response._type_map[association_node.type] = association_node.id response._type_map[target_node.type] = target_node.id response._type_map["e" + edge1.source_id + "-" + edge1.target_id] = edge1.id response._type_map["e" + edge2.source_id + "-" + edge2.target_id] = edge2.id #### Extract the sorted IDs from the list of tuples node_jaccard_ID_sorted = [ id for id, jac in node_jaccard_tuples_sorted ] # print(RU.return_subgraph_through_node_labels(source_node_ID, source_node_label, node_jaccard_ID_sorted, target_node_type, # [association_node_type], with_rel=[], directed=True, debug=True)) # get the entire subgraph g = RU.return_subgraph_through_node_labels(source_node_ID, source_node_label, node_jaccard_ID_sorted, target_node_type, [association_node_type], with_rel=[], directed=False, debug=False) # extract the source_node_number for node, data in g.nodes(data=True): if data['properties']['id'] == source_node_ID: source_node_number = node break # Get all the target numbers target_id2numbers = dict() node_jaccard_ID_sorted_set = set(node_jaccard_ID_sorted) for node, data in g.nodes(data=True): if data['properties']['id'] in node_jaccard_ID_sorted_set: target_id2numbers[data['properties']['id']] = node for other_disease_ID, jaccard in node_jaccard_tuples_sorted: target_name = RU.get_node_property(other_disease_ID, 'name') to_print = "The %s %s involves similar %ss as %s with similarity value %f" % ( target_node_type, target_name, association_node_type, source_node_description, jaccard) # get all the shortest paths between source and target all_paths = nx.all_shortest_paths( g, source_node_number, target_id2numbers[other_disease_ID]) # get all the nodes on these paths #try: if 1 == 1: rel_nodes = set() for path in all_paths: for node in path: rel_nodes.add(node) if rel_nodes: # extract the relevant subgraph sub_g = nx.subgraph(g, rel_nodes) # add it to the response res = response.add_subgraph(sub_g.nodes(data=True), sub_g.edges(data=True), to_print, jaccard, return_result=True) res.essence = "%s" % target_name # populate with essence of question result res.essence_type = target_node_type row_data = [] # initialize the row data row_data.append("%s" % source_node_description) row_data.append("%s" % source_node_ID) row_data.append("%s" % target_name) row_data.append("%s" % other_disease_ID) row_data.append("%f" % jaccard) res.row_data = row_data # except: # pass response.print()
def add_qedge(self, message, input_parameters, describe=False): """ Adds a new QEdge object to the QueryGraph inside the Message object :return: Response object with execution information :rtype: Response """ # #### Internal documentation setup allowable_parameters = { 'id': { 'Any string that is unique among all QEdge id fields, with recommended format e00, e01, e02, etc.' }, 'source_id': { 'id of the source QNode already present in the QueryGraph (e.g. n01, n02)' }, 'target_id': { 'id of the target QNode already present in the QueryGraph (e.g. n01, n02)' }, 'type': { 'Any valid Translator/BioLink relationship type (e.g. physically_interacts_with, participates_in)' }, } if describe: #allowable_parameters['action'] = { 'None' } #allowable_parameters = dict() allowable_parameters[ 'dsl_command'] = '`add_qedge()`' # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/ allowable_parameters[ 'brief_description'] = """The `add_qedge` method adds an additional QEdge to the QueryGraph in the Message object. Currently source_id and target_id QNodes must already be present in the QueryGraph. The specified type is not currently checked that it is a valid Translator/BioLink relationship type, but it should be.""" return allowable_parameters #### Define a default response response = Response() self.response = response self.message = message #### Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response #### Define a complete set of allowed parameters and their defaults parameters = { 'id': None, 'source_id': None, 'target_id': None, 'type': None, } #### Loop through the input_parameters and override the defaults and make sure they are allowed for key, value in input_parameters.items(): if key not in parameters: response.error(f"Supplied parameter {key} is not permitted", error_code="UnknownParameter") else: parameters[key] = value #### Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response #### Store these final parameters for convenience response.data['parameters'] = parameters self.parameters = parameters #### Now apply the filters. Order of operations is probably quite important #### Scalar value filters probably come first like minimum_confidence, then complex logic filters #### based on edge or node properties, and then finally maximum_results response.info( f"Adding a QueryEdge to Message with parameters {parameters}") #### Make sure there's a query_graph already here if message.query_graph is None: message.query_graph = QueryGraph() message.query_graph.nodes = [] message.query_graph.edges = [] if message.query_graph.edges is None: message.query_graph.edges = [] #### Create a QEdge qedge = QEdge() if parameters['id'] is not None: id = parameters['id'] else: id = self.__get_next_free_edge_id() qedge.id = id #### Get the list of available node_ids qnodes = message.query_graph.nodes ids = {} for qnode in qnodes: id = qnode.id ids[id] = 1 #### Add the source_id if parameters['source_id'] is not None: if parameters['source_id'] not in ids: response.error( f"While trying to add QEdge, there is no QNode with id {parameters['source_id']}", error_code="UnknownSourceId") return response qedge.source_id = parameters['source_id'] else: response.error( f"While trying to add QEdge, source_id is a required parameter", error_code="MissingSourceId") return response #### Add the target_id if parameters['target_id'] is not None: if parameters['target_id'] not in ids: response.error( f"While trying to add QEdge, there is no QNode with id {parameters['target_id']}", error_code="UnknownTargetId") return response qedge.target_id = parameters['target_id'] else: response.error( f"While trying to add QEdge, target_id is a required parameter", error_code="MissingTargetId") return response #### Add the type if any. Need to verify it's an allowed type. FIXME if parameters['type'] is not None: qedge.type = parameters['type'] #### Add it to the query_graph edge list message.query_graph.edges.append(qedge) #### Return the response return response
def answer(self, source_name, target_label, relationship_type, use_json=False, directed=False): """ Answer a question of the type "What proteins does drug X target" but is general: what <node X type> does <node Y grounded> <relatioship Z> that can be answered in one hop in the KG (increasing the step size if necessary). :param query_terms: a triple consisting of a source node name (KG neo4j node name, the target label (KG neo4j "node label") and the relationship type (KG neo4j "Relationship type") :param source_name: KG neo4j node name (eg "carbetocin") :param target_label: KG node label (eg. "protein") :param relationship_type: KG relationship type (eg. "physically_interacts_with") :param use_json: If the answer should be in Eric's Json standardized API output format :return: list of dictionaries containing the nodes that are one hop (along relationship type) that connect source to target. """ # Get label/kind of node the source is source_label = RU.get_node_property(source_name, "label") # Get the subgraph (all targets along relationship) has_intermediate_node = False try: g = RU.return_subgraph_paths_of_type(source_name, source_label, None, target_label, [relationship_type], directed=directed) except CustomExceptions.EmptyCypherError: try: has_intermediate_node = True g = RU.return_subgraph_paths_of_type( source_name, source_label, None, target_label, ['subclass_of', relationship_type], directed=directed) except CustomExceptions.EmptyCypherError: error_message = "No path between %s and %s via relationship %s" % ( source_name, target_label, relationship_type) error_code = "NoPathsFound" response = FormatOutput.FormatResponse(3) response.add_error_message(error_code, error_message) return response # extract the source_node_number for node, data in g.nodes(data=True): if data['properties']['id'] == source_name: source_node_number = node break # Get all the target numbers target_numbers = [] for node, data in g.nodes(data=True): if data['properties']['id'] != source_name: target_numbers.append(node) # if there's an intermediate node, get the name if has_intermediate_node: neighbors = list(g.neighbors(source_node_number)) if len(neighbors) > 1: error_message = "More than one intermediate node" error_code = "AmbiguousPath" response = FormatOutput.FormatResponse(3) response.add_error_message(error_code, error_message) return response else: intermediate_node = neighbors.pop() #### If use_json not specified, then return results as a fairly plain list if not use_json: results_list = list() for target_number in target_numbers: data = g.nodes[target_number] results_list.append({ 'type': list(set(data['labels']) - {'Base'}).pop(), 'name': data['properties']['name'], 'desc': data['properties']['name'], 'prob': 1 }) # All these are known to be true return results_list #### Else if use_json requested, return the results in the Translator standard API JSON format else: response = FormatOutput.FormatResponse(3) # it's a Q3 question response.message.table_column_names = [ "source name", "source ID", "target name", "target ID" ] source_description = g.nodes[source_node_number]['properties'][ 'name'] #### Create the QueryGraph for this type of question query_graph = QueryGraph() source_node = QNode() source_node.id = "n00" source_node.curie = g.nodes[source_node_number]['properties']['id'] source_node.type = g.nodes[source_node_number]['properties'][ 'category'] target_node = QNode() target_node.id = "n01" target_node.type = target_label query_graph.nodes = [source_node, target_node] edge1 = QEdge() edge1.id = "e00" edge1.source_id = "n00" edge1.target_id = "n01" edge1.type = relationship_type query_graph.edges = [edge1] response.message.query_graph = query_graph #### Create a mapping dict with the source curie and the target type. This dict is used for reverse lookups by type #### for mapping to the QueryGraph. response._type_map = dict() response._type_map[source_node.curie] = source_node.id response._type_map[target_node.type] = target_node.id response._type_map[edge1.type] = edge1.id #### Loop over all the returned targets and put them into the response structure for target_number in target_numbers: target_description = g.nodes[target_number]['properties'][ 'name'] if not has_intermediate_node: subgraph = g.subgraph([source_node_number, target_number]) else: subgraph = g.subgraph( [source_node_number, intermediate_node, target_number]) res = response.add_subgraph( subgraph.nodes(data=True), subgraph.edges(data=True), "%s and %s are connected by the relationship %s" % (source_description, target_description, relationship_type), 1, return_result=True) res.essence = "%s" % target_description # populate with essence of question result res.essence_type = g.nodes[target_number]['properties'][ 'category'] # populate with the type of the essence of question result row_data = [] # initialize the row data row_data.append("%s" % source_description) row_data.append( "%s" % g.nodes[source_node_number]['properties']['id']) row_data.append("%s" % target_description) row_data.append("%s" % g.nodes[target_number]['properties']['id']) res.row_data = row_data return response