def _convert_kg2_edge_to_swagger_edge(self, neo4j_edge): swagger_edge = Edge() swagger_edge.type = neo4j_edge.get('simplified_edge_label') swagger_edge.source_id = neo4j_edge.get('subject') swagger_edge.target_id = neo4j_edge.get('object') swagger_edge.id = self._create_edge_id(swagger_edge) swagger_edge.relation = neo4j_edge.get('relation') swagger_edge.publications = ast.literal_eval( neo4j_edge.get('publications')) swagger_edge.provided_by = self._convert_strange_provided_by_field_to_list( neo4j_edge.get('provided_by') ) # Temporary hack until provided_by is fixed in KG2 swagger_edge.negated = ast.literal_eval(neo4j_edge.get('negated')) swagger_edge.is_defined_by = "ARAX/KG2" swagger_edge.edge_attributes = [] # Add additional properties on KG2 edges as swagger EdgeAttribute objects # TODO: fix issues coming from strange characters in 'publications_info'! (EOF error) additional_kg2_edge_properties = [ 'relation_curie', 'simplified_relation_curie', 'simplified_relation', 'edge_label' ] edge_attributes = self._create_swagger_attributes( "edge", additional_kg2_edge_properties, neo4j_edge) swagger_edge.edge_attributes += edge_attributes return swagger_edge
def _convert_kg1_edge_to_swagger_edge(self, neo4j_edge: Dict[str, any], node_uuid_to_curie_dict: Dict[str, str]) -> Edge: swagger_edge = Edge() swagger_edge.type = neo4j_edge.get("predicate") swagger_edge.source_id = node_uuid_to_curie_dict[neo4j_edge.get("source_node_uuid")] swagger_edge.target_id = node_uuid_to_curie_dict[neo4j_edge.get("target_node_uuid")] swagger_edge.id = f"KG1:{neo4j_edge.get('id')}" swagger_edge.relation = neo4j_edge.get("relation") swagger_edge.provided_by = neo4j_edge.get("provided_by") swagger_edge.is_defined_by = "ARAX/KG1" if neo4j_edge.get("probability"): swagger_edge.edge_attributes = self._create_swagger_attributes("edge", ["probability"], neo4j_edge) return swagger_edge
def _add_answers_to_kg(self, answer_kg, reasoner_std_response, input_qnode_id, output_qnode_id, qedge_id): kg_to_qg_ids_dict = self._build_kg_to_qg_id_dict(reasoner_std_response['results']) if reasoner_std_response['knowledge_graph']['edges']: remapped_node_ids = dict() self.response.debug(f"Got results back from BTE for this query " f"({len(reasoner_std_response['knowledge_graph']['edges'])} edges)") for node in reasoner_std_response['knowledge_graph']['nodes']: swagger_node = Node() bte_node_id = node.get('id') swagger_node.name = node.get('name') swagger_node.type = eu.convert_string_to_snake_case(node.get('type')) # Map the returned BTE qg_ids back to the original qnode_ids in our query graph bte_qg_id = kg_to_qg_ids_dict['nodes'].get(bte_node_id) if bte_qg_id == "n0": qnode_id = input_qnode_id elif bte_qg_id == "n1": qnode_id = output_qnode_id else: self.response.error("Could not map BTE qg_id to ARAX qnode_id", error_code="UnknownQGID") return answer_kg # Find and use the preferred equivalent identifier for this node (if it's an 'output' node) if qnode_id == output_qnode_id: if bte_node_id in remapped_node_ids: swagger_node.id = remapped_node_ids.get(bte_node_id) else: equivalent_curies = [f"{prefix}:{eu.get_curie_local_id(local_id)}" for prefix, local_ids in node.get('equivalent_identifiers').items() for local_id in local_ids] swagger_node.id = eu.get_best_equivalent_curie(equivalent_curies, swagger_node.type) remapped_node_ids[bte_node_id] = swagger_node.id else: swagger_node.id = bte_node_id eu.add_node_to_kg(answer_kg, swagger_node, qnode_id) for edge in reasoner_std_response['knowledge_graph']['edges']: swagger_edge = Edge() swagger_edge.id = edge.get("id") swagger_edge.type = edge.get('type') swagger_edge.source_id = remapped_node_ids.get(edge.get('source_id'), edge.get('source_id')) swagger_edge.target_id = remapped_node_ids.get(edge.get('target_id'), edge.get('target_id')) swagger_edge.is_defined_by = "BTE" swagger_edge.provided_by = edge.get('edge_source') # Map the returned BTE qg_id back to the original qedge_id in our query graph bte_qg_id = kg_to_qg_ids_dict['edges'].get(swagger_edge.id) if bte_qg_id != "e1": self.response.error("Could not map BTE qg_id to ARAX qedge_id", error_code="UnknownQGID") return answer_kg eu.add_edge_to_kg(answer_kg, swagger_edge, qedge_id) return answer_kg
def _create_ngd_edge(self, ngd_value: float, source_id: str, target_id: str) -> Edge: ngd_edge = Edge() ngd_edge.type = self.ngd_edge_type ngd_edge.source_id = source_id ngd_edge.target_id = target_id ngd_edge.id = f"NGD:{source_id}--{ngd_edge.type}--{target_id}" ngd_edge.provided_by = "ARAX" ngd_edge.is_defined_by = "ARAX" ngd_edge.edge_attributes = [ EdgeAttribute(name=self.ngd_edge_attribute_name, type=self.ngd_edge_attribute_type, value=ngd_value, url=self.ngd_edge_attribute_url) ] return ngd_edge
def _convert_kg1_edge_to_swagger_edge(self, neo4j_edge, node_uuid_to_curie_dict): swagger_edge = Edge() swagger_edge.type = neo4j_edge.get('predicate') swagger_edge.source_id = node_uuid_to_curie_dict[neo4j_edge.get( 'source_node_uuid')] swagger_edge.target_id = node_uuid_to_curie_dict[neo4j_edge.get( 'target_node_uuid')] swagger_edge.id = self._create_edge_id(swagger_edge) swagger_edge.relation = neo4j_edge.get('relation') swagger_edge.provided_by = neo4j_edge.get('provided_by') swagger_edge.is_defined_by = "ARAX/KG1" if neo4j_edge.get('probability'): swagger_edge.edge_attributes = self._create_swagger_attributes( "edge", ['probability'], neo4j_edge) return swagger_edge
def _convert_kg2_edge_to_swagger_edge(self, neo4j_edge: Dict[str, any]) -> Edge: swagger_edge = Edge() swagger_edge.id = f"KG2:{neo4j_edge.get('id')}" swagger_edge.type = neo4j_edge.get("simplified_edge_label") swagger_edge.source_id = neo4j_edge.get("subject") swagger_edge.target_id = neo4j_edge.get("object") swagger_edge.relation = neo4j_edge.get("relation") swagger_edge.publications = ast.literal_eval(neo4j_edge.get("publications")) swagger_edge.provided_by = self._convert_strange_provided_by_field_to_list(neo4j_edge.get("provided_by")) # Temporary hack until provided_by is fixed in KG2 swagger_edge.negated = ast.literal_eval(neo4j_edge.get("negated")) swagger_edge.is_defined_by = "ARAX/KG2" swagger_edge.edge_attributes = [] # Add additional properties on KG2 edges as swagger EdgeAttribute objects # TODO: fix issues coming from strange characters in 'publications_info'! (EOF error) additional_kg2_edge_properties = ["relation_curie", "simplified_relation_curie", "simplified_relation", "edge_label"] edge_attributes = self._create_swagger_attributes("edge", additional_kg2_edge_properties, neo4j_edge) swagger_edge.edge_attributes += edge_attributes return swagger_edge
def add_subgraph(self, nodes, edges, plain_text, confidence, return_result=False): """ Populate the object model using networkx neo4j subgraph :param nodes: nodes in the subgraph (g.nodes(data=True)) :param edges: edges in the subgraph (g.edges(data=True)) :return: none """ # Get the relevant info from the nodes and edges node_keys = [] node_descriptions = dict() node_names = dict() node_labels = dict() node_uuids = dict() node_accessions = dict() node_iris = dict() node_uuids2iri = dict() node_curies = dict() node_uuids2curie = dict() for u, data in nodes: node_keys.append(u) if 'description' in data['properties']: node_descriptions[u] = data['properties']['description'] else: node_descriptions[u] = "None" node_names[u] = data['properties']['name'] node_labels[u] = list(set(data['labels']).difference({'Base'}))[0] node_uuids[u] = data['properties']['UUID'] node_accessions[u] = data['properties']['accession'] node_iris[u] = data['properties']['uri'] node_uuids2iri[data['properties'] ['UUID']] = data['properties']['uri'] curie_id = data['properties']['id'] if curie_id.split(':')[0].upper() == "CHEMBL": curie_id = "CHEMBL:CHEMBL" + curie_id.split(':')[1] node_uuids2curie[data['properties']['UUID']] = curie_id node_curies[ u] = curie_id # These are the actual CURIE IDS eg UBERON:00000941 (uri is the web address) edge_keys = [] edge_types = dict() edge_source_db = dict() edge_source_iri = dict() edge_target_iri = dict() edge_source_curie = dict() edge_target_curie = dict() for u, v, data in edges: edge_keys.append((u, v)) edge_types[(u, v)] = data['type'] edge_source_db[(u, v)] = data['properties']['provided_by'] edge_source_iri[( u, v)] = node_uuids2iri[data['properties']['source_node_uuid']] edge_target_iri[( u, v)] = node_uuids2iri[data['properties']['target_node_uuid']] edge_source_curie[( u, v)] = node_uuids2curie[data['properties']['source_node_uuid']] edge_target_curie[( u, v)] = node_uuids2curie[data['properties']['target_node_uuid']] # For each node, populate the relevant information node_objects = [] node_iris_to_node_object = dict() for node_key in node_keys: node = Node() node.id = node_curies[node_key] node.type = node_labels[node_key] node.name = node_names[node_key] node.uri = node_iris[node_key] node.accession = node_accessions[node_key] node.description = node_descriptions[node_key] node_objects.append(node) node_iris_to_node_object[node_iris[node_key]] = node # for each edge, create an edge between them edge_objects = [] for u, v in edge_keys: edge = Edge() edge.type = edge_types[(u, v)] edge.source_id = node_iris_to_node_object[edge_source_iri[(u, v)]].id edge.target_id = node_iris_to_node_object[edge_target_iri[(u, v)]].id #edge.origin_list = [] #edge.origin_list.append(edge_source_db[(u, v)]) # TODO: check with eric if this really should be a list and if it should contain the source DB('s) edge_objects.append(edge) #edge.attribute_list #edge.confidence #edge.evidence_type edge.is_defined_by = "RTX" #edge.provided_by = node_iris_to_node_object[edge_source_iri[(u, v)]].uri edge.provided_by = edge_source_db[(u, v)] #edge.publications #edge.qualifiers #edge.relation #edge.source_id #edge.target_id #edge.type # Create the result (potential answer) result1 = Result() result1.text = plain_text result1.confidence = confidence # Create a ResultGraph object and put the list of nodes and edges into it result_graph = ResultGraph() result_graph.node_list = node_objects result_graph.edge_list = edge_objects # Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph # Put the first result (potential answer) into the response self._result_list.append(result1) self.response.result_list = self._result_list # Increment the number of results self._num_results += 1 if self._num_results == 1: self.response.message = "%s result found" % self._num_results else: self.response.message = "%s results found" % self._num_results if return_result: return result1 else: pass
def add_neighborhood_graph(self, nodes, edges, confidence=None): """ Populate the object model using networkx neo4j subgraph :param nodes: nodes in the subgraph (g.nodes(data=True)) :param edges: edges in the subgraph (g.edges(data=True)) :return: none """ # Get the relevant info from the nodes and edges node_keys = [] node_descriptions = dict() node_names = dict() node_labels = dict() node_uuids = dict() node_accessions = dict() node_iris = dict() node_uuids2iri = dict() node_curies = dict() node_uuids2curie = dict() for u, data in nodes: node_keys.append(u) if 'description' in data['properties']: node_descriptions[u] = data['properties']['description'] else: node_descriptions[u] = "None" node_names[u] = data['properties']['name'] node_labels[u] = list(set(data['labels']).difference({'Base'}))[0] node_uuids[u] = data['properties']['UUID'] node_accessions[u] = data['properties']['accession'] node_iris[u] = data['properties']['uri'] node_uuids2iri[data['properties'] ['UUID']] = data['properties']['uri'] curie_id = data['properties']['id'] if curie_id.split(':')[0].upper() == "CHEMBL": curie_id = "CHEMBL:CHEMBL" + curie_id.split(':')[1] node_uuids2curie[data['properties']['UUID']] = curie_id node_curies[ u] = curie_id # These are the actual CURIE IDS eg UBERON:00000941 (uri is the web address) edge_keys = [] edge_types = dict() edge_source_db = dict() edge_source_iri = dict() edge_target_iri = dict() edge_source_curie = dict() edge_target_curie = dict() for u, v, data in edges: edge_keys.append((u, v)) edge_types[(u, v)] = data['type'] edge_source_db[(u, v)] = data['properties']['provided_by'] edge_source_iri[( u, v)] = node_uuids2iri[data['properties']['source_node_uuid']] edge_target_iri[( u, v)] = node_uuids2iri[data['properties']['target_node_uuid']] edge_source_curie[( u, v)] = node_uuids2curie[data['properties']['source_node_uuid']] edge_target_curie[( u, v)] = node_uuids2curie[data['properties']['target_node_uuid']] # For each node, populate the relevant information node_objects = [] node_iris_to_node_object = dict() for node_key in node_keys: node = Node() node.id = node_curies[node_key] node.type = node_labels[node_key] node.name = node_names[node_key] node.uri = node_iris[node_key] node.accession = node_accessions[node_key] node.description = node_descriptions[node_key] node_objects.append(node) node_iris_to_node_object[node_iris[node_key]] = node # for each edge, create an edge between them edge_objects = [] for u, v in edge_keys: edge = Edge() edge.type = edge_types[(u, v)] edge.source_id = node_iris_to_node_object[edge_source_iri[(u, v)]].id edge.target_id = node_iris_to_node_object[edge_target_iri[(u, v)]].id #edge.origin_list = [] #edge.origin_list.append(edge_source_db[(u, v)]) # TODO: check with eric if this really should be a list and if it should contain the source DB('s) edge.provided_by = edge_source_db[(u, v)] edge.is_defined_by = "RTX" edge_objects.append(edge) # Create the result (potential answer) result1 = Result() text = "This is a subgraph extracted from the full RTX knowledge graph, including nodes and edges relevant to the query." \ " This is not an answer to the query per se, but rather an opportunity to examine a small region of the RTX knowledge graph for further study. " \ "Formal answers to the query are below." result1.text = text result1.confidence = confidence result1.result_type = "neighborhood graph" # Create a ResultGraph object and put the list of nodes and edges into it result_graph = ResultGraph() result_graph.node_list = node_objects result_graph.edge_list = edge_objects # Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph # Put the first result (potential answer) into the response self._result_list.append(result1) self.response.result_list = self._result_list
def add_subgraph(self, nodes, edges, description, confidence, return_result=False, suppress_bindings=False): """ Populate the object model using networkx neo4j subgraph :param nodes: nodes in the subgraph (g.nodes(data=True)) :param edges: edges in the subgraph (g.edges(data=True)) :return: none """ # Get the relevant info from the nodes and edges node_keys = [] node_descriptions = dict() node_names = dict() node_labels = dict() node_uuids = dict() node_accessions = dict() node_iris = dict() node_uuids2iri = dict() node_curies = dict() node_uuids2curie = dict() for u, data in nodes: node_keys.append(u) if 'description' in data['properties']: node_descriptions[u] = data['properties']['description'] else: node_descriptions[u] = "None" node_names[u] = data['properties']['name'] node_labels[u] = list(set(data['labels']).difference({'Base'}))[0] node_uuids[u] = data['properties']['UUID'] node_accessions[u] = data['properties']['accession'] node_iris[u] = data['properties']['uri'] node_uuids2iri[data['properties'] ['UUID']] = data['properties']['uri'] curie_id = data['properties']['id'] if curie_id.split(':')[0].upper() == "CHEMBL": curie_id = "CHEMBL:CHEMBL" + curie_id.split(':')[1] node_uuids2curie[data['properties']['UUID']] = curie_id node_curies[ u] = curie_id # These are the actual CURIE IDS eg UBERON:00000941 (uri is the web address) edge_keys = [] edge_types = dict() edge_source_db = dict() edge_source_iri = dict() edge_target_iri = dict() edge_source_curie = dict() edge_target_curie = dict() edge_ids = dict() for u, v, data in edges: edge_keys.append((u, v)) edge_types[(u, v)] = data['type'] edge_source_db[(u, v)] = data['properties']['provided_by'] edge_source_iri[( u, v)] = node_uuids2iri[data['properties']['source_node_uuid']] edge_target_iri[( u, v)] = node_uuids2iri[data['properties']['target_node_uuid']] edge_source_curie[( u, v)] = node_uuids2curie[data['properties']['source_node_uuid']] edge_target_curie[( u, v)] = node_uuids2curie[data['properties']['target_node_uuid']] edge_ids[(u, v)] = data['properties']['provided_by'] # FIXME # For each node, populate the relevant information node_objects = [] node_iris_to_node_object = dict() for node_key in node_keys: node = Node() node.id = node_curies[node_key] node.type = [node_labels[node_key]] node.name = node_names[node_key] node.uri = node_iris[node_key] node.accession = node_accessions[node_key] node.description = node_descriptions[node_key] node_objects.append(node) node_iris_to_node_object[node_iris[node_key]] = node #### Add this node to the master knowledge graph if node.id not in self._node_ids: self.message.knowledge_graph.nodes.append(node) self._node_ids[node.id] = node.type[ 0] # Just take the first of potentially several FIXME #### Create the bindings lists node_bindings = list() edge_bindings = list() # for each edge, create an edge between them edge_objects = [] for u, v in edge_keys: edge = Edge() #edge.id is set below when building the bindings edge.type = edge_types[(u, v)] edge.source_id = node_iris_to_node_object[edge_source_iri[(u, v)]].id edge.target_id = node_iris_to_node_object[edge_target_iri[(u, v)]].id edge_objects.append(edge) #edge.attribute_list #edge.confidence #edge.evidence_type edge.is_defined_by = "RTX" edge.provided_by = edge_source_db[(u, v)] #edge.publications #edge.qualifiers #edge.relation #edge.source_id #edge.target_id #edge.type #### Add this edge to the master knowledge graph edge_str = "%s -%s- %s" % (edge.source_id, edge.type, edge.target_id) if edge_str not in self._edge_ids: self.message.knowledge_graph.edges.append(edge) edge.id = "%d" % self._edge_counter self._edge_ids[edge_str] = edge.id self._edge_counter += 1 else: edge.id = self._edge_ids[edge_str] #### Try to figure out how the source fits into the query_graph for the bindings source_type = self._node_ids[edge.source_id] if edge.source_id in self._type_map: source_knowledge_map_key = self._type_map[edge.source_id] else: source_knowledge_map_key = self._type_map[source_type] if not source_knowledge_map_key: eprint( "Expected to find '%s' in the response._type_map, but did not" % source_type) raise Exception( "Expected to find '%s' in the response._type_map, but did not" % source_type) node_bindings.append( NodeBinding(qg_id=source_knowledge_map_key, kg_id=edge.source_id)) # if source_knowledge_map_key not in node_bindings: # node_bindings[source_knowledge_map_key] = list() # node_bindings_dict[source_knowledge_map_key] = dict() # if edge.source_id not in node_bindings_dict[source_knowledge_map_key]: # node_bindings[source_knowledge_map_key].append(edge.source_id) # node_bindings_dict[source_knowledge_map_key][edge.source_id] = 1 #### Try to figure out how the target fits into the query_graph for the knowledge map target_type = self._node_ids[edge.target_id] if edge.target_id in self._type_map: target_knowledge_map_key = self._type_map[edge.target_id] else: target_knowledge_map_key = self._type_map[target_type] if not target_knowledge_map_key: eprint( "ERROR: Expected to find '%s' in the response._type_map, but did not" % target_type) raise Exception( "Expected to find '%s' in the response._type_map, but did not" % target_type) node_bindings.append( NodeBinding(qg_id=target_knowledge_map_key, kg_id=edge.target_id)) # if target_knowledge_map_key not in node_bindings: # node_bindings[target_knowledge_map_key] = list() # node_bindings_dict[target_knowledge_map_key] = dict() # if edge.target_id not in node_bindings_dict[target_knowledge_map_key]: # node_bindings[target_knowledge_map_key].append(edge.target_id) # node_bindings_dict[target_knowledge_map_key][edge.target_id] = 1 #### Try to figure out how the edge fits into the query_graph for the knowledge map source_target_key = "e" + source_knowledge_map_key + "-" + target_knowledge_map_key target_source_key = "e" + target_knowledge_map_key + "-" + source_knowledge_map_key if edge.type in self._type_map: knowledge_map_key = self._type_map[edge.type] elif source_target_key in self._type_map: knowledge_map_key = source_target_key elif target_source_key in self._type_map: knowledge_map_key = target_source_key else: eprint( "ERROR: Expected to find '%s' or '%s' or '%s' in the response._type_map, but did not" % (edge.type, source_target_key, target_source_key)) knowledge_map_key = "ERROR" edge_bindings.append( EdgeBinding(qg_id=knowledge_map_key, kg_id=edge.id)) # if knowledge_map_key not in edge_bindings: # edge_bindings[knowledge_map_key] = list() # edge_bindings_dict[knowledge_map_key] = dict() # if edge.id not in edge_bindings_dict[knowledge_map_key]: # edge_bindings[knowledge_map_key].append(edge.id) # edge_bindings_dict[knowledge_map_key][edge.id] = 1 # Create the result (potential answer) result1 = Result() result1.reasoner_id = "RTX" result1.description = description result1.confidence = confidence if suppress_bindings is False: result1.node_bindings = node_bindings result1.edge_bindings = edge_bindings # Create a KnowledgeGraph object and put the list of nodes and edges into it #### This is still legal, then is redundant with the knowledge map, so leave it out maybe knowledge_graph = KnowledgeGraph() knowledge_graph.nodes = node_objects knowledge_graph.edges = edge_objects if suppress_bindings is True: result1.result_graph = knowledge_graph # Put the first result (potential answer) into the message self._results.append(result1) self.message.results = self._results # Increment the number of results self._num_results += 1 if self._num_results == 1: self.message.code_description = "%s result found" % self._num_results else: self.message.code_description = "%s results found" % self._num_results #### Finish and return the result if requested if return_result: return result1 else: pass