def get_concordance(body=None): # noqa: E501 """Calculate Post text to generate concordance # noqa: E501 :param body: Text to be analyzed :type body: dict | bytes :rtype: Result """ # if connexion.request.is_json: # body = str.from_dict(connexion.request.get_json()) # noqa: E501 # Keep original string orignal_body = body.decode() # Try to access a dynamodb table try: test_key = Database.check_key(orignal_body, "concordance") if "Item" in test_key: item = Database.get_item(orignal_body, "concordance") return Result(item, orignal_body) except: print("database not found") # Sort string into list body = body.decode() body = body.split() body = [w.strip(",:;.!?").lower() for w in body] body.sort() # Create concordance list and dictionary conc_list = [] conc_dict = {} for word in body: if word in conc_dict: conc_dict[word] += 1 else: conc_dict[word] = 1 for key, value in conc_dict.items(): temp_dic = {} temp_dic["count"] = value temp_dic["token"] = key conc_list.append(temp_dic) result = Result(conc_list, orignal_body) # Try to access a dynamodb table try: Database.put(orignal_body, conc_list, "concordance") except: print("database not found") return result
def add_text(self, description, confidence=1): result1 = Result() result1.description = description result1.confidence = confidence self._results.append(result1) self.message.results = self._results # Increment the number of results self._num_results += 1 if self._num_results == 1: self.message.code_description = "%s result found" % self._num_results else: self.message.code_description = "%s results found" % self._num_results
def add_text(self, plain_text, confidence=1): result1 = Result() result1.text = plain_text result1.confidence = confidence self._result_list.append(result1) self.response.result_list = self._result_list # Increment the number of results self._num_results += 1 if self._num_results == 1: self.response.message = "%s result found" % self._num_results else: self.response.message = "%s results found" % self._num_results
def add_text(self, plain_text, confidence=1): result1 = Result() #result1.id = "http://rtx.ncats.io/api/v1/response/1234/result/2345" #result1.id = "-1" result1.text = plain_text result1.confidence = confidence self._result_list.append(result1) self.response.result_list = self._result_list # Increment the number of results self._num_results += 1 if self._num_results == 1: self.response.message = "%s result found" % self._num_results else: self.response.message = "%s results found" % self._num_results
def _make_result_from_node_set(dict_kg: KnowledgeGraph, result_node_ids_by_qnode_id: Dict[str, Set[str]], kg_edge_ids_by_qedge_id: Dict[str, Set[str]], qg: QueryGraph, ignore_edge_direction: bool) -> Result: node_bindings = [] for qnode_id, node_ids_for_this_qnode_id in result_node_ids_by_qnode_id.items( ): for node_id in node_ids_for_this_qnode_id: node_bindings.append(NodeBinding(qg_id=qnode_id, kg_id=node_id)) edge_bindings = [] for qedge_id, kg_edge_ids_for_this_qedge_id in kg_edge_ids_by_qedge_id.items( ): qedge = next(qedge for qedge in qg.edges if qedge.id == qedge_id) for edge_id in kg_edge_ids_for_this_qedge_id: edge = dict_kg.edges.get(edge_id) edge_fits_in_same_direction = ( edge.source_id in result_node_ids_by_qnode_id[qedge.source_id] and edge.target_id in result_node_ids_by_qnode_id[qedge.target_id]) edge_fits_in_opposite_direction = ( edge.source_id in result_node_ids_by_qnode_id[qedge.target_id] and edge.target_id in result_node_ids_by_qnode_id[qedge.source_id]) edge_belongs_in_result = ( edge_fits_in_same_direction or edge_fits_in_opposite_direction ) if ignore_edge_direction else edge_fits_in_same_direction if edge_belongs_in_result: edge_bindings.append(EdgeBinding(qg_id=qedge_id, kg_id=edge.id)) result = Result(node_bindings=node_bindings, edge_bindings=edge_bindings) return result
def get_concordance_v2(body=None): # noqa: E501 """Calculate and locate Post text to generate concordance and location # noqa: E501 :param body: Text to be analyzed and located :type body: dict | bytes :rtype: Result2 """ # if connexion.request.is_json: # body = str.from_dict(connexion.request.get_json()) # noqa: E501 # Keep original string orignal_body = body.decode() # Try to access a dynamodb table try: test_key = Database.check_key(orignal_body, "location") if "Item" in test_key: item = Database.get_item(orignal_body, "location") return Result(item, orignal_body) except: print("database not found") # Sort string into list body = body.decode() body = body.split() body = [w.strip(",:;.!?").lower() for w in body] location = {} for i, number in enumerate(body): location[number] = location.get(number, []) location[number].append(i) body.sort() conc_list = [] word_list = [] for word in body: if word not in word_list: word_list.append(word) temp_dic = {} temp_dic["location"] = location[word] temp_dic["token"] = word conc_list.append(temp_dic) result = Result2(conc_list, orignal_body) # Try to access a dynamodb table try: Database.put(orignal_body, conc_list, "location") except: print("database not found") return result
def get_concordance(body=None): # noqa: E501 """Calculate Post text to generate concordance # noqa: E501 :param body: Text to be analyzed :type body: dict | bytes :rtype: Result """ code = 200 ConcordanceTableOperations.create_concordance_table() try: if connexion.request.is_json: body = str.from_dict(connexion.request.get_json()) # noqa: E501 #initialize regular expression that excludes all punctuation and #other special characters except for apostrophes and hyphens regex = "[^a-zA-Z'\- ]+" input = body.decode('utf-8') concordance = [] dynamodb_resource = boto3.resource('dynamodb') table = dynamodb_resource.Table('concordance') response = table.get_item(Key={'input': input}) if 'Input' in response: concordance = response['input']['concordance'] else: #parse only alpha characters using the initialized regular expression input_split = re.sub(regex, '', input.lower()) input_split = input_split.split() used_words = [] for word in input_split: if (used_words.count(word) == 0): concordance.append( ResultConcordance(word, input_split.count(word))) used_words.append(word) concordance.sort(key=operator.attrgetter('token')) except Exception as error: concordance = [] code = 400 result = Result(concordance, input) ConcordanceTableOperations.upload_concordance_data(result) return result, code
def queryTerm(self, term): method = "queryTerm" attributes = self.findTermAttributesAndTypeByName(term) response = self.createResponse() if ( attributes["status"] == 'OK' ): node1 = Node() node1.id = "MESH:" + attributes["id"] node1.uri = "http://purl.obolibrary.org/obo/MESH_" + attributes["id"] node1.type = attributes["type"] node1.name = attributes["name"] node1.description = attributes["description"] #### Create the first result (potential answer) result1 = Result() result1.id = "http://rtx.ncats.io/api/v1/result/0000" result1.text = "The term " + attributes["name"] + " refers to " + attributes["description"] result1.confidence = 1.0 #### Create a ResultGraph object and put the list of nodes and edges into it result_graph = ResultGraph() result_graph.node_list = [ node1 ] #### Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph #### Put the first result (potential answer) into the response result_list = [ result1 ] response.result_list = result_list else: response.response_code = "TermNotFound" response.message = "Unable to find term '" + term + "' in MeSH. No further information is available at this time." response.id = None return response
def add_subgraph(self, nodes, edges, description, confidence, return_result=False, suppress_bindings=False): """ Populate the object model using networkx neo4j subgraph :param nodes: nodes in the subgraph (g.nodes(data=True)) :param edges: edges in the subgraph (g.edges(data=True)) :return: none """ # Get the relevant info from the nodes and edges node_keys = [] node_descriptions = dict() node_names = dict() node_labels = dict() node_uuids = dict() node_accessions = dict() node_iris = dict() node_uuids2iri = dict() node_curies = dict() node_uuids2curie = dict() for u, data in nodes: node_keys.append(u) if 'description' in data['properties']: node_descriptions[u] = data['properties']['description'] else: node_descriptions[u] = "None" node_names[u] = data['properties']['name'] node_labels[u] = list(set(data['labels']).difference({'Base'}))[0] node_uuids[u] = data['properties']['UUID'] node_accessions[u] = data['properties']['accession'] node_iris[u] = data['properties']['uri'] node_uuids2iri[data['properties'] ['UUID']] = data['properties']['uri'] curie_id = data['properties']['id'] if curie_id.split(':')[0].upper() == "CHEMBL": curie_id = "CHEMBL:CHEMBL" + curie_id.split(':')[1] node_uuids2curie[data['properties']['UUID']] = curie_id node_curies[ u] = curie_id # These are the actual CURIE IDS eg UBERON:00000941 (uri is the web address) edge_keys = [] edge_types = dict() edge_source_db = dict() edge_source_iri = dict() edge_target_iri = dict() edge_source_curie = dict() edge_target_curie = dict() edge_ids = dict() for u, v, data in edges: edge_keys.append((u, v)) edge_types[(u, v)] = data['type'] edge_source_db[(u, v)] = data['properties']['provided_by'] edge_source_iri[( u, v)] = node_uuids2iri[data['properties']['source_node_uuid']] edge_target_iri[( u, v)] = node_uuids2iri[data['properties']['target_node_uuid']] edge_source_curie[( u, v)] = node_uuids2curie[data['properties']['source_node_uuid']] edge_target_curie[( u, v)] = node_uuids2curie[data['properties']['target_node_uuid']] edge_ids[(u, v)] = data['properties']['provided_by'] # FIXME # For each node, populate the relevant information node_objects = [] node_iris_to_node_object = dict() for node_key in node_keys: node = Node() node.id = node_curies[node_key] node.type = [node_labels[node_key]] node.name = node_names[node_key] node.uri = node_iris[node_key] node.accession = node_accessions[node_key] node.description = node_descriptions[node_key] node_objects.append(node) node_iris_to_node_object[node_iris[node_key]] = node #### Add this node to the master knowledge graph if node.id not in self._node_ids: self.message.knowledge_graph.nodes.append(node) self._node_ids[node.id] = node.type[ 0] # Just take the first of potentially several FIXME #### Create the bindings lists node_bindings = list() edge_bindings = list() # for each edge, create an edge between them edge_objects = [] for u, v in edge_keys: edge = Edge() #edge.id is set below when building the bindings edge.type = edge_types[(u, v)] edge.source_id = node_iris_to_node_object[edge_source_iri[(u, v)]].id edge.target_id = node_iris_to_node_object[edge_target_iri[(u, v)]].id edge_objects.append(edge) #edge.attribute_list #edge.confidence #edge.evidence_type edge.is_defined_by = "RTX" edge.provided_by = edge_source_db[(u, v)] #edge.publications #edge.qualifiers #edge.relation #edge.source_id #edge.target_id #edge.type #### Add this edge to the master knowledge graph edge_str = "%s -%s- %s" % (edge.source_id, edge.type, edge.target_id) if edge_str not in self._edge_ids: self.message.knowledge_graph.edges.append(edge) edge.id = "%d" % self._edge_counter self._edge_ids[edge_str] = edge.id self._edge_counter += 1 else: edge.id = self._edge_ids[edge_str] #### Try to figure out how the source fits into the query_graph for the bindings source_type = self._node_ids[edge.source_id] if edge.source_id in self._type_map: source_knowledge_map_key = self._type_map[edge.source_id] else: source_knowledge_map_key = self._type_map[source_type] if not source_knowledge_map_key: eprint( "Expected to find '%s' in the response._type_map, but did not" % source_type) raise Exception( "Expected to find '%s' in the response._type_map, but did not" % source_type) node_bindings.append( NodeBinding(qg_id=source_knowledge_map_key, kg_id=edge.source_id)) # if source_knowledge_map_key not in node_bindings: # node_bindings[source_knowledge_map_key] = list() # node_bindings_dict[source_knowledge_map_key] = dict() # if edge.source_id not in node_bindings_dict[source_knowledge_map_key]: # node_bindings[source_knowledge_map_key].append(edge.source_id) # node_bindings_dict[source_knowledge_map_key][edge.source_id] = 1 #### Try to figure out how the target fits into the query_graph for the knowledge map target_type = self._node_ids[edge.target_id] if edge.target_id in self._type_map: target_knowledge_map_key = self._type_map[edge.target_id] else: target_knowledge_map_key = self._type_map[target_type] if not target_knowledge_map_key: eprint( "ERROR: Expected to find '%s' in the response._type_map, but did not" % target_type) raise Exception( "Expected to find '%s' in the response._type_map, but did not" % target_type) node_bindings.append( NodeBinding(qg_id=target_knowledge_map_key, kg_id=edge.target_id)) # if target_knowledge_map_key not in node_bindings: # node_bindings[target_knowledge_map_key] = list() # node_bindings_dict[target_knowledge_map_key] = dict() # if edge.target_id not in node_bindings_dict[target_knowledge_map_key]: # node_bindings[target_knowledge_map_key].append(edge.target_id) # node_bindings_dict[target_knowledge_map_key][edge.target_id] = 1 #### Try to figure out how the edge fits into the query_graph for the knowledge map source_target_key = "e" + source_knowledge_map_key + "-" + target_knowledge_map_key target_source_key = "e" + target_knowledge_map_key + "-" + source_knowledge_map_key if edge.type in self._type_map: knowledge_map_key = self._type_map[edge.type] elif source_target_key in self._type_map: knowledge_map_key = source_target_key elif target_source_key in self._type_map: knowledge_map_key = target_source_key else: eprint( "ERROR: Expected to find '%s' or '%s' or '%s' in the response._type_map, but did not" % (edge.type, source_target_key, target_source_key)) knowledge_map_key = "ERROR" edge_bindings.append( EdgeBinding(qg_id=knowledge_map_key, kg_id=edge.id)) # if knowledge_map_key not in edge_bindings: # edge_bindings[knowledge_map_key] = list() # edge_bindings_dict[knowledge_map_key] = dict() # if edge.id not in edge_bindings_dict[knowledge_map_key]: # edge_bindings[knowledge_map_key].append(edge.id) # edge_bindings_dict[knowledge_map_key][edge.id] = 1 # Create the result (potential answer) result1 = Result() result1.reasoner_id = "RTX" result1.description = description result1.confidence = confidence if suppress_bindings is False: result1.node_bindings = node_bindings result1.edge_bindings = edge_bindings # Create a KnowledgeGraph object and put the list of nodes and edges into it #### This is still legal, then is redundant with the knowledge map, so leave it out maybe knowledge_graph = KnowledgeGraph() knowledge_graph.nodes = node_objects knowledge_graph.edges = edge_objects if suppress_bindings is True: result1.result_graph = knowledge_graph # Put the first result (potential answer) into the message self._results.append(result1) self.message.results = self._results # Increment the number of results self._num_results += 1 if self._num_results == 1: self.message.code_description = "%s result found" % self._num_results else: self.message.code_description = "%s results found" % self._num_results #### Finish and return the result if requested if return_result: return result1 else: pass
def add_neighborhood_graph(self, nodes, edges, confidence=None): """ Populate the object model using networkx neo4j subgraph :param nodes: nodes in the subgraph (g.nodes(data=True)) :param edges: edges in the subgraph (g.edges(data=True)) :return: none """ # Get the relevant info from the nodes and edges node_keys = [] node_descriptions = dict() node_names = dict() node_labels = dict() node_uuids = dict() node_accessions = dict() node_iris = dict() node_uuids2iri = dict() node_curies = dict() node_uuids2curie = dict() for u, data in nodes: node_keys.append(u) if 'description' in data['properties']: node_descriptions[u] = data['properties']['description'] else: node_descriptions[u] = "None" node_names[u] = data['properties']['name'] node_labels[u] = list(set(data['labels']).difference({'Base'}))[0] node_uuids[u] = data['properties']['UUID'] node_accessions[u] = data['properties']['accession'] node_iris[u] = data['properties']['uri'] node_uuids2iri[data['properties'] ['UUID']] = data['properties']['uri'] curie_id = data['properties']['id'] if curie_id.split(':')[0].upper() == "CHEMBL": curie_id = "CHEMBL:CHEMBL" + curie_id.split(':')[1] node_uuids2curie[data['properties']['UUID']] = curie_id node_curies[ u] = curie_id # These are the actual CURIE IDS eg UBERON:00000941 (uri is the web address) edge_keys = [] edge_types = dict() edge_source_db = dict() edge_source_iri = dict() edge_target_iri = dict() edge_source_curie = dict() edge_target_curie = dict() for u, v, data in edges: edge_keys.append((u, v)) edge_types[(u, v)] = data['type'] edge_source_db[(u, v)] = data['properties']['provided_by'] edge_source_iri[( u, v)] = node_uuids2iri[data['properties']['source_node_uuid']] edge_target_iri[( u, v)] = node_uuids2iri[data['properties']['target_node_uuid']] edge_source_curie[( u, v)] = node_uuids2curie[data['properties']['source_node_uuid']] edge_target_curie[( u, v)] = node_uuids2curie[data['properties']['target_node_uuid']] # For each node, populate the relevant information node_objects = [] node_iris_to_node_object = dict() for node_key in node_keys: node = Node() node.id = node_curies[node_key] node.type = node_labels[node_key] node.name = node_names[node_key] node.uri = node_iris[node_key] node.accession = node_accessions[node_key] node.description = node_descriptions[node_key] node_objects.append(node) node_iris_to_node_object[node_iris[node_key]] = node # for each edge, create an edge between them edge_objects = [] for u, v in edge_keys: edge = Edge() edge.type = edge_types[(u, v)] edge.source_id = node_iris_to_node_object[edge_source_iri[(u, v)]].id edge.target_id = node_iris_to_node_object[edge_target_iri[(u, v)]].id #edge.origin_list = [] #edge.origin_list.append(edge_source_db[(u, v)]) # TODO: check with eric if this really should be a list and if it should contain the source DB('s) edge.provided_by = edge_source_db[(u, v)] edge.is_defined_by = "RTX" edge_objects.append(edge) # Create the result (potential answer) result1 = Result() text = "This is a subgraph extracted from the full RTX knowledge graph, including nodes and edges relevant to the query." \ " This is not an answer to the query per se, but rather an opportunity to examine a small region of the RTX knowledge graph for further study. " \ "Formal answers to the query are below." result1.text = text result1.confidence = confidence result1.result_type = "neighborhood graph" # Create a ResultGraph object and put the list of nodes and edges into it result_graph = ResultGraph() result_graph.node_list = node_objects result_graph.edge_list = edge_objects # Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph # Put the first result (potential answer) into the response self._result_list.append(result1) self.response.result_list = self._result_list
def add_split_results(self, knowledge_graph, result_bindings): """ Populate the object model with the resulting raw knowledge_graph and result_bindings (initially from QueryGraphReasoner) :param nodes: knowledge_graph in native RTX KG dump :param edges: result_bindings in a native format from QueryGraphReasoner :return: none """ #### Add the knowledge_graph nodes regular_node_attributes = [ "id", "uri", "name", "description", "symbol" ] for input_node in knowledge_graph["nodes"]: node = Node() for attribute in regular_node_attributes: if attribute in input_node: setattr(node, attribute, input_node[attribute]) node.type = [input_node["category"]] #node.node_attributes = FIXME self.message.knowledge_graph.nodes.append(node) #### Add the knowledge_graph edges regular_edge_attributes = [ "id", "type", "relation", "source_id", "target_id", "is_defined_by", "defined_datetime", "provided_by", "weight", "evidence_type", "qualifiers", "negated", "", "" ] for input_edge in knowledge_graph["edges"]: edge = Edge() for attribute in regular_edge_attributes: if attribute in input_edge: setattr(edge, attribute, input_edge[attribute]) if "probability" in input_edge: edge.confidence = input_edge["probability"] # missing edge properties: defined_datetime, weight, publications, evidence_type, qualifiers, negated # extra edge properties: predicate, #edge.edge_attributes = FIXME #edge.publications = FIXME self.message.knowledge_graph.edges.append(edge) #### Add each result self.message.results = [] for input_result in result_bindings: result = Result() result.description = "No description available" result.essence = "?" #result.essence_type = "?" #result.row_data = "?" #result.score = 0 #result.score_name = "?" #result.score_direction = "?" result.confidence = 1.0 result.result_type = "individual query answer" result.reasoner_id = "RTX" result.result_graph = None result.node_bindings = input_result["nodes"] # #### Convert each binding value to a list because the viewer requires it # for binding in result.node_bindings: # result.node_bindings[binding] = [ result.node_bindings[binding] ] result.edge_bindings = input_result["edges"] self.message.results.append(result) #### Set the code_description n_results = len(result_bindings) plural = "s" if n_results == 1: plural = "" self.message.code_description = f"{n_results} result{plural} found" #### Complete normally return ()
def add_subgraph(self, nodes, edges, plain_text, confidence): """ Populate the object model using networkx neo4j subgraph :param nodes: nodes in the subgraph (g.nodes(data=True)) :param edges: edges in the subgraph (g.edges(data=True)) :return: none """ # Get the relevant info from the nodes and edges node_keys = [] node_descriptions = dict() node_names = dict() node_labels = dict() node_uuids = dict() node_accessions = dict() node_iris = dict() node_uuids2iri = dict() node_curies = dict() node_uuids2curie = dict() for u, data in nodes: node_keys.append(u) node_descriptions[u] = data['properties']['description'] node_names[u] = data['properties']['name'] node_labels[u] = list(set(data['labels']).difference({'Base'}))[0] node_uuids[u] = data['properties']['UUID'] node_accessions[u] = data['properties']['accession'] node_iris[u] = data['properties']['iri'] node_uuids2iri[data['properties'] ['UUID']] = data['properties']['iri'] node_curies[u] = data['properties']['curie_id'] node_uuids2curie[data['properties'] ['UUID']] = data['properties']['curie_id'] edge_keys = [] edge_types = dict() edge_source_db = dict() edge_source_iri = dict() edge_target_iri = dict() edge_source_curie = dict() edge_target_curie = dict() for u, v, data in edges: edge_keys.append((u, v)) edge_types[(u, v)] = data['type'] edge_source_db[(u, v)] = data['properties']['sourcedb'] edge_source_iri[( u, v)] = node_uuids2iri[data['properties']['source_node_uuid']] edge_target_iri[( u, v)] = node_uuids2iri[data['properties']['target_node_uuid']] edge_source_curie[( u, v)] = node_uuids2curie[data['properties']['source_node_uuid']] edge_target_curie[( u, v)] = node_uuids2curie[data['properties']['target_node_uuid']] # For each node, populate the relevant information node_objects = [] node_iris_to_node_object = dict() for node_key in node_keys: node = Node() node.id = node_curies[node_key] node.type = node_labels[node_key] node.name = node_names[node_key] node.accession = node_accessions[node_key] node.description = node_descriptions[node_key] node_objects.append(node) node_iris_to_node_object[node_iris[node_key]] = node # for each edge, create an edge between them edge_objects = [] for u, v in edge_keys: edge = Edge() edge.type = edge_types[(u, v)] edge.source_id = node_iris_to_node_object[edge_source_iri[(u, v)]].id edge.target_id = node_iris_to_node_object[edge_target_iri[(u, v)]].id edge.origin_list = [] edge.origin_list.append( edge_source_db[(u, v)] ) # TODO: check with eric if this really should be a list and if it should contain the source DB('s) edge_objects.append(edge) # Create the result (potential answer) result1 = Result() #result1.id = "http://rtx.ncats.io/api/v1/response/1234/result/2345" #result1.id = "-1" result1.text = plain_text result1.confidence = confidence # Create a ResultGraph object and put the list of nodes and edges into it result_graph = ResultGraph() result_graph.node_list = node_objects result_graph.edge_list = edge_objects # Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph # Put the first result (potential answer) into the response self._result_list.append(result1) self.response.result_list = self._result_list # Increment the number of results self._num_results += 1 if self._num_results == 1: self.response.message = "%s result found" % self._num_results else: self.response.message = "%s results found" % self._num_results
def get_concordance(body=None, save=None, compute=None): # noqa: E501 """Calculate Post text to generate concordance # noqa: E501 :param body: Text to be analyzed :type body: dict | bytes :param save: :type save: bool :param compute: :type compute: bool :rtype: Result """ if connexion.request.is_json: body = str.from_dict(connexion.request.get_json()) # noqa: E501 # Body was in bytes, decoding into a string body = body.decode('UTF-8') # Check if input is in database already, if so initialize item with response # if not just set item to None to be checked in the if right after try: # Connect to dynamoDB dynamodb = boto3.resource('dynamodb') table = dynamodb.Table('Analyze') response = table.get_item(Key={ 'input': body, }) item = response['Item'] except: item = None # If items equivilent to None or compute is set to true run concordance and add to db and return the result if item == None or compute: # Need a dictionary for word:occurrence pairs concordanceDict = dict() # Split body into words that we can loop through and count bodyList = body.split() for i in range(len(bodyList)): # Going to put every word in lower case bodyList[i] = bodyList[i].lower() # Sorts the list in alphabetical order bodyList.sort() for word in bodyList: # Add the word to the dict if it isn't there, otherwise increment the occurrences if word not in concordanceDict: concordanceDict[word] = 1 elif word in concordanceDict: concordanceDict[word] = concordanceDict[word] + 1 # A list for ResultConcordance objects, created from the filled-in dictionary concordanceResult = [] for word in concordanceDict: concordanceResult.append( ResultConcordance(word, concordanceDict[word])) # Put the input and result of concordance into the db if save is true or hasn't been changed if save or save == None: table.put_item(Item={ 'input': body, 'concordance': concordanceDict }, ) # Return a Result object, providing the list of ResultConcordance objects and the original message return Result(concordanceResult, body) # Return the item recieved from the db if it existed in the earlier get else: return item
def _create_results(kg: KnowledgeGraph, qg: QueryGraph, ignore_edge_direction: bool = True) -> List[Result]: result_graphs = [] kg_node_ids_by_qg_id = _get_kg_node_ids_by_qg_id(kg) kg_node_adj_map_by_qg_id = _get_kg_node_adj_map_by_qg_id( kg_node_ids_by_qg_id, kg, qg) kg_node_lookup = {node.id: node for node in kg.nodes} qnodes_in_order = _get_qnodes_in_order(qg) # First create result graphs with only the nodes filled out for qnode in qnodes_in_order: prior_qnode = qnodes_in_order[ qnodes_in_order.index(qnode) - 1] if qnodes_in_order.index(qnode) > 0 else None if not result_graphs: all_node_ids_in_kg_for_this_qnode_id = kg_node_ids_by_qg_id.get( qnode.id) if qnode.is_set: new_result_graph = _create_new_empty_result_graph(qg) new_result_graph['nodes'][ qnode.id] = all_node_ids_in_kg_for_this_qnode_id result_graphs.append(new_result_graph) else: for node_id in all_node_ids_in_kg_for_this_qnode_id: new_result_graph = _create_new_empty_result_graph(qg) new_result_graph['nodes'][qnode.id] = {node_id} result_graphs.append(new_result_graph) else: new_result_graphs = [] for result_graph in result_graphs: node_ids_for_prior_qnode_id = result_graph['nodes'][ prior_qnode.id] connected_node_ids = set() for node_id in node_ids_for_prior_qnode_id: connected_node_ids = connected_node_ids.union( kg_node_adj_map_by_qg_id[prior_qnode.id][node_id][ qnode.id]) if qnode.is_set: new_result_graph = _copy_result_graph(result_graph) new_result_graph['nodes'][qnode.id] = connected_node_ids new_result_graphs.append(new_result_graph) else: for node_id in connected_node_ids: new_result_graph = _copy_result_graph(result_graph) new_result_graph['nodes'][qnode.id] = {node_id} new_result_graphs.append(new_result_graph) result_graphs = new_result_graphs # Then add edges to our result graphs as appropriate edges_by_node_pairs = {qedge.id: dict() for qedge in qg.edges} for edge in kg.edges: if edge.qedge_ids: for qedge_id in edge.qedge_ids: edge_node_pair = f"{edge.source_id}--{edge.target_id}" if edge_node_pair not in edges_by_node_pairs[qedge_id]: edges_by_node_pairs[qedge_id][edge_node_pair] = set() edges_by_node_pairs[qedge_id][edge_node_pair].add(edge.id) if ignore_edge_direction: node_pair_in_other_direction = f"{edge.target_id}--{edge.source_id}" if node_pair_in_other_direction not in edges_by_node_pairs[ qedge_id]: edges_by_node_pairs[qedge_id][ node_pair_in_other_direction] = set() edges_by_node_pairs[qedge_id][ node_pair_in_other_direction].add(edge.id) for result_graph in result_graphs: for qedge_id in result_graph['edges']: qedge = _get_query_edge(qedge_id, qg) potential_nodes_1 = result_graph['nodes'][qedge.source_id] potential_nodes_2 = result_graph['nodes'][qedge.target_id] possible_node_pairs = set() for node_1 in potential_nodes_1: for node_2 in potential_nodes_2: node_pair_key = f"{node_1}--{node_2}" possible_node_pairs.add(node_pair_key) for node_pair in possible_node_pairs: ids_of_matching_edges = edges_by_node_pairs[qedge_id].get( node_pair, set()) result_graph['edges'][qedge_id] = result_graph['edges'][ qedge_id].union(ids_of_matching_edges) final_result_graphs = [ result_graph for result_graph in result_graphs if _result_graph_is_fulfilled(result_graph, qg) ] # Convert these into actual object model results results = [] for result_graph in final_result_graphs: node_bindings = [] for qnode_id, node_ids in result_graph['nodes'].items(): for node_id in node_ids: node_bindings.append(NodeBinding(qg_id=qnode_id, kg_id=node_id)) edge_bindings = [] for qedge_id, edge_ids in result_graph['edges'].items(): for edge_id in edge_ids: edge_bindings.append(EdgeBinding(qg_id=qedge_id, kg_id=edge_id)) result = Result(node_bindings=node_bindings, edge_bindings=edge_bindings) # Fill out the essence for the result essence_qnode_id = _get_essence_node_for_qg(qg) essence_qnode = _get_query_node(essence_qnode_id, qg) essence_kg_node_id_set = result_graph['nodes'].get( essence_qnode_id, set()) if len(essence_kg_node_id_set) == 1: essence_kg_node_id = next(iter(essence_kg_node_id_set)) essence_kg_node = kg_node_lookup[essence_kg_node_id] result.essence = essence_kg_node.name if result.essence is None: result.essence = essence_kg_node_id assert result.essence is not None if essence_kg_node.symbol is not None: result.essence += " (" + str(essence_kg_node.symbol) + ")" result.essence_type = str( essence_qnode.type) if essence_qnode else None elif len(essence_kg_node_id_set) == 0: result.essence = cast(str, None) result.essence_type = cast(str, None) else: raise ValueError( f"Result contains more than one node that is a candidate for the essence: {essence_kg_node_id_set}" ) # Programmatically generating an informative description for each result # seems difficult, but having something non-None is required by the # database. Just put in a placeholder for now, as is done by the # QueryGraphReasoner result.description = "No description available" # see issue 642 results.append(result) return results
def _prune_dead_ends_from_result(result: Result, query_graph: QueryGraph, kg_edges_map: Dict[str, Edge], qg_adj_map: Dict[str, Set[str]]) -> Result: result_nodes_by_qg_id = { qnode.id: { node_binding.kg_id for node_binding in result.node_bindings if node_binding.qg_id == qnode.id } for qnode in query_graph.nodes } result_edges_by_qg_id = { qedge.id: { edge_binding.kg_id for edge_binding in result.edge_bindings if edge_binding.qg_id == qedge.id } for qedge in query_graph.edges } if not result_edges_by_qg_id: # No pruning needed for edge-less queries return result # Create a map of which edges use which nodes in which position (e.g., 'n00') for this result # Example node_usages_by_edges_map: {'e00': {'KG1:111221': {'n00': 'CUI:122', 'n01': 'CUI:124'}}} node_usages_by_edges_map = dict() for qedge in query_graph.edges: node_usages_by_edges_map[qedge.id] = dict() for edge_id in result_edges_by_qg_id[qedge.id]: edge = kg_edges_map.get(edge_id) if edge_id not in node_usages_by_edges_map[qedge.id]: node_usages_by_edges_map[qedge.id][edge_id] = dict() qnode_id_1 = qedge.source_id qnode_id_2 = qedge.target_id if edge.source_id in result_nodes_by_qg_id[ qnode_id_1] and edge.target_id in result_nodes_by_qg_id[ qnode_id_2]: node_usages_by_edges_map[ qedge.id][edge_id][qnode_id_1] = edge.source_id node_usages_by_edges_map[ qedge.id][edge_id][qnode_id_2] = edge.target_id else: node_usages_by_edges_map[ qedge.id][edge_id][qnode_id_1] = edge.target_id node_usages_by_edges_map[ qedge.id][edge_id][qnode_id_2] = edge.source_id # Create a map of which nodes each node is connected to in this result (organized by the qnode_id they're fulfilling) # Example node_connections_map: {'n01': {'CUI:1222': {'n00': {'DOID:122'}, 'n02': {'UniProtKB:22', 'UniProtKB:333'}}}} node_connections_map = dict() for qedge_id, edges_to_nodes_dict in node_usages_by_edges_map.items(): current_qedge = next(qedge for qedge in query_graph.edges if qedge.id == qedge_id) qnode_ids = [current_qedge.source_id, current_qedge.target_id] for edge_id, node_usages_dict in edges_to_nodes_dict.items(): for current_qnode_id in qnode_ids: connected_qnode_id = next(qnode_id for qnode_id in qnode_ids if qnode_id != current_qnode_id) current_node_id = node_usages_dict[current_qnode_id] connected_node_id = node_usages_dict[connected_qnode_id] if current_qnode_id not in node_connections_map: node_connections_map[current_qnode_id] = dict() if current_node_id not in node_connections_map[ current_qnode_id]: node_connections_map[current_qnode_id][ current_node_id] = dict() if connected_qnode_id not in node_connections_map[ current_qnode_id][current_node_id]: node_connections_map[current_qnode_id][current_node_id][ connected_qnode_id] = set() node_connections_map[current_qnode_id][current_node_id][ connected_qnode_id].add(connected_node_id) # Iteratively remove any nodes that are missing a neighbor until there are no such nodes left found_dead_end = True while found_dead_end: found_dead_end = False for qnode_id in [qnode.id for qnode in query_graph.nodes]: qnode_ids_should_be_connected_to = qg_adj_map[qnode_id] for node_id, node_mappings_dict in node_connections_map[ qnode_id].items(): # Check if any mappings are even entered for all qnode_ids this node should be connected to if set(node_mappings_dict.keys() ) != qnode_ids_should_be_connected_to: if node_id in result_nodes_by_qg_id[qnode_id]: result_nodes_by_qg_id[qnode_id].remove(node_id) found_dead_end = True else: # Verify that at least one of the entered connections still exists (for each connected qnode_id) for connected_qnode_id, connected_node_ids in node_mappings_dict.items( ): if not connected_node_ids.intersection( result_nodes_by_qg_id[connected_qnode_id]): if node_id in result_nodes_by_qg_id[qnode_id]: result_nodes_by_qg_id[qnode_id].remove(node_id) found_dead_end = True # Then remove all orphaned edges (which were created when the dead-end nodes were removed above) for qedge_id, edges_dict in node_usages_by_edges_map.items(): for edge_key, node_mappings in edges_dict.items(): for qnode_id, used_node_id in node_mappings.items(): if used_node_id not in result_nodes_by_qg_id[qnode_id]: if edge_key in result_edges_by_qg_id[qedge_id]: result_edges_by_qg_id[qedge_id].remove(edge_key) # Then create a new pruned result revised_node_bindings = [] for qnode_id, node_ids in result_nodes_by_qg_id.items(): for node_id in node_ids: revised_node_bindings.append( NodeBinding(qg_id=qnode_id, kg_id=node_id)) revised_edge_bindings = [] for qedge_id, edge_ids in result_edges_by_qg_id.items(): for edge_id in edge_ids: revised_edge_bindings.append( EdgeBinding(qg_id=qedge_id, kg_id=edge_id)) return Result(node_bindings=revised_node_bindings, edge_bindings=revised_edge_bindings)
def queryTerm(self, term): method = "queryTerm" attributes = self.findTermAttributesAndTypeByName(term) message = self.createMessage() if ( attributes["status"] == 'OK' ): message.code_description = "1 result found" message.table_column_names = [ "id", "type", "name", "description", "uri" ] #### Create a Node object and fill it node1 = Node() node1.id = "MESH:" + attributes["id"] node1.uri = "http://purl.obolibrary.org/obo/MESH_" + attributes["id"] node1.type = [ attributes["type"] ] node1.name = attributes["name"] node1.description = attributes["description"] #### Create the first result (potential answer) result1 = Result() result1.id = "http://rtx.ncats.io/api/v1/result/0000" result1.description = "The term " + attributes["name"] + " refers to " + attributes["description"] result1.confidence = 1.0 result1.essence = attributes["name"] result1.essence_type = attributes["type"] node_types = ",".join(node1.type) result1.row_data = [ node1.id, node_types, node1.name, node1.description, node1.uri ] #### Create a KnowledgeGraph object and put the list of nodes and edges into it result_graph = KnowledgeGraph() result_graph.nodes = [ node1 ] #### Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph #### Put the first result (potential answer) into the message results = [ result1 ] message.results = results #### Also put the union of all result_graph components into the top Message KnowledgeGraph #### Normally the knowledge_graph will be much more complex than this, but take a shortcut for this single-node result message.knowledge_graph = result_graph #### Also manufacture a query_graph post hoc qnode1 = QNode() qnode1.node_id = "n00" qnode1.curie = "MESH:" + attributes["id"] qnode1.type = None query_graph = QueryGraph() query_graph.nodes = [ qnode1 ] query_graph.edges = [] message.query_graph = query_graph #### Create the corresponding knowledge_map knowledge_map = { "n00": "MESH:" + attributes["id"] } result1.knowledge_map = knowledge_map else: message.message_code = "TermNotFound" message.code_description = "Unable to find this term in MeSH. No further information is available at this time." message.id = None return message
def answer(self, entity, use_json=False): """ Answer a question of the type "What is X" but is general: :param entity: KG neo4j node name (eg "carbetocin") :param use_json: If the answer should be in Translator standardized API output format :return: a description and type of the node """ #### See if this entity is in the KG via the index eprint("Looking up '%s' in KgNodeIndex" % entity) kgNodeIndex = KGNodeIndex() curies = kgNodeIndex.get_curies(entity) #### If not in the KG, then return no information if not curies: if not use_json: return None else: error_code = "TermNotFound" error_message = "This concept is not in our knowledge graph" response = FormatOutput.FormatResponse(0) response.add_error_message(error_code, error_message) return response.message # Get label/kind of node the source is eprint("Getting properties for '%s'" % curies[0]) properties = RU.get_node_properties(curies[0]) eprint("Properties are:") eprint(properties) #### By default, return the results just as a plain simple list of data structures if not use_json: return properties #### Or, if requested, format the output as the standardized API output format else: #### Create a stub Message object response = FormatOutput.FormatResponse(0) response.message.table_column_names = [ "id", "type", "name", "description", "uri" ] response.message.code_description = None #### Create a Node object and fill it node1 = Node() node1.id = properties["id"] node1.uri = properties["uri"] node1.type = [properties["category"]] node1.name = properties["name"] node1.description = properties["description"] #### Create the first result (potential answer) result1 = Result() result1.id = "http://arax.ncats.io/api/v1/result/0000" result1.description = "The term %s is in our knowledge graph and is defined as %s" % ( properties["name"], properties["description"]) result1.confidence = 1.0 result1.essence = properties["name"] result1.essence_type = properties["category"] node_types = ",".join(node1.type) result1.row_data = [ node1.id, node_types, node1.name, node1.description, node1.uri ] #### Create a KnowledgeGraph object and put the list of nodes and edges into it result_graph = KnowledgeGraph() result_graph.nodes = [node1] result_graph.edges = [] #### Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph #### Put the first result (potential answer) into the message results = [result1] response.message.results = results #### Also put the union of all result_graph components into the top Message KnowledgeGraph #### Normally the knowledge_graph will be much more complex than this, but take a shortcut for this single-node result response.message.knowledge_graph = result_graph #### Also manufacture a query_graph post hoc qnode1 = QNode() qnode1.id = "n00" qnode1.curie = properties["id"] qnode1.type = None query_graph = QueryGraph() query_graph.nodes = [qnode1] query_graph.edges = [] response.message.query_graph = query_graph #### Create the corresponding knowledge_map node_binding = NodeBinding(qg_id="n00", kg_id=properties["id"]) result1.node_bindings = [node_binding] result1.edge_bindings = [] #eprint(response.message) return response.message
def add_subgraph(self, nodes, edges, plain_text, confidence, return_result=False): """ Populate the object model using networkx neo4j subgraph :param nodes: nodes in the subgraph (g.nodes(data=True)) :param edges: edges in the subgraph (g.edges(data=True)) :return: none """ # Get the relevant info from the nodes and edges node_keys = [] node_descriptions = dict() node_names = dict() node_labels = dict() node_uuids = dict() node_accessions = dict() node_iris = dict() node_uuids2iri = dict() node_curies = dict() node_uuids2curie = dict() for u, data in nodes: node_keys.append(u) if 'description' in data['properties']: node_descriptions[u] = data['properties']['description'] else: node_descriptions[u] = "None" node_names[u] = data['properties']['name'] node_labels[u] = list(set(data['labels']).difference({'Base'}))[0] node_uuids[u] = data['properties']['UUID'] node_accessions[u] = data['properties']['accession'] node_iris[u] = data['properties']['uri'] node_uuids2iri[data['properties'] ['UUID']] = data['properties']['uri'] curie_id = data['properties']['id'] if curie_id.split(':')[0].upper() == "CHEMBL": curie_id = "CHEMBL:CHEMBL" + curie_id.split(':')[1] node_uuids2curie[data['properties']['UUID']] = curie_id node_curies[ u] = curie_id # These are the actual CURIE IDS eg UBERON:00000941 (uri is the web address) edge_keys = [] edge_types = dict() edge_source_db = dict() edge_source_iri = dict() edge_target_iri = dict() edge_source_curie = dict() edge_target_curie = dict() for u, v, data in edges: edge_keys.append((u, v)) edge_types[(u, v)] = data['type'] edge_source_db[(u, v)] = data['properties']['provided_by'] edge_source_iri[( u, v)] = node_uuids2iri[data['properties']['source_node_uuid']] edge_target_iri[( u, v)] = node_uuids2iri[data['properties']['target_node_uuid']] edge_source_curie[( u, v)] = node_uuids2curie[data['properties']['source_node_uuid']] edge_target_curie[( u, v)] = node_uuids2curie[data['properties']['target_node_uuid']] # For each node, populate the relevant information node_objects = [] node_iris_to_node_object = dict() for node_key in node_keys: node = Node() node.id = node_curies[node_key] node.type = node_labels[node_key] node.name = node_names[node_key] node.uri = node_iris[node_key] node.accession = node_accessions[node_key] node.description = node_descriptions[node_key] node_objects.append(node) node_iris_to_node_object[node_iris[node_key]] = node # for each edge, create an edge between them edge_objects = [] for u, v in edge_keys: edge = Edge() edge.type = edge_types[(u, v)] edge.source_id = node_iris_to_node_object[edge_source_iri[(u, v)]].id edge.target_id = node_iris_to_node_object[edge_target_iri[(u, v)]].id #edge.origin_list = [] #edge.origin_list.append(edge_source_db[(u, v)]) # TODO: check with eric if this really should be a list and if it should contain the source DB('s) edge_objects.append(edge) #edge.attribute_list #edge.confidence #edge.evidence_type edge.is_defined_by = "RTX" #edge.provided_by = node_iris_to_node_object[edge_source_iri[(u, v)]].uri edge.provided_by = edge_source_db[(u, v)] #edge.publications #edge.qualifiers #edge.relation #edge.source_id #edge.target_id #edge.type # Create the result (potential answer) result1 = Result() result1.text = plain_text result1.confidence = confidence # Create a ResultGraph object and put the list of nodes and edges into it result_graph = ResultGraph() result_graph.node_list = node_objects result_graph.edge_list = edge_objects # Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph # Put the first result (potential answer) into the response self._result_list.append(result1) self.response.result_list = self._result_list # Increment the number of results self._num_results += 1 if self._num_results == 1: self.response.message = "%s result found" % self._num_results else: self.response.message = "%s results found" % self._num_results if return_result: return result1 else: pass
def test1(self): #### Create the response object and fill it with attributes about the response response = Response() response.context = "http://translator.ncats.io" response.id = "http://rtx.ncats.io/api/v1/response/1234" response.type = "medical_translator_query_response" response.tool_version = "RTX 0.4" response.schema_version = "0.5" response.datetime = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") response.original_question_text = "what proteins are affected by sickle cell anemia" response.restated_question_text = "Which proteins are affected by sickle cell anemia?" response.result_code = "OK" response.message = "1 result found" #### Create a disease node node1 = Node() node1.id = "http://omim.org/entry/603903" node1.type = "disease" node1.name = "sickle cell anemia" node1.accession = "OMIM:603903" node1.description = "A disease characterized by chronic hemolytic anemia..." #### Create a protein node node2 = Node() node2.id = "https://www.uniprot.org/uniprot/P00738" node2.type = "protein" node2.name = "Haptoglobin" node2.symbol = "HP" node2.accession = "UNIPROT:P00738" node2.description = "Haptoglobin captures, and combines with free plasma hemoglobin..." #### Create a node attribute node2attribute1 = NodeAttribute() node2attribute1.type = "comment" node2attribute1.name = "Complex_description" node2attribute1.value = "The Hemoglobin/haptoglobin complex is composed of a haptoglobin dimer bound to two hemoglobin alpha-beta dimers" node2.node_attributes = [node2attribute1] #### Create an edge between these 2 nodes edge1 = Edge() edge1.type = "is_caused_by_a_defect_in" edge1.source_id = node1.id edge1.target_id = node2.id edge1.confidence = 1.0 #### Add an origin and property for the edge origin1 = Origin() origin1.id = "https://api.monarchinitiative.org/api/bioentity/disease/OMIM:603903/genes/" origin1.type = "Monarch_BioLink_API_Relationship" #### Add an attribute attribute1 = EdgeAttribute() attribute1.type = "PubMed_article" attribute1.name = "Orthopaedic Manifestations of Sickle Cell Disease" attribute1.value = None attribute1.url = "https://www.ncbi.nlm.nih.gov/pubmed/29309293" origin1.attribute_list = [attribute1] edge1.origin_list = [origin1] #### Create the first result (potential answer) result1 = Result() result1.id = "http://rtx.ncats.io/api/v1/response/1234/result/2345" result1.text = "A free text description of this result" result1.confidence = 0.932 #### Create a ResultGraph object and put the list of nodes and edges into it result_graph = ResultGraph() result_graph.node_list = [node1, node2] result_graph.edge_list = [edge1] #### Put the ResultGraph into the first result (potential answer) result1.result_graph = result_graph #### Put the first result (potential answer) into the response result_list = [result1] response.result_list = result_list print(response)