def answer_single_node_query( self, single_node_qg: QueryGraph) -> QGOrganizedKnowledgeGraph: kg_name = self.kg_name use_synonyms = self.use_synonyms log = self.response if kg_name == "KG1": single_node_qg = eu.make_qg_use_old_snake_case_types( single_node_qg) qnode_key = next(qnode_key for qnode_key in single_node_qg.nodes) qnode = single_node_qg.nodes[qnode_key] # Convert qnode curies as needed (either to synonyms or to canonical versions) if qnode.id: if use_synonyms and kg_name == "KG1": qnode.id = eu.get_curie_synonyms(qnode.id, log) qnode.category = [ ] # Important to clear this, otherwise results are limited (#889) elif kg_name == "KG2c": qnode.id = eu.get_canonical_curies_list(qnode.id, log) qnode.category = [ ] # Important to clear this to avoid discrepancies in types for particular concepts if kg_name == "KG2c": # Use Plover to answer KG2c queries plover_answer, response_status = self._answer_query_using_plover( single_node_qg, log) if response_status == 200: final_kg = self._grab_nodes_and_edges_from_sqlite( plover_answer, kg_name, log) else: # Backup to using neo4j in the event plover failed log.warning( f"Plover returned a {response_status} response, so I'm backing up to Neo4j.." ) final_kg = self._answer_single_node_query_using_neo4j( qnode_key, single_node_qg, kg_name, log) else: # Use Neo4j for KG2 and KG1 queries final_kg = self._answer_single_node_query_using_neo4j( qnode_key, single_node_qg, kg_name, log) return final_kg
def answer_single_node_query( self, single_node_qg: QueryGraph) -> QGOrganizedKnowledgeGraph: kg_name = self.kg_name use_synonyms = self.use_synonyms log = self.response final_kg = QGOrganizedKnowledgeGraph() single_node_qg = eu.make_qg_use_old_types( single_node_qg) # Temporary patch until we switch to KG2.5.1 qnode_key = next(qnode_key for qnode_key in single_node_qg.nodes) qnode = single_node_qg.nodes[qnode_key] # Convert qnode curies as needed (either to synonyms or to canonical versions) if qnode.id: if use_synonyms and kg_name == "KG1": qnode.id = eu.get_curie_synonyms(qnode.id, log) qnode.category = [ ] # Important to clear this, otherwise results are limited (#889) elif kg_name == "KG2c": qnode.id = eu.get_canonical_curies_list(qnode.id, log) qnode.category = [ ] # Important to clear this to avoid discrepancies in types for particular concepts # Build and run a cypher query to get this node/nodes where_clause = f"{qnode_key}.id='{qnode.id}'" if type( qnode.id) is str else f"{qnode_key}.id in {qnode.id}" cypher_query = f"MATCH {self._get_cypher_for_query_node(qnode_key, single_node_qg, kg_name)} WHERE {where_clause} RETURN {qnode_key}" log.info( f"Sending cypher query for node {qnode_key} to {kg_name} neo4j") results = self._run_cypher_query(cypher_query, kg_name, log) # Load the results into swagger object model and add to our answer knowledge graph for result in results: neo4j_node = result.get(qnode_key) swagger_node_key, swagger_node = self._convert_neo4j_node_to_swagger_node( neo4j_node, kg_name) final_kg.add_node(swagger_node_key, swagger_node, qnode_key) # TODO: remove this patch once we switch to KG2.5.0! eu.convert_node_and_edge_types_to_new_format(final_kg) return final_kg
def _pre_process_query_graph(self, query_graph: QueryGraph, log: ARAXResponse) -> QueryGraph: for qnode_key, qnode in query_graph.nodes.items(): # Convert node types to preferred format and verify we can do this query formatted_qnode_categories = { self.node_category_overrides_for_kp.get( qnode_category, qnode_category) for qnode_category in eu.convert_string_or_list_to_list( qnode.category) } accepted_qnode_categories = formatted_qnode_categories.intersection( self.accepted_node_categories) if not accepted_qnode_categories: log.error( f"{self.kp_name} can only be used for queries involving {self.accepted_node_categories} " f"and QNode {qnode_key} has category '{qnode.category}'", error_code="UnsupportedQueryForKP") return query_graph else: qnode.category = list(accepted_qnode_categories)[0] # Convert curies to equivalent curies accepted by the KP (depending on qnode type) if qnode.id: equivalent_curies = eu.get_curie_synonyms(qnode.id, log) desired_curies = [ curie for curie in equivalent_curies if curie.startswith( f"{self.kp_preferred_prefixes[qnode.category]}:") ] if desired_curies: qnode.id = desired_curies if len( desired_curies) > 1 else desired_curies[0] log.debug( f"Converted qnode {qnode_key} curie to {qnode.id}") else: log.warning( f"Could not convert qnode {qnode_key} curie(s) to preferred prefix ({self.kp_preferred_prefixes[qnode.category]})" ) return query_graph
def _validate_and_pre_process_input(qg: QueryGraph, valid_bte_inputs_dict: Dict[str, Set[str]], enforce_directionality: bool, use_synonyms: bool, log: ARAXResponse) -> Tuple[str, str]: # Make sure we have a valid one-hop query graph if len(qg.edges) != 1 or len(qg.nodes) != 2: log.error( f"BTE can only accept one-hop query graphs (your QG has {len(qg.nodes)} nodes and " f"{len(qg.edges)} edges)", error_code="InvalidQueryGraph") return "", "" qedge_key = next(qedge_key for qedge_key in qg.edges) qedge = qg.edges[qedge_key] # Make sure at least one of our qnodes has a curie qnodes_with_curies = [ qnode_key for qnode_key, qnode in qg.nodes.items() if qnode.id ] if not qnodes_with_curies: log.error( f"Neither qnode for qedge {qedge_key} has a curie specified. BTE requires that at least one of " f"them has a curie. Your query graph is: {qg.to_dict()}", error_code="UnsupportedQueryForKP") return "", "" # Figure out which query node is input vs. output if enforce_directionality: input_qnode_key = qedge.subject output_qnode_key = qedge.object else: input_qnode_key = next(qnode_key for qnode_key, qnode in qg.nodes.items() if qnode.id) output_qnode_key = list( set(qg.nodes).difference({input_qnode_key}))[0] log.warning( f"BTE cannot do bidirectional queries; the query for this edge will be directed, going: " f"{input_qnode_key}-->{output_qnode_key}") input_qnode = qg.nodes[input_qnode_key] output_qnode = qg.nodes[output_qnode_key] # Make sure predicate is allowed if qedge.predicate not in valid_bte_inputs_dict[ 'predicates'] and qedge.predicate is not None: log.error( f"BTE does not accept predicate '{qedge.predicate}'. Valid options are " f"{valid_bte_inputs_dict['predicates']}", error_code="InvalidInput") return "", "" # Process qnode types (convert to preferred format, make sure allowed) input_qnode.category = [ eu.convert_string_to_pascal_case(node_category) for node_category in eu.convert_string_or_list_to_list(input_qnode.category) ] output_qnode.category = [ eu.convert_string_to_pascal_case(node_category) for node_category in eu.convert_string_or_list_to_list(output_qnode.category) ] qnodes_missing_type = [ qnode_key for qnode_key in [input_qnode_key, output_qnode_key] if not qg.nodes[qnode_key].category ] if qnodes_missing_type: log.error( f"BTE requires every query node to have a category. QNode(s) missing a category: " f"{', '.join(qnodes_missing_type)}", error_code="InvalidInput") return "", "" invalid_qnode_categories = [ node_category for qnode in [input_qnode, output_qnode] for node_category in qnode.category if node_category not in valid_bte_inputs_dict['node_categories'] ] if invalid_qnode_categories: log.error( f"BTE does not accept QNode category(s): {', '.join(invalid_qnode_categories)}. Valid options are " f"{valid_bte_inputs_dict['node_categories']}", error_code="InvalidInput") return "", "" # Sub in curie synonyms as appropriate if use_synonyms: qnodes_with_curies = [ qnode for qnode in [input_qnode, output_qnode] if qnode.id ] for qnode in qnodes_with_curies: synonymized_curies = eu.get_curie_synonyms(qnode.id, log) qnode.id = synonymized_curies # Make sure our input node curies are in list form and use prefixes BTE prefers input_curie_list = eu.convert_string_or_list_to_list(input_qnode.id) input_qnode.id = [ eu.convert_curie_to_bte_format(curie) for curie in input_curie_list ] return input_qnode_key, output_qnode_key
def answer_one_hop_query( self, query_graph: QueryGraph ) -> Tuple[QGOrganizedKnowledgeGraph, Dict[str, Dict[str, str]]]: """ This function answers a one-hop (single-edge) query using either KG1 or KG2. :param query_graph: A Reasoner API standard query graph. :return: A tuple containing: 1. an (almost) Reasoner API standard knowledge graph containing all of the nodes and edges returned as results for the query. (Dictionary version, organized by QG IDs.) 2. a map of which nodes fulfilled which qnode_keys for each edge. Example: {'KG1:111221': {'n00': 'DOID:111', 'n01': 'HP:124'}, 'KG1:111223': {'n00': 'DOID:111', 'n01': 'HP:126'}} """ log = self.response enforce_directionality = self.enforce_directionality use_synonyms = self.use_synonyms kg_name = self.kg_name final_kg = QGOrganizedKnowledgeGraph() edge_to_nodes_map = dict() query_graph = eu.make_qg_use_old_types( query_graph) # Temporary patch until we switch to KG2.5.1 # Verify this is a valid one-hop query graph if len(query_graph.edges) != 1: log.error( f"answer_one_hop_query() was passed a query graph that is not one-hop: " f"{query_graph.to_dict()}", error_code="InvalidQuery") return final_kg, edge_to_nodes_map if len(query_graph.nodes) != 2: log.error( f"answer_one_hop_query() was passed a query graph with more than two nodes: " f"{query_graph.to_dict()}", error_code="InvalidQuery") return final_kg, edge_to_nodes_map qedge_key = next(qedge_key for qedge_key in query_graph.edges) # Convert qnode curies as needed (either to synonyms or to canonical versions) qnode_keys_with_curies = [ qnode_key for qnode_key, qnode in query_graph.nodes.items() if qnode.id ] for qnode_key in qnode_keys_with_curies: qnode = query_graph.nodes[qnode_key] if use_synonyms and kg_name == "KG1": qnode.id = eu.get_curie_synonyms(qnode.id, log) elif kg_name == "KG2c": canonical_curies = eu.get_canonical_curies_list(qnode.id, log) log.debug( f"Using {len(canonical_curies)} curies as canonical curies for qnode {qnode_key}" ) qnode.id = canonical_curies qnode.category = [ ] # Important to clear this, otherwise results are limited (#889) # Run the actual query and process results cypher_query = self._convert_one_hop_query_graph_to_cypher_query( query_graph, enforce_directionality, kg_name, log) if log.status != 'OK': return final_kg, edge_to_nodes_map neo4j_results = self._answer_query_using_neo4j(cypher_query, qedge_key, kg_name, log) if log.status != 'OK': return final_kg, edge_to_nodes_map final_kg, edge_to_nodes_map = self._load_answers_into_kg( neo4j_results, kg_name, query_graph, log) if log.status != 'OK': return final_kg, edge_to_nodes_map # TODO: remove this patch once we switch to KG2.5.0! eu.convert_node_and_edge_types_to_new_format(final_kg) return final_kg, edge_to_nodes_map
def answer_one_hop_query( self, query_graph: QueryGraph) -> QGOrganizedKnowledgeGraph: """ This function answers a one-hop (single-edge) query using either KG1 or KG2. :param query_graph: A TRAPI query graph. :return: An (almost) TRAPI knowledge graph containing all of the nodes and edges returned as results for the query. (Organized by QG IDs.) """ log = self.response enforce_directionality = self.enforce_directionality use_synonyms = self.use_synonyms kg_name = self.kg_name if kg_name == "KG1": query_graph = eu.make_qg_use_old_snake_case_types(query_graph) final_kg = QGOrganizedKnowledgeGraph() # Verify this is a valid one-hop query graph if len(query_graph.edges) != 1: log.error( f"answer_one_hop_query() was passed a query graph that is not one-hop: " f"{query_graph.to_dict()}", error_code="InvalidQuery") return final_kg if len(query_graph.nodes) != 2: log.error( f"answer_one_hop_query() was passed a query graph with more than two nodes: " f"{query_graph.to_dict()}", error_code="InvalidQuery") return final_kg qedge_key = next(qedge_key for qedge_key in query_graph.edges) # Consider any inverses of our predicate(s) as well query_graph = self._add_inverted_predicates(query_graph, log) # Convert qnode curies as needed (either to synonyms or to canonical versions) qnode_keys_with_curies = [ qnode_key for qnode_key, qnode in query_graph.nodes.items() if qnode.id ] for qnode_key in qnode_keys_with_curies: qnode = query_graph.nodes[qnode_key] if use_synonyms and kg_name == "KG1": qnode.id = eu.get_curie_synonyms(qnode.id, log) elif kg_name == "KG2c": canonical_curies = eu.get_canonical_curies_list(qnode.id, log) log.debug( f"Using {len(canonical_curies)} curies as canonical curies for qnode {qnode_key}" ) qnode.id = canonical_curies qnode.category = [ ] # Important to clear this, otherwise results are limited (#889) if kg_name == "KG2c": # Use Plover to answer KG2c queries plover_answer, response_status = self._answer_query_using_plover( query_graph, log) if response_status == 200: final_kg = self._grab_nodes_and_edges_from_sqlite( plover_answer, kg_name, log) else: # Backup to using neo4j in the event plover failed log.warning( f"Plover returned a {response_status} response, so I'm backing up to Neo4j.." ) final_kg = self._answer_query_using_neo4j( query_graph, kg_name, qedge_key, enforce_directionality, log) else: # Use Neo4j for KG2 and KG1 queries final_kg = self._answer_query_using_neo4j(query_graph, kg_name, qedge_key, enforce_directionality, log) return final_kg