def _check_node_categories(nodes: Dict[str, Node], query_graph: QueryGraph): for node in nodes.values(): for qnode_key in node.qnode_keys: qnode = query_graph.nodes[qnode_key] if qnode.category: assert set(eu.convert_to_list(qnode.category)).issubset( set(node.category) ) # Could have additional categories if it has multiple qnode keys
async def _answer_query_using_kp_async( self, query_graph: QueryGraph) -> QGOrganizedKnowledgeGraph: request_body = self._get_prepped_request_body(query_graph) query_sent = copy.deepcopy(request_body) query_timeout = self._get_query_timeout_length() qedge_key = next(qedge_key for qedge_key in query_graph.edges) # Avoid calling the KG2 TRAPI endpoint if the 'force_local' flag is set (used only for testing/dev work) num_input_curies = max([ len(eu.convert_to_list(qnode.ids)) for qnode in query_graph.nodes.values() ]) waiting_message = f"Query with {num_input_curies} curies sent: waiting for response" self.log.update_query_plan(qedge_key, self.kp_name, "Waiting", waiting_message, query=query_sent) start = time.time() if self.force_local and self.kp_name == 'infores:rtx-kg2': json_response = self._answer_query_force_local(request_body) # Otherwise send the query graph to the KP's TRAPI API else: self.log.debug( f"{self.kp_name}: Sending query to {self.kp_name} API") async with aiohttp.ClientSession(connector=aiohttp.TCPConnector( verify_ssl=False)) as session: try: async with session.post( f"{self.kp_endpoint}/query", json=request_body, headers={'accept': 'application/json'}, timeout=query_timeout) as response: if response.status == 200: json_response = await response.json() else: wait_time = round(time.time() - start) http_error_message = f"Returned HTTP error {response.status} after {wait_time} seconds" self.log.warning( f"{self.kp_name}: {http_error_message}. Query sent to KP was: {request_body}" ) self.log.update_query_plan(qedge_key, self.kp_name, "Error", http_error_message) return QGOrganizedKnowledgeGraph() except concurrent.futures._base.TimeoutError: timeout_message = f"Query timed out after {query_timeout} seconds" self.log.warning(f"{self.kp_name}: {timeout_message}") self.log.update_query_plan(qedge_key, self.kp_name, "Timed out", timeout_message) return QGOrganizedKnowledgeGraph() except Exception as ex: wait_time = round(time.time() - start) exception_message = f"Request threw exception after {wait_time} seconds: {type(ex)}" self.log.warning(f"{self.kp_name}: {exception_message}") self.log.update_query_plan(qedge_key, self.kp_name, "Error", exception_message) return QGOrganizedKnowledgeGraph() wait_time = round(time.time() - start) answer_kg = self._load_kp_json_response(json_response) done_message = f"Returned {len(answer_kg.edges_by_qg_id.get(qedge_key, dict()))} edges in {wait_time} seconds" self.log.update_query_plan(qedge_key, self.kp_name, "Done", done_message) return answer_kg
def _verify_qg_is_accepted_by_kp(self, query_graph: QueryGraph): kp_predicates_response = requests.get(f"{self.kp_endpoint}/predicates", headers={'accept': 'application/json'}) if kp_predicates_response.status_code != 200: self.log.warning(f"Unable to access {self.kp_name}'s predicates endpoint " f"(returned status of {kp_predicates_response.status_code})") else: predicates_dict = kp_predicates_response.json() # TEMPORARY patch until new KG2 is rolled out where no predicates have commas in them if self.kp_name.endswith("KG2"): for subject_category in predicates_dict: for object_category in predicates_dict[subject_category]: commaless_predicates = [predicate.replace(",", "") for predicate in predicates_dict[subject_category][object_category]] predicates_dict[subject_category][object_category] = commaless_predicates qnodes = query_graph.nodes qedge_key = next(qedge_key for qedge_key in query_graph.edges) qedge = query_graph.edges[qedge_key] qg_triples = [[qnodes[qedge.subject].category, qedge.predicate, qnodes[qedge.object].category] for qedge in query_graph.edges.values()] for triple in qg_triples: query_subject_categories = set(eu.convert_to_list(triple[0])) query_predicates = set(eu.convert_to_list(triple[1])) query_object_categories = set(eu.convert_to_list(triple[2])) # Make sure the subject qnode's category(s) are accepted by the KP allowed_subj_categories = set(predicates_dict) accepted_query_subj_categories = query_subject_categories.intersection(allowed_subj_categories) if not query_subject_categories and self.kp_supports_none_for_category: # If this KP supports None for category, we'll pretend we have all supported categories accepted_query_subj_categories = allowed_subj_categories if not accepted_query_subj_categories: self.log.error(f"{self.kp_name} doesn't support {qedge.subject}'s category. Supported categories " f"for subject qnodes are: {allowed_subj_categories}", error_code="UnsupportedQueryForKP") return # Make sure that, given the subject qnode's category(s), >=1 of the object's categories are accepted allowed_object_categories = {category for subj_category in accepted_query_subj_categories for category in predicates_dict[subj_category]} accepted_query_obj_categories = query_object_categories.intersection(allowed_object_categories) if not query_object_categories and self.kp_supports_none_for_category: # If this KP supports None for category, we'll pretend we have all nested categories on this qnode accepted_query_obj_categories = allowed_object_categories if not accepted_query_obj_categories: self.log.error(f"{self.kp_name} doesn't support {qedge.object}'s category. When subject " f"category is {query_subject_categories}, supported object categories are: " f"{allowed_object_categories}", error_code="UnsupportedQueryForKP") return # Make sure that, given the subject/object categories, at least one of the predicates is accepted allowed_predicates = set() for subj_category in accepted_query_subj_categories: for obj_category in accepted_query_obj_categories: if obj_category in predicates_dict[subj_category]: allowed_predicates.update(set(predicates_dict[subj_category][obj_category])) accepted_query_predicates = query_predicates.intersection(allowed_predicates) if not query_predicates and self.kp_supports_none_for_predicate: # If this KP supports None for predicate, we'll pretend we have all nested predicates accepted_query_predicates = allowed_predicates if not accepted_query_predicates: self.log.error(f"{self.kp_name} doesn't support {qedge_key}'s predicate. For " f"{query_subject_categories}-->{query_object_categories} qedges, supported " f"predicates are: {allowed_predicates}") return
def _override_qnode_types_as_needed(self, query_graph: QueryGraph) -> QueryGraph: for qnode_key, qnode in query_graph.nodes.items(): overriden_categories = {self.node_category_overrides_for_kp.get(qnode_category, qnode_category) for qnode_category in eu.convert_to_list(qnode.category)} qnode.category = list(overriden_categories) return query_graph
def answer_one_hop_query( self, query_graph: QueryGraph) -> QGOrganizedKnowledgeGraph: """ This function answers a one-hop (single-edge) query using the specified KP. :param query_graph: A TRAPI query graph. :return: An (almost) TRAPI knowledge graph containing all of the nodes and edges returned as results for the query. (Organized by QG IDs.) """ log = self.log final_kg = QGOrganizedKnowledgeGraph() qg_copy = eu.copy_qg( query_graph) # Create a copy so we don't modify the original # Verify this query graph is valid, preprocess it for the KP's needs, and make sure it's answerable by the KP self._verify_is_one_hop_query_graph(qg_copy) if log.status != 'OK': return final_kg qg_copy = self._preprocess_query_graph(qg_copy) if log.status != 'OK': return final_kg if not self.kp_name.endswith( "KG2" ): # Skip for KG2 for now since predicates/ isn't symmetric yet self._verify_qg_is_accepted_by_kp(qg_copy) if log.status != 'OK': return final_kg # Answer the query using the KP and load its answers into our object model if self.kp_name.endswith("KG2"): # Our KPs can handle batch queries (where qnode.id is a list of curies) final_kg = self._answer_query_using_kp(qg_copy) else: # Otherwise we need to search for curies one-by-one (until TRAPI includes a batch querying method) qedge = next(qedge for qedge in qg_copy.edges.values()) subject_qnode_curies = eu.convert_to_list( qg_copy.nodes[qedge.subject].id) subject_qnode_curies = subject_qnode_curies if subject_qnode_curies else [ None ] object_qnode_curies = eu.convert_to_list( qg_copy.nodes[qedge.object].id) object_qnode_curies = object_qnode_curies if object_qnode_curies else [ None ] curie_combinations = [(curie_subj, curie_obj) for curie_subj in subject_qnode_curies for curie_obj in object_qnode_curies] # Query KP for all pairs of subject/object curies (pairs look like ("curie1", None) if one has no curies) for curie_combination in curie_combinations: subject_curie = curie_combination[0] object_curie = curie_combination[1] qg_copy.nodes[qedge.subject].id = subject_curie qg_copy.nodes[qedge.object].id = object_curie self.log.debug( f"Current curie pair is: subject: {subject_curie}, object: {object_curie}" ) if self.kp_supports_category_lists and self.kp_supports_predicate_lists: sub_kg = self._answer_query_using_kp(qg_copy) else: sub_kg = self._answer_query_for_kps_who_dont_like_lists( qg_copy) final_kg = eu.merge_two_kgs(sub_kg, final_kg) return final_kg