예제 #1
0
def _check_node_categories(nodes: Dict[str, Node], query_graph: QueryGraph):
    for node in nodes.values():
        for qnode_key in node.qnode_keys:
            qnode = query_graph.nodes[qnode_key]
            if qnode.category:
                assert set(eu.convert_to_list(qnode.category)).issubset(
                    set(node.category)
                )  # Could have additional categories if it has multiple qnode keys
예제 #2
0
    async def _answer_query_using_kp_async(
            self, query_graph: QueryGraph) -> QGOrganizedKnowledgeGraph:
        request_body = self._get_prepped_request_body(query_graph)
        query_sent = copy.deepcopy(request_body)
        query_timeout = self._get_query_timeout_length()
        qedge_key = next(qedge_key for qedge_key in query_graph.edges)

        # Avoid calling the KG2 TRAPI endpoint if the 'force_local' flag is set (used only for testing/dev work)
        num_input_curies = max([
            len(eu.convert_to_list(qnode.ids))
            for qnode in query_graph.nodes.values()
        ])
        waiting_message = f"Query with {num_input_curies} curies sent: waiting for response"
        self.log.update_query_plan(qedge_key,
                                   self.kp_name,
                                   "Waiting",
                                   waiting_message,
                                   query=query_sent)
        start = time.time()
        if self.force_local and self.kp_name == 'infores:rtx-kg2':
            json_response = self._answer_query_force_local(request_body)
        # Otherwise send the query graph to the KP's TRAPI API
        else:
            self.log.debug(
                f"{self.kp_name}: Sending query to {self.kp_name} API")
            async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(
                    verify_ssl=False)) as session:
                try:
                    async with session.post(
                            f"{self.kp_endpoint}/query",
                            json=request_body,
                            headers={'accept': 'application/json'},
                            timeout=query_timeout) as response:
                        if response.status == 200:
                            json_response = await response.json()
                        else:
                            wait_time = round(time.time() - start)
                            http_error_message = f"Returned HTTP error {response.status} after {wait_time} seconds"
                            self.log.warning(
                                f"{self.kp_name}: {http_error_message}. Query sent to KP was: {request_body}"
                            )
                            self.log.update_query_plan(qedge_key, self.kp_name,
                                                       "Error",
                                                       http_error_message)
                            return QGOrganizedKnowledgeGraph()
                except concurrent.futures._base.TimeoutError:
                    timeout_message = f"Query timed out after {query_timeout} seconds"
                    self.log.warning(f"{self.kp_name}: {timeout_message}")
                    self.log.update_query_plan(qedge_key, self.kp_name,
                                               "Timed out", timeout_message)
                    return QGOrganizedKnowledgeGraph()
                except Exception as ex:
                    wait_time = round(time.time() - start)
                    exception_message = f"Request threw exception after {wait_time} seconds: {type(ex)}"
                    self.log.warning(f"{self.kp_name}: {exception_message}")
                    self.log.update_query_plan(qedge_key, self.kp_name,
                                               "Error", exception_message)
                    return QGOrganizedKnowledgeGraph()

        wait_time = round(time.time() - start)
        answer_kg = self._load_kp_json_response(json_response)
        done_message = f"Returned {len(answer_kg.edges_by_qg_id.get(qedge_key, dict()))} edges in {wait_time} seconds"
        self.log.update_query_plan(qedge_key, self.kp_name, "Done",
                                   done_message)
        return answer_kg
예제 #3
0
    def _verify_qg_is_accepted_by_kp(self, query_graph: QueryGraph):
        kp_predicates_response = requests.get(f"{self.kp_endpoint}/predicates", headers={'accept': 'application/json'})
        if kp_predicates_response.status_code != 200:
            self.log.warning(f"Unable to access {self.kp_name}'s predicates endpoint "
                             f"(returned status of {kp_predicates_response.status_code})")
        else:
            predicates_dict = kp_predicates_response.json()
            # TEMPORARY patch until new KG2 is rolled out where no predicates have commas in them
            if self.kp_name.endswith("KG2"):
                for subject_category in predicates_dict:
                    for object_category in predicates_dict[subject_category]:
                        commaless_predicates = [predicate.replace(",", "") for predicate in predicates_dict[subject_category][object_category]]
                        predicates_dict[subject_category][object_category] = commaless_predicates

            qnodes = query_graph.nodes
            qedge_key = next(qedge_key for qedge_key in query_graph.edges)
            qedge = query_graph.edges[qedge_key]
            qg_triples = [[qnodes[qedge.subject].category, qedge.predicate, qnodes[qedge.object].category]
                          for qedge in query_graph.edges.values()]
            for triple in qg_triples:
                query_subject_categories = set(eu.convert_to_list(triple[0]))
                query_predicates = set(eu.convert_to_list(triple[1]))
                query_object_categories = set(eu.convert_to_list(triple[2]))

                # Make sure the subject qnode's category(s) are accepted by the KP
                allowed_subj_categories = set(predicates_dict)
                accepted_query_subj_categories = query_subject_categories.intersection(allowed_subj_categories)
                if not query_subject_categories and self.kp_supports_none_for_category:
                    # If this KP supports None for category, we'll pretend we have all supported categories
                    accepted_query_subj_categories = allowed_subj_categories
                if not accepted_query_subj_categories:
                    self.log.error(f"{self.kp_name} doesn't support {qedge.subject}'s category. Supported categories "
                                   f"for subject qnodes are: {allowed_subj_categories}",
                                   error_code="UnsupportedQueryForKP")
                    return

                # Make sure that, given the subject qnode's category(s), >=1 of the object's categories are accepted
                allowed_object_categories = {category for subj_category in accepted_query_subj_categories
                                             for category in predicates_dict[subj_category]}
                accepted_query_obj_categories = query_object_categories.intersection(allowed_object_categories)
                if not query_object_categories and self.kp_supports_none_for_category:
                    # If this KP supports None for category, we'll pretend we have all nested categories on this qnode
                    accepted_query_obj_categories = allowed_object_categories
                if not accepted_query_obj_categories:
                    self.log.error(f"{self.kp_name} doesn't support {qedge.object}'s category. When subject "
                                   f"category is {query_subject_categories}, supported object categories are: "
                                   f"{allowed_object_categories}", error_code="UnsupportedQueryForKP")
                    return

                # Make sure that, given the subject/object categories, at least one of the predicates is accepted
                allowed_predicates = set()
                for subj_category in accepted_query_subj_categories:
                    for obj_category in accepted_query_obj_categories:
                        if obj_category in predicates_dict[subj_category]:
                            allowed_predicates.update(set(predicates_dict[subj_category][obj_category]))
                accepted_query_predicates = query_predicates.intersection(allowed_predicates)
                if not query_predicates and self.kp_supports_none_for_predicate:
                    # If this KP supports None for predicate, we'll pretend we have all nested predicates
                    accepted_query_predicates = allowed_predicates
                if not accepted_query_predicates:
                    self.log.error(f"{self.kp_name} doesn't support {qedge_key}'s predicate. For "
                                   f"{query_subject_categories}-->{query_object_categories} qedges, supported "
                                   f"predicates are: {allowed_predicates}")
                    return
예제 #4
0
 def _override_qnode_types_as_needed(self, query_graph: QueryGraph) -> QueryGraph:
     for qnode_key, qnode in query_graph.nodes.items():
         overriden_categories = {self.node_category_overrides_for_kp.get(qnode_category, qnode_category)
                                 for qnode_category in eu.convert_to_list(qnode.category)}
         qnode.category = list(overriden_categories)
     return query_graph
예제 #5
0
    def answer_one_hop_query(
            self, query_graph: QueryGraph) -> QGOrganizedKnowledgeGraph:
        """
        This function answers a one-hop (single-edge) query using the specified KP.
        :param query_graph: A TRAPI query graph.
        :return: An (almost) TRAPI knowledge graph containing all of the nodes and edges returned as
                results for the query. (Organized by QG IDs.)
        """
        log = self.log
        final_kg = QGOrganizedKnowledgeGraph()
        qg_copy = eu.copy_qg(
            query_graph)  # Create a copy so we don't modify the original

        # Verify this query graph is valid, preprocess it for the KP's needs, and make sure it's answerable by the KP
        self._verify_is_one_hop_query_graph(qg_copy)
        if log.status != 'OK':
            return final_kg
        qg_copy = self._preprocess_query_graph(qg_copy)
        if log.status != 'OK':
            return final_kg
        if not self.kp_name.endswith(
                "KG2"
        ):  # Skip for KG2 for now since predicates/ isn't symmetric yet
            self._verify_qg_is_accepted_by_kp(qg_copy)
        if log.status != 'OK':
            return final_kg

        # Answer the query using the KP and load its answers into our object model
        if self.kp_name.endswith("KG2"):
            # Our KPs can handle batch queries (where qnode.id is a list of curies)
            final_kg = self._answer_query_using_kp(qg_copy)
        else:
            # Otherwise we need to search for curies one-by-one (until TRAPI includes a batch querying method)
            qedge = next(qedge for qedge in qg_copy.edges.values())
            subject_qnode_curies = eu.convert_to_list(
                qg_copy.nodes[qedge.subject].id)
            subject_qnode_curies = subject_qnode_curies if subject_qnode_curies else [
                None
            ]
            object_qnode_curies = eu.convert_to_list(
                qg_copy.nodes[qedge.object].id)
            object_qnode_curies = object_qnode_curies if object_qnode_curies else [
                None
            ]
            curie_combinations = [(curie_subj, curie_obj)
                                  for curie_subj in subject_qnode_curies
                                  for curie_obj in object_qnode_curies]
            # Query KP for all pairs of subject/object curies (pairs look like ("curie1", None) if one has no curies)
            for curie_combination in curie_combinations:
                subject_curie = curie_combination[0]
                object_curie = curie_combination[1]
                qg_copy.nodes[qedge.subject].id = subject_curie
                qg_copy.nodes[qedge.object].id = object_curie
                self.log.debug(
                    f"Current curie pair is: subject: {subject_curie}, object: {object_curie}"
                )
                if self.kp_supports_category_lists and self.kp_supports_predicate_lists:
                    sub_kg = self._answer_query_using_kp(qg_copy)
                else:
                    sub_kg = self._answer_query_for_kps_who_dont_like_lists(
                        qg_copy)
                final_kg = eu.merge_two_kgs(sub_kg, final_kg)

        return final_kg