Beispiel #1
0
    def answer_single_node_query(
            self, single_node_qg: QueryGraph) -> QGOrganizedKnowledgeGraph:
        kg_name = self.kg_name
        use_synonyms = self.use_synonyms
        log = self.response
        if kg_name == "KG1":
            single_node_qg = eu.make_qg_use_old_snake_case_types(
                single_node_qg)
        qnode_key = next(qnode_key for qnode_key in single_node_qg.nodes)
        qnode = single_node_qg.nodes[qnode_key]

        # Convert qnode curies as needed (either to synonyms or to canonical versions)
        if qnode.id:
            if use_synonyms and kg_name == "KG1":
                qnode.id = eu.get_curie_synonyms(qnode.id, log)
                qnode.category = [
                ]  # Important to clear this, otherwise results are limited (#889)
            elif kg_name == "KG2c":
                qnode.id = eu.get_canonical_curies_list(qnode.id, log)
                qnode.category = [
                ]  # Important to clear this to avoid discrepancies in types for particular concepts

        if kg_name == "KG2c":
            # Use Plover to answer KG2c queries
            plover_answer, response_status = self._answer_query_using_plover(
                single_node_qg, log)
            if response_status == 200:
                final_kg = self._grab_nodes_and_edges_from_sqlite(
                    plover_answer, kg_name, log)
            else:
                # Backup to using neo4j in the event plover failed
                log.warning(
                    f"Plover returned a {response_status} response, so I'm backing up to Neo4j.."
                )
                final_kg = self._answer_single_node_query_using_neo4j(
                    qnode_key, single_node_qg, kg_name, log)
        else:
            # Use Neo4j for KG2 and KG1 queries
            final_kg = self._answer_single_node_query_using_neo4j(
                qnode_key, single_node_qg, kg_name, log)

        return final_kg
Beispiel #2
0
    def answer_single_node_query(
            self, single_node_qg: QueryGraph) -> QGOrganizedKnowledgeGraph:
        kg_name = self.kg_name
        use_synonyms = self.use_synonyms
        log = self.response
        final_kg = QGOrganizedKnowledgeGraph()
        single_node_qg = eu.make_qg_use_old_types(
            single_node_qg)  # Temporary patch until we switch to KG2.5.1
        qnode_key = next(qnode_key for qnode_key in single_node_qg.nodes)
        qnode = single_node_qg.nodes[qnode_key]

        # Convert qnode curies as needed (either to synonyms or to canonical versions)
        if qnode.id:
            if use_synonyms and kg_name == "KG1":
                qnode.id = eu.get_curie_synonyms(qnode.id, log)
                qnode.category = [
                ]  # Important to clear this, otherwise results are limited (#889)
            elif kg_name == "KG2c":
                qnode.id = eu.get_canonical_curies_list(qnode.id, log)
                qnode.category = [
                ]  # Important to clear this to avoid discrepancies in types for particular concepts

        # Build and run a cypher query to get this node/nodes
        where_clause = f"{qnode_key}.id='{qnode.id}'" if type(
            qnode.id) is str else f"{qnode_key}.id in {qnode.id}"
        cypher_query = f"MATCH {self._get_cypher_for_query_node(qnode_key, single_node_qg, kg_name)} WHERE {where_clause} RETURN {qnode_key}"
        log.info(
            f"Sending cypher query for node {qnode_key} to {kg_name} neo4j")
        results = self._run_cypher_query(cypher_query, kg_name, log)

        # Load the results into swagger object model and add to our answer knowledge graph
        for result in results:
            neo4j_node = result.get(qnode_key)
            swagger_node_key, swagger_node = self._convert_neo4j_node_to_swagger_node(
                neo4j_node, kg_name)
            final_kg.add_node(swagger_node_key, swagger_node, qnode_key)

        # TODO: remove this patch once we switch to KG2.5.0!
        eu.convert_node_and_edge_types_to_new_format(final_kg)

        return final_kg
Beispiel #3
0
 def _pre_process_query_graph(self, query_graph: QueryGraph,
                              log: ARAXResponse) -> QueryGraph:
     for qnode_key, qnode in query_graph.nodes.items():
         # Convert node types to preferred format and verify we can do this query
         formatted_qnode_categories = {
             self.node_category_overrides_for_kp.get(
                 qnode_category, qnode_category)
             for qnode_category in eu.convert_string_or_list_to_list(
                 qnode.category)
         }
         accepted_qnode_categories = formatted_qnode_categories.intersection(
             self.accepted_node_categories)
         if not accepted_qnode_categories:
             log.error(
                 f"{self.kp_name} can only be used for queries involving {self.accepted_node_categories} "
                 f"and QNode {qnode_key} has category '{qnode.category}'",
                 error_code="UnsupportedQueryForKP")
             return query_graph
         else:
             qnode.category = list(accepted_qnode_categories)[0]
         # Convert curies to equivalent curies accepted by the KP (depending on qnode type)
         if qnode.id:
             equivalent_curies = eu.get_curie_synonyms(qnode.id, log)
             desired_curies = [
                 curie for curie in equivalent_curies if curie.startswith(
                     f"{self.kp_preferred_prefixes[qnode.category]}:")
             ]
             if desired_curies:
                 qnode.id = desired_curies if len(
                     desired_curies) > 1 else desired_curies[0]
                 log.debug(
                     f"Converted qnode {qnode_key} curie to {qnode.id}")
             else:
                 log.warning(
                     f"Could not convert qnode {qnode_key} curie(s) to preferred prefix ({self.kp_preferred_prefixes[qnode.category]})"
                 )
     return query_graph
Beispiel #4
0
    def _validate_and_pre_process_input(qg: QueryGraph,
                                        valid_bte_inputs_dict: Dict[str,
                                                                    Set[str]],
                                        enforce_directionality: bool,
                                        use_synonyms: bool,
                                        log: ARAXResponse) -> Tuple[str, str]:
        # Make sure we have a valid one-hop query graph
        if len(qg.edges) != 1 or len(qg.nodes) != 2:
            log.error(
                f"BTE can only accept one-hop query graphs (your QG has {len(qg.nodes)} nodes and "
                f"{len(qg.edges)} edges)",
                error_code="InvalidQueryGraph")
            return "", ""
        qedge_key = next(qedge_key for qedge_key in qg.edges)
        qedge = qg.edges[qedge_key]

        # Make sure at least one of our qnodes has a curie
        qnodes_with_curies = [
            qnode_key for qnode_key, qnode in qg.nodes.items() if qnode.id
        ]
        if not qnodes_with_curies:
            log.error(
                f"Neither qnode for qedge {qedge_key} has a curie specified. BTE requires that at least one of "
                f"them has a curie. Your query graph is: {qg.to_dict()}",
                error_code="UnsupportedQueryForKP")
            return "", ""

        # Figure out which query node is input vs. output
        if enforce_directionality:
            input_qnode_key = qedge.subject
            output_qnode_key = qedge.object
        else:
            input_qnode_key = next(qnode_key
                                   for qnode_key, qnode in qg.nodes.items()
                                   if qnode.id)
            output_qnode_key = list(
                set(qg.nodes).difference({input_qnode_key}))[0]
            log.warning(
                f"BTE cannot do bidirectional queries; the query for this edge will be directed, going: "
                f"{input_qnode_key}-->{output_qnode_key}")
        input_qnode = qg.nodes[input_qnode_key]
        output_qnode = qg.nodes[output_qnode_key]

        # Make sure predicate is allowed
        if qedge.predicate not in valid_bte_inputs_dict[
                'predicates'] and qedge.predicate is not None:
            log.error(
                f"BTE does not accept predicate '{qedge.predicate}'. Valid options are "
                f"{valid_bte_inputs_dict['predicates']}",
                error_code="InvalidInput")
            return "", ""

        # Process qnode types (convert to preferred format, make sure allowed)
        input_qnode.category = [
            eu.convert_string_to_pascal_case(node_category) for node_category
            in eu.convert_string_or_list_to_list(input_qnode.category)
        ]
        output_qnode.category = [
            eu.convert_string_to_pascal_case(node_category) for node_category
            in eu.convert_string_or_list_to_list(output_qnode.category)
        ]
        qnodes_missing_type = [
            qnode_key for qnode_key in [input_qnode_key, output_qnode_key]
            if not qg.nodes[qnode_key].category
        ]
        if qnodes_missing_type:
            log.error(
                f"BTE requires every query node to have a category. QNode(s) missing a category: "
                f"{', '.join(qnodes_missing_type)}",
                error_code="InvalidInput")
            return "", ""
        invalid_qnode_categories = [
            node_category for qnode in [input_qnode, output_qnode]
            for node_category in qnode.category
            if node_category not in valid_bte_inputs_dict['node_categories']
        ]
        if invalid_qnode_categories:
            log.error(
                f"BTE does not accept QNode category(s): {', '.join(invalid_qnode_categories)}. Valid options are "
                f"{valid_bte_inputs_dict['node_categories']}",
                error_code="InvalidInput")
            return "", ""

        # Sub in curie synonyms as appropriate
        if use_synonyms:
            qnodes_with_curies = [
                qnode for qnode in [input_qnode, output_qnode] if qnode.id
            ]
            for qnode in qnodes_with_curies:
                synonymized_curies = eu.get_curie_synonyms(qnode.id, log)
                qnode.id = synonymized_curies

        # Make sure our input node curies are in list form and use prefixes BTE prefers
        input_curie_list = eu.convert_string_or_list_to_list(input_qnode.id)
        input_qnode.id = [
            eu.convert_curie_to_bte_format(curie) for curie in input_curie_list
        ]

        return input_qnode_key, output_qnode_key
Beispiel #5
0
    def answer_one_hop_query(
        self, query_graph: QueryGraph
    ) -> Tuple[QGOrganizedKnowledgeGraph, Dict[str, Dict[str, str]]]:
        """
        This function answers a one-hop (single-edge) query using either KG1 or KG2.
        :param query_graph: A Reasoner API standard query graph.
        :return: A tuple containing:
            1. an (almost) Reasoner API standard knowledge graph containing all of the nodes and edges returned as
           results for the query. (Dictionary version, organized by QG IDs.)
            2. a map of which nodes fulfilled which qnode_keys for each edge. Example:
              {'KG1:111221': {'n00': 'DOID:111', 'n01': 'HP:124'}, 'KG1:111223': {'n00': 'DOID:111', 'n01': 'HP:126'}}
        """
        log = self.response
        enforce_directionality = self.enforce_directionality
        use_synonyms = self.use_synonyms
        kg_name = self.kg_name
        final_kg = QGOrganizedKnowledgeGraph()
        edge_to_nodes_map = dict()
        query_graph = eu.make_qg_use_old_types(
            query_graph)  # Temporary patch until we switch to KG2.5.1

        # Verify this is a valid one-hop query graph
        if len(query_graph.edges) != 1:
            log.error(
                f"answer_one_hop_query() was passed a query graph that is not one-hop: "
                f"{query_graph.to_dict()}",
                error_code="InvalidQuery")
            return final_kg, edge_to_nodes_map
        if len(query_graph.nodes) != 2:
            log.error(
                f"answer_one_hop_query() was passed a query graph with more than two nodes: "
                f"{query_graph.to_dict()}",
                error_code="InvalidQuery")
            return final_kg, edge_to_nodes_map
        qedge_key = next(qedge_key for qedge_key in query_graph.edges)

        # Convert qnode curies as needed (either to synonyms or to canonical versions)
        qnode_keys_with_curies = [
            qnode_key for qnode_key, qnode in query_graph.nodes.items()
            if qnode.id
        ]
        for qnode_key in qnode_keys_with_curies:
            qnode = query_graph.nodes[qnode_key]
            if use_synonyms and kg_name == "KG1":
                qnode.id = eu.get_curie_synonyms(qnode.id, log)
            elif kg_name == "KG2c":
                canonical_curies = eu.get_canonical_curies_list(qnode.id, log)
                log.debug(
                    f"Using {len(canonical_curies)} curies as canonical curies for qnode {qnode_key}"
                )
                qnode.id = canonical_curies
            qnode.category = [
            ]  # Important to clear this, otherwise results are limited (#889)

        # Run the actual query and process results
        cypher_query = self._convert_one_hop_query_graph_to_cypher_query(
            query_graph, enforce_directionality, kg_name, log)
        if log.status != 'OK':
            return final_kg, edge_to_nodes_map
        neo4j_results = self._answer_query_using_neo4j(cypher_query, qedge_key,
                                                       kg_name, log)
        if log.status != 'OK':
            return final_kg, edge_to_nodes_map
        final_kg, edge_to_nodes_map = self._load_answers_into_kg(
            neo4j_results, kg_name, query_graph, log)
        if log.status != 'OK':
            return final_kg, edge_to_nodes_map

        # TODO: remove this patch once we switch to KG2.5.0!
        eu.convert_node_and_edge_types_to_new_format(final_kg)

        return final_kg, edge_to_nodes_map
Beispiel #6
0
    def answer_one_hop_query(
            self, query_graph: QueryGraph) -> QGOrganizedKnowledgeGraph:
        """
        This function answers a one-hop (single-edge) query using either KG1 or KG2.
        :param query_graph: A TRAPI query graph.
        :return: An (almost) TRAPI knowledge graph containing all of the nodes and edges returned as
                results for the query. (Organized by QG IDs.)
        """
        log = self.response
        enforce_directionality = self.enforce_directionality
        use_synonyms = self.use_synonyms
        kg_name = self.kg_name
        if kg_name == "KG1":
            query_graph = eu.make_qg_use_old_snake_case_types(query_graph)
        final_kg = QGOrganizedKnowledgeGraph()

        # Verify this is a valid one-hop query graph
        if len(query_graph.edges) != 1:
            log.error(
                f"answer_one_hop_query() was passed a query graph that is not one-hop: "
                f"{query_graph.to_dict()}",
                error_code="InvalidQuery")
            return final_kg
        if len(query_graph.nodes) != 2:
            log.error(
                f"answer_one_hop_query() was passed a query graph with more than two nodes: "
                f"{query_graph.to_dict()}",
                error_code="InvalidQuery")
            return final_kg
        qedge_key = next(qedge_key for qedge_key in query_graph.edges)

        # Consider any inverses of our predicate(s) as well
        query_graph = self._add_inverted_predicates(query_graph, log)

        # Convert qnode curies as needed (either to synonyms or to canonical versions)
        qnode_keys_with_curies = [
            qnode_key for qnode_key, qnode in query_graph.nodes.items()
            if qnode.id
        ]
        for qnode_key in qnode_keys_with_curies:
            qnode = query_graph.nodes[qnode_key]
            if use_synonyms and kg_name == "KG1":
                qnode.id = eu.get_curie_synonyms(qnode.id, log)
            elif kg_name == "KG2c":
                canonical_curies = eu.get_canonical_curies_list(qnode.id, log)
                log.debug(
                    f"Using {len(canonical_curies)} curies as canonical curies for qnode {qnode_key}"
                )
                qnode.id = canonical_curies
            qnode.category = [
            ]  # Important to clear this, otherwise results are limited (#889)

        if kg_name == "KG2c":
            # Use Plover to answer KG2c queries
            plover_answer, response_status = self._answer_query_using_plover(
                query_graph, log)
            if response_status == 200:
                final_kg = self._grab_nodes_and_edges_from_sqlite(
                    plover_answer, kg_name, log)
            else:
                # Backup to using neo4j in the event plover failed
                log.warning(
                    f"Plover returned a {response_status} response, so I'm backing up to Neo4j.."
                )
                final_kg = self._answer_query_using_neo4j(
                    query_graph, kg_name, qedge_key, enforce_directionality,
                    log)
        else:
            # Use Neo4j for KG2 and KG1 queries
            final_kg = self._answer_query_using_neo4j(query_graph, kg_name,
                                                      qedge_key,
                                                      enforce_directionality,
                                                      log)

        return final_kg