예제 #1
0
 def _get_edge_with_curie_node(query_graph):
     for edge in query_graph.edges:
         source_node = eu.get_query_node(query_graph, edge.source_id)
         target_node = eu.get_query_node(query_graph, edge.target_id)
         if source_node.curie or target_node.curie:
             return edge
     return None
예제 #2
0
    def _get_query_graph_for_edge(qedge, query_graph, dict_kg):
        # This function creates a query graph for the specified qedge, updating its qnodes' curies as needed
        edge_query_graph = QueryGraph(nodes=[], edges=[])
        qnodes = [
            eu.get_query_node(query_graph, qedge.source_id),
            eu.get_query_node(query_graph, qedge.target_id)
        ]

        # Add (a copy of) this qedge to our edge query graph
        edge_query_graph.edges.append(eu.copy_qedge(qedge))

        # Update this qedge's qnodes as appropriate and add (copies of) them to the edge query graph
        qedge_has_already_been_expanded = qedge.id in dict_kg['edges']
        qnodes_using_curies_from_prior_step = set()
        for qnode in qnodes:
            qnode_copy = eu.copy_qnode(qnode)

            # Handle case where we need to feed curies from a prior Expand() step as the curie for this qnode
            qnode_already_fulfilled = qnode_copy.id in dict_kg['nodes']
            if qnode_already_fulfilled and not qnode_copy.curie and not qedge_has_already_been_expanded:
                qnode_copy.curie = list(dict_kg['nodes'][qnode_copy.id].keys())
                qnodes_using_curies_from_prior_step.add(qnode_copy.id)

            edge_query_graph.nodes.append(qnode_copy)

        return edge_query_graph, qnodes_using_curies_from_prior_step
예제 #3
0
    def _get_query_graph_for_edge(self, qedge: QEdge, query_graph: QueryGraph,
                                  dict_kg: DictKnowledgeGraph,
                                  use_synonyms: bool, kp_to_use: str,
                                  log: Response) -> QueryGraph:
        # This function creates a query graph for the specified qedge, updating its qnodes' curies as needed
        edge_query_graph = QueryGraph(nodes=[], edges=[])
        qnodes = [
            eu.get_query_node(query_graph, qedge.source_id),
            eu.get_query_node(query_graph, qedge.target_id)
        ]

        # Add (a copy of) this qedge to our edge query graph
        edge_query_graph.edges.append(eu.copy_qedge(qedge))

        # Update this qedge's qnodes as appropriate and add (copies of) them to the edge query graph
        qedge_has_already_been_expanded = qedge.id in dict_kg.edges_by_qg_id
        for qnode in qnodes:
            qnode_copy = eu.copy_qnode(qnode)

            # Feed in curies from a prior Expand() step as the curie for this qnode as necessary
            qnode_already_fulfilled = qnode_copy.id in dict_kg.nodes_by_qg_id
            if qnode_already_fulfilled and not qnode_copy.curie and not qedge_has_already_been_expanded:
                qnode_copy.curie = list(
                    dict_kg.nodes_by_qg_id[qnode_copy.id].keys())

            edge_query_graph.nodes.append(qnode_copy)

        if use_synonyms:
            self._add_curie_synonyms_to_query_nodes(
                qnodes=edge_query_graph.nodes, log=log, kp=kp_to_use)
        # Consider both protein and gene if qnode's type is one of those (since KP's handle these differently)
        for qnode in edge_query_graph.nodes:
            if qnode.type in ['protein', 'gene']:
                qnode.type = ['protein', 'gene']
        return edge_query_graph
예제 #4
0
    def _convert_query_graph_to_cypher_query(self, enforce_directionality):
        if len(self.query_graph.edges) > 1:
            self.response.error(
                f"KGQuerier requires a single-edge query graph",
                error_code="InvalidQuery")
        else:
            self.response.debug(
                f"Generating cypher for edge {self.query_graph.edges[0].id} query graph"
            )
            try:
                # Build the match clause
                edge = self.query_graph.edges[0]
                source_node = eu.get_query_node(self.query_graph,
                                                edge.source_id)
                target_node = eu.get_query_node(self.query_graph,
                                                edge.target_id)
                edge_cypher = self._get_cypher_for_query_edge(
                    edge, enforce_directionality)
                source_node_cypher = self._get_cypher_for_query_node(
                    source_node)
                target_node_cypher = self._get_cypher_for_query_node(
                    target_node)
                match_clause = f"MATCH {source_node_cypher}{edge_cypher}{target_node_cypher}"

                # Build the where clause
                where_fragments = []
                for node in [source_node, target_node]:
                    if node.curie:
                        if type(node.curie) is str:
                            where_fragment = f"{node.id}.id='{node.curie}'"
                        else:
                            where_fragment = f"{node.id}.id in {node.curie}"
                        where_fragments.append(where_fragment)
                if where_fragments:
                    where_clause = "WHERE "
                    where_clause += " AND ".join(where_fragments)
                else:
                    where_clause = ""

                # Build the with clause
                source_node_col_name = f"nodes_{source_node.id}"
                target_node_col_name = f"nodes_{target_node.id}"
                edge_col_name = f"edges_{edge.id}"
                extra_edge_properties = "{.*, " + f"id:ID({edge.id}), {source_node.id}:{source_node.id}.id, {target_node.id}:{target_node.id}.id" + "}"
                with_clause = f"WITH collect(distinct {source_node.id}) as {source_node_col_name}, " \
                              f"collect(distinct {target_node.id}) as {target_node_col_name}, " \
                              f"collect(distinct {edge.id}{extra_edge_properties}) as {edge_col_name}"

                # Build the return clause
                return_clause = f"RETURN {source_node_col_name}, {target_node_col_name}, {edge_col_name}"

                self.cypher_query = f"{match_clause} {where_clause} {with_clause} {return_clause}"
            except Exception:
                tb = traceback.format_exc()
                error_type, error, _ = sys.exc_info()
                self.response.error(
                    f"Problem generating cypher for query. {tb}",
                    error_code=error_type.__name__)
예제 #5
0
    def _convert_one_hop_query_graph_to_cypher_query(self, query_graph: QueryGraph, enforce_directionality: bool,
                                                     kp: str, log: Response) -> str:
        log.debug(f"Generating cypher for edge {query_graph.edges[0].id} query graph")
        try:
            # Build the match clause
            qedge = query_graph.edges[0]
            source_qnode = eu.get_query_node(query_graph, qedge.source_id)
            target_qnode = eu.get_query_node(query_graph, qedge.target_id)
            qedge_cypher = self._get_cypher_for_query_edge(qedge, enforce_directionality)
            source_qnode_cypher = self._get_cypher_for_query_node(source_qnode)
            target_qnode_cypher = self._get_cypher_for_query_node(target_qnode)
            match_clause = f"MATCH {source_qnode_cypher}{qedge_cypher}{target_qnode_cypher}"

            # Build the where clause
            where_fragments = []
            for qnode in [source_qnode, target_qnode]:
                if qnode.curie:
                    if type(qnode.curie) is str:
                        node_id_where_fragment = f"{qnode.id}.id='{qnode.curie}'"
                    else:
                        node_id_where_fragment = f"{qnode.id}.id in {qnode.curie}"
                    where_fragments.append(node_id_where_fragment)
                if qnode.type and isinstance(qnode.type, list):
                    if "KG2" in kp:
                        node_type_property = "category_label"
                    else:
                        node_type_property = "category"
                    where_fragments.append(f"{qnode.id}.{node_type_property} in {qnode.type}")
            if where_fragments:
                where_clause = f"WHERE {' AND '.join(where_fragments)}"
            else:
                where_clause = ""

            # Build the with clause
            source_qnode_col_name = f"nodes_{source_qnode.id}"
            target_qnode_col_name = f"nodes_{target_qnode.id}"
            qedge_col_name = f"edges_{qedge.id}"
            # This line grabs the edge's ID and a record of which of its nodes correspond to which qnode ID
            extra_edge_properties = "{.*, " + f"id:ID({qedge.id}), {source_qnode.id}:{source_qnode.id}.id, {target_qnode.id}:{target_qnode.id}.id" + "}"
            with_clause = f"WITH collect(distinct {source_qnode.id}) as {source_qnode_col_name}, " \
                          f"collect(distinct {target_qnode.id}) as {target_qnode_col_name}, " \
                          f"collect(distinct {qedge.id}{extra_edge_properties}) as {qedge_col_name}"

            # Build the return clause
            return_clause = f"RETURN {source_qnode_col_name}, {target_qnode_col_name}, {qedge_col_name}"

            cypher_query = f"{match_clause} {where_clause} {with_clause} {return_clause}"
            return cypher_query
        except Exception:
            tb = traceback.format_exc()
            error_type, error, _ = sys.exc_info()
            log.error(f"Problem generating cypher for query. {tb}", error_code=error_type.__name__)
            return ""
예제 #6
0
    def _expand_node(self, qnode_id, kp_to_use, continue_if_no_results,
                     query_graph):
        # This function expands a single node using the specified knowledge provider
        self.response.debug(f"Expanding node {qnode_id} using {kp_to_use}")

        query_node = eu.get_query_node(query_graph, qnode_id)
        if self.response.status != 'OK':
            return None

        if kp_to_use == 'BTE':
            self.response.error(
                f"Cannot use BTE to answer single node queries",
                error_code="InvalidQuery")
            return None
        elif kp_to_use == 'ARAX/KG2' or kp_to_use == 'ARAX/KG1':
            from Expand.kg_querier import KGQuerier
            kg_querier = KGQuerier(self.response, kp_to_use)
            answer_kg = kg_querier.answer_single_node_query(query_node)

            # Make sure all qnodes have been fulfilled (unless we're continuing if no results)
            if self.response.status == 'OK' and not continue_if_no_results:
                if query_node.id not in answer_kg[
                        'nodes'] or not answer_kg['nodes'][query_node.id]:
                    self.response.error(
                        f"Returned answer KG does not contain any results for QNode {query_node.id}",
                        error_code="UnfulfilledQGID")
            return answer_kg
        else:
            self.response.error(
                f"Invalid knowledge provider: {kp_to_use}. Valid options are ARAX/KG1 or ARAX/KG2"
            )
            return None
예제 #7
0
    def _extract_query_subgraph(qedge_ids_to_expand: List[str],
                                query_graph: QueryGraph,
                                log: Response) -> QueryGraph:
        # This function extracts a sub-query graph containing the provided qedge IDs from a larger query graph
        sub_query_graph = QueryGraph(nodes=[], edges=[])

        for qedge_id in qedge_ids_to_expand:
            # Make sure this query edge actually exists in the query graph
            if not any(qedge.id == qedge_id for qedge in query_graph.edges):
                log.error(
                    f"An edge with ID '{qedge_id}' does not exist in Message.QueryGraph",
                    error_code="UnknownValue")
                return None
            qedge = next(qedge for qedge in query_graph.edges
                         if qedge.id == qedge_id)

            # Make sure this qedge's qnodes actually exist in the query graph
            if not eu.get_query_node(query_graph, qedge.source_id):
                log.error(
                    f"Qedge {qedge.id}'s source_id refers to a qnode that does not exist in the query graph: "
                    f"{qedge.source_id}",
                    error_code="InvalidQEdge")
                return None
            if not eu.get_query_node(query_graph, qedge.target_id):
                log.error(
                    f"Qedge {qedge.id}'s target_id refers to a qnode that does not exist in the query graph: "
                    f"{qedge.target_id}",
                    error_code="InvalidQEdge")
                return None
            qnodes = [
                eu.get_query_node(query_graph, qedge.source_id),
                eu.get_query_node(query_graph, qedge.target_id)
            ]

            # Add (copies of) this qedge and its two qnodes to our new query sub graph
            qedge_copy = eu.copy_qedge(qedge)
            if not any(qedge.id == qedge_copy.id
                       for qedge in sub_query_graph.edges):
                sub_query_graph.edges.append(qedge_copy)
            for qnode in qnodes:
                qnode_copy = eu.copy_qnode(qnode)
                if not any(qnode.id == qnode_copy.id
                           for qnode in sub_query_graph.nodes):
                    sub_query_graph.nodes.append(qnode_copy)

        return sub_query_graph
예제 #8
0
    def _expand_node(self, qnode_id: str, kp_to_use: str,
                     continue_if_no_results: bool, query_graph: QueryGraph,
                     use_synonyms: bool, synonym_handling: str,
                     log: Response) -> DictKnowledgeGraph:
        # This function expands a single node using the specified knowledge provider
        log.debug(f"Expanding node {qnode_id} using {kp_to_use}")
        query_node = eu.get_query_node(query_graph, qnode_id)
        answer_kg = DictKnowledgeGraph()
        if log.status != 'OK':
            return answer_kg
        if not query_node.curie:
            log.error(
                f"Cannot expand a single query node if it doesn't have a curie",
                error_code="InvalidQuery")
            return answer_kg
        copy_of_qnode = eu.copy_qnode(query_node)

        if use_synonyms:
            self._add_curie_synonyms_to_query_nodes(qnodes=[copy_of_qnode],
                                                    log=log,
                                                    kp=kp_to_use)
        if copy_of_qnode.type in ["protein", "gene"]:
            copy_of_qnode.type = ["protein", "gene"]
        log.debug(f"Modified query node is: {copy_of_qnode.to_dict()}")

        # Answer the query using the proper KP
        valid_kps_for_single_node_queries = ["ARAX/KG1", "ARAX/KG2"]
        if kp_to_use in valid_kps_for_single_node_queries:
            from Expand.kg_querier import KGQuerier
            kg_querier = KGQuerier(log, kp_to_use)
            answer_kg = kg_querier.answer_single_node_query(copy_of_qnode)
            log.info(
                f"Query for node {copy_of_qnode.id} returned results ({eu.get_printable_counts_by_qg_id(answer_kg)})"
            )

            # Make sure all qnodes have been fulfilled (unless we're continuing if no results)
            if log.status == 'OK' and not continue_if_no_results:
                if copy_of_qnode.id not in answer_kg.nodes_by_qg_id or not answer_kg.nodes_by_qg_id[
                        copy_of_qnode.id]:
                    log.error(
                        f"Returned answer KG does not contain any results for QNode {copy_of_qnode.id}",
                        error_code="UnfulfilledQGID")
                    return answer_kg

            if synonym_handling != 'add_all':
                answer_kg, edge_node_usage_map = self._deduplicate_nodes(
                    dict_kg=answer_kg, edge_to_nodes_map={}, log=log)
            return answer_kg
        else:
            log.error(
                f"Invalid knowledge provider: {kp_to_use}. Valid options for single-node queries are "
                f"{', '.join(valid_kps_for_single_node_queries)}",
                error_code="InvalidKP")
            return answer_kg