예제 #1
0
    def _prune_answers_to_achieve_curie_to_curie_query(
            kg: QGOrganizedKnowledgeGraph, output_qnode_key: str,
            qg: QueryGraph) -> QGOrganizedKnowledgeGraph:
        """
        This is a way of hacking around BTE's limitation where it can only do (node with curie)-->(non-specific node)
        kinds of queries. We do the non-specific query, and then use this function to remove all of the answer nodes
        that do not correspond to the curie we wanted for the 'output' node.
        """
        # Remove 'output' nodes in the KG that aren't actually the ones we were looking for
        output_qnode = qg.nodes[output_qnode_key]
        qedge_key = next(qedge_key for qedge_key in qg.edges)
        qedge = qg.edges[qedge_key]
        desired_output_curies = set(
            eu.convert_string_or_list_to_list(output_qnode.id))
        all_output_node_keys = set(kg.nodes_by_qg_id[output_qnode_key])
        output_node_keys_to_remove = all_output_node_keys.difference(
            desired_output_curies)
        for node_key in output_node_keys_to_remove:
            kg.nodes_by_qg_id[output_qnode_key].pop(node_key)

        # And remove any edges that used them
        edge_keys_to_remove = set()
        for edge_key, edge in kg.edges_by_qg_id[qedge_key].items():
            if edge.object in output_node_keys_to_remove:  # Edge object always contains output node ID for BTE
                edge_keys_to_remove.add(edge_key)
        for edge_key in edge_keys_to_remove:
            kg.edges_by_qg_id[qedge_key].pop(edge_key)

        return kg
예제 #2
0
 def _send_query_to_kp(self, query_graph: QueryGraph,
                       log: ARAXResponse) -> Dict[str, any]:
     # Send query to their API (stripping down qnode/qedges to only the properties they like)
     stripped_qnodes = []
     for qnode_key, qnode in query_graph.nodes.items():
         stripped_qnode = {'id': qnode_key, 'type': qnode.category}
         if qnode.id:
             stripped_qnode['curie'] = qnode.id
         stripped_qnodes.append(stripped_qnode)
     qedge_key = next(qedge_key for qedge_key in
                      query_graph.edges)  # Our query graph is single-edge
     qedge = query_graph.edges[qedge_key]
     stripped_qedge = {
         'id': qedge_key,
         'source_id': qedge.subject,
         'target_id': qedge.object,
         'type': list(self.accepted_edge_types)[0]
     }
     source_stripped_qnode = next(qnode for qnode in stripped_qnodes
                                  if qnode['id'] == qedge.subject)
     input_curies = eu.convert_string_or_list_to_list(
         source_stripped_qnode['curie'])
     combined_response = dict()
     for input_curie in input_curies:  # Until we have batch querying, ping them one-by-one for each input curie
         log.debug(
             f"Sending {qedge_key} query to {self.kp_name} for {input_curie}"
         )
         source_stripped_qnode['curie'] = input_curie
         kp_response = requests.post(self.kp_query_endpoint,
                                     json={
                                         'message': {
                                             'query_graph': {
                                                 'nodes': stripped_qnodes,
                                                 'edges': [stripped_qedge]
                                             }
                                         }
                                     },
                                     headers={'accept': 'application/json'})
         if kp_response.status_code != 200:
             log.warning(
                 f"{self.kp_name} KP API returned response of {kp_response.status_code}"
             )
         else:
             kp_response_json = kp_response.json()
             if kp_response_json.get('results'):
                 if not combined_response:
                     combined_response = kp_response_json
                 else:
                     combined_response['knowledge_graph'][
                         'nodes'] += kp_response_json['knowledge_graph'][
                             'nodes']
                     combined_response['knowledge_graph'][
                         'edges'] += kp_response_json['knowledge_graph'][
                             'edges']
                     combined_response['results'] += kp_response_json[
                         'results']
     return combined_response
예제 #3
0
 def _convert_kg1_node_to_swagger_node(
         self, neo4j_node: Dict[str, any]) -> Tuple[str, Node]:
     swagger_node = Node()
     swagger_node_key = neo4j_node.get('id')
     swagger_node.name = neo4j_node.get('name')
     node_category = neo4j_node.get('category')
     swagger_node.category = eu.convert_string_or_list_to_list(
         node_category)
     other_properties = ["symbol", "description", "uri"]
     swagger_node.attributes = self._create_swagger_attributes(
         other_properties, neo4j_node)
     return swagger_node_key, swagger_node
예제 #4
0
 def _build_kg_to_qg_id_dict(
         results: Dict[str, any]) -> Dict[str, Dict[str, List[str]]]:
     kg_to_qg_ids = {'nodes': dict(), 'edges': dict()}
     for node_binding in results['node_bindings']:
         node_key = node_binding['kg_id']
         qnode_key = node_binding['qg_id']
         kg_to_qg_ids['nodes'][node_key] = qnode_key
     for edge_binding in results['edge_bindings']:
         edge_keys = eu.convert_string_or_list_to_list(
             edge_binding['kg_id'])
         qedge_keys = edge_binding['qg_id']
         for kg_id in edge_keys:
             kg_to_qg_ids['edges'][kg_id] = qedge_keys
     return kg_to_qg_ids
예제 #5
0
 def _convert_kg2_node_to_swagger_node(
         self, neo4j_node: Dict[str, any]) -> Tuple[str, Node]:
     swagger_node = Node()
     swagger_node_key = neo4j_node.get('id')
     swagger_node.name = neo4j_node.get('name')
     node_category = neo4j_node.get('category_label')
     swagger_node.category = eu.convert_string_or_list_to_list(
         node_category)
     # Add all additional properties on KG2 nodes as swagger Attribute objects
     other_properties = [
         "full_name", "description", "iri", "publications", "synonym",
         "category", "provided_by", "deprecated", "update_date"
     ]
     swagger_node.attributes = self._create_swagger_attributes(
         other_properties, neo4j_node)
     return swagger_node_key, swagger_node
예제 #6
0
 def _pre_process_query_graph(self, query_graph: QueryGraph,
                              log: ARAXResponse) -> QueryGraph:
     for qnode_key, qnode in query_graph.nodes.items():
         # Convert node types to preferred format and verify we can do this query
         formatted_qnode_categories = {
             self.node_category_overrides_for_kp.get(
                 qnode_category, qnode_category)
             for qnode_category in eu.convert_string_or_list_to_list(
                 qnode.category)
         }
         accepted_qnode_categories = formatted_qnode_categories.intersection(
             self.accepted_node_categories)
         if not accepted_qnode_categories:
             log.error(
                 f"{self.kp_name} can only be used for queries involving {self.accepted_node_categories} "
                 f"and QNode {qnode_key} has category '{qnode.category}'",
                 error_code="UnsupportedQueryForKP")
             return query_graph
         else:
             qnode.category = list(accepted_qnode_categories)[0]
         # Convert curies to equivalent curies accepted by the KP (depending on qnode type)
         if qnode.id:
             equivalent_curies = eu.get_curie_synonyms(qnode.id, log)
             desired_curies = [
                 curie for curie in equivalent_curies if curie.startswith(
                     f"{self.kp_preferred_prefixes[qnode.category]}:")
             ]
             if desired_curies:
                 qnode.id = desired_curies if len(
                     desired_curies) > 1 else desired_curies[0]
                 log.debug(
                     f"Converted qnode {qnode_key} curie to {qnode.id}")
             else:
                 log.warning(
                     f"Could not convert qnode {qnode_key} curie(s) to preferred prefix ({self.kp_preferred_prefixes[qnode.category]})"
                 )
     return query_graph
예제 #7
0
    def _validate_and_pre_process_input(qg: QueryGraph,
                                        valid_bte_inputs_dict: Dict[str,
                                                                    Set[str]],
                                        enforce_directionality: bool,
                                        use_synonyms: bool,
                                        log: ARAXResponse) -> Tuple[str, str]:
        # Make sure we have a valid one-hop query graph
        if len(qg.edges) != 1 or len(qg.nodes) != 2:
            log.error(
                f"BTE can only accept one-hop query graphs (your QG has {len(qg.nodes)} nodes and "
                f"{len(qg.edges)} edges)",
                error_code="InvalidQueryGraph")
            return "", ""
        qedge_key = next(qedge_key for qedge_key in qg.edges)
        qedge = qg.edges[qedge_key]

        # Make sure at least one of our qnodes has a curie
        qnodes_with_curies = [
            qnode_key for qnode_key, qnode in qg.nodes.items() if qnode.id
        ]
        if not qnodes_with_curies:
            log.error(
                f"Neither qnode for qedge {qedge_key} has a curie specified. BTE requires that at least one of "
                f"them has a curie. Your query graph is: {qg.to_dict()}",
                error_code="UnsupportedQueryForKP")
            return "", ""

        # Figure out which query node is input vs. output
        if enforce_directionality:
            input_qnode_key = qedge.subject
            output_qnode_key = qedge.object
        else:
            input_qnode_key = next(qnode_key
                                   for qnode_key, qnode in qg.nodes.items()
                                   if qnode.id)
            output_qnode_key = list(
                set(qg.nodes).difference({input_qnode_key}))[0]
            log.warning(
                f"BTE cannot do bidirectional queries; the query for this edge will be directed, going: "
                f"{input_qnode_key}-->{output_qnode_key}")
        input_qnode = qg.nodes[input_qnode_key]
        output_qnode = qg.nodes[output_qnode_key]

        # Make sure predicate is allowed
        if qedge.predicate not in valid_bte_inputs_dict[
                'predicates'] and qedge.predicate is not None:
            log.error(
                f"BTE does not accept predicate '{qedge.predicate}'. Valid options are "
                f"{valid_bte_inputs_dict['predicates']}",
                error_code="InvalidInput")
            return "", ""

        # Process qnode types (convert to preferred format, make sure allowed)
        input_qnode.category = [
            eu.convert_string_to_pascal_case(node_category) for node_category
            in eu.convert_string_or_list_to_list(input_qnode.category)
        ]
        output_qnode.category = [
            eu.convert_string_to_pascal_case(node_category) for node_category
            in eu.convert_string_or_list_to_list(output_qnode.category)
        ]
        qnodes_missing_type = [
            qnode_key for qnode_key in [input_qnode_key, output_qnode_key]
            if not qg.nodes[qnode_key].category
        ]
        if qnodes_missing_type:
            log.error(
                f"BTE requires every query node to have a category. QNode(s) missing a category: "
                f"{', '.join(qnodes_missing_type)}",
                error_code="InvalidInput")
            return "", ""
        invalid_qnode_categories = [
            node_category for qnode in [input_qnode, output_qnode]
            for node_category in qnode.category
            if node_category not in valid_bte_inputs_dict['node_categories']
        ]
        if invalid_qnode_categories:
            log.error(
                f"BTE does not accept QNode category(s): {', '.join(invalid_qnode_categories)}. Valid options are "
                f"{valid_bte_inputs_dict['node_categories']}",
                error_code="InvalidInput")
            return "", ""

        # Sub in curie synonyms as appropriate
        if use_synonyms:
            qnodes_with_curies = [
                qnode for qnode in [input_qnode, output_qnode] if qnode.id
            ]
            for qnode in qnodes_with_curies:
                synonymized_curies = eu.get_curie_synonyms(qnode.id, log)
                qnode.id = synonymized_curies

        # Make sure our input node curies are in list form and use prefixes BTE prefers
        input_curie_list = eu.convert_string_or_list_to_list(input_qnode.id)
        input_qnode.id = [
            eu.convert_curie_to_bte_format(curie) for curie in input_curie_list
        ]

        return input_qnode_key, output_qnode_key
예제 #8
0
    def _add_answers_to_kg(self, answer_kg: QGOrganizedKnowledgeGraph,
                           reasoner_std_response: Dict[str, any],
                           input_qnode_key: str, output_qnode_key: str,
                           qedge_key: str,
                           log: ARAXResponse) -> QGOrganizedKnowledgeGraph:
        kg_to_qg_ids_dict = self._build_kg_to_qg_id_dict(
            reasoner_std_response['results'])
        if reasoner_std_response['knowledge_graph']['edges']:
            remapped_node_keys = dict()
            log.debug(
                f"Got results back from BTE for this query "
                f"({len(reasoner_std_response['knowledge_graph']['edges'])} edges)"
            )

            for node in reasoner_std_response['knowledge_graph']['nodes']:
                swagger_node = Node()
                bte_node_key = node.get('id')
                swagger_node.name = node.get('name')
                swagger_node.category = eu.convert_string_or_list_to_list(
                    eu.convert_string_to_snake_case(node.get('type')))

                # Map the returned BTE qg_ids back to the original qnode_keys in our query graph
                bte_qg_id = kg_to_qg_ids_dict['nodes'].get(bte_node_key)
                if bte_qg_id == "n0":
                    qnode_key = input_qnode_key
                elif bte_qg_id == "n1":
                    qnode_key = output_qnode_key
                else:
                    log.error("Could not map BTE qg_id to ARAX qnode_key",
                              error_code="UnknownQGID")
                    return answer_kg

                # Find and use the preferred equivalent identifier for this node (if it's an output node)
                if qnode_key == output_qnode_key:
                    if bte_node_key in remapped_node_keys:
                        swagger_node_key = remapped_node_keys.get(bte_node_key)
                    else:
                        equivalent_curies = [
                            f"{prefix}:{eu.get_curie_local_id(local_id)}"
                            for prefix, local_ids in node.get(
                                'equivalent_identifiers').items()
                            for local_id in local_ids
                        ]
                        swagger_node_key = self._get_best_equivalent_bte_curie(
                            equivalent_curies, swagger_node.category[0])
                        remapped_node_keys[bte_node_key] = swagger_node_key
                else:
                    swagger_node_key = bte_node_key

                answer_kg.add_node(swagger_node_key, swagger_node, qnode_key)

            for edge in reasoner_std_response['knowledge_graph']['edges']:
                swagger_edge = Edge()
                swagger_edge_key = edge.get("id")
                swagger_edge.predicate = edge.get('type')
                swagger_edge.subject = remapped_node_keys.get(
                    edge.get('source_id'), edge.get('source_id'))
                swagger_edge.object = remapped_node_keys.get(
                    edge.get('target_id'), edge.get('target_id'))
                swagger_edge.attributes = [
                    Attribute(name="provided_by",
                              value=edge.get('edge_source'),
                              type=eu.get_attribute_type("provided_by")),
                    Attribute(name="is_defined_by",
                              value="BTE",
                              type=eu.get_attribute_type("is_defined_by"))
                ]
                # Map the returned BTE qg_id back to the original qedge_key in our query graph
                bte_qg_id = kg_to_qg_ids_dict['edges'].get(swagger_edge_key)
                if bte_qg_id != "e1":
                    log.error("Could not map BTE qg_id to ARAX qedge_key",
                              error_code="UnknownQGID")
                    return answer_kg
                answer_kg.add_edge(swagger_edge_key, swagger_edge, qedge_key)

        return answer_kg
예제 #9
0
    def _convert_one_hop_query_graph_to_cypher_query(
            self, qg: QueryGraph, enforce_directionality: bool, kg_name: str,
            log: ARAXResponse) -> str:
        qedge_key = next(qedge_key for qedge_key in qg.edges)
        qedge = qg.edges[qedge_key]
        log.debug(f"Generating cypher for edge {qedge_key} query graph")
        try:
            # Build the match clause
            source_qnode_key = qedge.subject
            target_qnode_key = qedge.object
            qedge_cypher = self._get_cypher_for_query_edge(
                qedge_key, qg, enforce_directionality)
            source_qnode_cypher = self._get_cypher_for_query_node(
                source_qnode_key, qg, kg_name)
            target_qnode_cypher = self._get_cypher_for_query_node(
                target_qnode_key, qg, kg_name)
            match_clause = f"MATCH {source_qnode_cypher}{qedge_cypher}{target_qnode_cypher}"

            # Build the where clause
            where_fragments = []
            for qnode_key in [source_qnode_key, target_qnode_key]:
                qnode = qg.nodes[qnode_key]
                if qnode.id and isinstance(qnode.id,
                                           list) and len(qnode.id) > 1:
                    where_fragments.append(f"{qnode_key}.id in {qnode.id}")
                if qnode.category:
                    if kg_name == "KG2c":
                        qnode_categories = eu.convert_string_or_list_to_list(
                            qnode.category)
                        category_fragments = [
                            f"'{qnode_category}' in {qnode_key}.types"
                            for qnode_category in qnode_categories
                        ]
                        joined_category_fragments = " OR ".join(
                            category_fragments)
                        category_where_clause = joined_category_fragments if len(
                            category_fragments
                        ) < 2 else f"({joined_category_fragments})"
                        where_fragments.append(category_where_clause)
                    elif isinstance(qnode.category, list):
                        if kg_name == "KG2":
                            node_category_property = "category_label"
                        else:
                            node_category_property = "category"
                        where_fragments.append(
                            f"{qnode_key}.{node_category_property} in {qnode.category}"
                        )

            if where_fragments:
                where_clause = f"WHERE {' AND '.join(where_fragments)}"
            else:
                where_clause = ""

            # Build the with clause
            source_qnode_col_name = f"nodes_{source_qnode_key}"
            target_qnode_col_name = f"nodes_{target_qnode_key}"
            qedge_col_name = f"edges_{qedge_key}"
            # This line grabs the edge's ID and a record of which of its nodes correspond to which qnode ID
            extra_edge_properties = "{.*, " + f"id:ID({qedge_key}), {source_qnode_key}:{source_qnode_key}.id, {target_qnode_key}:{target_qnode_key}.id" + "}"
            with_clause = f"WITH collect(distinct {source_qnode_key}) as {source_qnode_col_name}, " \
                          f"collect(distinct {target_qnode_key}) as {target_qnode_col_name}, " \
                          f"collect(distinct {qedge_key}{extra_edge_properties}) as {qedge_col_name}"

            # Build the return clause
            return_clause = f"RETURN {source_qnode_col_name}, {target_qnode_col_name}, {qedge_col_name}"

            cypher_query = f"{match_clause} {where_clause} {with_clause} {return_clause}"
            return cypher_query
        except Exception:
            tb = traceback.format_exc()
            error_type, error, _ = sys.exc_info()
            log.error(f"Problem generating cypher for query. {tb}",
                      error_code=error_type.__name__)
            return ""