Beispiel #1
0
 def _add_inverted_predicates(qg: QueryGraph,
                              log: ARAXResponse) -> QueryGraph:
     # For now, we'll consider BOTH predicates in an inverse pair (TODO: later tailor to what we know is in KG2)
     qedge = next(qedge for qedge in qg.edges.values())
     response = requests.get(
         "https://raw.githubusercontent.com/biolink/biolink-model/master/biolink-model.yaml"
     )
     if response.status_code == 200:
         qedge.predicate = eu.convert_to_list(qedge.predicate)
         biolink_model = yaml.safe_load(response.text)
         inverse_predicates = set()
         for predicate in qedge.predicate:
             english_predicate = predicate.split(":")[-1].replace(
                 "_", " ")  # Converts to 'subclass of' format
             biolink_predicate_info = biolink_model["slots"].get(
                 english_predicate)
             if biolink_predicate_info and "inverse" in biolink_predicate_info:
                 english_inverse_predicate = biolink_predicate_info[
                     "inverse"]
                 machine_inverse_predicate = f"biolink:{english_inverse_predicate.replace(' ', '_')}"
                 inverse_predicates.add(machine_inverse_predicate)
                 log.debug(
                     f"Found inverse predicate for {predicate}: {machine_inverse_predicate}"
                 )
         qedge.predicate = list(
             set(qedge.predicate).union(inverse_predicates))
     else:
         log.warning(
             f"Cannot check for inverse predicates: Failed to load Biolink Model yaml file. "
             f"(Page gave status {response.status_code}.)")
     return qg
Beispiel #2
0
    def _prune_answers_to_achieve_curie_to_curie_query(kg: QGOrganizedKnowledgeGraph, output_qnode_key: str, qg: QueryGraph) -> QGOrganizedKnowledgeGraph:
        """
        This is a way of hacking around BTE's limitation where it can only do (node with curie)-->(non-specific node)
        kinds of queries. We do the non-specific query, and then use this function to remove all of the answer nodes
        that do not correspond to the curie we wanted for the 'output' node.
        """
        # Remove 'output' nodes in the KG that aren't actually the ones we were looking for
        output_qnode = qg.nodes[output_qnode_key]
        qedge_key = next(qedge_key for qedge_key in qg.edges)
        qedge = qg.edges[qedge_key]
        desired_output_curies = set(eu.convert_to_list(output_qnode.id))
        all_output_node_keys = set(kg.nodes_by_qg_id[output_qnode_key])
        output_node_keys_to_remove = all_output_node_keys.difference(desired_output_curies)
        for node_key in output_node_keys_to_remove:
            kg.nodes_by_qg_id[output_qnode_key].pop(node_key)

        # And remove any edges that used them
        edge_keys_to_remove = set()
        for edge_key, edge in kg.edges_by_qg_id[qedge_key].items():
            if edge.object in output_node_keys_to_remove:  # Edge object always contains output node ID for BTE
                edge_keys_to_remove.add(edge_key)
        for edge_key in edge_keys_to_remove:
            kg.edges_by_qg_id[qedge_key].pop(edge_key)

        return kg
Beispiel #3
0
    def _convert_one_hop_query_graph_to_cypher_query(
            self, qg: QueryGraph, enforce_directionality: bool,
            log: ARAXResponse) -> str:
        qedge_key = next(qedge_key for qedge_key in qg.edges)
        qedge = qg.edges[qedge_key]
        log.debug(f"Generating cypher for edge {qedge_key} query graph")
        try:
            # Build the match clause
            subject_qnode_key = qedge.subject
            object_qnode_key = qedge.object
            qedge_cypher = self._get_cypher_for_query_edge(
                qedge_key, qg, enforce_directionality)
            source_qnode_cypher = self._get_cypher_for_query_node(
                subject_qnode_key, qg)
            target_qnode_cypher = self._get_cypher_for_query_node(
                object_qnode_key, qg)
            match_clause = f"MATCH {source_qnode_cypher}{qedge_cypher}{target_qnode_cypher}"

            # Build the where clause
            where_fragments = []
            for qnode_key in [subject_qnode_key, object_qnode_key]:
                qnode = qg.nodes[qnode_key]
                if qnode.id and isinstance(qnode.id,
                                           list) and len(qnode.id) > 1:
                    where_fragments.append(f"{qnode_key}.id in {qnode.id}")
                if qnode.category:
                    qnode.category = eu.convert_to_list(qnode.category)
                    if len(qnode.category) > 1:
                        # Create where fragment that looks like 'n00:biolink:Disease OR n00:biolink:PhenotypicFeature..'
                        category_sub_fragments = [
                            f"{qnode_key}:`{category}`"
                            for category in qnode.category
                        ]
                        category_where_fragment = f"({' OR '.join(category_sub_fragments)})"
                        where_fragments.append(category_where_fragment)
            where_clause = f"WHERE {' AND '.join(where_fragments)}" if where_fragments else ""

            # Build the with clause
            source_qnode_col_name = f"nodes_{subject_qnode_key}"
            target_qnode_col_name = f"nodes_{object_qnode_key}"
            qedge_col_name = f"edges_{qedge_key}"
            # This line grabs the edge's ID and a record of which of its nodes correspond to which qnode ID
            extra_edge_properties = "{.*, " + f"id:ID({qedge_key}), {subject_qnode_key}:{subject_qnode_key}.id, {object_qnode_key}:{object_qnode_key}.id" + "}"
            with_clause = f"WITH collect(distinct {subject_qnode_key}) as {source_qnode_col_name}, " \
                          f"collect(distinct {object_qnode_key}) as {target_qnode_col_name}, " \
                          f"collect(distinct {qedge_key}{extra_edge_properties}) as {qedge_col_name}"

            # Build the return clause
            return_clause = f"RETURN {source_qnode_col_name}, {target_qnode_col_name}, {qedge_col_name}"

            cypher_query = f"{match_clause} {where_clause} {with_clause} {return_clause}"
            return cypher_query
        except Exception:
            tb = traceback.format_exc()
            error_type, error, _ = sys.exc_info()
            log.error(f"Problem generating cypher for query. {tb}",
                      error_code=error_type.__name__)
            return ""
Beispiel #4
0
    def _add_answers_to_kg(self, answer_kg: QGOrganizedKnowledgeGraph, reasoner_std_response: Dict[str, any],
                           input_qnode_key: str, output_qnode_key: str, qedge_key: str, log: ARAXResponse) -> QGOrganizedKnowledgeGraph:
        kg_to_qg_ids_dict = self._build_kg_to_qg_id_dict(reasoner_std_response['results'])
        if reasoner_std_response['knowledge_graph']['edges']:
            remapped_node_keys = dict()
            log.debug(f"Got results back from BTE for this query "
                      f"({len(reasoner_std_response['knowledge_graph']['edges'])} edges)")

            for node in reasoner_std_response['knowledge_graph']['nodes']:
                swagger_node = Node()
                bte_node_key = node.get('id')
                swagger_node.name = node.get('name')
                swagger_node.category = eu.convert_to_list(eu.convert_string_to_snake_case(node.get('type')))

                # Map the returned BTE qg_ids back to the original qnode_keys in our query graph
                bte_qg_id = kg_to_qg_ids_dict['nodes'].get(bte_node_key)
                if bte_qg_id == "n0":
                    qnode_key = input_qnode_key
                elif bte_qg_id == "n1":
                    qnode_key = output_qnode_key
                else:
                    log.error("Could not map BTE qg_id to ARAX qnode_key", error_code="UnknownQGID")
                    return answer_kg

                # Find and use the preferred equivalent identifier for this node (if it's an output node)
                if qnode_key == output_qnode_key:
                    if bte_node_key in remapped_node_keys:
                        swagger_node_key = remapped_node_keys.get(bte_node_key)
                    else:
                        equivalent_curies = [f"{prefix}:{eu.get_curie_local_id(local_id)}" for prefix, local_ids in
                                             node.get('equivalent_identifiers').items() for local_id in local_ids]
                        swagger_node_key = self._get_best_equivalent_bte_curie(equivalent_curies, swagger_node.category[0])
                        remapped_node_keys[bte_node_key] = swagger_node_key
                else:
                    swagger_node_key = bte_node_key

                answer_kg.add_node(swagger_node_key, swagger_node, qnode_key)

            for edge in reasoner_std_response['knowledge_graph']['edges']:
                swagger_edge = Edge()
                swagger_edge_key = edge.get("id")
                swagger_edge.predicate = edge.get('type')
                swagger_edge.subject = remapped_node_keys.get(edge.get('source_id'), edge.get('source_id'))
                swagger_edge.object = remapped_node_keys.get(edge.get('target_id'), edge.get('target_id'))
                swagger_edge.attributes = [Attribute(name="provided_by", value=edge.get('edge_source'), type=eu.get_attribute_type("provided_by")),
                                           Attribute(name="is_defined_by", value="BTE", type=eu.get_attribute_type("is_defined_by"))]
                # Map the returned BTE qg_id back to the original qedge_key in our query graph
                bte_qg_id = kg_to_qg_ids_dict['edges'].get(swagger_edge_key)
                if bte_qg_id != "e1":
                    log.error("Could not map BTE qg_id to ARAX qedge_key", error_code="UnknownQGID")
                    return answer_kg
                answer_kg.add_edge(swagger_edge_key, swagger_edge, qedge_key)

        return answer_kg
Beispiel #5
0
 def _convert_kg1_node_to_swagger_node(
         self, neo4j_node: Dict[str, any]) -> Tuple[str, Node]:
     swagger_node = Node()
     swagger_node_key = neo4j_node.get('id')
     swagger_node.name = neo4j_node.get('name')
     node_category = neo4j_node.get('category')
     swagger_node.category = eu.convert_to_list(node_category)
     other_properties = ["symbol", "description", "uri"]
     swagger_node.attributes = self._create_swagger_attributes(
         other_properties, neo4j_node)
     return swagger_node_key, swagger_node
Beispiel #6
0
 def _get_supported_prefixes(self, categories: List[str],
                             kp: str) -> Set[str]:
     bh = BiolinkHelper()
     categories_with_descendants = bh.get_descendants(
         eu.convert_to_list(categories), include_mixins=False)
     supported_prefixes = {
         prefix.upper()
         for category in categories_with_descendants
         for prefix in self.meta_map[kp]["prefixes"].get(category, set())
     }
     return supported_prefixes
Beispiel #7
0
 def _build_kg_to_qg_id_dict(results: Dict[str, any]) -> Dict[str, Dict[str, List[str]]]:
     kg_to_qg_ids = {'nodes': dict(), 'edges': dict()}
     for node_binding in results['node_bindings']:
         node_key = node_binding['kg_id']
         qnode_key = node_binding['qg_id']
         kg_to_qg_ids['nodes'][node_key] = qnode_key
     for edge_binding in results['edge_bindings']:
         edge_keys = eu.convert_to_list(edge_binding['kg_id'])
         qedge_keys = edge_binding['qg_id']
         for kg_id in edge_keys:
             kg_to_qg_ids['edges'][kg_id] = qedge_keys
     return kg_to_qg_ids
Beispiel #8
0
 def get_desirable_equivalent_curies(self, curies: List[str],
                                     categories: Optional[List[str]],
                                     kp: str) -> List[str]:
     """
     For each input curie, this function returns an equivalent curie(s) that uses a prefix the KP supports.
     """
     self.log.debug(
         f"{kp}: Converting curies in the QG to kinds that {kp} can answer")
     if not self.meta_map.get(kp):
         self.log.warning(
             f"{kp}: Somehow missing meta info for {kp}. Cannot do curie prefix conversion; will send "
             f"curies as they are.")
         return curies
     elif not self.meta_map[kp].get("prefixes"):
         self.log.warning(
             f"{kp}: No supported prefix info is available for {kp}. Will send curies as they are."
         )
         return curies
     else:
         supported_prefixes = self._get_supported_prefixes(
             eu.convert_to_list(categories), kp)
         self.log.debug(
             f"{kp}: Prefixes {kp} supports for categories {categories} (and descendants) are: "
             f"{supported_prefixes}")
         converted_curies = set()
         unsupported_curies = set()
         synonyms_dict = eu.get_curie_synonyms_dict(curies)
         # Convert each input curie to a preferred, supported prefix
         for input_curie, equivalent_curies in synonyms_dict.items():
             input_curie_prefix = self._get_uppercase_prefix(input_curie)
             supported_equiv_curies_by_prefix = defaultdict(set)
             for curie in equivalent_curies:
                 prefix = self._get_uppercase_prefix(curie)
                 if prefix in supported_prefixes:
                     supported_equiv_curies_by_prefix[prefix].add(curie)
             if supported_equiv_curies_by_prefix:
                 # Grab equivalent curies with the same prefix as the input curie, if available
                 if input_curie_prefix in supported_equiv_curies_by_prefix:
                     curies_to_send = supported_equiv_curies_by_prefix[
                         input_curie_prefix]
                 # Otherwise pick any supported curie prefix present
                 else:
                     curies_to_send = next(
                         curie_set for curie_set in
                         supported_equiv_curies_by_prefix.values())
                 converted_curies = converted_curies.union(curies_to_send)
             else:
                 unsupported_curies.add(input_curie)
         if unsupported_curies:
             self.log.warning(
                 f"{kp}: Could not find curies with prefixes {kp} prefers for these curies: "
                 f"{unsupported_curies}; will not send these to KP")
         return list(converted_curies)
Beispiel #9
0
 def _convert_kg2c_node_to_trapi_node(
         self, neo4j_node: Dict[str, any]) -> Tuple[str, Node]:
     node = Node()
     node_key = neo4j_node.get('id')
     node.name = neo4j_node.get('name')
     node.category = eu.convert_to_list(neo4j_node.get('category'))
     # Add all additional properties on KG2c nodes as TRAPI Attribute objects
     other_properties = [
         "iri", "description", "all_names", "all_categories",
         "expanded_categories", "equivalent_curies", "publications"
     ]
     node.attributes = self._create_trapi_attributes(
         other_properties, neo4j_node)
     return node_key, node
Beispiel #10
0
 def _convert_kg2_node_to_trapi_node(
         self, neo4j_node: Dict[str, any]) -> Tuple[str, Node]:
     node = Node()
     node_key = neo4j_node.get('id')
     node.name = neo4j_node.get('name')
     node.category = eu.convert_to_list(neo4j_node.get('category'))
     # Add all additional properties on KG2 nodes as TRAPI Attribute objects
     other_properties = [
         "iri", "full_name", "description", "publications", "synonym",
         "provided_by", "deprecated", "update_date"
     ]
     node.attributes = self._create_trapi_attributes(
         other_properties, neo4j_node)
     return node_key, node
Beispiel #11
0
 def _convert_kg2_node_to_swagger_node(
         self, neo4j_node: Dict[str, any]) -> Tuple[str, Node]:
     swagger_node = Node()
     swagger_node_key = neo4j_node.get('id')
     swagger_node.name = neo4j_node.get('name')
     node_category = neo4j_node.get('category_label')
     swagger_node.category = eu.convert_to_list(node_category)
     # Add all additional properties on KG2 nodes as swagger Attribute objects
     other_properties = [
         "full_name", "description", "iri", "publications", "synonym",
         "category", "provided_by", "deprecated", "update_date"
     ]
     swagger_node.attributes = self._create_swagger_attributes(
         other_properties, neo4j_node)
     return swagger_node_key, swagger_node
Beispiel #12
0
 def make_qg_use_supported_prefixes(
         self, qg: QueryGraph, kp_name: str,
         log: ARAXResponse) -> Optional[QueryGraph]:
     for qnode_key, qnode in qg.nodes.items():
         if qnode.ids:
             if kp_name == "infores:rtx-kg2":
                 # Just convert them into canonical curies
                 qnode.ids = eu.get_canonical_curies_list(qnode.ids, log)
             else:
                 # Otherwise figure out which kind of curies KPs want
                 categories = eu.convert_to_list(qnode.categories)
                 supported_prefixes = self._get_supported_prefixes(
                     categories, kp_name)
                 used_prefixes = {
                     self._get_uppercase_prefix(curie)
                     for curie in qnode.ids
                 }
                 # Only convert curie(s) if any use an unsupported prefix
                 if used_prefixes.issubset(supported_prefixes):
                     self.log.debug(
                         f"{kp_name}: All {qnode_key} curies use prefix(es) {kp_name} supports; no "
                         f"conversion necessary")
                 else:
                     self.log.debug(
                         f"{kp_name}: One or more {qnode_key} curies use a prefix {kp_name} doesn't "
                         f"support; will convert these")
                     converted_curies = self.get_desirable_equivalent_curies(
                         qnode.ids, qnode.categories, kp_name)
                     if converted_curies:
                         log.debug(
                             f"{kp_name}: Converted {qnode_key}'s {len(qnode.ids)} curies to a list of "
                             f"{len(converted_curies)} curies tailored for {kp_name}"
                         )
                         qnode.ids = converted_curies
                     else:
                         log.info(
                             f"{kp_name} cannot answer the query because no equivalent curies were found "
                             f"with prefixes it supports for qnode {qnode_key}. Original curies were: "
                             f"{qnode.ids}")
                         return None
     return qg
Beispiel #13
0
    def _validate_and_pre_process_input(qg: QueryGraph, valid_bte_inputs_dict: Dict[str, Set[str]],
                                        enforce_directionality: bool, use_synonyms: bool, log: ARAXResponse) -> Tuple[str, str]:
        # Make sure we have a valid one-hop query graph
        if len(qg.edges) != 1 or len(qg.nodes) != 2:
            log.error(f"BTE can only accept one-hop query graphs (your QG has {len(qg.nodes)} nodes and "
                      f"{len(qg.edges)} edges)", error_code="InvalidQueryGraph")
            return "", ""
        qedge_key = next(qedge_key for qedge_key in qg.edges)
        qedge = qg.edges[qedge_key]

        # Make sure at least one of our qnodes has a curie
        qnodes_with_curies = [qnode_key for qnode_key, qnode in qg.nodes.items() if qnode.id]
        if not qnodes_with_curies:
            log.error(f"Neither qnode for qedge {qedge_key} has a curie specified. BTE requires that at least one of "
                      f"them has a curie. Your query graph is: {qg.to_dict()}", error_code="UnsupportedQueryForKP")
            return "", ""

        # Figure out which query node is input vs. output
        if enforce_directionality:
            input_qnode_key = qedge.subject
            output_qnode_key = qedge.object
        else:
            input_qnode_key = next(qnode_key for qnode_key, qnode in qg.nodes.items() if qnode.id)
            output_qnode_key = list(set(qg.nodes).difference({input_qnode_key}))[0]
            log.warning(f"BTE cannot do bidirectional queries; the query for this edge will be directed, going: "
                        f"{input_qnode_key}-->{output_qnode_key}")
        input_qnode = qg.nodes[input_qnode_key]
        output_qnode = qg.nodes[output_qnode_key]

        # Make sure predicate is allowed
        if qedge.predicate:
            accepted_predicates = set(qedge.predicate).intersection(valid_bte_inputs_dict['predicates'])
            # Throw an error if none of the predicates are supported
            if not accepted_predicates:
                log.error(f"BTE does not accept predicate(s) {qedge.predicate}. Valid options are "
                          f"{valid_bte_inputs_dict['predicates']}", error_code="UnsupportedQueryForKP")
                return "", ""
            # Give a warning if only some of the predicates are supported
            elif len(accepted_predicates) < len(qedge.predicate):
                unaccepted_predicates = set(qedge.predicate).difference(accepted_predicates)
                log.warning(f"Some of qedge {qedge_key}'s predicates are not accepted by BTE: {unaccepted_predicates}."
                            f" Valid options are: {valid_bte_inputs_dict['predicates']}")
                qedge.predicate = list(accepted_predicates)

        # Process qnode types (convert to preferred format, make sure allowed)
        input_qnode.category = [eu.convert_string_to_pascal_case(node_category) for node_category in eu.convert_to_list(input_qnode.category)]
        output_qnode.category = [eu.convert_string_to_pascal_case(node_category) for node_category in eu.convert_to_list(output_qnode.category)]
        qnodes_missing_type = [qnode_key for qnode_key in [input_qnode_key, output_qnode_key] if not qg.nodes[qnode_key].category]
        if qnodes_missing_type:
            log.error(f"BTE requires every query node to have a category. QNode(s) missing a category: "
                      f"{', '.join(qnodes_missing_type)}", error_code="InvalidInput")
            return "", ""
        invalid_qnode_categories = [node_category for qnode in [input_qnode, output_qnode] for node_category in qnode.category
                                    if node_category not in valid_bte_inputs_dict['node_categories']]
        if invalid_qnode_categories:
            log.error(f"BTE does not accept QNode category(s): {', '.join(invalid_qnode_categories)}. Valid options are "
                      f"{valid_bte_inputs_dict['node_categories']}", error_code="InvalidInput")
            return "", ""

        # Sub in curie synonyms as appropriate
        if use_synonyms:
            qnodes_with_curies = [qnode for qnode in [input_qnode, output_qnode] if qnode.id]
            for qnode in qnodes_with_curies:
                synonymized_curies = eu.get_curie_synonyms(qnode.id, log)
                qnode.id = synonymized_curies

        # Make sure our input node curies are in list form and use prefixes BTE prefers
        input_curie_list = eu.convert_to_list(input_qnode.id)
        input_qnode.id = [eu.convert_curie_to_bte_format(curie) for curie in input_curie_list]

        return input_qnode_key, output_qnode_key
Beispiel #14
0
 def _convert_kg2c_plover_node_to_trapi_node(node_tuple: list) -> Node:
     node = Node(name=node_tuple[0], categories=eu.convert_to_list(node_tuple[1]))
     return node