Example #1
0
    def _get_best_equivalent_bte_curie(equivalent_curies: List[str], node_category: str) -> str:
        # Curie prefixes in order of preference for different node types (not all-inclusive)
        preferred_node_prefixes_dict = {'chemical_substance': ['CHEMBL.COMPOUND', 'CHEBI'],
                                        'protein': ['UNIPROTKB', 'PR'],
                                        'gene': ['NCBIGENE', 'ENSEMBL', 'HGNC', 'GO'],
                                        'disease': ['DOID', 'MONDO', 'OMIM', 'MESH'],
                                        'phenotypic_feature': ['HP', 'OMIM'],
                                        'anatomical_entity': ['UBERON', 'FMA', 'CL'],
                                        'pathway': ['REACTOME'],
                                        'biological_process': ['GO'],
                                        'cellular_component': ['GO']}
        prefixes_in_order_of_preference = preferred_node_prefixes_dict.get(eu.convert_string_to_snake_case(node_category), [])
        equivalent_curies.sort()

        # Pick the curie that uses the (relatively) most preferred prefix
        lowest_ranking = 10000
        best_curie = None
        for curie in equivalent_curies:
            uppercase_prefix = eu.get_curie_prefix(curie).upper()
            if uppercase_prefix in prefixes_in_order_of_preference:
                ranking = prefixes_in_order_of_preference.index(uppercase_prefix)
                if ranking < lowest_ranking:
                    lowest_ranking = ranking
                    best_curie = curie

        # Otherwise, just try to pick one that isn't 'NAME:___'
        if not best_curie:
            non_name_curies = [curie for curie in equivalent_curies if eu.get_curie_prefix(curie).upper() != 'NAME']
            best_curie = non_name_curies[0] if non_name_curies else equivalent_curies[0]

        return best_curie
Example #2
0
    def _add_answers_to_kg(self, answer_kg: QGOrganizedKnowledgeGraph, reasoner_std_response: Dict[str, any],
                           input_qnode_key: str, output_qnode_key: str, qedge_key: str, log: ARAXResponse) -> QGOrganizedKnowledgeGraph:
        kg_to_qg_ids_dict = self._build_kg_to_qg_id_dict(reasoner_std_response['results'])
        if reasoner_std_response['knowledge_graph']['edges']:
            remapped_node_keys = dict()
            log.debug(f"Got results back from BTE for this query "
                      f"({len(reasoner_std_response['knowledge_graph']['edges'])} edges)")

            for node in reasoner_std_response['knowledge_graph']['nodes']:
                swagger_node = Node()
                bte_node_key = node.get('id')
                swagger_node.name = node.get('name')
                swagger_node.category = eu.convert_to_list(eu.convert_string_to_snake_case(node.get('type')))

                # Map the returned BTE qg_ids back to the original qnode_keys in our query graph
                bte_qg_id = kg_to_qg_ids_dict['nodes'].get(bte_node_key)
                if bte_qg_id == "n0":
                    qnode_key = input_qnode_key
                elif bte_qg_id == "n1":
                    qnode_key = output_qnode_key
                else:
                    log.error("Could not map BTE qg_id to ARAX qnode_key", error_code="UnknownQGID")
                    return answer_kg

                # Find and use the preferred equivalent identifier for this node (if it's an output node)
                if qnode_key == output_qnode_key:
                    if bte_node_key in remapped_node_keys:
                        swagger_node_key = remapped_node_keys.get(bte_node_key)
                    else:
                        equivalent_curies = [f"{prefix}:{eu.get_curie_local_id(local_id)}" for prefix, local_ids in
                                             node.get('equivalent_identifiers').items() for local_id in local_ids]
                        swagger_node_key = self._get_best_equivalent_bte_curie(equivalent_curies, swagger_node.category[0])
                        remapped_node_keys[bte_node_key] = swagger_node_key
                else:
                    swagger_node_key = bte_node_key

                answer_kg.add_node(swagger_node_key, swagger_node, qnode_key)

            for edge in reasoner_std_response['knowledge_graph']['edges']:
                swagger_edge = Edge()
                swagger_edge_key = edge.get("id")
                swagger_edge.predicate = edge.get('type')
                swagger_edge.subject = remapped_node_keys.get(edge.get('source_id'), edge.get('source_id'))
                swagger_edge.object = remapped_node_keys.get(edge.get('target_id'), edge.get('target_id'))
                swagger_edge.attributes = [Attribute(name="provided_by", value=edge.get('edge_source'), type=eu.get_attribute_type("provided_by")),
                                           Attribute(name="is_defined_by", value="BTE", type=eu.get_attribute_type("is_defined_by"))]
                # Map the returned BTE qg_id back to the original qedge_key in our query graph
                bte_qg_id = kg_to_qg_ids_dict['edges'].get(swagger_edge_key)
                if bte_qg_id != "e1":
                    log.error("Could not map BTE qg_id to ARAX qedge_key", error_code="UnknownQGID")
                    return answer_kg
                answer_kg.add_edge(swagger_edge_key, swagger_edge, qedge_key)

        return answer_kg