Esempio n. 1
0
def get_canonical_curies_dict(curie: Union[str, List[str]],
                              log: ARAXResponse) -> Dict[str, Dict[str, str]]:
    curies = convert_string_or_list_to_list(curie)
    try:
        synonymizer = NodeSynonymizer()
        log.debug(
            f"Sending NodeSynonymizer.get_canonical_curies() a list of {len(curies)} curies"
        )
        canonical_curies_dict = synonymizer.get_canonical_curies(curies)
        log.debug(f"Got response back from NodeSynonymizer")
    except Exception:
        tb = traceback.format_exc()
        error_type, error, _ = sys.exc_info()
        log.error(f"Encountered a problem using NodeSynonymizer: {tb}",
                  error_code=error_type.__name__)
        return {}
    else:
        if canonical_curies_dict is not None:
            unrecognized_curies = {
                input_curie
                for input_curie in canonical_curies_dict
                if not canonical_curies_dict.get(input_curie)
            }
            if unrecognized_curies:
                log.warning(
                    f"NodeSynonymizer did not return canonical info for: {unrecognized_curies}"
                )
            return canonical_curies_dict
        else:
            log.error(f"NodeSynonymizer returned None",
                      error_code="NodeNormalizationIssue")
            return {}
Esempio n. 2
0
 def _add_inverted_predicates(qg: QueryGraph,
                              log: ARAXResponse) -> QueryGraph:
     # For now, we'll consider BOTH predicates in an inverse pair (TODO: later tailor to what we know is in KG2)
     qedge = next(qedge for qedge in qg.edges.values())
     response = requests.get(
         "https://raw.githubusercontent.com/biolink/biolink-model/master/biolink-model.yaml"
     )
     if response.status_code == 200:
         qedge.predicate = eu.convert_to_list(qedge.predicate)
         biolink_model = yaml.safe_load(response.text)
         inverse_predicates = set()
         for predicate in qedge.predicate:
             english_predicate = predicate.split(":")[-1].replace(
                 "_", " ")  # Converts to 'subclass of' format
             biolink_predicate_info = biolink_model["slots"].get(
                 english_predicate)
             if biolink_predicate_info and "inverse" in biolink_predicate_info:
                 english_inverse_predicate = biolink_predicate_info[
                     "inverse"]
                 machine_inverse_predicate = f"biolink:{english_inverse_predicate.replace(' ', '_')}"
                 inverse_predicates.add(machine_inverse_predicate)
                 log.debug(
                     f"Found inverse predicate for {predicate}: {machine_inverse_predicate}"
                 )
         qedge.predicate = list(
             set(qedge.predicate).union(inverse_predicates))
     else:
         log.warning(
             f"Cannot check for inverse predicates: Failed to load Biolink Model yaml file. "
             f"(Page gave status {response.status_code}.)")
     return qg
Esempio n. 3
0
def get_preferred_categories(curie: Union[str, List[str]],
                             log: ARAXResponse) -> Optional[List[str]]:
    curies = convert_to_list(curie)
    synonymizer = NodeSynonymizer()
    log.debug(
        f"Sending NodeSynonymizer.get_canonical_curies() a list of {len(curies)} curies"
    )
    canonical_curies_dict = synonymizer.get_canonical_curies(curies)
    log.debug(f"Got response back from NodeSynonymizer")
    if canonical_curies_dict is not None:
        recognized_input_curies = {
            input_curie
            for input_curie in canonical_curies_dict
            if canonical_curies_dict.get(input_curie)
        }
        unrecognized_curies = set(curies).difference(recognized_input_curies)
        if unrecognized_curies:
            log.warning(
                f"NodeSynonymizer did not recognize: {unrecognized_curies}")
        preferred_categories = {
            canonical_curies_dict[recognized_curie].get('preferred_category')
            for recognized_curie in recognized_input_curies
        }
        if preferred_categories:
            return list(preferred_categories)
        else:
            log.warning(
                f"Unable to find any preferred categories; will default to biolink:NamedThing"
            )
            return ["biolink:NamedThing"]
    else:
        log.error(f"NodeSynonymizer returned None",
                  error_code="NodeNormalizationIssue")
        return []
Esempio n. 4
0
 def _answer_query_using_bte(self, input_qnode_key: str, output_qnode_key: str, qg: QueryGraph,
                             answer_kg: QGOrganizedKnowledgeGraph, valid_bte_inputs_dict: Dict[str, Set[str]],
                             log: ARAXResponse) -> Tuple[QGOrganizedKnowledgeGraph, Set[str]]:
     accepted_curies = set()
     qedge_key = next(qedge_key for qedge_key in qg.edges)
     qedge = qg.edges[qedge_key]
     input_qnode = qg.nodes[input_qnode_key]
     output_qnode = qg.nodes[output_qnode_key]
     # Send this single-edge query to BTE, input curie by input curie (adding findings to our answer KG as we go)
     for curie in input_qnode.id:
         # Consider all different combinations of qnode types (can be multiple if gene/protein)
         for input_qnode_category, output_qnode_category in itertools.product(input_qnode.category, output_qnode.category):
             if eu.get_curie_prefix(curie) in valid_bte_inputs_dict['curie_prefixes']:
                 accepted_curies.add(curie)
                 try:
                     loop = asyncio.new_event_loop()
                     seqd = SingleEdgeQueryDispatcher(input_cls=input_qnode_category,
                                                      output_cls=output_qnode_category,
                                                      pred=qedge.predicate,
                                                      input_id=eu.get_curie_prefix(curie),
                                                      values=eu.get_curie_local_id(curie),
                                                      loop=loop)
                     log.debug(f"Sending query to BTE: {curie}-{qedge.predicate if qedge.predicate else ''}->{output_qnode_category}")
                     seqd.query()
                     reasoner_std_response = seqd.to_reasoner_std()
                 except Exception:
                     trace_back = traceback.format_exc()
                     error_type, error, _ = sys.exc_info()
                     log.error(f"Encountered a problem while using BioThings Explorer. {trace_back}",
                               error_code=error_type.__name__)
                     return answer_kg, accepted_curies
                 else:
                     answer_kg = self._add_answers_to_kg(answer_kg, reasoner_std_response, input_qnode_key, output_qnode_key, qedge_key, log)
     return answer_kg, accepted_curies
Esempio n. 5
0
    def _load_answers_into_kg(self, neo4j_results: List[Dict[str,
                                                             List[Dict[str,
                                                                       any]]]],
                              kg_name: str, qg: QueryGraph,
                              log: ARAXResponse) -> QGOrganizedKnowledgeGraph:
        log.debug(
            f"Processing query results for edge {next(qedge_key for qedge_key in qg.edges)}"
        )
        final_kg = QGOrganizedKnowledgeGraph()
        node_uuid_to_curie_dict = self._build_node_uuid_to_curie_dict(
            neo4j_results[0]) if kg_name == "KG1" else dict()

        results_table = neo4j_results[0]
        column_names = [column_name for column_name in results_table]
        for column_name in column_names:
            # Load answer nodes into our knowledge graph
            if column_name.startswith(
                    'nodes'):  # Example column name: 'nodes_n00'
                column_qnode_key = column_name.replace("nodes_", "", 1)
                for neo4j_node in results_table.get(column_name):
                    node_key, node = self._convert_neo4j_node_to_trapi_node(
                        neo4j_node, kg_name)
                    final_kg.add_node(node_key, node, column_qnode_key)
            # Load answer edges into our knowledge graph
            elif column_name.startswith(
                    'edges'):  # Example column name: 'edges_e01'
                column_qedge_key = column_name.replace("edges_", "", 1)
                for neo4j_edge in results_table.get(column_name):
                    edge_key, edge = self._convert_neo4j_edge_to_trapi_edge(
                        neo4j_edge, node_uuid_to_curie_dict, kg_name)
                    final_kg.add_edge(edge_key, edge, column_qedge_key)

        return final_kg
def QGI_test5():
    # This is to test forked/non-linear queries (currently not working properly)
    input_query_graph = {
    "message": {
        "query_graph": {
            "nodes": {
                "n0": {
                    "categories": ["biolink:Gene"]
                },
                "n1": {
                    "ids": ["CHEBI:45783"],
                    "categories": ["biolink:ChemicalEntity"]
                },
                "n2": {
                    "ids": ["MONDO:0005301"],
                    "categories": ["biolink:Disease"]
                },
                "n3": {
                    "categories": ["biolink:ChemicalEntity"]
                }
            },
            "edges": {
                "e01": {
                    "subject": "n0",
                    "object": "n1",
                    "predicates": ["biolink:related_to"]
                },
                "e02": {
                    "subject": "n0",
                    "object": "n2",
                    "predicates": ["biolink:related_to"]
                },
                "e03": {
                    "subject": "n0",
                    "object": "n3",
                    "predicates": ["biolink:related_to"]
                }
            }
        }
    }
    }


    #### Create a template Message
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    message = ARAXMessenger().from_dict(input_query_graph['message'])
    response.envelope.message.query_graph = message.query_graph

    interpreter = ARAXQueryGraphInterpreter()
    interpreter.translate_to_araxi(response)
    if response.status != 'OK':
        print(response.show(level=ARAXResponse.DEBUG))
        return response

    araxi_commands = response.data['araxi_commands']
    for cmd in araxi_commands:
        print(f"  - {cmd}")
Esempio n. 7
0
    def _convert_one_hop_query_graph_to_cypher_query(
            self, qg: QueryGraph, enforce_directionality: bool,
            log: ARAXResponse) -> str:
        qedge_key = next(qedge_key for qedge_key in qg.edges)
        qedge = qg.edges[qedge_key]
        log.debug(f"Generating cypher for edge {qedge_key} query graph")
        try:
            # Build the match clause
            subject_qnode_key = qedge.subject
            object_qnode_key = qedge.object
            qedge_cypher = self._get_cypher_for_query_edge(
                qedge_key, qg, enforce_directionality)
            source_qnode_cypher = self._get_cypher_for_query_node(
                subject_qnode_key, qg)
            target_qnode_cypher = self._get_cypher_for_query_node(
                object_qnode_key, qg)
            match_clause = f"MATCH {source_qnode_cypher}{qedge_cypher}{target_qnode_cypher}"

            # Build the where clause
            where_fragments = []
            for qnode_key in [subject_qnode_key, object_qnode_key]:
                qnode = qg.nodes[qnode_key]
                if qnode.id and isinstance(qnode.id,
                                           list) and len(qnode.id) > 1:
                    where_fragments.append(f"{qnode_key}.id in {qnode.id}")
                if qnode.category:
                    qnode.category = eu.convert_to_list(qnode.category)
                    if len(qnode.category) > 1:
                        # Create where fragment that looks like 'n00:biolink:Disease OR n00:biolink:PhenotypicFeature..'
                        category_sub_fragments = [
                            f"{qnode_key}:`{category}`"
                            for category in qnode.category
                        ]
                        category_where_fragment = f"({' OR '.join(category_sub_fragments)})"
                        where_fragments.append(category_where_fragment)
            where_clause = f"WHERE {' AND '.join(where_fragments)}" if where_fragments else ""

            # Build the with clause
            source_qnode_col_name = f"nodes_{subject_qnode_key}"
            target_qnode_col_name = f"nodes_{object_qnode_key}"
            qedge_col_name = f"edges_{qedge_key}"
            # This line grabs the edge's ID and a record of which of its nodes correspond to which qnode ID
            extra_edge_properties = "{.*, " + f"id:ID({qedge_key}), {subject_qnode_key}:{subject_qnode_key}.id, {object_qnode_key}:{object_qnode_key}.id" + "}"
            with_clause = f"WITH collect(distinct {subject_qnode_key}) as {source_qnode_col_name}, " \
                          f"collect(distinct {object_qnode_key}) as {target_qnode_col_name}, " \
                          f"collect(distinct {qedge_key}{extra_edge_properties}) as {qedge_col_name}"

            # Build the return clause
            return_clause = f"RETURN {source_qnode_col_name}, {target_qnode_col_name}, {qedge_col_name}"

            cypher_query = f"{match_clause} {where_clause} {with_clause} {return_clause}"
            return cypher_query
        except Exception:
            tb = traceback.format_exc()
            error_type, error, _ = sys.exc_info()
            log.error(f"Problem generating cypher for query. {tb}",
                      error_code=error_type.__name__)
            return ""
Esempio n. 8
0
 def _send_query_to_kp(self, query_graph: QueryGraph,
                       log: ARAXResponse) -> Dict[str, any]:
     # Send query to their API (stripping down qnode/qedges to only the properties they like)
     stripped_qnodes = []
     for qnode_key, qnode in query_graph.nodes.items():
         stripped_qnode = {'id': qnode_key, 'type': qnode.category}
         if qnode.id:
             stripped_qnode['curie'] = qnode.id
         stripped_qnodes.append(stripped_qnode)
     qedge_key = next(qedge_key for qedge_key in
                      query_graph.edges)  # Our query graph is single-edge
     qedge = query_graph.edges[qedge_key]
     stripped_qedge = {
         'id': qedge_key,
         'source_id': qedge.subject,
         'target_id': qedge.object,
         'type': list(self.accepted_edge_types)[0]
     }
     source_stripped_qnode = next(qnode for qnode in stripped_qnodes
                                  if qnode['id'] == qedge.subject)
     input_curies = eu.convert_string_or_list_to_list(
         source_stripped_qnode['curie'])
     combined_response = dict()
     for input_curie in input_curies:  # Until we have batch querying, ping them one-by-one for each input curie
         log.debug(
             f"Sending {qedge_key} query to {self.kp_name} for {input_curie}"
         )
         source_stripped_qnode['curie'] = input_curie
         kp_response = requests.post(self.kp_query_endpoint,
                                     json={
                                         'message': {
                                             'query_graph': {
                                                 'nodes': stripped_qnodes,
                                                 'edges': [stripped_qedge]
                                             }
                                         }
                                     },
                                     headers={'accept': 'application/json'})
         if kp_response.status_code != 200:
             log.warning(
                 f"{self.kp_name} KP API returned response of {kp_response.status_code}"
             )
         else:
             kp_response_json = kp_response.json()
             if kp_response_json.get('results'):
                 if not combined_response:
                     combined_response = kp_response_json
                 else:
                     combined_response['knowledge_graph'][
                         'nodes'] += kp_response_json['knowledge_graph'][
                             'nodes']
                     combined_response['knowledge_graph'][
                         'edges'] += kp_response_json['knowledge_graph'][
                             'edges']
                     combined_response['results'] += kp_response_json[
                         'results']
     return combined_response
Esempio n. 9
0
 def _run_arax_query(request_body: dict,
                     log: ARAXResponse) -> Tuple[ARAXResponse, Message]:
     araxq = ARAXQuery()
     sub_query_response = araxq.query(request_body, mode="RTXKG2")
     if sub_query_response.status != 'OK':
         log.error(
             f"Encountered an error running ARAXQuery within Expand: {sub_query_response.show(level=sub_query_response.DEBUG)}"
         )
     return sub_query_response, araxq.message
Esempio n. 10
0
def test_example1():
    query_graph = {
        "edges": {
            "e00": {
                "subject": "n00",
                "object": "n01"
            },
            "e01": {
                "subject": "n00",
                "object": "n01",
                "predicate": "biolink:contraindicated_for",
                "exclude": True
            }
        },
        "nodes": {
            "n00": {
                "id": "MONDO:0001627",
                "category": "biolink:Disease"
            },
            "n01": {
                "category": "biolink:ChemicalSubstance"
            }
        }
    }

    from ARAX_messenger import ARAXMessenger
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)

    response.envelope.message.query_graph = QueryGraph().from_dict(query_graph)

    query_graph_info = QueryGraphInfo()
    result = query_graph_info.assess(response.envelope.message)
    response.merge(result)
    if result.status != 'OK':
        print(response.show(level=ARAXResponse.DEBUG))
        return response

    query_graph_info_dict = {
        'n_nodes': query_graph_info.n_nodes,
        'n_edges': query_graph_info.n_edges,
        'is_bifurcated_graph': query_graph_info.is_bifurcated_graph,
        'start_node': query_graph_info.start_node,
        'node_info': query_graph_info.node_info,
        'edge_info': query_graph_info.edge_info,
        'node_order': query_graph_info.node_order,
        'edge_order': query_graph_info.edge_order,
        'node_category_map': query_graph_info.node_category_map,
        'edge_predicate_map': query_graph_info.edge_predicate_map,
    }
    print(
        json.dumps(ast.literal_eval(repr(query_graph_info_dict)),
                   sort_keys=True,
                   indent=2))
Esempio n. 11
0
 def _run_arax_query(actions_list: List[str],
                     log: ARAXResponse) -> Tuple[ARAXResponse, Message]:
     araxq = ARAXQuery()
     sub_query_response = araxq.query(
         {"operations": {
             "actions": actions_list
         }})
     if sub_query_response.status != 'OK':
         log.error(
             f"Encountered an error running ARAXQuery within Expand: {sub_query_response.show(level=sub_query_response.DEBUG)}"
         )
     return sub_query_response, araxq.message
def QGI_test3():

    input_query_graph = {
        "message": {
            "query_graph": {
                "nodes": {
                    "n00": {
                        "id": "MONDO:0002715"
                    },
                    "n01": {
                        "category": "biolink:ChemicalSubstance"
                    },
                    "n02": {
                        "category": "biolink:Gene"
                    }
                },
                "edges": {
                    "e00": {
                        "predicate": "biolink:correlated_with",
                        "subject": "n00",
                        "object": "n01"
                    },
                    "e01": {
                        "predicate": "biolink:related_to",
                        "subject": "n01",
                        "object": "n02"
                    }
                }
            }
        }
    }

    #### Create a template Message
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    message = ARAXMessenger().from_dict(input_query_graph['message'])
    response.envelope.message.query_graph = message.query_graph

    interpreter = ARAXQueryGraphInterpreter()
    interpreter.translate_to_araxi(response)
    if response.status != 'OK':
        print(response.show(level=ARAXResponse.DEBUG))
        return response

    araxi_commands = response.data['araxi_commands']
    for cmd in araxi_commands:
        print(f"  - {cmd}")

    #### Show the final result
    print('-------------------------')
    print(response.show(level=ARAXResponse.DEBUG))
    print(json.dumps(message.to_dict(), sort_keys=True, indent=2))
Esempio n. 13
0
    def apply(self, input_message, input_parameters):

        #### Define a default response
        response = ARAXResponse()
        self.response = response
        self.message = input_message

        #### Basic checks on arguments
        if not isinstance(input_parameters, dict):
            response.error("Provided parameters is not a dict",
                           error_code="ParametersNotDict")
            return response

        #### Define a complete set of allowed parameters and their defaults
        parameters = {
            'maximum_results': None,
            'minimum_confidence': None,
            'start_node': 1
        }

        #### Loop through the input_parameters and override the defaults and make sure they are allowed
        for key, value in input_parameters.items():
            if key not in parameters:
                response.error(f"Supplied parameter {key} is not permitted",
                               error_code="UnknownParameter")
            else:
                parameters[key] = value
        #### Return if any of the parameters generated an error (showing not just the first one)
        if response.status != 'OK':
            return response

        #### Store these final parameters for convenience
        response.data['parameters'] = parameters
        self.parameters = parameters

        #### Now apply the filters. Order of operations is probably quite important
        #### Scalar value filters probably come first like minimum_confidence, then complex logic filters
        #### based on edge or node properties, and then finally maximum_results
        response.debug(
            f"Applying filter to Message with parameters {parameters}")

        #### First, as a test, blow away the results and see if we can recompute them
        #message.n_results = 0
        #message.results = []
        #self.__recompute_results()

        #### Apply scalar value filters first to do easy things and reduce the problem
        # TODO

        #### Complex logic filters probably come next. These may be hard
        # TODO

        #### Finally, if the maximum_results parameter is set, then limit the number of results to that last
        if parameters['maximum_results'] is not None:
            self.__apply_maximum_results_filter(parameters['maximum_results'])

        #### Return the response
        return response
Esempio n. 14
0
def determine_virtual_qedge_option_group(subject_qnode_key: str, object_qnode_key: str, query_graph: QueryGraph, log: ARAXResponse) -> Optional[str]:
    # Determines what option group ID a virtual qedge between the two input qnodes should have
    qnodes = [qnode for key, qnode in query_graph.nodes.items() if key in {subject_qnode_key, object_qnode_key}]
    qnode_option_group_ids = {qnode.option_group_id for qnode in qnodes if qnode.option_group_id}
    if len(qnode_option_group_ids) == 1:
        return list(qnode_option_group_ids)[0]
    elif len(qnode_option_group_ids) > 1:
        log.error(f"Cannot add a virtual qedge between two qnodes that belong to different option groups {qnode_option_group_ids}",
                  error_code="InvalidQEdge")
        return None
    else:
        return None
Esempio n. 15
0
 def _answer_query_using_neo4j(
         self, cypher_query: str, qedge_key: str, kg_name: str,
         log: ARAXResponse) -> List[Dict[str, List[Dict[str, any]]]]:
     log.info(
         f"Sending cypher query for edge {qedge_key} to {kg_name} neo4j")
     results_from_neo4j = self._run_cypher_query(cypher_query, kg_name, log)
     if log.status == 'OK':
         columns_with_lengths = dict()
         for column in results_from_neo4j[0]:
             columns_with_lengths[column] = len(
                 results_from_neo4j[0].get(column))
     return results_from_neo4j
Esempio n. 16
0
def test_add_qnode_bad_parameters():
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    assert response.status == 'OK'
    bad_parameters_list = [
        {
            'parameters': ['ids', 'PICKLES:123'],
            'error_code': 'ParametersNotDict'
        },
        {
            'parameters': {
                'pickles': 'on the side'
            },
            'error_code': 'UnknownParameter'
        },
        {
            'parameters': {
                'ids': 'n2',
                'category': 'biolink:Disease'
            },
            'error_code': 'UnknownParameter'
        },
    ]
    template_response = copy.deepcopy(response)
    for bad_parameters in bad_parameters_list:
        response = copy.deepcopy(template_response)
        message = response.envelope.message
        print(bad_parameters)
        messenger.add_qnode(response, bad_parameters['parameters'])
        assert response.status == 'ERROR'
        assert len(message.query_graph.nodes) == 0
        assert response.error_code == bad_parameters['error_code']
Esempio n. 17
0
def test_add_qedge_duplicate_key():
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    assert response.status == 'OK'
    message = response.envelope.message
    messenger.add_qnode(response, {
        'key': 'n00',
        'ids': ['CHEMBL.COMPOUND:CHEMBL112']
    })
    messenger.add_qnode(response, {
        'key': 'n01',
        'categories': ['biolink:Protein']
    })
    messenger.add_qedge(response, {
        'key': 'e00',
        'subject': 'n00',
        'object': 'n01'
    })
    assert response.status == 'OK'
    messenger.add_qedge(
        response, {
            'key': 'e00',
            'subject': 'n00',
            'object': 'n01',
            'predicates': ['biolink:treats']
        })
    print(
        json.dumps(ast.literal_eval(repr(message.query_graph.edges)),
                   sort_keys=True,
                   indent=2))
    assert response.status == 'ERROR'
    assert isinstance(message.query_graph.nodes, dict)
    assert len(message.query_graph.edges) == 1
    assert response.error_code == 'QEdgeDuplicateKey'
Esempio n. 18
0
def QGI_test4():

    input_query_graph = { "message": { "query_graph": 
            {
            "nodes": {
                "n00": {
                "categories": [
                    "biolink:Gene"
                ],
                "is_set": False
                },
                "n01": {
                "ids": [
                    "MONDO:0018177"
                ],
                "categories": [
                    "biolink:Disease"
                ],
                "is_set": False
                }
            },
            "edges": {
                "e00": {
                "subject": "n00",
                "object": "n01",
                "exclude": False
                }
            }
            }
    } }

    #### Create a template Message
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    message = ARAXMessenger().from_dict(input_query_graph['message'])
    response.envelope.message.query_graph = message.query_graph

    interpreter = ARAXQueryGraphInterpreter()
    interpreter.translate_to_araxi(response)
    if response.status != 'OK':
        print(response.show(level=ARAXResponse.DEBUG))
        return response

    araxi_commands = response.data['araxi_commands']
    for cmd in araxi_commands:
        print(f"  - {cmd}")
Esempio n. 19
0
 def __init__(self, log: ARAXResponse = ARAXResponse()):
     self.meta_map_path = f"{os.path.dirname(os.path.abspath(__file__))}/meta_map_v2.pickle"
     self.timeout_record_path = f"{os.path.dirname(os.path.abspath(__file__))}/kp_timeout_record.pickle"
     self.log = log
     self.all_kps = eu.get_all_kps()
     self.timeout_record = self._load_timeout_record()
     self.meta_map = self._load_meta_map()
     self.biolink_helper = BiolinkHelper()
Esempio n. 20
0
def test_create_message_basic():
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    assert response.status == 'OK'
    message = response.envelope.message
    assert response.envelope.type == 'translator_reasoner_response'
    assert response.envelope.schema_version == '1.0.0'
Esempio n. 21
0
def check_for_canonical_predicates(
        kg: QGOrganizedKnowledgeGraph, kp_name: str,
        log: ARAXResponse) -> QGOrganizedKnowledgeGraph:
    non_canonical_predicates_used = set()
    biolink_helper = BiolinkHelper()
    for qedge_id, edges in kg.edges_by_qg_id.items():
        for edge in edges.values():
            canonical_predicate = biolink_helper.get_canonical_predicates(
                edge.predicate)[0]
            if canonical_predicate != edge.predicate:
                non_canonical_predicates_used.add(edge.predicate)
                _ = flip_edge(edge, canonical_predicate)
    if non_canonical_predicates_used:
        log.warning(
            f"{kp_name}: Found edges in {kp_name}'s answer that use non-canonical "
            f"predicates: {non_canonical_predicates_used}. I corrected these.")
    return kg
Esempio n. 22
0
def update_results_with_overlay_edge(subject_knode_key: str, object_knode_key: str, kedge_key: str, message: Message, log: ARAXResponse):
    try:
        new_edge_binding = EdgeBinding(id=kedge_key)
        for result in message.results:
            for qedge_key in result.edge_bindings.keys():
                if kedge_key not in set([x.id for x in result.edge_bindings[qedge_key]]):
                    if qedge_key not in message.query_graph.edges:
                        log.warning(f"Encountered a result edge binding which does not exist in the query graph")
                        continue
                    subject_nodes = [x.id for x in result.node_bindings[message.query_graph.edges[qedge_key].subject]]
                    object_nodes = [x.id for x in result.node_bindings[message.query_graph.edges[qedge_key].object]]
                    result_nodes = set(subject_nodes).union(set(object_nodes))
                    if subject_knode_key in result_nodes and object_knode_key in result_nodes:
                        result.edge_bindings[qedge_key].append(new_edge_binding)
    except:
        tb = traceback.format_exc()
        log.error(f"Error encountered when modifying results with overlay edge (subject_knode_key)-kedge_key-(object_knode_key):\n{tb}",
                    error_code="UncaughtError")
Esempio n. 23
0
    def _load_answers_into_kg(
        self, neo4j_results: List[Dict[str, List[Dict[str, any]]]],
        kg_name: str, qg: QueryGraph, log: ARAXResponse
    ) -> Tuple[QGOrganizedKnowledgeGraph, Dict[str, Dict[str, str]]]:
        log.debug(
            f"Processing query results for edge {next(qedge_key for qedge_key in qg.edges)}"
        )
        final_kg = QGOrganizedKnowledgeGraph()
        edge_to_nodes_map = dict()
        node_uuid_to_curie_dict = self._build_node_uuid_to_curie_dict(
            neo4j_results[0]) if kg_name == "KG1" else dict()

        results_table = neo4j_results[0]
        column_names = [column_name for column_name in results_table]
        for column_name in column_names:
            # Load answer nodes into our knowledge graph
            if column_name.startswith(
                    'nodes'):  # Example column name: 'nodes_n00'
                column_qnode_key = column_name.replace("nodes_", "", 1)
                for neo4j_node in results_table.get(column_name):
                    swagger_node_key, swagger_node = self._convert_neo4j_node_to_swagger_node(
                        neo4j_node, kg_name)
                    final_kg.add_node(swagger_node_key, swagger_node,
                                      column_qnode_key)
            # Load answer edges into our knowledge graph
            elif column_name.startswith(
                    'edges'):  # Example column name: 'edges_e01'
                column_qedge_key = column_name.replace("edges_", "", 1)
                for neo4j_edge in results_table.get(column_name):
                    swagger_edge_key, swagger_edge = self._convert_neo4j_edge_to_swagger_edge(
                        neo4j_edge, node_uuid_to_curie_dict, kg_name)

                    # Record which of this edge's nodes correspond to which qnode_key
                    if swagger_edge_key not in edge_to_nodes_map:
                        edge_to_nodes_map[swagger_edge_key] = dict()
                    for qnode_key in qg.nodes:
                        edge_to_nodes_map[swagger_edge_key][
                            qnode_key] = neo4j_edge.get(qnode_key)

                    # Finally add the current edge to our answer knowledge graph
                    final_kg.add_edge(swagger_edge_key, swagger_edge,
                                      column_qedge_key)

        return final_kg, edge_to_nodes_map
Esempio n. 24
0
 def _prune_highly_connected_nodes(kg: QGOrganizedKnowledgeGraph, qedge_key: str, input_curies: Set[str],
                                   input_qnode_key: str, max_edges_per_input_curie: int, log: ARAXResponse) -> QGOrganizedKnowledgeGraph:
     # First create a lookup of which edges belong to which input curies
     input_nodes_to_edges_dict = defaultdict(set)
     for edge_key, edge in kg.edges_by_qg_id[qedge_key].items():
         if edge.subject in input_curies:
             input_nodes_to_edges_dict[edge.subject].add(edge_key)
         if edge.object in input_curies:
             input_nodes_to_edges_dict[edge.object].add(edge_key)
     # Then prune down highly-connected nodes (delete edges per input curie in excess of some set limit)
     for node_key, connected_edge_keys in input_nodes_to_edges_dict.items():
         connected_edge_keys_list = list(connected_edge_keys)
         if len(connected_edge_keys_list) > max_edges_per_input_curie:
             random.shuffle(connected_edge_keys_list)  # Make it random which edges we keep for this input curie
             edge_keys_to_remove = connected_edge_keys_list[max_edges_per_input_curie:]
             log.debug(f"Randomly removing {len(edge_keys_to_remove)} edges from answer for input curie {node_key}")
             for edge_key in edge_keys_to_remove:
                 kg.edges_by_qg_id[qedge_key].pop(edge_key, None)
             # Document that not all answers for this input curie are included
             node = kg.nodes_by_qg_id[input_qnode_key].get(node_key)
             if node:
                 if not node.attributes:
                     node.attributes = []
                 if not any(attribute.attribute_type_id == "biolink:incomplete_result_set"
                            for attribute in node.attributes):
                     node.attributes.append(Attribute(attribute_type_id="biolink:incomplete_result_set",  # TODO: request this as actual biolink item?
                                                      value_type_id="metatype:Boolean",
                                                      value=True,
                                                      attribute_source="infores:rtx-kg2",
                                                      description=f"This attribute indicates that not all "
                                                                  f"nodes/edges returned as answers for this input "
                                                                  f"curie were included in the final answer due to "
                                                                  f"size limitations. {max_edges_per_input_curie} "
                                                                  f"edges for this input curie were kept."))
     # Then delete any nodes orphaned by removal of edges
     node_keys_used_by_edges = kg.get_all_node_keys_used_by_edges()
     for qnode_key, nodes in kg.nodes_by_qg_id.items():
         orphan_node_keys = set(nodes).difference(node_keys_used_by_edges)
         if orphan_node_keys:
             log.debug(f"Removing {len(orphan_node_keys)} {qnode_key} nodes orphaned by the above step")
             for orphan_node_key in orphan_node_keys:
                 del kg.nodes_by_qg_id[qnode_key][orphan_node_key]
     return kg
Esempio n. 25
0
def test_create_message_node_edge_types():
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    assert response.status == 'OK'
    message = response.envelope.message
    assert isinstance(message.knowledge_graph.nodes, dict)
    assert isinstance(message.knowledge_graph.edges, dict)
    assert isinstance(message.query_graph.nodes, dict)
    assert isinstance(message.query_graph.edges, dict)
Esempio n. 26
0
def test_add_qnode_type():
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    assert response.status == 'OK'
    message = response.envelope.message
    messenger.add_qnode(response,{ 'category': 'biolink:Protein' })
    assert response.status == 'OK'
    assert isinstance(message.query_graph.nodes, dict)
    assert len(message.query_graph.nodes) == 1
    assert message.query_graph.nodes['n00'].category == 'biolink:Protein'
Esempio n. 27
0
def test_add_qnode_name():
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    assert response.status == 'OK'
    message = response.envelope.message
    messenger.add_qnode(response,{ 'name': 'acetaminophen' })
    assert response.status == 'OK'
    assert isinstance(message.query_graph.nodes, dict)
    assert len(message.query_graph.nodes) == 1
    assert message.query_graph.nodes['n00'].id == 'CHEMBL.COMPOUND:CHEMBL112'
Esempio n. 28
0
def test_add_qnode_curie_list():
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    assert response.status == 'OK'
    message = response.envelope.message
    messenger.add_qnode(response,{ 'id': ['UniProtKB:P14136','UniProtKB:P35579'] })
    assert response.status == 'OK'
    assert isinstance(message.query_graph.nodes, dict)
    assert len(message.query_graph.nodes) == 1
    assert len(message.query_graph.nodes['n00'].id) == 2
Esempio n. 29
0
def test_add_qnode_basic():
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    assert response.status == 'OK'
    message = response.envelope.message
    messenger.add_qnode(response,{})
    assert response.status == 'OK'
    assert isinstance(message.query_graph.nodes, dict)
    assert len(message.query_graph.nodes) == 1
    assert message.query_graph.nodes['n00'].id == None
Esempio n. 30
0
def test_add_qnode_bad_name():
    response = ARAXResponse()
    messenger = ARAXMessenger()
    messenger.create_envelope(response)
    assert response.status == 'OK'
    message = response.envelope.message
    messenger.add_qnode(response,{ 'name': 'Big Bird' })
    assert response.status == 'ERROR'
    assert isinstance(message.query_graph.nodes, dict)
    assert len(message.query_graph.nodes) == 0
    assert response.error_code == 'UnresolvableNodeName'