def get_preferred_curies(curie: Union[str, List[str]], log: Response) -> Dict[str, Dict[str, str]]: curies = convert_string_or_list_to_list(curie) try: synonymizer = NodeSynonymizer() log.debug( f"Sending NodeSynonymizer.get_canonical_curies() a list of {len(curies)} curies" ) canonical_curies_dict = synonymizer.get_canonical_curies(curies) log.debug(f"Got response back from NodeSynonymizer") except Exception: tb = traceback.format_exc() error_type, error, _ = sys.exc_info() log.error(f"Encountered a problem using NodeSynonymizer: {tb}", error_code=error_type.__name__) return {} else: if canonical_curies_dict is not None: unrecognized_curies = { input_curie for input_curie in canonical_curies_dict if not canonical_curies_dict.get(input_curie) } if unrecognized_curies: log.warning( f"NodeSynonymizer did not return canonical info for: {unrecognized_curies}" ) return canonical_curies_dict else: log.error(f"NodeSynonymizer returned None", error_code="NodeNormalizationIssue") return {}
def _remove_self_edges(kg: DictKnowledgeGraph, edge_to_nodes_map: Dict[str, Dict[str, str]], qedge_id: QEdge, qnodes: List[QNode], log: Response) -> DictKnowledgeGraph: log.debug(f"Removing any self-edges from the answer KG") # Remove any self-edges edges_to_remove = [] for edge_key, edge in kg.edges_by_qg_id[qedge_id].items(): if edge.source_id == edge.target_id: edges_to_remove.append(edge_key) for edge_id in edges_to_remove: kg.edges_by_qg_id[qedge_id].pop(edge_id) # Remove any nodes that may have been orphaned as a result of removing self-edges for qnode in qnodes: node_ids_used_by_edges_for_this_qnode_id = set() for edge in kg.edges_by_qg_id[qedge_id].values(): node_ids_used_by_edges_for_this_qnode_id.add( edge_to_nodes_map[edge.id][qnode.id]) orphan_node_ids_for_this_qnode_id = set( kg.nodes_by_qg_id[qnode.id].keys()).difference( node_ids_used_by_edges_for_this_qnode_id) for node_id in orphan_node_ids_for_this_qnode_id: kg.nodes_by_qg_id[qnode.id].pop(node_id) log.debug( f"After removing self-edges, answer KG counts are: {eu.get_printable_counts_by_qg_id(kg)}" ) return kg
def _load_answers_into_kg(self, neo4j_results: List[Dict[str, List[Dict[str, any]]]], kp: str, query_graph: QueryGraph, log: Response) -> Tuple[DictKnowledgeGraph, Dict[str, Dict[str, str]]]: log.debug(f"Processing query results for edge {query_graph.edges[0].id}") final_kg = DictKnowledgeGraph() edge_to_nodes_map = dict() node_uuid_to_curie_dict = self._build_node_uuid_to_curie_dict(neo4j_results[0]) if kp == "KG1" else dict() results_table = neo4j_results[0] column_names = [column_name for column_name in results_table] for column_name in column_names: # Load answer nodes into our knowledge graph if column_name.startswith('nodes'): # Example column name: 'nodes_n00' column_qnode_id = column_name.replace("nodes_", "", 1) for neo4j_node in results_table.get(column_name): swagger_node = self._convert_neo4j_node_to_swagger_node(neo4j_node, kp) final_kg.add_node(swagger_node, column_qnode_id) # Load answer edges into our knowledge graph elif column_name.startswith('edges'): # Example column name: 'edges_e01' column_qedge_id = column_name.replace("edges_", "", 1) for neo4j_edge in results_table.get(column_name): if kp == "KG2": swagger_edge = self._convert_kg2_edge_to_swagger_edge(neo4j_edge) else: swagger_edge = self._convert_kg1_edge_to_swagger_edge(neo4j_edge, node_uuid_to_curie_dict) # Record which of this edge's nodes correspond to which qnode_id if swagger_edge.id not in edge_to_nodes_map: edge_to_nodes_map[swagger_edge.id] = dict() for qnode in query_graph.nodes: edge_to_nodes_map[swagger_edge.id][qnode.id] = neo4j_edge.get(qnode.id) # Finally add the current edge to our answer knowledge graph final_kg.add_edge(swagger_edge, column_qedge_id) return final_kg, edge_to_nodes_map
def apply(self, input_message: Message, input_parameters: dict) -> Response: # Define a default response response = Response() self.response = response self.message = input_message # Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response # Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response # Store these final parameters for convenience response.data['parameters'] = input_parameters self.parameters = input_parameters response.debug( f"Applying Resultifier to Message with parameters {input_parameters}" ) # call _resultify self._resultify(describe=False) # Clean up the KG (should only contain nodes used in the results) self._clean_up_kg() # Return the response and done return response
def apply(self, input_message, input_parameters, response=None): #### Define a default response if response is None: response = Response() self.response = response self.message = input_message #### Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response # list of actions that have so far been created for ARAX_overlay allowable_actions = self.allowable_actions # check to see if an action is actually provided if 'action' not in input_parameters: response.error( f"Must supply an action. Allowable actions are: action={allowable_actions}", error_code="MissingAction") elif input_parameters['action'] not in allowable_actions: response.error( f"Supplied action {input_parameters['action']} is not permitted. Allowable actions are: {allowable_actions}", error_code="UnknownAction") #### Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response # populate the parameters dict parameters = dict() for key, value in input_parameters.items(): parameters[key] = value #### Store these final parameters for convenience response.data['parameters'] = parameters self.parameters = parameters # convert the action string to a function call (so I don't need a ton of if statements getattr( self, '_' + self.__class__.__name__ + '__' + parameters['action'] )( ) # thank you https://stackoverflow.com/questions/11649848/call-methods-by-string response.debug( f"Applying Overlay to Message with parameters {parameters}" ) # TODO: re-write this to be more specific about the actual action # TODO: add_pubmed_ids # TODO: compute_confidence_scores # TODO: finish COHD # TODO: Jaccard #### Return the response and done if self.report_stats: # helper to report information in debug if class self.report_stats = True response = self.report_response_stats(response) return response
def apply(self, input_message, input_parameters): #### Define a default response response = Response() self.response = response self.message = input_message #### Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response #### Define a complete set of allowed parameters and their defaults parameters = { 'maximum_results': None, 'minimum_confidence': None, 'start_node': 1 } #### Loop through the input_parameters and override the defaults and make sure they are allowed for key,value in input_parameters.items(): if key not in parameters: response.error(f"Supplied parameter {key} is not permitted", error_code="UnknownParameter") else: parameters[key] = value #### Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response #### Store these final parameters for convenience response.data['parameters'] = parameters self.parameters = parameters #### Now apply the filters. Order of operations is probably quite important #### Scalar value filters probably come first like minimum_confidence, then complex logic filters #### based on edge or node properties, and then finally maximum_results response.debug(f"Applying filter to Message with parameters {parameters}") #### First, as a test, blow away the results and see if we can recompute them #message.n_results = 0 #message.results = [] #self.__recompute_results() #### Apply scalar value filters first to do easy things and reduce the problem # TODO #### Complex logic filters probably come next. These may be hard # TODO #### Finally, if the maximum_results parameter is set, then limit the number of results to that last if parameters['maximum_results'] is not None: self.__apply_maximum_results_filter(parameters['maximum_results']) #### Return the response return response
def _expand_node(self, qnode_id: str, kp_to_use: str, continue_if_no_results: bool, query_graph: QueryGraph, use_synonyms: bool, synonym_handling: str, log: Response) -> DictKnowledgeGraph: # This function expands a single node using the specified knowledge provider log.debug(f"Expanding node {qnode_id} using {kp_to_use}") query_node = eu.get_query_node(query_graph, qnode_id) answer_kg = DictKnowledgeGraph() if log.status != 'OK': return answer_kg if not query_node.curie: log.error( f"Cannot expand a single query node if it doesn't have a curie", error_code="InvalidQuery") return answer_kg copy_of_qnode = eu.copy_qnode(query_node) if use_synonyms: self._add_curie_synonyms_to_query_nodes(qnodes=[copy_of_qnode], log=log, kp=kp_to_use) if copy_of_qnode.type in ["protein", "gene"]: copy_of_qnode.type = ["protein", "gene"] log.debug(f"Modified query node is: {copy_of_qnode.to_dict()}") # Answer the query using the proper KP valid_kps_for_single_node_queries = ["ARAX/KG1", "ARAX/KG2"] if kp_to_use in valid_kps_for_single_node_queries: from Expand.kg_querier import KGQuerier kg_querier = KGQuerier(log, kp_to_use) answer_kg = kg_querier.answer_single_node_query(copy_of_qnode) log.info( f"Query for node {copy_of_qnode.id} returned results ({eu.get_printable_counts_by_qg_id(answer_kg)})" ) # Make sure all qnodes have been fulfilled (unless we're continuing if no results) if log.status == 'OK' and not continue_if_no_results: if copy_of_qnode.id not in answer_kg.nodes_by_qg_id or not answer_kg.nodes_by_qg_id[ copy_of_qnode.id]: log.error( f"Returned answer KG does not contain any results for QNode {copy_of_qnode.id}", error_code="UnfulfilledQGID") return answer_kg if synonym_handling != 'add_all': answer_kg, edge_node_usage_map = self._deduplicate_nodes( dict_kg=answer_kg, edge_to_nodes_map={}, log=log) return answer_kg else: log.error( f"Invalid knowledge provider: {kp_to_use}. Valid options for single-node queries are " f"{', '.join(valid_kps_for_single_node_queries)}", error_code="InvalidKP") return answer_kg
def _merge_answer_into_message_kg(answer_dict_kg: DictKnowledgeGraph, dict_kg: DictKnowledgeGraph, log: Response): # This function merges an answer KG (from the current edge/node expansion) into the overarching KG log.debug("Merging answer into Message.KnowledgeGraph") for qnode_id, nodes in answer_dict_kg.nodes_by_qg_id.items(): for node_key, node in nodes.items(): dict_kg.add_node(node, qnode_id) for qedge_id, edges_dict in answer_dict_kg.edges_by_qg_id.items(): for edge_key, edge in edges_dict.items(): dict_kg.add_edge(edge, qedge_id)
def _convert_one_hop_query_graph_to_cypher_query(self, query_graph: QueryGraph, enforce_directionality: bool, kp: str, log: Response) -> str: log.debug(f"Generating cypher for edge {query_graph.edges[0].id} query graph") try: # Build the match clause qedge = query_graph.edges[0] source_qnode = eu.get_query_node(query_graph, qedge.source_id) target_qnode = eu.get_query_node(query_graph, qedge.target_id) qedge_cypher = self._get_cypher_for_query_edge(qedge, enforce_directionality) source_qnode_cypher = self._get_cypher_for_query_node(source_qnode) target_qnode_cypher = self._get_cypher_for_query_node(target_qnode) match_clause = f"MATCH {source_qnode_cypher}{qedge_cypher}{target_qnode_cypher}" # Build the where clause where_fragments = [] for qnode in [source_qnode, target_qnode]: if qnode.curie: if type(qnode.curie) is str: node_id_where_fragment = f"{qnode.id}.id='{qnode.curie}'" else: node_id_where_fragment = f"{qnode.id}.id in {qnode.curie}" where_fragments.append(node_id_where_fragment) if qnode.type and isinstance(qnode.type, list): if "KG2" in kp: node_type_property = "category_label" else: node_type_property = "category" where_fragments.append(f"{qnode.id}.{node_type_property} in {qnode.type}") if where_fragments: where_clause = f"WHERE {' AND '.join(where_fragments)}" else: where_clause = "" # Build the with clause source_qnode_col_name = f"nodes_{source_qnode.id}" target_qnode_col_name = f"nodes_{target_qnode.id}" qedge_col_name = f"edges_{qedge.id}" # This line grabs the edge's ID and a record of which of its nodes correspond to which qnode ID extra_edge_properties = "{.*, " + f"id:ID({qedge.id}), {source_qnode.id}:{source_qnode.id}.id, {target_qnode.id}:{target_qnode.id}.id" + "}" with_clause = f"WITH collect(distinct {source_qnode.id}) as {source_qnode_col_name}, " \ f"collect(distinct {target_qnode.id}) as {target_qnode_col_name}, " \ f"collect(distinct {qedge.id}{extra_edge_properties}) as {qedge_col_name}" # Build the return clause return_clause = f"RETURN {source_qnode_col_name}, {target_qnode_col_name}, {qedge_col_name}" cypher_query = f"{match_clause} {where_clause} {with_clause} {return_clause}" return cypher_query except Exception: tb = traceback.format_exc() error_type, error, _ = sys.exc_info() log.error(f"Problem generating cypher for query. {tb}", error_code=error_type.__name__) return ""
def get_curie_synonyms(curie: Union[str, List[str]], log: Response) -> List[str]: curies = convert_string_or_list_to_list(curie) try: synonymizer = NodeSynonymizer() log.debug( f"Sending NodeSynonymizer.get_equivalent_nodes() a list of {len(curies)} curies" ) equivalent_curies_dict = synonymizer.get_equivalent_nodes( curies, kg_name="KG2") log.debug(f"Got response back from NodeSynonymizer") except Exception: tb = traceback.format_exc() error_type, error, _ = sys.exc_info() log.error(f"Encountered a problem using NodeSynonymizer: {tb}", error_code=error_type.__name__) return [] else: if equivalent_curies_dict is not None: curies_missing_info = { curie for curie in equivalent_curies_dict if not equivalent_curies_dict.get(curie) } if curies_missing_info: log.warning( f"NodeSynonymizer did not find any equivalent curies for: {curies_missing_info}" ) equivalent_curies = { curie for curie_dict in equivalent_curies_dict.values() if curie_dict for curie in curie_dict } all_curies = equivalent_curies.union(set( curies)) # Make sure even curies without synonyms are included return sorted(list(all_curies)) else: log.error(f"NodeSynonymizer returned None", error_code="NodeNormalizationIssue") return []
def _answer_query_using_bte( self, input_qnode: QNode, output_qnode: QNode, qedge: QEdge, answer_kg: DictKnowledgeGraph, valid_bte_inputs_dict: Dict[str, Set[str]], log: Response) -> Tuple[DictKnowledgeGraph, Set[str]]: accepted_curies = set() # Send this single-edge query to BTE, input curie by input curie (adding findings to our answer KG as we go) for curie in input_qnode.curie: # Consider all different combinations of qnode types (can be multiple if gene/protein) for input_qnode_type, output_qnode_type in itertools.product( input_qnode.type, output_qnode.type): if eu.get_curie_prefix( curie) in valid_bte_inputs_dict['curie_prefixes']: accepted_curies.add(curie) try: loop = asyncio.new_event_loop() seqd = SingleEdgeQueryDispatcher( input_cls=input_qnode_type, output_cls=output_qnode_type, pred=qedge.type, input_id=eu.get_curie_prefix(curie), values=eu.get_curie_local_id(curie), loop=loop) log.debug( f"Sending query to BTE: {curie}-{qedge.type if qedge.type else ''}->{output_qnode_type}" ) seqd.query() reasoner_std_response = seqd.to_reasoner_std() except Exception: trace_back = traceback.format_exc() error_type, error, _ = sys.exc_info() log.error( f"Encountered a problem while using BioThings Explorer. {trace_back}", error_code=error_type.__name__) return answer_kg, accepted_curies else: answer_kg = self._add_answers_to_kg( answer_kg, reasoner_std_response, input_qnode.id, output_qnode.id, qedge.id, log) return answer_kg, accepted_curies
def _add_curie_synonyms_to_query_nodes(qnodes: List[QNode], log: Response, kp: str): log.debug("Looking for query nodes to use curie synonyms for") for qnode in qnodes: if qnode.curie: log.debug( f"Getting curie synonyms for qnode {qnode.id} using the NodeSynonymizer" ) synonymized_curies = eu.get_curie_synonyms(qnode.curie, log) log.debug( f"Using {len(synonymized_curies)} equivalent curies for qnode {qnode.id}" ) qnode.curie = synonymized_curies if "BTE" not in kp: qnode.type = None # Important to clear when using synonyms; otherwise we're limited #889
def check_for_query_graph_tags(self, message, query_graph_info): #### Define a default response response = Response() self.response = response self.message = message response.debug(f"Checking KnowledgeGraph for QueryGraph tags") #### Get shorter handles knowedge_graph = message.knowledge_graph nodes = knowedge_graph.nodes edges = knowedge_graph.edges #### Store number of nodes and edges self.n_nodes = len(nodes) self.n_edges = len(edges) response.debug(f"Found {self.n_nodes} nodes and {self.n_edges} edges") #### Clear the maps self.node_map = {'by_qnode_id': {}} self.edge_map = {'by_qedge_id': {}} #### Loop through nodes computing some stats n_nodes_with_query_graph_ids = 0 for node in nodes: id = node.id if node.qnode_id is None: continue n_nodes_with_query_graph_ids += 1 #### Place an entry in the node_map if node.qnode_id not in self.node_map['by_qnode_id']: self.node_map['by_qnode_id'][node.qnode_id] = {} self.node_map['by_qnode_id'][node.qnode_id][id] = 1 #### Tally the stats if n_nodes_with_query_graph_ids == self.n_nodes: self.query_graph_id_node_status = 'all nodes have query_graph_ids' elif n_nodes_with_query_graph_ids == 0: self.query_graph_id_node_status = 'no nodes have query_graph_ids' else: self.query_graph_id_node_status = 'only some nodes have query_graph_ids' response.info( f"In the KnowledgeGraph, {self.query_graph_id_node_status}") #### Loop through edges computing some stats n_edges_with_query_graph_ids = 0 for edge in edges: id = edge.id if edge.qedge_id is None: continue n_edges_with_query_graph_ids += 1 #### Place an entry in the edge_map if edge.qedge_id not in self.edge_map['by_qedge_id']: self.edge_map['by_qedge_id'][edge.qedge_id] = {} self.edge_map['by_qedge_id'][edge.qedge_id][id] = 1 if n_edges_with_query_graph_ids == self.n_edges: self.query_graph_id_edge_status = 'all edges have query_graph_ids' elif n_edges_with_query_graph_ids == 0: self.query_graph_id_edge_status = 'no edges have query_graph_ids' else: self.query_graph_id_edge_status = 'only some edges have query_graph_ids' response.info( f"In the KnowledgeGraph, {self.query_graph_id_edge_status}") #### Return the response return response
def _deduplicate_nodes( dict_kg: DictKnowledgeGraph, edge_to_nodes_map: Dict[str, Dict[str, str]], log: Response ) -> Tuple[DictKnowledgeGraph, Dict[str, Dict[str, str]]]: log.debug(f"Deduplicating nodes") deduplicated_kg = DictKnowledgeGraph( nodes={qnode_id: dict() for qnode_id in dict_kg.nodes_by_qg_id}, edges={qedge_id: dict() for qedge_id in dict_kg.edges_by_qg_id}) updated_edge_to_nodes_map = { edge_id: dict() for edge_id in edge_to_nodes_map } curie_mappings = dict() # First deduplicate the nodes for qnode_id, nodes in dict_kg.nodes_by_qg_id.items(): # Load preferred curie info from NodeSynonymizer for nodes we haven't seen before unmapped_node_ids = set(nodes).difference(set(curie_mappings)) log.debug( f"Getting preferred curies for {qnode_id} nodes returned in this step" ) canonicalized_nodes = eu.get_preferred_curies( list(unmapped_node_ids), log) if unmapped_node_ids else dict() if log.status != 'OK': return deduplicated_kg, updated_edge_to_nodes_map for node_id in unmapped_node_ids: # Figure out the preferred curie/name for this node node = nodes.get(node_id) canonicalized_node = canonicalized_nodes.get(node_id) if canonicalized_node: preferred_curie = canonicalized_node.get( 'preferred_curie', node_id) preferred_name = canonicalized_node.get( 'preferred_name', node.name) preferred_type = eu.convert_string_or_list_to_list( canonicalized_node.get('preferred_type', node.type)) curie_mappings[node_id] = preferred_curie else: # Means the NodeSynonymizer didn't recognize this curie preferred_curie = node_id preferred_name = node.name preferred_type = node.type curie_mappings[node_id] = preferred_curie # Add this node into our deduplicated KG as necessary # TODO: merge certain fields, like uri? if preferred_curie not in deduplicated_kg.nodes_by_qg_id[ qnode_id]: node.id = preferred_curie node.name = preferred_name node.type = preferred_type deduplicated_kg.add_node(node, qnode_id) # Then update the edges to reflect changes made to the nodes for qedge_id, edges in dict_kg.edges_by_qg_id.items(): for edge_id, edge in edges.items(): edge.source_id = curie_mappings.get(edge.source_id) edge.target_id = curie_mappings.get(edge.target_id) if not edge.source_id or not edge.target_id: log.error( f"Could not find preferred curie mappings for edge {edge_id}'s node(s)" ) return deduplicated_kg, updated_edge_to_nodes_map deduplicated_kg.add_edge(edge, qedge_id) # Update the edge-to-node map for this edge (used down the line for pruning) for qnode_id, corresponding_node_id in edge_to_nodes_map[ edge_id].items(): updated_edge_to_nodes_map[edge_id][ qnode_id] = curie_mappings.get(corresponding_node_id) log.debug( f"After deduplication, answer KG counts are: {eu.get_printable_counts_by_qg_id(deduplicated_kg)}" ) return deduplicated_kg, updated_edge_to_nodes_map
def _expand_edge( self, qedge: QEdge, kp_to_use: str, dict_kg: DictKnowledgeGraph, continue_if_no_results: bool, query_graph: QueryGraph, use_synonyms: bool, synonym_handling: str, log: Response ) -> Tuple[DictKnowledgeGraph, Dict[str, Dict[str, str]]]: # This function answers a single-edge (one-hop) query using the specified knowledge provider log.info(f"Expanding edge {qedge.id} using {kp_to_use}") answer_kg = DictKnowledgeGraph() edge_to_nodes_map = dict() # Create a query graph for this edge (that uses synonyms as well as curies found in prior steps) edge_query_graph = self._get_query_graph_for_edge( qedge, query_graph, dict_kg, use_synonyms, kp_to_use, log) if log.status != 'OK': return answer_kg, edge_to_nodes_map if not any(qnode for qnode in edge_query_graph.nodes if qnode.curie): log.error( f"Cannot expand an edge for which neither end has any curies. (Could not find curies to use from " f"a prior expand step, and neither qnode has a curie specified.)", error_code="InvalidQuery") return answer_kg, edge_to_nodes_map valid_kps = ["ARAX/KG1", "ARAX/KG2", "BTE", "COHD", "NGD"] if kp_to_use not in valid_kps: log.error( f"Invalid knowledge provider: {kp_to_use}. Valid options are {', '.join(valid_kps)}", error_code="InvalidKP") return answer_kg, edge_to_nodes_map else: if kp_to_use == 'BTE': from Expand.bte_querier import BTEQuerier kp_querier = BTEQuerier(log) elif kp_to_use == 'COHD': from Expand.COHD_querier import COHDQuerier kp_querier = COHDQuerier(log) elif kp_to_use == 'NGD': from Expand.ngd_querier import NGDQuerier kp_querier = NGDQuerier(log) else: from Expand.kg_querier import KGQuerier kp_querier = KGQuerier(log, kp_to_use) answer_kg, edge_to_nodes_map = kp_querier.answer_one_hop_query( edge_query_graph) if log.status != 'OK': return answer_kg, edge_to_nodes_map log.debug( f"Query for edge {qedge.id} returned results ({eu.get_printable_counts_by_qg_id(answer_kg)})" ) # Do some post-processing (deduplicate nodes, remove self-edges..) if synonym_handling != 'add_all': answer_kg, edge_to_nodes_map = self._deduplicate_nodes( answer_kg, edge_to_nodes_map, log) if eu.qg_is_fulfilled(edge_query_graph, answer_kg): answer_kg = self._remove_self_edges(answer_kg, edge_to_nodes_map, qedge.id, edge_query_graph.nodes, log) # Make sure our query has been fulfilled (unless we're continuing if no results) if not eu.qg_is_fulfilled(edge_query_graph, answer_kg): if continue_if_no_results: log.warning( f"No paths were found in {kp_to_use} satisfying this query graph" ) else: log.error( f"No paths were found in {kp_to_use} satisfying this query graph", error_code="NoResults") return answer_kg, edge_to_nodes_map
def parse(self, input_actions): #### Define a default response response = Response() response.info(f"Parsing input actions list") #### Basic error checking of the input_actions if not isinstance(input_actions, list): response.error("Provided input actions is not a list", error_code="ActionsNotList") return response if len(input_actions) == 0: response.error("Provided input actions is an empty list", error_code="ActionsListEmpty") return response #### Iterate through the list, checking the items actions = [] n_lines = 1 for action in input_actions: response.debug(f"Parsing action: {action}") # If this line is empty, then skip match = re.match(r"\s*$", action) if match: continue # If this line begins with a #, it is a comment, then skip match = re.match(r"#", action) if match: continue #### First look for a naked command without parentheses match = re.match(r"\s*([A-Za-z_]+)\s*$", action) if match is not None: action = { "line": n_lines, "command": match.group(1), "parameters": None } actions.append(action) #### Then look for and parse a command with parentheses and a comma-separated parameter list if match is None: match = re.match(r"\s*([A-Za-z_]+)\((.*)\)\s*$", action) if match is not None: command = match.group(1) param_string = match.group(2) #### Split the parameters on comma and process those param_string_list = re.split(",", param_string) parameters = {} #### If a value is of the form key=[value1,value2] special code is needed to recompose that mode = 'normal' list_buffer = [] key = '' for param_item in param_string_list: param_item = param_item.strip() if mode == 'normal': #### Split on the first = only (might be = in the value) values = re.split("=", param_item, 1) key = values[0] #### If there isn't a value after an =, then just set to string true value = 'true' if len(values) > 1: value = values[1] key = key.strip() value = value.strip() #### If the value begins with a "[", then this is a list match = re.match(r"\[(.+)$", value) if match: #### If it also ends with a "]", then this is a list of one element match2 = re.match(r"\[(.*)\]$", value) if match2: if match2.group(1) == '': parameters[key] = [] else: parameters[key] = [match2.group(1)] else: mode = 'in_list' list_buffer = [match.group(1)] else: parameters[key] = value #### Special processing if we're in the middle of a list elif mode == 'in_list': match = re.match(r"(.*)\]$", param_item) if match: mode = 'normal' list_buffer.append(match.group(1)) parameters[key] = list_buffer else: list_buffer.append(param_item) else: eprint("Inconceivable!") if mode == 'in_list': parameters[key] = list_buffer #### Store the parsed result in a dict and add to the list action = { "line": n_lines, "command": command, "parameters": parameters } actions.append(action) else: response.error(f"Unable to parse action {action}", error_code="ActionsListEmpty") n_lines += 1 #### Put the actions in the response data envelope and return response.data["actions"] = actions return response
def _prune_dead_end_paths(dict_kg: DictKnowledgeGraph, full_query_graph: QueryGraph, node_usages_by_edges_map: Dict[str, Dict[str, Dict[str, str]]], log: Response): # This function removes any 'dead-end' paths from the KG. (Because edges are expanded one-by-one, not all edges # found in the last expansion will connect to edges in the next one) log.debug(f"Pruning any paths that are now dead ends") # Create a map of which qnodes are connected to which other qnodes # Example qnode_connections_map: {'n00': {'n01'}, 'n01': {'n00', 'n02'}, 'n02': {'n01'}} qnode_connections_map = dict() for qnode in full_query_graph.nodes: qnode_connections_map[qnode.id] = set() for qedge in full_query_graph.edges: if qedge.source_id == qnode.id or qedge.target_id == qnode.id: connected_qnode_id = qedge.target_id if qedge.target_id != qnode.id else qedge.source_id qnode_connections_map[qnode.id].add(connected_qnode_id) # Create a map of which nodes each node is connected to (organized by the qnode_id they're fulfilling) # Example node_usages_by_edges_map: {'e00': {'KG1:111221': {'n00': 'CUI:122', 'n01': 'CUI:124'}}} # Example node_connections_map: {'CUI:1222': {'n00': {'DOID:122'}, 'n02': {'UniProtKB:22', 'UniProtKB:333'}}} node_connections_map = dict() for qedge_id, edges_to_nodes_dict in node_usages_by_edges_map.items(): current_qedge = next(qedge for qedge in full_query_graph.edges if qedge.id == qedge_id) qnode_ids = [current_qedge.source_id, current_qedge.target_id] for edge_id, node_usages_dict in edges_to_nodes_dict.items(): for current_qnode_id in qnode_ids: connected_qnode_id = next(qnode_id for qnode_id in qnode_ids if qnode_id != current_qnode_id) current_node_id = node_usages_dict[current_qnode_id] connected_node_id = node_usages_dict[connected_qnode_id] if current_qnode_id not in node_connections_map: node_connections_map[current_qnode_id] = dict() if current_node_id not in node_connections_map[ current_qnode_id]: node_connections_map[current_qnode_id][ current_node_id] = dict() if connected_qnode_id not in node_connections_map[ current_qnode_id][current_node_id]: node_connections_map[current_qnode_id][ current_node_id][connected_qnode_id] = set() node_connections_map[current_qnode_id][current_node_id][ connected_qnode_id].add(connected_node_id) # Iteratively remove all disconnected nodes until there are none left qnode_ids_already_expanded = list(node_connections_map.keys()) found_dead_end = True while found_dead_end: found_dead_end = False for qnode_id in qnode_ids_already_expanded: qnode_ids_should_be_connected_to = qnode_connections_map[ qnode_id].intersection(qnode_ids_already_expanded) for node_id, node_mappings_dict in node_connections_map[ qnode_id].items(): # Check if any mappings are even entered for all qnode_ids this node should be connected to if set(node_mappings_dict.keys() ) != qnode_ids_should_be_connected_to: if node_id in dict_kg.nodes_by_qg_id[qnode_id]: dict_kg.nodes_by_qg_id[qnode_id].pop(node_id) found_dead_end = True else: # Verify that at least one of the entered connections still exists (for each connected qnode_id) for connected_qnode_id, connected_node_ids in node_mappings_dict.items( ): if not connected_node_ids.intersection( set(dict_kg.nodes_by_qg_id[ connected_qnode_id].keys())): if node_id in dict_kg.nodes_by_qg_id[qnode_id]: dict_kg.nodes_by_qg_id[qnode_id].pop( node_id) found_dead_end = True # Then remove all orphaned edges for qedge_id, edges_dict in node_usages_by_edges_map.items(): for edge_key, node_mappings in edges_dict.items(): for qnode_id, used_node_id in node_mappings.items(): if used_node_id not in dict_kg.nodes_by_qg_id[qnode_id]: if edge_key in dict_kg.edges_by_qg_id[qedge_id]: dict_kg.edges_by_qg_id[qedge_id].pop(edge_key)
def reassign_curies(self, message, input_parameters, describe=False): """ Reassigns CURIEs to the target Knowledge Provider :param message: Translator standard Message object :type message: Message :param input_parameters: Dict of input parameters to control the method :type input_parameters: Message :return: Response object with execution information :rtype: Response """ # #### Internal documentation setup allowable_parameters = { 'knowledge_provider': { 'Name of the Knowledge Provider CURIE space to map to. Default=KG1. Also currently supported KG2' }, 'mismap_result': { 'Desired action when mapping fails: ERROR or WARNING. Default is ERROR' }, } if describe: allowable_parameters[ 'dsl_command'] = '`reassign_curies()`' # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/ allowable_parameters[ 'brief_description'] = """The `reassign_curies` method reassigns all the CURIEs in the Message QueryGraph to the specified knowledge provider. Allowed values are KG1 or KG2. Default is KG1 if not specified.""" return allowable_parameters #### Define a default response response = Response() self.response = response self.message = message #### Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response #### Define a complete set of allowed parameters and their defaults parameters = { 'knowledge_provider': 'KG1', 'mismap_result': 'ERROR', } #### Loop through the input_parameters and override the defaults and make sure they are allowed for key, value in input_parameters.items(): if key not in parameters: response.error(f"Supplied parameter {key} is not permitted", error_code="UnknownParameter") else: parameters[key] = value #### Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response #### Store these final parameters for convenience response.data['parameters'] = parameters self.parameters = parameters # Check that the knowledge_provider is valid: if parameters['knowledge_provider'] != 'KG1' and parameters[ 'knowledge_provider'] != 'KG2': response.error( f"Specified knowledge provider must be 'KG1' or 'KG2', not '{parameters['knowledge_provider']}'", error_code="UnknownKP") return response #### Now try to assign the CURIEs response.info( f"Reassigning the CURIEs in QueryGraph to {parameters['knowledge_provider']} space" ) #### Make sure there's a query_graph already here if message.query_graph is None: message.query_graph = QueryGraph() message.query_graph.nodes = [] message.query_graph.edges = [] if message.query_graph.nodes is None: message.query_graph.nodes = [] #### Set up the KGNodeIndex kgNodeIndex = KGNodeIndex() # Loops through the QueryGraph nodes and adjust them for qnode in message.query_graph.nodes: # If the CURIE is None, then there's nothing to do curie = qnode.curie if curie is None: continue # Map the CURIE to the desired Knowledge Provider if parameters['knowledge_provider'] == 'KG1': if kgNodeIndex.is_curie_present(curie) is True: mapped_curies = [curie] else: mapped_curies = kgNodeIndex.get_KG1_curies(curie) elif parameters['knowledge_provider'] == 'KG2': if kgNodeIndex.is_curie_present(curie, kg_name='KG2'): mapped_curies = [curie] else: mapped_curies = kgNodeIndex.get_curies_and_types( curie, kg_name='KG2') else: response.error( f"Specified knowledge provider must be 'KG1' or 'KG2', not '{parameters['knowledge_provider']}'", error_code="UnknownKP") return response # Try to find a new CURIE new_curie = None if len(mapped_curies) == 0: if parameters['mismap_result'] == 'WARNING': response.warning( f"Did not find a mapping for {curie} to KP '{parameters['knowledge_provider']}'. Leaving as is" ) else: response.error( f"Did not find a mapping for {curie} to KP '{parameters['knowledge_provider']}'. This is an error" ) elif len(mapped_curies) == 1: new_curie = mapped_curies[0] else: original_curie_is_fine = False for potential_curie in mapped_curies: if potential_curie == curie: original_curie_is_fine = True if original_curie_is_fine: new_curie = curie else: new_curie = mapped_curies[0] response.warning( f"There are multiple possible CURIEs in KP '{parameters['knowledge_provider']}'. Selecting the first one {new_curie}" ) # If there's no CURIE, then nothing to do if new_curie is None: pass # If it's the same elif new_curie == curie: response.debug( f"CURIE {curie} is fine for KP '{parameters['knowledge_provider']}'" ) else: response.info( f"Remapping CURIE {curie} to {new_curie} for KP '{parameters['knowledge_provider']}'" ) #### Return the response return response
def add_query_graph_tags(self, message, query_graph_info): #### Define a default response response = Response() self.response = response self.message = message response.debug(f"Adding temporary QueryGraph ids to KnowledgeGraph") #### Get shorter handles knowedge_graph = message.knowledge_graph nodes = knowedge_graph.nodes edges = knowedge_graph.edges #### Loop through nodes adding qnode_ids for node in nodes: #### If there is not qnode_id, then determine what it should be and add it if node.qnode_id is None: id = node.id types = node.type #### Find a matching type in the QueryGraph for this node if types is None: response.error( f"KnowledgeGraph node {id} does not have a type. This should never be", error_code="NodeMissingType") return response n_found_types = 0 found_type = None for node_type in types: if node_type in query_graph_info.node_type_map: n_found_types += 1 found_type = node_type #### If we did not find exactly one matching type, error out if n_found_types == 0: response.error( f"Tried to find types '{types}' for KnowledgeGraph node {id} in query_graph_info, but did not find it", error_code="NodeTypeMissingInQueryGraph") return response elif n_found_types > 1: response.error( f"Tried to find types '{types}' for KnowledgeGraph node {id} in query_graph_info, and found multiple matches. This is ambiguous", error_code="MultipleNodeTypesInQueryGraph") return response #### Else add it node.qnode_id = query_graph_info.node_type_map[found_type] #### Loop through the edges adding qedge_ids for edge in edges: id = edge.id #### Check to see if there is already a qedge_id attribute on the edge if edge.qedge_id is None: #### If there isn't a type or can't find it in the query_graph, error out if edge.type is None: response.error( f"KnowledgeGraph edge {id} does not have a type. This should never be", error_code="EdgeMissingType") return response if edge.type not in query_graph_info.edge_type_map: response.error( f"Tried to find type '{edge.type}' for KnowledgeGraph node {id} in query_graph_info, but did not find it", error_code="EdgeTypeMissingInQueryGraph") return response #### Else add it edge.qedge_id = query_graph_info.edge_type_map[edge.type] #### Return the response return response
def add_qnode(self, message, input_parameters, describe=False): """ Adds a new QNode object to the QueryGraph inside the Message object :return: Response object with execution information :rtype: Response """ # #### Internal documentation setup allowable_parameters = { 'id': { 'Any string that is unique among all QNode id fields, with recommended format n00, n01, n02, etc.' }, 'curie': { 'Any compact URI (CURIE) (e.g. DOID:9281) (May also be a list like [UniProtKB:P12345,UniProtKB:Q54321])' }, 'name': { 'Any name of a bioentity that will be resolved into a CURIE if possible or result in an error if not (e.g. hypertension, insulin)' }, 'type': { 'Any valid Translator bioentity type (e.g. protein, chemical_substance, disease)' }, 'is_set': { 'If set to true, this QNode represents a set of nodes that are all in common between the two other linked QNodes' }, } if describe: allowable_parameters[ 'dsl_command'] = '`add_qnode()`' # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/ allowable_parameters[ 'brief_description'] = """The `add_qnode` method adds an additional QNode to the QueryGraph in the Message object. Currently when a curie or name is specified, this method will only return success if a matching node is found in the KG1/KG2 KGNodeIndex.""" return allowable_parameters #### Define a default response response = Response() self.response = response self.message = message #### Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response #### Define a complete set of allowed parameters and their defaults parameters = { 'id': None, 'curie': None, 'name': None, 'type': None, 'is_set': None, } #### Loop through the input_parameters and override the defaults and make sure they are allowed for key, value in input_parameters.items(): if key not in parameters: response.error(f"Supplied parameter {key} is not permitted", error_code="UnknownParameter") else: parameters[key] = value #### Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response #### Store these final parameters for convenience response.data['parameters'] = parameters self.parameters = parameters #### Now apply the filters. Order of operations is probably quite important #### Scalar value filters probably come first like minimum_confidence, then complex logic filters #### based on edge or node properties, and then finally maximum_results response.info( f"Adding a QueryNode to Message with parameters {parameters}") #### Make sure there's a query_graph already here if message.query_graph is None: message.query_graph = QueryGraph() message.query_graph.nodes = [] message.query_graph.edges = [] if message.query_graph.nodes is None: message.query_graph.nodes = [] #### Set up the KGNodeIndex kgNodeIndex = KGNodeIndex() # Create the QNode and set the id qnode = QNode() if parameters['id'] is not None: id = parameters['id'] else: id = self.__get_next_free_node_id() qnode.id = id # Set the is_set parameter to what the user selected if parameters['is_set'] is not None: qnode.is_set = (parameters['is_set'].lower() == 'true') #### If the CURIE is specified, try to find that if parameters['curie'] is not None: # If the curie is a scalar then treat it here as a list of one if isinstance(parameters['curie'], str): curie_list = [parameters['curie']] is_curie_a_list = False if parameters['is_set'] is not None and qnode.is_set is True: response.error( f"Specified CURIE '{parameters['curie']}' is a scalar, but is_set=true, which doesn't make sense", error_code="CurieScalarButIsSetTrue") return response # Or else set it up as a list elif isinstance(parameters['curie'], list): curie_list = parameters['curie'] is_curie_a_list = True qnode.curie = [] if parameters['is_set'] is None: response.warning( f"Specified CURIE '{parameters['curie']}' is a list, but is_set was not set to true. It must be true in this context, so automatically setting to true. Avoid this warning by explictly setting to true." ) qnode.is_set = True else: if qnode.is_set == False: response.warning( f"Specified CURIE '{parameters['curie']}' is a list, but is_set=false, which doesn't make sense, so automatically setting to true. Avoid this warning by explictly setting to true." ) qnode.is_set = True # Or if it's neither a list or a string, then error out. This cannot be handled at present else: response.error( f"Specified CURIE '{parameters['curie']}' is neither a string nor a list. This cannot to handled", error_code="CurieNotListOrScalar") return response # Loop over the available curies and create the list for curie in curie_list: response.debug(f"Looking up CURIE {curie} in KgNodeIndex") nodes = kgNodeIndex.get_curies_and_types(curie, kg_name='KG2') # If nothing was found, we won't bail out, but rather just issue a warning if len(nodes) == 0: response.warning( f"A node with CURIE {curie} is not in our knowledge graph KG2, but will continue" ) if is_curie_a_list: qnode.curie.append(curie) else: qnode.curie = curie else: # FIXME. This is just always taking the first result. This could cause problems for CURIEs with multiple types. Is that possible? # In issue #623 on 2020-06-15 we concluded that we should not specify the type here #qnode.type = nodes[0]['type'] # Either append or set the found curie if is_curie_a_list: qnode.curie.append(nodes[0]['curie']) else: qnode.curie = nodes[0]['curie'] if 'type' in parameters and parameters['type'] is not None: if isinstance(parameters['type'], str): qnode.type = parameters['type'] else: qnode.type = parameters['type'][0] message.query_graph.nodes.append(qnode) return response #### If the name is specified, try to find that if parameters['name'] is not None: response.debug( f"Looking up CURIE {parameters['name']} in KgNodeIndex") nodes = kgNodeIndex.get_curies_and_types(parameters['name']) if len(nodes) == 0: nodes = kgNodeIndex.get_curies_and_types(parameters['name'], kg_name='KG2') if len(nodes) == 0: response.error( f"A node with name '{parameters['name']}'' is not in our knowledge graph", error_code="UnknownCURIE") return response qnode.curie = nodes[0]['curie'] qnode.type = nodes[0]['type'] message.query_graph.nodes.append(qnode) return response #### If the type is specified, just add that type. There should be checking that it is legal. FIXME if parameters['type'] is not None: qnode.type = parameters['type'] if parameters['is_set'] is not None: qnode.is_set = (parameters['is_set'].lower() == 'true') message.query_graph.nodes.append(qnode) return response #### If we get here, it means that all three main parameters are null. Just a generic node with no type or anything. This is okay. message.query_graph.nodes.append(qnode) return response
def assess(self, message): #### Define a default response response = Response() self.response = response self.message = message response.debug(f"Assessing the QueryGraph for basic information") #### Get shorter handles query_graph = message.query_graph nodes = query_graph.nodes edges = query_graph.edges #### Store number of nodes and edges self.n_nodes = len(nodes) self.n_edges = len(edges) response.debug(f"Found {self.n_nodes} nodes and {self.n_edges} edges") #### Handle impossible cases if self.n_nodes == 0: response.error( "QueryGraph has 0 nodes. At least 1 node is required", error_code="QueryGraphZeroNodes") return response if self.n_nodes == 1 and self.n_edges > 0: response.error( "QueryGraph may not have edges if there is only one node", error_code="QueryGraphTooManyEdges") return response #if self.n_nodes == 2 and self.n_edges > 1: # response.error("QueryGraph may not have more than 1 edge if there are only 2 nodes", error_code="QueryGraphTooManyEdges") # return response #### Loop through nodes computing some stats node_info = {} self.node_type_map = {} for qnode in nodes: id = qnode.id node_info[id] = { 'id': id, 'node_object': qnode, 'has_curie': False, 'type': qnode.type, 'has_type': False, 'is_set': False, 'n_edges': 0, 'n_links': 0, 'is_connected': False, 'edges': [], 'edge_dict': {} } if qnode.curie is not None: node_info[id]['has_curie'] = True if qnode.type is not None: node_info[id]['has_type'] = True #if qnode.is_set is not None: node_info[id]['is_set'] = True if qnode.id is None: response.error( "QueryGraph has a node with no id. This is not permitted", error_code="QueryGraphNodeWithNoId") return response #### Store lookup of types warning_counter = 0 if qnode.type is None: if warning_counter == 0: response.debug( "QueryGraph has nodes with no type. This may cause problems with results inference later" ) warning_counter += 1 self.node_type_map['unknown'] = id else: self.node_type_map[qnode.type] = id #### Loop through edges computing some stats edge_info = {} self.edge_type_map = {} unique_links = {} for qedge in edges: #### Ignore special informationational edges for now. virtual_edge_types = { 'has_normalized_google_distance_with': 1, 'has_fisher_exact_test_p-value_with': 1, 'has_jaccard_index_with': 1, 'probably_treats': 1, 'has_paired_concept_frequency_with': 1, 'has_observed_expected_ratio_with': 1, 'has_chi_square_with': 1 } if qedge.type is not None and qedge.type in virtual_edge_types: continue id = qedge.id edge_info[id] = { 'id': id, 'has_type': False, 'source_id': qedge.source_id, 'target_id': qedge.target_id, 'type': None } #if qnode.type is not None: if qedge.type is not None: edge_info[id]['has_type'] = True edge_info[id]['type'] = qnode.type if qedge.id is None: response.error( "QueryGraph has a edge with no id. This is not permitted", error_code="QueryGraphEdgeWithNoId") return response #### Create a unique node link string link_string = ','.join(sorted([qedge.source_id, qedge.target_id])) if link_string not in unique_links: node_info[qedge.source_id]['n_links'] += 1 node_info[qedge.target_id]['n_links'] += 1 unique_links[link_string] = 1 #print(link_string) node_info[qedge.source_id]['n_edges'] += 1 node_info[qedge.target_id]['n_edges'] += 1 node_info[qedge.source_id]['is_connected'] = True node_info[qedge.target_id]['is_connected'] = True #node_info[qedge.source_id]['edges'].append(edge_info[id]) #node_info[qedge.target_id]['edges'].append(edge_info[id]) node_info[qedge.source_id]['edges'].append(edge_info[id]) node_info[qedge.target_id]['edges'].append(edge_info[id]) node_info[qedge.source_id]['edge_dict'][id] = edge_info[id] node_info[qedge.target_id]['edge_dict'][id] = edge_info[id] #### Store lookup of types warning_counter = 0 edge_type = 'any' if qedge.type is None: if warning_counter == 0: response.debug( "QueryGraph has edges with no type. This may cause problems with results inference later" ) warning_counter += 1 else: edge_type = qedge.type #### It's not clear yet whether we need to store the whole sentence or just the type #type_encoding = f"{node_info[qedge.source_id]['type']}---{edge_type}---{node_info[qedge.target_id]['type']}" type_encoding = edge_type self.edge_type_map[type_encoding] = id #### Loop through the nodes again, trying to identify the start_node and the end_node singletons = [] for node_id, node_data in node_info.items(): if node_data['n_links'] < 2: singletons.append(node_data) elif node_data['n_links'] > 2: self.is_bifurcated_graph = True response.warning( "QueryGraph appears to have a fork in it. This might cause trouble" ) #### Try to identify the start_node and the end_node start_node = singletons[0] if len(nodes) == 1: # Just a single node, fine pass elif len(singletons) < 2: response.warning( "QueryGraph appears to be circular or has a strange geometry. This might cause trouble" ) elif len(singletons) > 2: response.warning( "QueryGraph appears to have a fork in it. This might cause trouble" ) else: if singletons[0]['has_curie'] is True and singletons[1][ 'has_curie'] is False: start_node = singletons[0] elif singletons[0]['has_curie'] is False and singletons[1][ 'has_curie'] is True: start_node = singletons[1] else: start_node = singletons[0] #### Hmm, that's not very robust against odd graphs. This needs work. FIXME self.node_info = node_info self.edge_info = edge_info self.start_node = start_node current_node = start_node node_order = [start_node] edge_order = [] edges = current_node['edges'] while 1: #tmp = { 'astate': '1', 'current_node': current_node, 'node_order': node_order, 'edge_order': edge_order, 'edges': edges } #print(json.dumps(ast.literal_eval(repr(tmp)),sort_keys=True,indent=2)) #print('==================================================================================') #tmp = input() if len(edges) == 0: break if len(edges) > 1: response.error( "Help, two edges at A583. Don't know what to do", error_code="InteralErrorA583") return response edge_order.append(edges[0]) previous_node = current_node if edges[0]['source_id'] == current_node['id']: current_node = node_info[edges[0]['target_id']] elif edges[0]['target_id'] == current_node['id']: current_node = node_info[edges[0]['source_id']] else: response.error("Help, edge error A584. Don't know what to do", error_code="InteralErrorA584") return response node_order.append(current_node) #tmp = { 'astate': '2', 'current_node': current_node, 'node_order': node_order, 'edge_order': edge_order, 'edges': edges } #print(json.dumps(ast.literal_eval(repr(tmp)),sort_keys=True,indent=2)) #print('==================================================================================') #tmp = input() edges = current_node['edges'] new_edges = [] for edge in edges: if edge['id'] not in previous_node['edge_dict']: new_edges.append(edge) edges = new_edges if len(edges) == 0: break #tmp = { 'astate': '3', 'current_node': current_node, 'node_order': node_order, 'edge_order': edge_order, 'edges': edges } #print(json.dumps(ast.literal_eval(repr(tmp)),sort_keys=True,indent=2)) #print('==================================================================================') #tmp = input() self.node_order = node_order self.edge_order = edge_order # Create a text rendering of the QueryGraph geometry for matching against a template self.query_graph_templates = { 'simple': '', 'detailed': { 'n_nodes': len(node_order), 'components': [] } } node_index = 0 edge_index = 0 #print(json.dumps(ast.literal_eval(repr(node_order)),sort_keys=True,indent=2)) for node in node_order: component_id = f"n{node_index:02}" content = '' component = { 'component_type': 'node', 'component_id': component_id, 'has_curie': node['has_curie'], 'has_type': node['has_type'], 'type_value': None } self.query_graph_templates['detailed']['components'].append( component) if node['has_curie']: content = 'curie' if node['has_type'] and node['node_object'].type is not None: content = f"type={node['node_object'].type}" component['type_value'] = node['node_object'].type elif node['has_type']: content = 'type' template_part = f"{component_id}({content})" self.query_graph_templates['simple'] += template_part # Since queries with intermediate nodes that are not is_set=true tend to blow up, for now, make them is_set=true unless explicitly set to false if node_index > 0 and node_index < (self.n_nodes - 1): if 'is_set' not in node or node['is_set'] is None: node['node_object'].is_set = True response.warning( f"Setting unspecified is_set to true for {node['id']} because this will probably lead to a happier result" ) elif node['is_set'] is True: response.debug( f"Value for is_set is already true for {node['id']} so that's good" ) elif node['is_set'] is False: #response.info(f"Value for is_set is set to false for intermediate node {node['id']}. This could lead to weird results. Consider setting it to true") response.info( f"Value for is_set is false for intermediate node {node['id']}. Setting to true because this will probably lead to a happier result" ) node['node_object'].is_set = True #else: # response.error(f"Unrecognized value is_set='{node['is_set']}' for {node['id']}. This should be true or false") node_index += 1 if node_index < self.n_nodes: component_id = f"e{edge_index:02}" template_part = f"-{component_id}()-" self.query_graph_templates['simple'] += template_part component = { 'component_type': 'edge', 'component_id': component_id, 'has_curie': False, 'has_type': False } self.query_graph_templates['detailed']['components'].append( component) edge_index += 1 response.debug( f"The QueryGraph reference template is: {self.query_graph_templates['simple']}" ) #tmp = { 'node_info': node_info, 'edge_info': edge_info, 'start_node': start_node, 'n_nodes': self.n_nodes, 'n_edges': self.n_edges, # 'is_bifurcated_graph': self.is_bifurcated_graph, 'node_order': node_order, 'edge_order': edge_order } #print(json.dumps(ast.literal_eval(repr(tmp)),sort_keys=True,indent=2)) #sys.exit(0) #### Return the response return response
def apply(self, input_message, input_parameters, response=None): if response is None: response = Response() self.response = response self.message = input_message # Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response # Define a complete set of allowed parameters and their defaults parameters = self.parameters parameters['kp'] = "ARAX/KG1" parameters['enforce_directionality'] = False parameters['use_synonyms'] = True parameters['synonym_handling'] = 'map_back' parameters['continue_if_no_results'] = False for key, value in input_parameters.items(): if key and key not in parameters: response.error(f"Supplied parameter {key} is not permitted", error_code="UnknownParameter") else: if type(value) is str and value.lower() == "true": value = True elif type(value) is str and value.lower() == "false": value = False parameters[key] = value # Default to expanding the entire query graph if the user didn't specify what to expand if not parameters['edge_id'] and not parameters['node_id']: parameters['edge_id'] = [ edge.id for edge in self.message.query_graph.edges ] parameters['node_id'] = self._get_orphan_query_node_ids( self.message.query_graph) if response.status != 'OK': return response response.data['parameters'] = parameters self.parameters = parameters # Do the actual expansion response.debug( f"Applying Expand to Message with parameters {parameters}") input_edge_ids = eu.convert_string_or_list_to_list( parameters['edge_id']) input_node_ids = eu.convert_string_or_list_to_list( parameters['node_id']) kp_to_use = self.parameters['kp'] continue_if_no_results = self.parameters['continue_if_no_results'] # Convert message knowledge graph to dictionary format, for faster processing dict_kg = eu.convert_standard_kg_to_dict_kg( self.message.knowledge_graph) # Expand any specified edges if input_edge_ids: query_sub_graph = self._extract_query_subgraph( input_edge_ids, self.message.query_graph) if response.status != 'OK': return response self.response.debug( f"Query graph for this Expand() call is: {query_sub_graph.to_dict()}" ) # Expand the query graph edge by edge (much faster for neo4j queries, and allows easy integration with BTE) ordered_qedges_to_expand = self._get_order_to_expand_edges_in( query_sub_graph) node_usages_by_edges_map = dict() for qedge in ordered_qedges_to_expand: answer_kg, edge_node_usage_map = self._expand_edge( qedge, kp_to_use, dict_kg, continue_if_no_results, self.message.query_graph) if response.status != 'OK': return response node_usages_by_edges_map[qedge.id] = edge_node_usage_map self._process_and_merge_answer(answer_kg, dict_kg) if response.status != 'OK': return response self._prune_dead_end_paths(dict_kg, query_sub_graph, node_usages_by_edges_map) if response.status != 'OK': return response # Expand any specified nodes if input_node_ids: for qnode_id in input_node_ids: answer_kg = self._expand_node(qnode_id, kp_to_use, continue_if_no_results, self.message.query_graph) if response.status != 'OK': return response self._process_and_merge_answer(answer_kg, dict_kg) if response.status != 'OK': return response # Convert message knowledge graph back to API standard format self.message.knowledge_graph = eu.convert_dict_kg_to_standard_kg( dict_kg) # Return the response and done kg = self.message.knowledge_graph response.info( f"After Expand, Message.KnowledgeGraph has {len(kg.nodes)} nodes and {len(kg.edges)} edges" ) return response
def _add_answers_to_kg(self, answer_kg: DictKnowledgeGraph, reasoner_std_response: Dict[str, any], input_qnode_id: str, output_qnode_id: str, qedge_id: str, log: Response) -> DictKnowledgeGraph: kg_to_qg_ids_dict = self._build_kg_to_qg_id_dict( reasoner_std_response['results']) if reasoner_std_response['knowledge_graph']['edges']: remapped_node_ids = dict() log.debug( f"Got results back from BTE for this query " f"({len(reasoner_std_response['knowledge_graph']['edges'])} edges)" ) for node in reasoner_std_response['knowledge_graph']['nodes']: swagger_node = Node() bte_node_id = node.get('id') swagger_node.name = node.get('name') swagger_node.type = eu.convert_string_or_list_to_list( eu.convert_string_to_snake_case(node.get('type'))) # Map the returned BTE qg_ids back to the original qnode_ids in our query graph bte_qg_id = kg_to_qg_ids_dict['nodes'].get(bte_node_id) if bte_qg_id == "n0": qnode_id = input_qnode_id elif bte_qg_id == "n1": qnode_id = output_qnode_id else: log.error("Could not map BTE qg_id to ARAX qnode_id", error_code="UnknownQGID") return answer_kg # Find and use the preferred equivalent identifier for this node (if it's an output node) if qnode_id == output_qnode_id: if bte_node_id in remapped_node_ids: swagger_node.id = remapped_node_ids.get(bte_node_id) else: equivalent_curies = [ f"{prefix}:{eu.get_curie_local_id(local_id)}" for prefix, local_ids in node.get( 'equivalent_identifiers').items() for local_id in local_ids ] swagger_node.id = self._get_best_equivalent_bte_curie( equivalent_curies, swagger_node.type[0]) remapped_node_ids[bte_node_id] = swagger_node.id else: swagger_node.id = bte_node_id answer_kg.add_node(swagger_node, qnode_id) for edge in reasoner_std_response['knowledge_graph']['edges']: swagger_edge = Edge() swagger_edge.id = edge.get("id") swagger_edge.type = edge.get('type') swagger_edge.source_id = remapped_node_ids.get( edge.get('source_id'), edge.get('source_id')) swagger_edge.target_id = remapped_node_ids.get( edge.get('target_id'), edge.get('target_id')) swagger_edge.is_defined_by = "BTE" swagger_edge.provided_by = edge.get('edge_source') # Map the returned BTE qg_id back to the original qedge_id in our query graph bte_qg_id = kg_to_qg_ids_dict['edges'].get(swagger_edge.id) if bte_qg_id != "e1": log.error("Could not map BTE qg_id to ARAX qedge_id", error_code="UnknownQGID") return answer_kg answer_kg.add_edge(swagger_edge, qedge_id) return answer_kg