def get_node_pairs_to_overlay(subject_qnode_key: str, object_qnode_key: str, query_graph: QueryGraph, knowledge_graph: KnowledgeGraph, log: ARAXResponse) -> Set[Tuple[str, str]]: """ This function determines which combinations of subject/object nodes in the KG need to be overlayed (e.g., have a virtual edge added between). It makes use of Resultify to determine what combinations of subject and object nodes may actually appear together in the same Results. (See issue #1069.) If it fails to narrow the node pairs for whatever reason, it defaults to returning all possible combinations of subject/object nodes. """ log.debug(f"Narrowing down {subject_qnode_key}--{object_qnode_key} node pairs to overlay") kg_nodes_by_qg_id = get_node_ids_by_qg_id(knowledge_graph) kg_edges_by_qg_id = get_edge_ids_by_qg_id(knowledge_graph) # Grab the portion of the QG already 'expanded' (aka, present in the KG) sub_query_graph = QueryGraph(nodes={key:qnode for key, qnode in query_graph.nodes.items() if key in set(kg_nodes_by_qg_id)}, edges={key:qedge for key, qedge in query_graph.edges.items() if key in set(kg_edges_by_qg_id)}) # Compute results using Resultify so we can see which nodes appear in the same results resultifier = ARAXResultify() sub_response = ARAXResponse() sub_response.envelope = Response() sub_response.envelope.message = Message() sub_message = sub_response.envelope.message sub_message.query_graph = sub_query_graph sub_message.knowledge_graph = KnowledgeGraph(nodes=knowledge_graph.nodes.copy(), edges=knowledge_graph.edges.copy()) #sub_response.envelope.message = sub_message resultify_response = resultifier.apply(sub_response, {}) # Figure out which node pairs appear together in one or more results if resultify_response.status == 'OK': node_pairs = set() for result in sub_message.results: subject_curies_in_this_result = {node_binding.id for key, node_binding_list in result.node_bindings.items() for node_binding in node_binding_list if key == subject_qnode_key} object_curies_in_this_result = {node_binding.id for key, node_binding_list in result.node_bindings.items() for node_binding in node_binding_list if key == object_qnode_key} pairs_in_this_result = set(itertools.product(subject_curies_in_this_result, object_curies_in_this_result)) node_pairs = node_pairs.union(pairs_in_this_result) log.debug(f"Identified {len(node_pairs)} node pairs to overlay (with help of resultify)") if node_pairs: return node_pairs # Back up to using the old (O(n^2)) method of all combinations of subject/object nodes in the KG log.warning(f"Failed to narrow down node pairs to overlay; defaulting to all possible combinations") return set(itertools.product(kg_nodes_by_qg_id[subject_qnode_key], kg_nodes_by_qg_id[object_qnode_key]))
def apply_fetch_message(self, message, input_parameters, describe=False): """ Adds a new QEdge object to the QueryGraph inside the Message object :return: ARAXResponse object with execution information :rtype: ARAXResponse """ # #### Command definition for autogenerated documentation command_definition = { 'dsl_command': 'fetch_message()', 'description': """The `fetch_message` command fetches a remote Message by its id and can then allow further processing on it.""", 'parameters': { 'id': { 'is_required': True, 'examples': ['https://arax.ncats.io/api/arax/v1.0/message/1'], 'default': '', 'type': 'string', 'description': """A URL/URI that identifies the Message to be fetched""", }, } } if describe: return command_definition #### Define a default response response = ARAXResponse() self.response = response self.message = message #### Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response #### Define a complete set of allowed parameters and their defaults parameters = { 'uri': None, } #### Loop through the input_parameters and override the defaults and make sure they are allowed for key, value in input_parameters.items(): if key not in parameters: response.error(f"Supplied parameter {key} is not permitted", error_code="UnknownParameter") else: parameters[key] = value #### Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response #### Store these final parameters for convenience response.data['parameters'] = parameters self.parameters = parameters #### Basic checks on arguments message_uri = input_parameters['uri'] if not isinstance(message_uri, str): response.error("Provided parameter is not a string", error_code="ParameterNotString") return response response.info(f"Fetching Message via GET to '{message_uri}'") response_content = requests.get(message_uri, headers={'accept': 'application/json'}) status_code = response_content.status_code if status_code != 200: response.error( f"GET to '{message_uri}' returned HTTP code {status_code} and content '{response_content.content}'", error_code="GETFailed") response.error( f"GET to '{message_uri}' returned HTTP code {status_code}", error_code="GETFailed") return response #### Unpack the response content into a dict and dump try: response_dict = response_content.json() envelope = Response().from_dict(response_dict) except: response.error( f"Error converting response from '{message_uri}' to objects from content", error_code="UnableToParseContent") return response #### Store the decoded message and return response message = envelope.message self.message = message self.envelope = envelope n_results = 0 n_qg_nodes = 0 n_kg_nodes = 0 if message.results is not None and isinstance(message.results, list): n_results = len(message.results) if message.query_graph is not None and isinstance( message.query_graph, QueryGraph) and isinstance( message.query_graph.nodes, list): n_qg_nodes = len(message.query_graph.nodes) if message.knowledge_graph is not None and isinstance( message.knowledge_graph, KnowledgeGraph) and isinstance( message.knowledge_graph.nodes, list): n_kg_nodes = len(message.knowledge_graph.nodes) response.info( f"Retreived Message with {n_qg_nodes} QueryGraph nodes, {n_kg_nodes} KnowledgeGraph nodes, and {n_results} results" ) if n_qg_nodes > 0 and n_kg_nodes > 0 and n_results > 0: resultifier = ARAXResultify() response.envelope = envelope resultifier.recompute_qg_keys(response) return response
def create_results( qg: QueryGraph, kg: QGOrganizedKnowledgeGraph, log: ARAXResponse, overlay_fet: bool = False, rank_results: bool = False, qnode_key_to_prune: Optional[str] = None, ) -> Response: regular_format_kg = convert_qg_organized_kg_to_standard_kg(kg) resultifier = ARAXResultify() prune_response = ARAXResponse() prune_response.envelope = Response() prune_response.envelope.message = Message() prune_message = prune_response.envelope.message prune_message.query_graph = qg prune_message.knowledge_graph = regular_format_kg if overlay_fet: log.debug( f"Using FET to assess quality of intermediate answers in Expand") connected_qedges = [ qedge for qedge in qg.edges.values() if qedge.subject == qnode_key_to_prune or qedge.object == qnode_key_to_prune ] qnode_pairs_to_overlay = { (qedge.subject if qedge.subject != qnode_key_to_prune else qedge.object, qnode_key_to_prune) for qedge in connected_qedges } for qnode_pair in qnode_pairs_to_overlay: pair_string_id = f"{qnode_pair[0]}-->{qnode_pair[1]}" log.debug(f"Overlaying FET for {pair_string_id} (from Expand)") fet_qedge_key = f"FET{pair_string_id}" try: overlayer = ARAXOverlay() params = { "action": "fisher_exact_test", "subject_qnode_key": qnode_pair[0], "object_qnode_key": qnode_pair[1], "virtual_relation_label": fet_qedge_key } overlayer.apply(prune_response, params) except Exception as error: exception_type, exception_value, exception_traceback = sys.exc_info( ) log.warning( f"An uncaught error occurred when overlaying with FET during Expand's pruning: {error}: " f"{repr(traceback.format_exception(exception_type, exception_value, exception_traceback))}" ) if prune_response.status != "OK": log.warning( f"FET produced an error when Expand tried to use it to prune the KG. " f"Log was: {prune_response.show()}") log.debug(f"Will continue pruning without overlaying FET") # Get rid of any FET edges that might be in the KG/QG, since this step failed remove_edges_with_qedge_key( prune_response.envelope.message.knowledge_graph, fet_qedge_key) qg.edges.pop(fet_qedge_key, None) prune_response.status = "OK" # Clear this so we can continue without overlaying else: if fet_qedge_key in qg.edges: qg.edges[ fet_qedge_key].option_group_id = f"FET_VIRTUAL_GROUP_{pair_string_id}" else: log.warning( f"Attempted to overlay FET from Expand, but it didn't work. Pruning without it." ) # Create results and rank them as appropriate log.debug(f"Calling Resultify from Expand for pruning") resultifier.apply(prune_response, {}) if rank_results: try: log.debug(f"Ranking Expand's intermediate pruning results") ranker = ARAXRanker() ranker.aggregate_scores_dmk(prune_response) except Exception as error: exception_type, exception_value, exception_traceback = sys.exc_info( ) log.error( f"An uncaught error occurred when attempting to rank results during Expand's pruning: " f"{error}: {repr(traceback.format_exception(exception_type, exception_value, exception_traceback))}." f"Log was: {prune_response.show()}", error_code="UncaughtARAXiError") # Give any unranked results a score of 0 for result in prune_response.envelope.message.results: if result.score is None: result.score = 0 return prune_response