def __remove_edges_by_attribute(self, describe=False): """ Removes edges from the KG. Allowable parameters: {'edge_type': str, 'edge_attribute': str, 'direction': {'above', 'below'}} :return: """ message = self.message parameters = self.parameters # make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters if message and parameters and hasattr( message, 'knowledge_graph') and hasattr( message.knowledge_graph, 'edges'): known_attributes = set() for edge in message.knowledge_graph.edges: if hasattr(edge, 'edge_attributes'): if edge.edge_attributes: for attribute in edge.edge_attributes: known_attributes.add(attribute.name) # print(known_attributes) allowable_parameters = { 'action': {'remove_edges_by_attribute'}, 'edge_attribute': known_attributes, 'direction': {'above', 'below'}, 'threshold': {float()}, 'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'qnode_id': set([ t for x in self.message.knowledge_graph.nodes if x.qnode_ids is not None for t in x.qnode_ids ]) } else: allowable_parameters = { 'action': {'remove_edges_by_attribute'}, 'edge_attribute': {'an edge attribute name'}, 'direction': {'above', 'below'}, 'threshold': {'a floating point number'}, 'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'qnode_id': {'a specific query node id to remove'} } # A little function to describe what this thing does if describe: brief_description = """ `remove_edges_by_attribute` removes edges from the knowledge graph (KG) based on a a certain edge attribute. Edge attributes are a list of additional attributes for an edge. This action interacts particularly well with `overlay()` as `overlay()` frequently adds additional edge attributes. Use cases include: * removing all edges that have a normalized google distance above/below a certain value `edge_attribute=ngd, direction=above, threshold=0.85` (i.e. remove edges that aren't represented well in the literature) * removing all edges that Jaccard index above/below a certain value `edge_attribute=jaccard_index, direction=below, threshold=0.2` (i.e. all edges that have less than 20% of intermediate nodes in common) * removing all edges with clinical information satisfying some condition `edge_attribute=chi_square, direction=above, threshold=.005` (i.e. all edges that have a chi square p-value above .005) * etc. etc. You have the option to either remove all connected nodes to such edges (via `remove_connected_nodes=t`), or else, only remove a single source/target node based on a query node id (via `remove_connected_nodes=t, qnode_id=<a query node id.>` This can be applied to an arbitrary knowledge graph as possible edge attributes are computed dynamically (i.e. not just those created/recognized by the ARA Expander team). """ allowable_parameters['brief_description'] = brief_description return allowable_parameters edge_params = self.parameters # try to convert the threshold to a float try: edge_params['threshold'] = float(edge_params['threshold']) except: tb = traceback.format_exc() error_type, error, _ = sys.exc_info() self.response.error(tb, error_code=error_type.__name__) self.response.error(f"parameter 'threshold' must be a float") if self.response.status != 'OK': return self.response # Make sure only allowable parameters and values have been passed self.check_params(allowable_parameters) # return if bad parameters have been passed if self.response.status != 'OK': return self.response if 'remove_connected_nodes' in edge_params: value = edge_params['remove_connected_nodes'] if value in {'true', 'True', 't', 'T'}: edge_params['remove_connected_nodes'] = True elif value in {'false', 'False', 'f', 'F'}: edge_params['remove_connected_nodes'] = False else: self.response.error( f"Supplied value {value} is not permitted. In parameter remove_connected_nodes, allowable values are: {list(allowable_parameters['remove_connected_nodes'])}", error_code="UnknownValue") else: edge_params['remove_connected_nodes'] = False if 'direction' not in edge_params: self.response.error( f"Direction must be provided, allowable directions are: {list(allowable_parameters['direction'])}", error_code="UnknownValue") if 'edge_attribute' not in edge_params: self.response.error( f"Edge attribute must be provided, allowable attributes are: {list(allowable_parameters['edge_attribute'])}", error_code="UnknownValue") if self.response.status != 'OK': return self.response # now do the call out to NGD from Filter_KG.remove_edges import RemoveEdges RE = RemoveEdges(self.response, self.message, edge_params) response = RE.remove_edges_by_attribute() return response
def __remove_edges_by_property(self, describe=False): """ Removes edges from the KG. Allowable parameters: {'edge_type': str, 'edge_property': str, 'direction': {'above', 'below'}} :return: """ message = self.message parameters = self.parameters # make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters if message and parameters and hasattr( message, 'query_graph') and hasattr(message.query_graph, 'edges'): # check if all required parameters are provided if 'edge_property' not in parameters.keys(): self.response.error( f"The parameter edge_property must be provided to remove edges by propery, allowable parameters include: {set([key for x in self.message.knowledge_graph.edges for key, val in x.to_dict().items() if type(val) == str])}" ) if self.response.status != 'OK': return self.response known_values = set() if 'edge_property' in parameters: for edge in message.knowledge_graph.edges: if hasattr(edge, parameters['edge_property']): value = edge.to_dict()[parameters['edge_property']] if type(value) == str: known_values.add(value) allowable_parameters = { 'action': {'remove_edges_by_property'}, 'edge_property': set([ key for x in self.message.knowledge_graph.edges for key, val in x.to_dict().items() if type(val) == str ]), 'property_value': known_values, 'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'qnode_id': set([ t for x in self.message.knowledge_graph.nodes if x.qnode_ids is not None for t in x.qnode_ids ]) } else: allowable_parameters = { 'action': {'remove_edges_by_property'}, 'edge_property': {'an edge property'}, 'property_value': {'a value for the edge property'}, 'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'qnode_id': {'a specific query node id to remove'} } # A little function to describe what this thing does if describe: brief_description = """ `remove_edges_by_property` removes edges from the knowledge graph (KG) based on a given edge property. Use cases include: * removing all edges that were provided by a certain knowledge provider (KP) via `edge_property=provided, property_value=Pharos` to remove all edges provided by the KP Pharos. * removing all edges that connect to a certain node via `edge_property=source_id, property_value=DOID:8398` * removing all edges with a certain relation via `edge_property=relation, property_value=upregulates` * removing all edges provided by another ARA via `edge_property=is_defined_by, property_value=ARAX/RTX` * etc. etc. You have the option to either remove all connected nodes to such edges (via `remove_connected_nodes=t`), or else, only remove a single source/target node based on a query node id (via `remove_connected_nodes=t, qnode_id=<a query node id.>` This can be applied to an arbitrary knowledge graph as possible edge properties are computed dynamically (i.e. not just those created/recognized by the ARA Expander team). """ allowable_parameters['brief_description'] = brief_description return allowable_parameters # Make sure only allowable parameters and values have been passed self.check_params(allowable_parameters) # return if bad parameters have been passed if self.response.status != 'OK': return self.response edge_params = self.parameters if 'remove_connected_nodes' in edge_params: value = edge_params['remove_connected_nodes'] if value in {'true', 'True', 't', 'T'}: edge_params['remove_connected_nodes'] = True elif value in {'false', 'False', 'f', 'F'}: edge_params['remove_connected_nodes'] = False else: self.response.error( f"Supplied value {value} is not permitted. In parameter remove_connected_nodes, allowable values are: {list(allowable_parameters['remove_connected_nodes'])}", error_code="UnknownValue") else: edge_params['remove_connected_nodes'] = False if 'edge_property' not in edge_params: self.response.error( f"Edge property must be provided, allowable properties are: {list(allowable_parameters['edge_property'])}", error_code="UnknownValue") if 'property_value' not in edge_params: self.response.error( f"Property value must be provided, allowable values are: {list(allowable_parameters['property_value'])}", error_code="UnknownValue") if self.response.status != 'OK': return self.response # now do the call out to NGD from Filter_KG.remove_edges import RemoveEdges RE = RemoveEdges(self.response, self.message, edge_params) response = RE.remove_edges_by_property() return response
def __remove_edges_by_stats(self, describe=False): """ Removes edges from the KG. Allowable parameters: {'edge_type': str, 'edge_attribute': str, 'direction': {'above', 'below'}} :return: """ message = self.message parameters = self.parameters # make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters if message and parameters and hasattr( message, 'knowledge_graph') and hasattr( message.knowledge_graph, 'edges'): known_attributes = set() for edge in message.knowledge_graph.edges: if hasattr(edge, 'edge_attributes'): if edge.edge_attributes: for attribute in edge.edge_attributes: known_attributes.add(attribute.name) # print(known_attributes) allowable_parameters = { 'action': {'remove_edges_by_stats'}, 'edge_attribute': known_attributes, 'type': {'n', 'std', 'std_dev', 'percentile', 'p'}, 'direction': {'above', 'below'}, 'threshold': {float()}, 'top': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'qnode_id': set([ t for x in self.message.knowledge_graph.nodes if x.qnode_ids is not None for t in x.qnode_ids ]) } else: allowable_parameters = { 'action': {'remove_edges_by_stats'}, 'edge_attribute': {'an edge attribute name'}, 'type': {'n', 'top_n', 'std', 'top_std'}, 'direction': {'above', 'below'}, 'threshold': {'a floating point number'}, 'top': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'qnode_id': {'a specific query node id to remove'} } # A little function to describe what this thing does if describe: brief_description = """ `remove_edges_by_stats` removes edges from the knowledge graph (KG) based on a certain edge attribute using default heuristics. Edge attributes are a list of additional attributes for an edge. This action interacts particularly well with `overlay()` as `overlay()` frequently adds additional edge attributes. there are two heuristic options: `n` for removing all but the 50 best results, `std`/`std_dev` for removing all but the best results more than 1 standard deviation from the mean, or `percentile` to remove all but the best 5% of results. (if not supplied this defaults to `n`) Use cases include: * removing all edges with normalized google distance scores but the top 50 `edge_attribute=ngd, type=n` (i.e. remove edges that aren't represented well in the literature) * removing all edges that Jaccard index leass than 1 standard deviation above the mean. `edge_attribute=jaccard_index, type=std` (i.e. all edges that have less than 20% of intermediate nodes in common) * etc. etc. You have the option (this defaults to false) to either remove all connected nodes to such edges (via `remove_connected_nodes=t`), or else, only remove a single source/target node based on a query node id (via `remove_connected_nodes=t, qnode_id=<a query node id.>` You also have the option of specifying the direction to remove and location of the split by using the options * `direction` with options `above`,`below` * `threshold` specified by a floating point number * `top` which is boolean specified by `t`, `true`, `T`, `True` and `f`, `false`, `F`, `False` e.g. to remove all the edges with jaccard_index values greater than 0.25 standard deviations below the mean you can run the following: `filter_kg(action=remove_edges_by_stats, edge_attribute=jaccard_index, type=std, remove_connected_nodes=f, threshold=0.25, top=f, direction=above)` """ allowable_parameters['brief_description'] = brief_description return allowable_parameters edge_params = self.parameters # try to convert the threshold to a float if self.response.status != 'OK': return self.response # Make sure only allowable parameters and values have been passed resp = self.check_params(allowable_parameters) # return if bad parameters have been passed if self.response.status != 'OK' or resp == -1: return self.response supplied_threshhold = None supplied_direction = None supplied_top = None if 'threshold' in edge_params: try: edge_params['threshold'] = float(edge_params['threshold']) except: tb = traceback.format_exc() error_type, error, _ = sys.exc_info() self.response.error(tb, error_code=error_type.__name__) self.response.error(f"parameter 'threshold' must be a float") if self.response.status != 'OK': return self.response supplied_threshhold = edge_params['threshold'] if 'direction' in edge_params: supplied_direction = edge_params['direction'] if 'top' in edge_params: if edge_params['top'] in {'true', 'True', 't', 'T'}: supplied_top = True elif edge_params['top'] in {'false', 'False', 'f', 'F'}: supplied_top = False if 'remove_connected_nodes' in edge_params: value = edge_params['remove_connected_nodes'] if value in {'true', 'True', 't', 'T'}: edge_params['remove_connected_nodes'] = True elif value in {'false', 'False', 'f', 'F'}: edge_params['remove_connected_nodes'] = False else: self.response.error( f"Supplied value {value} is not permitted. In parameter remove_connected_nodes, allowable values are: {list(allowable_parameters['remove_connected_nodes'])}", error_code="UnknownValue") else: edge_params['remove_connected_nodes'] = False if 'type' in edge_params: if edge_params['type'] in {'n'}: edge_params['stat'] = 'n' edge_params['threshold'] = 50 elif edge_params['type'] in {'std', 'std_dev'}: edge_params['stat'] = 'std' edge_params['threshold'] = 1 elif edge_params['type'] in {'percentile', 'p'}: edge_params['stat'] = 'percentile' edge_params['threshold'] = 95 if supplied_threshhold is not None: if supplied_threshhold > 100 or supplied_threshhold < 0: self.response.error( f"Supplied value {supplied_threshhold} is not permitted. In parameter threshold, when using the percentile type allowable values are real numbers between 0 and 100.", error_code="UnknownValue") else: edge_params['stat'] = 'n' edge_params['threshold'] = 50 if 'edge_attribute' not in edge_params: self.response.error( f"Edge attribute must be provided, allowable attributes are: {list(allowable_parameters['edge_attribute'])}", error_code="UnknownValue") else: if edge_params['edge_attribute'] in { 'ngd', 'chi_square', 'fisher_exact', 'normalized_google_distance' }: edge_params['direction'] = 'above' edge_params['top'] = False if edge_params['stat'] == 'percentile': edge_params['threshold'] = 1 - edge_params['threshold'] elif edge_params['edge_attribute'] in { 'jaccard_index', 'observed_expected_ratio', 'probability_treats' }: edge_params['direction'] = 'below' edge_params['top'] = True else: edge_params['direction'] = 'below' edge_params['top'] = True if supplied_threshhold is not None: edge_params['threshold'] = supplied_threshhold if supplied_direction is not None: edge_params['direction'] = supplied_direction if supplied_top is not None: edge_params['top'] = supplied_top if self.response.status != 'OK': return self.response # now do the call out to NGD from Filter_KG.remove_edges import RemoveEdges RE = RemoveEdges(self.response, self.message, edge_params) response = RE.remove_edges_by_stats() return response
def __remove_edges_by_type(self, describe=False): """ Removes edges from the KG. Allowable parameters: {'edge_type': str, 'edge_property': str, 'direction': {'above', 'below'}} :return: """ message = self.message parameters = self.parameters # make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters if message and parameters and hasattr( message, 'query_graph') and hasattr(message.query_graph, 'edges'): allowable_parameters = { 'action': {'remove_edges_by_type'}, 'edge_type': set([x.type for x in self.message.knowledge_graph.edges]), 'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'qnode_id': set([ t for x in self.message.knowledge_graph.nodes if x.qnode_ids is not None for t in x.qnode_ids ]) } else: allowable_parameters = { 'action': {'remove_edges_by_type'}, 'edge_type': {'an edge type'}, 'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'qnode_id': {'a specific query node id to remove'} } # A little function to describe what this thing does if describe: brief_description = """ `remove_edges_by_type` removes edges from the knowledge graph (KG) based on a given edge type. Use cases include: * removing all edges that have `edge_type=contraindicated_for`. * if virtual edges have been introduced with `overlay()` DSL commands, this action can remove all of them. * etc. You have the option to either remove all connected nodes to such edges (via `remove_connected_nodes=t`), or else, only remove a single source/target node based on a query node id (via `remove_connected_nodes=t, qnode_id=<a query node id.>` This can be applied to an arbitrary knowledge graph as possible edge types are computed dynamically (i.e. not just those created/recognized by the ARA Expander team). """ allowable_parameters['brief_description'] = brief_description return allowable_parameters # Make sure only allowable parameters and values have been passed self.check_params(allowable_parameters) # return if bad parameters have been passed if self.response.status != 'OK': return self.response edge_params = self.parameters if 'remove_connected_nodes' in edge_params: value = edge_params['remove_connected_nodes'] if value in {'true', 'True', 't', 'T'}: edge_params['remove_connected_nodes'] = True elif value in {'false', 'False', 'f', 'F'}: edge_params['remove_connected_nodes'] = False else: self.response.error( f"Supplied value {value} is not permitted. In parameter remove_connected_nodes, allowable values are: {list(allowable_parameters['remove_connected_nodes'])}", error_code="UnknownValue") else: edge_params['remove_connected_nodes'] = False # now do the call out to NGD from Filter_KG.remove_edges import RemoveEdges RE = RemoveEdges(self.response, self.message, edge_params) response = RE.remove_edges_by_type() return response
def __remove_edges_by_attribute_default(self, describe=False): """ Removes edges from the KG. Allowable parameters: {'edge_type': str, 'edge_attribute': str, 'direction': {'above', 'below'}} :return: """ message = self.message parameters = self.parameters # make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters if message and parameters and hasattr(message, 'knowledge_graph') and hasattr(message.knowledge_graph, 'edges'): known_attributes = set() for edge in message.knowledge_graph.edges: if hasattr(edge, 'edge_attributes'): if edge.edge_attributes: for attribute in edge.edge_attributes: known_attributes.add(attribute.name) # print(known_attributes) allowable_parameters = {'action': {'remove_edges_by_attribute_default'}, 'edge_attribute': known_attributes, 'type': {'n', 'top_n', 'std', 'top_std'}, 'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'qnode_id':set([t for x in self.message.knowledge_graph.nodes if x.qnode_ids is not None for t in x.qnode_ids]) } else: allowable_parameters = {'action': {'remove_edges_by_attribute_default'}, 'edge_attribute': {'an edge attribute name'}, 'type': {'n', 'top_n', 'std', 'top_std'}, 'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'}, 'qnode_id':{'a specific query node id to remove'} } # A little function to describe what this thing does if describe: brief_description = """ `remove_edges_by_attribute_default` removes edges from the knowledge graph (KG) based on a certain edge attribute using default heuristics. Edge attributes are a list of additional attributes for an edge. This action interacts particularly well with `overlay()` as `overlay()` frequently adds additional edge attributes. there are two heuristic options: `n` for removing all but the top 50 results or `std` for removing all but the top results more than 1 standard deviation from the mean. (if not supplied this defaults to `top_n`) Use cases include: * removing all edges with normalized google distance scores but the top 50 `edge_attribute=ngd, type=n` (i.e. remove edges that aren't represented well in the literature) * removing all edges that Jaccard index leass than 1 standard deviation above the mean. `edge_attribute=jaccard_index, type=std` (i.e. all edges that have less than 20% of intermediate nodes in common) * etc. etc. You have the option to either remove all connected nodes to such edges (via `remove_connected_nodes=t`), or else, only remove a single source/target node based on a query node id (via `remove_connected_nodes=t, qnode_id=<a query node id.>` """ allowable_parameters['brief_description'] = brief_description return allowable_parameters edge_params = self.parameters # try to convert the threshold to a float if self.response.status != 'OK': return self.response # Make sure only allowable parameters and values have been passed self.check_params(allowable_parameters) # return if bad parameters have been passed if self.response.status != 'OK': return self.response if 'remove_connected_nodes' in edge_params: value = edge_params['remove_connected_nodes'] if value in {'true', 'True', 't', 'T'}: edge_params['remove_connected_nodes'] = True elif value in {'false', 'False', 'f', 'F'}: edge_params['remove_connected_nodes'] = False else: self.response.error( f"Supplied value {value} is not permitted. In parameter remove_connected_nodes, allowable values are: {list(allowable_parameters['remove_connected_nodes'])}", error_code="UnknownValue") else: edge_params['remove_connected_nodes'] = False if 'type' in edge_params: if edge_params['type'] in {'n', 'top_n'}: edge_params['stat'] = 'n' edge_params['threshold']= 50 elif edge_params['type'] in {'std', 'top_std'}: edge_params['stat'] = 'std' edge_params['threshold'] = 1 else: edge_params['stat'] = 'n' edge_params['threshold']= 50 if 'edge_attribute' not in edge_params: self.response.error( f"Edge attribute must be provided, allowable attributes are: {list(allowable_parameters['edge_attribute'])}", error_code="UnknownValue") else: if edge_params['edge_attribute'] in {'ngd', 'chi_square', 'fisher_exact'}: edge_params['direction'] = 'above' edge_params['top'] = False elif edge_params['edge_attribute'] in {'jaccard_index', 'observed_expected_ratio', 'probability_treats'}: edge_params['direction'] = 'below' edge_params['top'] = True else: edge_params['direction'] = 'below' edge_params['top'] = True if self.response.status != 'OK': return self.response # now do the call out to NGD from Filter_KG.remove_edges import RemoveEdges RE = RemoveEdges(self.response, self.message, edge_params) response = RE.remove_edges_by_stats() return response