예제 #1
0
    def __remove_edges_by_attribute(self, describe=False):
        """
        Removes edges from the KG.
        Allowable parameters: {'edge_type': str, 
                                'edge_attribute': str,
                                'direction': {'above', 'below'}}
        :return:
        """
        message = self.message
        parameters = self.parameters
        # make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
        if message and parameters and hasattr(
                message, 'knowledge_graph') and hasattr(
                    message.knowledge_graph, 'edges'):
            known_attributes = set()
            for edge in message.knowledge_graph.edges:
                if hasattr(edge, 'edge_attributes'):
                    if edge.edge_attributes:
                        for attribute in edge.edge_attributes:
                            known_attributes.add(attribute.name)
            # print(known_attributes)
            allowable_parameters = {
                'action': {'remove_edges_by_attribute'},
                'edge_attribute':
                known_attributes,
                'direction': {'above', 'below'},
                'threshold': {float()},
                'remove_connected_nodes':
                {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                'qnode_id':
                set([
                    t for x in self.message.knowledge_graph.nodes
                    if x.qnode_ids is not None for t in x.qnode_ids
                ])
            }
        else:
            allowable_parameters = {
                'action': {'remove_edges_by_attribute'},
                'edge_attribute': {'an edge attribute name'},
                'direction': {'above', 'below'},
                'threshold': {'a floating point number'},
                'remove_connected_nodes':
                {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                'qnode_id': {'a specific query node id to remove'}
            }

        # A little function to describe what this thing does
        if describe:
            brief_description = """
`remove_edges_by_attribute` removes edges from the knowledge graph (KG) based on a a certain edge attribute.
Edge attributes are a list of additional attributes for an edge.
This action interacts particularly well with `overlay()` as `overlay()` frequently adds additional edge attributes.
Use cases include:

* removing all edges that have a normalized google distance above/below a certain value `edge_attribute=ngd, direction=above, threshold=0.85` (i.e. remove edges that aren't represented well in the literature)
* removing all edges that Jaccard index above/below a certain value `edge_attribute=jaccard_index, direction=below, threshold=0.2` (i.e. all edges that have less than 20% of intermediate nodes in common)
* removing all edges with clinical information satisfying some condition `edge_attribute=chi_square, direction=above, threshold=.005` (i.e. all edges that have a chi square p-value above .005)
* etc. etc.
                
You have the option to either remove all connected nodes to such edges (via `remove_connected_nodes=t`), or
else, only remove a single source/target node based on a query node id (via `remove_connected_nodes=t, qnode_id=<a query node id.>`
                
This can be applied to an arbitrary knowledge graph as possible edge attributes are computed dynamically (i.e. not just those created/recognized by the ARA Expander team).
"""
            allowable_parameters['brief_description'] = brief_description
            return allowable_parameters

        edge_params = self.parameters

        # try to convert the threshold to a float
        try:
            edge_params['threshold'] = float(edge_params['threshold'])
        except:
            tb = traceback.format_exc()
            error_type, error, _ = sys.exc_info()
            self.response.error(tb, error_code=error_type.__name__)
            self.response.error(f"parameter 'threshold' must be a float")
        if self.response.status != 'OK':
            return self.response

        # Make sure only allowable parameters and values have been passed
        self.check_params(allowable_parameters)
        # return if bad parameters have been passed
        if self.response.status != 'OK':
            return self.response

        if 'remove_connected_nodes' in edge_params:
            value = edge_params['remove_connected_nodes']
            if value in {'true', 'True', 't', 'T'}:
                edge_params['remove_connected_nodes'] = True
            elif value in {'false', 'False', 'f', 'F'}:
                edge_params['remove_connected_nodes'] = False
            else:
                self.response.error(
                    f"Supplied value {value} is not permitted. In parameter remove_connected_nodes, allowable values are: {list(allowable_parameters['remove_connected_nodes'])}",
                    error_code="UnknownValue")
        else:
            edge_params['remove_connected_nodes'] = False

        if 'direction' not in edge_params:
            self.response.error(
                f"Direction must be provided, allowable directions are: {list(allowable_parameters['direction'])}",
                error_code="UnknownValue")
        if 'edge_attribute' not in edge_params:
            self.response.error(
                f"Edge attribute must be provided, allowable attributes are: {list(allowable_parameters['edge_attribute'])}",
                error_code="UnknownValue")
        if self.response.status != 'OK':
            return self.response

        # now do the call out to NGD
        from Filter_KG.remove_edges import RemoveEdges
        RE = RemoveEdges(self.response, self.message, edge_params)
        response = RE.remove_edges_by_attribute()
        return response
예제 #2
0
    def __remove_edges_by_property(self, describe=False):
        """
        Removes edges from the KG.
        Allowable parameters: {'edge_type': str, 
                                'edge_property': str,
                                'direction': {'above', 'below'}}
        :return:
        """
        message = self.message
        parameters = self.parameters

        # make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
        if message and parameters and hasattr(
                message, 'query_graph') and hasattr(message.query_graph,
                                                    'edges'):
            # check if all required parameters are provided
            if 'edge_property' not in parameters.keys():
                self.response.error(
                    f"The parameter edge_property must be provided to remove edges by propery, allowable parameters include: {set([key for x in self.message.knowledge_graph.edges for key, val in x.to_dict().items() if type(val) == str])}"
                )
            if self.response.status != 'OK':
                return self.response
            known_values = set()
            if 'edge_property' in parameters:
                for edge in message.knowledge_graph.edges:
                    if hasattr(edge, parameters['edge_property']):
                        value = edge.to_dict()[parameters['edge_property']]
                        if type(value) == str:
                            known_values.add(value)
            allowable_parameters = {
                'action': {'remove_edges_by_property'},
                'edge_property':
                set([
                    key for x in self.message.knowledge_graph.edges
                    for key, val in x.to_dict().items() if type(val) == str
                ]),
                'property_value':
                known_values,
                'remove_connected_nodes':
                {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                'qnode_id':
                set([
                    t for x in self.message.knowledge_graph.nodes
                    if x.qnode_ids is not None for t in x.qnode_ids
                ])
            }
        else:
            allowable_parameters = {
                'action': {'remove_edges_by_property'},
                'edge_property': {'an edge property'},
                'property_value': {'a value for the edge property'},
                'remove_connected_nodes':
                {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                'qnode_id': {'a specific query node id to remove'}
            }

        # A little function to describe what this thing does
        if describe:
            brief_description = """
`remove_edges_by_property` removes edges from the knowledge graph (KG) based on a given edge property.
Use cases include:
                
* removing all edges that were provided by a certain knowledge provider (KP) via `edge_property=provided, property_value=Pharos` to remove all edges provided by the KP Pharos.
* removing all edges that connect to a certain node via `edge_property=source_id, property_value=DOID:8398`
* removing all edges with a certain relation via `edge_property=relation, property_value=upregulates`
* removing all edges provided by another ARA via `edge_property=is_defined_by, property_value=ARAX/RTX`
* etc. etc.
                
You have the option to either remove all connected nodes to such edges (via `remove_connected_nodes=t`), or
else, only remove a single source/target node based on a query node id (via `remove_connected_nodes=t, qnode_id=<a query node id.>`
                
This can be applied to an arbitrary knowledge graph as possible edge properties are computed dynamically (i.e. not just those created/recognized by the ARA Expander team).
"""
            allowable_parameters['brief_description'] = brief_description
            return allowable_parameters

        # Make sure only allowable parameters and values have been passed
        self.check_params(allowable_parameters)
        # return if bad parameters have been passed
        if self.response.status != 'OK':
            return self.response

        edge_params = self.parameters
        if 'remove_connected_nodes' in edge_params:
            value = edge_params['remove_connected_nodes']
            if value in {'true', 'True', 't', 'T'}:
                edge_params['remove_connected_nodes'] = True
            elif value in {'false', 'False', 'f', 'F'}:
                edge_params['remove_connected_nodes'] = False
            else:
                self.response.error(
                    f"Supplied value {value} is not permitted. In parameter remove_connected_nodes, allowable values are: {list(allowable_parameters['remove_connected_nodes'])}",
                    error_code="UnknownValue")
        else:
            edge_params['remove_connected_nodes'] = False

        if 'edge_property' not in edge_params:
            self.response.error(
                f"Edge property must be provided, allowable properties are: {list(allowable_parameters['edge_property'])}",
                error_code="UnknownValue")
        if 'property_value' not in edge_params:
            self.response.error(
                f"Property value must be provided, allowable values are: {list(allowable_parameters['property_value'])}",
                error_code="UnknownValue")
        if self.response.status != 'OK':
            return self.response

        # now do the call out to NGD
        from Filter_KG.remove_edges import RemoveEdges
        RE = RemoveEdges(self.response, self.message, edge_params)
        response = RE.remove_edges_by_property()
        return response
예제 #3
0
    def __remove_edges_by_stats(self, describe=False):
        """
        Removes edges from the KG.
        Allowable parameters: {'edge_type': str, 
                                'edge_attribute': str,
                                'direction': {'above', 'below'}}
        :return:
        """
        message = self.message
        parameters = self.parameters
        # make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
        if message and parameters and hasattr(
                message, 'knowledge_graph') and hasattr(
                    message.knowledge_graph, 'edges'):
            known_attributes = set()
            for edge in message.knowledge_graph.edges:
                if hasattr(edge, 'edge_attributes'):
                    if edge.edge_attributes:
                        for attribute in edge.edge_attributes:
                            known_attributes.add(attribute.name)
            # print(known_attributes)
            allowable_parameters = {
                'action': {'remove_edges_by_stats'},
                'edge_attribute':
                known_attributes,
                'type': {'n', 'std', 'std_dev', 'percentile', 'p'},
                'direction': {'above', 'below'},
                'threshold': {float()},
                'top': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                'remove_connected_nodes':
                {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                'qnode_id':
                set([
                    t for x in self.message.knowledge_graph.nodes
                    if x.qnode_ids is not None for t in x.qnode_ids
                ])
            }
        else:
            allowable_parameters = {
                'action': {'remove_edges_by_stats'},
                'edge_attribute': {'an edge attribute name'},
                'type': {'n', 'top_n', 'std', 'top_std'},
                'direction': {'above', 'below'},
                'threshold': {'a floating point number'},
                'top': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                'remove_connected_nodes':
                {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                'qnode_id': {'a specific query node id to remove'}
            }

        # A little function to describe what this thing does
        if describe:
            brief_description = """
`remove_edges_by_stats` removes edges from the knowledge graph (KG) based on a certain edge attribute using default heuristics.
Edge attributes are a list of additional attributes for an edge.
This action interacts particularly well with `overlay()` as `overlay()` frequently adds additional edge attributes.
there are two heuristic options: `n` for removing all but the 50 best results, `std`/`std_dev` for removing all but 
the best results more than 1 standard deviation from the mean, or `percentile` to remove all but the best 
5% of results. (if not supplied this defaults to `n`)
Use cases include:

* removing all edges with normalized google distance scores but the top 50 `edge_attribute=ngd, type=n` (i.e. remove edges that aren't represented well in the literature)
* removing all edges that Jaccard index leass than 1 standard deviation above the mean. `edge_attribute=jaccard_index, type=std` (i.e. all edges that have less than 20% of intermediate nodes in common)
* etc. etc.
                
You have the option (this defaults to false) to either remove all connected nodes to such edges (via `remove_connected_nodes=t`), or
else, only remove a single source/target node based on a query node id (via `remove_connected_nodes=t, qnode_id=<a query node id.>`

You also have the option of specifying the direction to remove and location of the split by using the options 
* `direction` with options `above`,`below`
* `threshold` specified by a floating point number
* `top` which is boolean specified by `t`, `true`, `T`, `True` and `f`, `false`, `F`, `False`
e.g. to remove all the edges with jaccard_index values greater than 0.25 standard deviations below the mean you can run the following:
`filter_kg(action=remove_edges_by_stats, edge_attribute=jaccard_index, type=std, remove_connected_nodes=f, threshold=0.25, top=f, direction=above)`
"""
            allowable_parameters['brief_description'] = brief_description
            return allowable_parameters

        edge_params = self.parameters

        # try to convert the threshold to a float
        if self.response.status != 'OK':
            return self.response

        # Make sure only allowable parameters and values have been passed
        resp = self.check_params(allowable_parameters)
        # return if bad parameters have been passed
        if self.response.status != 'OK' or resp == -1:
            return self.response

        supplied_threshhold = None
        supplied_direction = None
        supplied_top = None

        if 'threshold' in edge_params:
            try:
                edge_params['threshold'] = float(edge_params['threshold'])
            except:
                tb = traceback.format_exc()
                error_type, error, _ = sys.exc_info()
                self.response.error(tb, error_code=error_type.__name__)
                self.response.error(f"parameter 'threshold' must be a float")
            if self.response.status != 'OK':
                return self.response
            supplied_threshhold = edge_params['threshold']
        if 'direction' in edge_params:
            supplied_direction = edge_params['direction']
        if 'top' in edge_params:
            if edge_params['top'] in {'true', 'True', 't', 'T'}:
                supplied_top = True
            elif edge_params['top'] in {'false', 'False', 'f', 'F'}:
                supplied_top = False

        if 'remove_connected_nodes' in edge_params:
            value = edge_params['remove_connected_nodes']
            if value in {'true', 'True', 't', 'T'}:
                edge_params['remove_connected_nodes'] = True
            elif value in {'false', 'False', 'f', 'F'}:
                edge_params['remove_connected_nodes'] = False
            else:
                self.response.error(
                    f"Supplied value {value} is not permitted. In parameter remove_connected_nodes, allowable values are: {list(allowable_parameters['remove_connected_nodes'])}",
                    error_code="UnknownValue")
        else:
            edge_params['remove_connected_nodes'] = False

        if 'type' in edge_params:
            if edge_params['type'] in {'n'}:
                edge_params['stat'] = 'n'
                edge_params['threshold'] = 50
            elif edge_params['type'] in {'std', 'std_dev'}:
                edge_params['stat'] = 'std'
                edge_params['threshold'] = 1
            elif edge_params['type'] in {'percentile', 'p'}:
                edge_params['stat'] = 'percentile'
                edge_params['threshold'] = 95
                if supplied_threshhold is not None:
                    if supplied_threshhold > 100 or supplied_threshhold < 0:
                        self.response.error(
                            f"Supplied value {supplied_threshhold} is not permitted. In parameter threshold, when using the percentile type allowable values are real numbers between 0 and 100.",
                            error_code="UnknownValue")
        else:
            edge_params['stat'] = 'n'
            edge_params['threshold'] = 50
        if 'edge_attribute' not in edge_params:
            self.response.error(
                f"Edge attribute must be provided, allowable attributes are: {list(allowable_parameters['edge_attribute'])}",
                error_code="UnknownValue")
        else:
            if edge_params['edge_attribute'] in {
                    'ngd', 'chi_square', 'fisher_exact',
                    'normalized_google_distance'
            }:
                edge_params['direction'] = 'above'
                edge_params['top'] = False
                if edge_params['stat'] == 'percentile':
                    edge_params['threshold'] = 1 - edge_params['threshold']
            elif edge_params['edge_attribute'] in {
                    'jaccard_index', 'observed_expected_ratio',
                    'probability_treats'
            }:
                edge_params['direction'] = 'below'
                edge_params['top'] = True
            else:
                edge_params['direction'] = 'below'
                edge_params['top'] = True

        if supplied_threshhold is not None:
            edge_params['threshold'] = supplied_threshhold
        if supplied_direction is not None:
            edge_params['direction'] = supplied_direction
        if supplied_top is not None:
            edge_params['top'] = supplied_top

        if self.response.status != 'OK':
            return self.response

        # now do the call out to NGD
        from Filter_KG.remove_edges import RemoveEdges
        RE = RemoveEdges(self.response, self.message, edge_params)
        response = RE.remove_edges_by_stats()
        return response
예제 #4
0
    def __remove_edges_by_type(self, describe=False):
        """
        Removes edges from the KG.
        Allowable parameters: {'edge_type': str, 
                                'edge_property': str,
                                'direction': {'above', 'below'}}
        :return:
        """
        message = self.message
        parameters = self.parameters
        # make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
        if message and parameters and hasattr(
                message, 'query_graph') and hasattr(message.query_graph,
                                                    'edges'):
            allowable_parameters = {
                'action': {'remove_edges_by_type'},
                'edge_type':
                set([x.type for x in self.message.knowledge_graph.edges]),
                'remove_connected_nodes':
                {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                'qnode_id':
                set([
                    t for x in self.message.knowledge_graph.nodes
                    if x.qnode_ids is not None for t in x.qnode_ids
                ])
            }
        else:
            allowable_parameters = {
                'action': {'remove_edges_by_type'},
                'edge_type': {'an edge type'},
                'remove_connected_nodes':
                {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                'qnode_id': {'a specific query node id to remove'}
            }

        # A little function to describe what this thing does
        if describe:
            brief_description = """
`remove_edges_by_type` removes edges from the knowledge graph (KG) based on a given edge type.
Use cases include:
             
* removing all edges that have `edge_type=contraindicated_for`. 
* if virtual edges have been introduced with `overlay()` DSL commands, this action can remove all of them.
* etc.
            
You have the option to either remove all connected nodes to such edges (via `remove_connected_nodes=t`), or
else, only remove a single source/target node based on a query node id (via `remove_connected_nodes=t, qnode_id=<a query node id.>`
            
This can be applied to an arbitrary knowledge graph as possible edge types are computed dynamically (i.e. not just those created/recognized by the ARA Expander team).
"""
            allowable_parameters['brief_description'] = brief_description
            return allowable_parameters

        # Make sure only allowable parameters and values have been passed
        self.check_params(allowable_parameters)
        # return if bad parameters have been passed
        if self.response.status != 'OK':
            return self.response

        edge_params = self.parameters
        if 'remove_connected_nodes' in edge_params:
            value = edge_params['remove_connected_nodes']
            if value in {'true', 'True', 't', 'T'}:
                edge_params['remove_connected_nodes'] = True
            elif value in {'false', 'False', 'f', 'F'}:
                edge_params['remove_connected_nodes'] = False
            else:
                self.response.error(
                    f"Supplied value {value} is not permitted. In parameter remove_connected_nodes, allowable values are: {list(allowable_parameters['remove_connected_nodes'])}",
                    error_code="UnknownValue")
        else:
            edge_params['remove_connected_nodes'] = False

        # now do the call out to NGD
        from Filter_KG.remove_edges import RemoveEdges
        RE = RemoveEdges(self.response, self.message, edge_params)
        response = RE.remove_edges_by_type()
        return response
예제 #5
0
    def __remove_edges_by_attribute_default(self, describe=False):
        """
        Removes edges from the KG.
        Allowable parameters: {'edge_type': str, 
                                'edge_attribute': str,
                                'direction': {'above', 'below'}}
        :return:
        """
        message = self.message
        parameters = self.parameters
        # make a list of the allowable parameters (keys), and their possible values (values). Note that the action and corresponding name will always be in the allowable parameters
        if message and parameters and hasattr(message, 'knowledge_graph') and hasattr(message.knowledge_graph, 'edges'):
            known_attributes = set()
            for edge in message.knowledge_graph.edges:
                if hasattr(edge, 'edge_attributes'):
                    if edge.edge_attributes:
                        for attribute in edge.edge_attributes:
                            known_attributes.add(attribute.name)
            # print(known_attributes)
            allowable_parameters = {'action': {'remove_edges_by_attribute_default'},
                                    'edge_attribute': known_attributes,
                                    'type': {'n', 'top_n', 'std', 'top_std'},
                                    'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                                    'qnode_id':set([t for x in self.message.knowledge_graph.nodes if x.qnode_ids is not None for t in x.qnode_ids])
                                    }
        else:
            allowable_parameters = {'action': {'remove_edges_by_attribute_default'},
                                    'edge_attribute': {'an edge attribute name'},
                                    'type': {'n', 'top_n', 'std', 'top_std'},
                                    'remove_connected_nodes': {'true', 'false', 'True', 'False', 't', 'f', 'T', 'F'},
                                    'qnode_id':{'a specific query node id to remove'}
                                    }

        # A little function to describe what this thing does
        if describe:
            brief_description = """
`remove_edges_by_attribute_default` removes edges from the knowledge graph (KG) based on a certain edge attribute using default heuristics.
Edge attributes are a list of additional attributes for an edge.
This action interacts particularly well with `overlay()` as `overlay()` frequently adds additional edge attributes.
there are two heuristic options: `n` for removing all but the top 50 results or `std` for removing all but 
the top results more than 1 standard deviation from the mean. (if not supplied this defaults to `top_n`)
Use cases include:

* removing all edges with normalized google distance scores but the top 50 `edge_attribute=ngd, type=n` (i.e. remove edges that aren't represented well in the literature)
* removing all edges that Jaccard index leass than 1 standard deviation above the mean. `edge_attribute=jaccard_index, type=std` (i.e. all edges that have less than 20% of intermediate nodes in common)
* etc. etc.
                
You have the option to either remove all connected nodes to such edges (via `remove_connected_nodes=t`), or
else, only remove a single source/target node based on a query node id (via `remove_connected_nodes=t, qnode_id=<a query node id.>`
"""
            allowable_parameters['brief_description'] = brief_description
            return allowable_parameters

        edge_params = self.parameters

        # try to convert the threshold to a float
        if self.response.status != 'OK':
            return self.response

        # Make sure only allowable parameters and values have been passed
        self.check_params(allowable_parameters)
        # return if bad parameters have been passed
        if self.response.status != 'OK':
            return self.response

        if 'remove_connected_nodes' in edge_params:
            value = edge_params['remove_connected_nodes']
            if value in {'true', 'True', 't', 'T'}:
                edge_params['remove_connected_nodes'] = True
            elif value in {'false', 'False', 'f', 'F'}:
                edge_params['remove_connected_nodes'] = False
            else:
                self.response.error(
                    f"Supplied value {value} is not permitted. In parameter remove_connected_nodes, allowable values are: {list(allowable_parameters['remove_connected_nodes'])}",
                    error_code="UnknownValue")
        else:
            edge_params['remove_connected_nodes'] = False

        if 'type' in edge_params:
            if edge_params['type'] in {'n', 'top_n'}:
                edge_params['stat'] = 'n'
                edge_params['threshold']= 50
            elif edge_params['type'] in {'std', 'top_std'}:
                edge_params['stat'] = 'std'
                edge_params['threshold'] = 1
        else:
            edge_params['stat'] = 'n'
            edge_params['threshold']= 50
        if 'edge_attribute' not in edge_params:
            self.response.error(
                f"Edge attribute must be provided, allowable attributes are: {list(allowable_parameters['edge_attribute'])}",
                error_code="UnknownValue")
        else:
            if edge_params['edge_attribute'] in {'ngd', 'chi_square', 'fisher_exact'}:
                edge_params['direction'] = 'above'
                edge_params['top'] = False
            elif edge_params['edge_attribute'] in {'jaccard_index', 'observed_expected_ratio', 'probability_treats'}:
                edge_params['direction'] = 'below'
                edge_params['top'] = True
            else:
                edge_params['direction'] = 'below'
                edge_params['top'] = True
        if self.response.status != 'OK':
            return self.response

        # now do the call out to NGD
        from Filter_KG.remove_edges import RemoveEdges
        RE = RemoveEdges(self.response, self.message, edge_params)
        response = RE.remove_edges_by_stats()
        return response