def test_has_annotation(self): self.assertFalse(edge_has_annotation({}, 'Subgraph')) self.assertFalse(edge_has_annotation({ANNOTATIONS: {}}, 'Subgraph')) self.assertFalse( edge_has_annotation({ANNOTATIONS: { 'Subgraph': None }}, 'Subgraph')) self.assertTrue( edge_has_annotation({ANNOTATIONS: { 'Subgraph': 'value' }}, 'Subgraph')) self.assertFalse( edge_has_annotation({ANNOTATIONS: { 'Nope': 'value' }}, 'Subgraph'))
def calculate_subgraph_edge_overlap( graph: BELGraph, annotation: str = 'Subgraph', ) -> Tuple[Mapping[str, EdgeSet], Mapping[str, Mapping[str, EdgeSet]], Mapping[ str, Mapping[str, EdgeSet]], Mapping[str, Mapping[str, float]], ]: """Build a Dataframe to show the overlap between different sub-graphs. Options: 1. Total number of edges overlap (intersection) 2. Percentage overlap (tanimoto similarity) :param graph: A BEL graph :param annotation: The annotation to group by and compare. Defaults to 'Subgraph' :return: {subgraph: set of edges}, {(subgraph 1, subgraph2): set of intersecting edges}, {(subgraph 1, subgraph2): set of unioned edges}, {(subgraph 1, subgraph2): tanimoto similarity}, """ sg2edge = defaultdict(set) for u, v, d in graph.edges(data=True): if edge_has_annotation(d, annotation): sg2edge[d[ANNOTATIONS][annotation]].add((u, v)) subgraph_intersection: Dict[str, Dict[str, Set[EdgeSet]]] = defaultdict(dict) subgraph_union: Dict[str, Dict[str, Set[EdgeSet]]] = defaultdict(dict) result: Dict[str, Dict[str, float]] = defaultdict(dict) for sg1, sg2 in itt.product(sg2edge, repeat=2): subgraph_intersection[sg1][sg2] = sg2edge[sg1] & sg2edge[sg2] subgraph_union[sg1][sg2] = sg2edge[sg1] | sg2edge[sg2] result[sg1][sg2] = len(subgraph_intersection[sg1][sg2]) / len( subgraph_union[sg1][sg2]) return sg2edge, subgraph_intersection, subgraph_union, result
def calculate_subgraph_edge_overlap(graph, annotation='Subgraph'): """Builds a dataframe to show the overlap between different subgraphs Options: 1. Total number of edges overlap (intersection) 2. Percentage overlap (tanimoto similarity) :param pybel.BELGraph graph: A BEL graph :param annotation: The annotation to group by and compare. Defaults to 'Subgraph' :type annotation: str :return: {subgraph: set of edges}, {(subgraph 1, subgraph2): set of intersecting edges}, {(subgraph 1, subgraph2): set of unioned edges}, {(subgraph 1, subgraph2): tanimoto similarity}, """ sg2edge = defaultdict(set) for u, v, d in graph.edges_iter(data=True): if not edge_has_annotation(d, annotation): continue sg2edge[d[ANNOTATIONS][annotation]].add((u, v)) subgraph_intersection = defaultdict(dict) subgraph_union = defaultdict(dict) result = defaultdict(dict) for sg1, sg2 in itt.product(sg2edge, repeat=2): subgraph_intersection[sg1][sg2] = sg2edge[sg1] & sg2edge[sg2] subgraph_union[sg1][sg2] = sg2edge[sg1] | sg2edge[sg2] result[sg1][sg2] = len(subgraph_intersection[sg1][sg2]) / len(subgraph_union[sg1][sg2]) return sg2edge, subgraph_intersection, subgraph_union, result
def get_subgraph_edges( graph: BELGraph, annotation: str, value: str, source_filter: Optional[NodePredicates] = None, target_filter: Optional[NodePredicates] = None, ) -> Iterable[Tuple[BaseEntity, BaseEntity, str, EdgeData]]: """Get all edges from a given subgraph whose source and target nodes pass all of the given filters. :param graph: A BEL graph :param annotation: The annotation to search :param value: The annotation value to search by :param source_filter: Optional filter for source nodes (graph, node) -> bool :param target_filter: Optional filter for target nodes (graph, node) -> bool :return: An iterable of (source node, target node, key, data) for all edges that match the annotation/value and node filters """ if source_filter is None: source_filter = true_node_predicate if target_filter is None: target_filter = true_node_predicate for u, v, k, data in graph.edges(keys=True, data=True): if not edge_has_annotation(data, annotation): continue if data[ANNOTATIONS][annotation] == value and source_filter( graph, u) and target_filter(graph, v): yield u, v, k, data
def annotation_dict_filter(data): """A filter that matches edges with the given dictionary as a subdictionary :param dict data: A PyBEL edge data dictionary :rtype: bool """ return any( edge_has_annotation(data, key) and data[ANNOTATIONS][key] == value for key, values in annotations.items() for value in values)
def count_annotation_values_filtered( graph: BELGraph, annotation: str, source_predicate: Optional[NodePredicate] = None, target_predicate: Optional[NodePredicate] = None, ) -> Counter: """Count in how many edges each annotation appears in a graph, but filter out source nodes and target nodes. See :func:`pybel_tools.utils.keep_node` for a basic filter. :param graph: A BEL graph :param annotation: The annotation to count :param source_predicate: A predicate (graph, node) -> bool for keeping source nodes :param target_predicate: A predicate (graph, node) -> bool for keeping target nodes :return: A Counter from {annotation value: frequency} """ if source_predicate and target_predicate: return Counter( data[ANNOTATIONS][annotation] for u, v, data in graph.edges(data=True) if edge_has_annotation(data, annotation) and source_predicate(graph, u) and target_predicate(graph, v)) elif source_predicate: return Counter(data[ANNOTATIONS][annotation] for u, v, data in graph.edges(data=True) if edge_has_annotation(data, annotation) and source_predicate(graph, u)) elif target_predicate: return Counter(data[ANNOTATIONS][annotation] for u, v, data in graph.edges(data=True) if edge_has_annotation(data, annotation) and target_predicate(graph, u)) else: return Counter(data[ANNOTATIONS][annotation] for u, v, data in graph.edges(data=True) if edge_has_annotation(data, annotation))
def group_nodes_by_annotation( graph: BELGraph, annotation: str = 'Subgraph', ) -> Mapping[str, Set[BaseEntity]]: """Group the nodes occurring in edges by the given annotation.""" result = defaultdict(set) for u, v, d in graph.edges(data=True): if not edge_has_annotation(d, annotation): continue result[d[ANNOTATIONS][annotation]].add(u) result[d[ANNOTATIONS][annotation]].add(v) return dict(result)
def group_nodes_by_annotation(graph, annotation='Subgraph'): """Groups the nodes occurring in edges by the given annotation :param pybel.BELGraph graph: A BEL graph :param annotation: An annotation to use to group edges :type annotation: str :return: dict of sets of BELGraph nodes :rtype: dict """ result = defaultdict(set) for u, v, d in graph.edges_iter(data=True): if not edge_has_annotation(d, annotation): continue result[d[ANNOTATIONS][annotation]].add(u) result[d[ANNOTATIONS][annotation]].add(v) return dict(result)
def calculate_error_by_annotation(graph: BELGraph, annotation: str) -> Mapping[str, List[str]]: """Group the graph by a given annotation and builds lists of errors for each. :return: A dictionary of {annotation value: list of errors} """ results = defaultdict(list) for _, exc, ctx in graph.warnings: if not ctx or not edge_has_annotation(ctx, annotation): continue values = ctx[ANNOTATIONS][annotation] if isinstance(values, str): results[values].append(exc.__class__.__name__) elif isinstance(values, Iterable): for value in values: results[value].append(exc.__class__.__name__) return dict(results)
def count_citations_by_annotation(graph, annotation): """Groups the citation counters by subgraphs induced by the annotation :param pybel.BELGraph graph: A BEL graph :param str annotation: The annotation to use to group the graph :return: A dictionary of Counters {subgraph name: Counter from {citation: frequency}} """ citations = defaultdict(lambda: defaultdict(set)) for u, v, data in graph.edges_iter(data=True): if not edge_has_annotation(data, annotation) or CITATION not in data: continue k = data[ANNOTATIONS][annotation] citations[k][u, v].add((data[CITATION][CITATION_TYPE], data[CITATION][CITATION_REFERENCE].strip())) return { k: Counter(itt.chain.from_iterable(v.values())) for k, v in citations.items() }
def count_annotation_values_filtered(graph, annotation, source_filter=None, target_filter=None): """Counts in how many edges each annotation appears in a graph, but filter out source nodes and target nodes See :func:`pybel_tools.utils.keep_node` for a basic filter. :param pybel.BELGraph graph: A BEL graph :param str annotation: The annotation to count :param source_filter: A predicate (graph, node) -> bool for keeping source nodes :type source_filter: types.FunctionType :param target_filter: A predicate (graph, node) -> bool for keeping target nodes :type target_filter: types.FunctionType :return: A Counter from {annotation value: frequency} :rtype: Counter """ source_filter = keep_node_permissive if source_filter is None else source_filter target_filter = keep_node_permissive if target_filter is None else target_filter return Counter( data[ANNOTATIONS][annotation] for u, v, data in graph.edges_iter(data=True) if edge_has_annotation(data, annotation) and source_filter(graph, u) and target_filter(graph, v) )
def count_authors_by_annotation(graph, annotation='Subgraph'): """Groups the author counters by subgraphs induced by the annotation :param pybel.BELGraph graph: A BEL graph :param str annotation: The annotation to use to group the graph :return: A dictionary of Counters {subgraph name: Counter from {author: frequency}} :rtype: dict """ authors = defaultdict(list) for data in graph_edge_data_iter(graph): if not edge_has_annotation( data, annotation ) or CITATION not in data or CITATION_AUTHORS not in data[CITATION]: continue if isinstance(data[CITATION][CITATION_AUTHORS], str): raise ValueError( 'Graph should be converted with pybel.mutation.parse_authors first' ) for author in data[CITATION][CITATION_AUTHORS]: authors[data[ANNOTATIONS][annotation]].append(author) return count_defaultdict(authors)
def calculate_error_by_annotation(graph, annotation): """Groups the graph by a given annotation and builds lists of errors for each :param pybel.BELGraph graph: A BEL graph :param annotation: The annotation to group errors by :type annotation: str :return: A dictionary of {annotation value: list of errors} :rtype: dict[str, list[str]] """ results = defaultdict(list) for line_number, line, e, context in graph.warnings: if not context or not edge_has_annotation(context, annotation): continue values = context[ANNOTATIONS][annotation] if isinstance(values, str): results[values].append(e.__class__.__name__) elif isinstance(values, (set, tuple, list)): for value in values: results[value].append(e.__class__.__name__) return dict(results)