Esempio n. 1
0
 def test_has_annotation(self):
     self.assertFalse(edge_has_annotation({}, 'Subgraph'))
     self.assertFalse(edge_has_annotation({ANNOTATIONS: {}}, 'Subgraph'))
     self.assertFalse(
         edge_has_annotation({ANNOTATIONS: {
             'Subgraph': None
         }}, 'Subgraph'))
     self.assertTrue(
         edge_has_annotation({ANNOTATIONS: {
             'Subgraph': 'value'
         }}, 'Subgraph'))
     self.assertFalse(
         edge_has_annotation({ANNOTATIONS: {
             'Nope': 'value'
         }}, 'Subgraph'))
Esempio n. 2
0
def calculate_subgraph_edge_overlap(
    graph: BELGraph,
    annotation: str = 'Subgraph',
) -> Tuple[Mapping[str, EdgeSet], Mapping[str, Mapping[str, EdgeSet]], Mapping[
        str, Mapping[str, EdgeSet]], Mapping[str, Mapping[str, float]], ]:
    """Build a Dataframe to show the overlap between different sub-graphs.

    Options:
    1. Total number of edges overlap (intersection)
    2. Percentage overlap (tanimoto similarity)

    :param graph: A BEL graph
    :param annotation: The annotation to group by and compare. Defaults to 'Subgraph'
    :return: {subgraph: set of edges}, {(subgraph 1, subgraph2): set of intersecting edges},
            {(subgraph 1, subgraph2): set of unioned edges}, {(subgraph 1, subgraph2): tanimoto similarity},
    """
    sg2edge = defaultdict(set)

    for u, v, d in graph.edges(data=True):
        if edge_has_annotation(d, annotation):
            sg2edge[d[ANNOTATIONS][annotation]].add((u, v))

    subgraph_intersection: Dict[str, Dict[str,
                                          Set[EdgeSet]]] = defaultdict(dict)
    subgraph_union: Dict[str, Dict[str, Set[EdgeSet]]] = defaultdict(dict)
    result: Dict[str, Dict[str, float]] = defaultdict(dict)

    for sg1, sg2 in itt.product(sg2edge, repeat=2):
        subgraph_intersection[sg1][sg2] = sg2edge[sg1] & sg2edge[sg2]
        subgraph_union[sg1][sg2] = sg2edge[sg1] | sg2edge[sg2]
        result[sg1][sg2] = len(subgraph_intersection[sg1][sg2]) / len(
            subgraph_union[sg1][sg2])

    return sg2edge, subgraph_intersection, subgraph_union, result
Esempio n. 3
0
def calculate_subgraph_edge_overlap(graph, annotation='Subgraph'):
    """Builds a dataframe to show the overlap between different subgraphs

    Options:
    1. Total number of edges overlap (intersection)
    2. Percentage overlap (tanimoto similarity)


    :param pybel.BELGraph graph: A BEL graph
    :param annotation: The annotation to group by and compare. Defaults to 'Subgraph'
    :type annotation: str
    :return: {subgraph: set of edges}, {(subgraph 1, subgraph2): set of intersecting edges},
            {(subgraph 1, subgraph2): set of unioned edges}, {(subgraph 1, subgraph2): tanimoto similarity},
    """

    sg2edge = defaultdict(set)

    for u, v, d in graph.edges_iter(data=True):
        if not edge_has_annotation(d, annotation):
            continue
        sg2edge[d[ANNOTATIONS][annotation]].add((u, v))

    subgraph_intersection = defaultdict(dict)
    subgraph_union = defaultdict(dict)
    result = defaultdict(dict)

    for sg1, sg2 in itt.product(sg2edge, repeat=2):
        subgraph_intersection[sg1][sg2] = sg2edge[sg1] & sg2edge[sg2]
        subgraph_union[sg1][sg2] = sg2edge[sg1] | sg2edge[sg2]
        result[sg1][sg2] = len(subgraph_intersection[sg1][sg2]) / len(subgraph_union[sg1][sg2])

    return sg2edge, subgraph_intersection, subgraph_union, result
Esempio n. 4
0
def get_subgraph_edges(
    graph: BELGraph,
    annotation: str,
    value: str,
    source_filter: Optional[NodePredicates] = None,
    target_filter: Optional[NodePredicates] = None,
) -> Iterable[Tuple[BaseEntity, BaseEntity, str, EdgeData]]:
    """Get all edges from a given subgraph whose source and target nodes pass all of the given filters.

    :param graph: A BEL graph
    :param annotation:  The annotation to search
    :param value: The annotation value to search by
    :param source_filter: Optional filter for source nodes (graph, node) -> bool
    :param target_filter: Optional filter for target nodes (graph, node) -> bool
    :return: An iterable of (source node, target node, key, data) for all edges that match the annotation/value and
             node filters
    """
    if source_filter is None:
        source_filter = true_node_predicate

    if target_filter is None:
        target_filter = true_node_predicate

    for u, v, k, data in graph.edges(keys=True, data=True):
        if not edge_has_annotation(data, annotation):
            continue
        if data[ANNOTATIONS][annotation] == value and source_filter(
                graph, u) and target_filter(graph, v):
            yield u, v, k, data
Esempio n. 5
0
    def annotation_dict_filter(data):
        """A filter that matches edges with the given dictionary as a subdictionary

        :param dict data: A PyBEL edge data dictionary
        :rtype: bool
        """
        return any(
            edge_has_annotation(data, key) and data[ANNOTATIONS][key] == value
            for key, values in annotations.items() for value in values)
Esempio n. 6
0
def count_annotation_values_filtered(
    graph: BELGraph,
    annotation: str,
    source_predicate: Optional[NodePredicate] = None,
    target_predicate: Optional[NodePredicate] = None,
) -> Counter:
    """Count in how many edges each annotation appears in a graph, but filter out source nodes and target nodes.

    See :func:`pybel_tools.utils.keep_node` for a basic filter.

    :param graph: A BEL graph
    :param annotation: The annotation to count
    :param source_predicate: A predicate (graph, node) -> bool for keeping source nodes
    :param target_predicate: A predicate (graph, node) -> bool for keeping target nodes
    :return: A Counter from {annotation value: frequency}
    """
    if source_predicate and target_predicate:
        return Counter(
            data[ANNOTATIONS][annotation]
            for u, v, data in graph.edges(data=True)
            if edge_has_annotation(data, annotation)
            and source_predicate(graph, u) and target_predicate(graph, v))
    elif source_predicate:
        return Counter(data[ANNOTATIONS][annotation]
                       for u, v, data in graph.edges(data=True)
                       if edge_has_annotation(data, annotation)
                       and source_predicate(graph, u))
    elif target_predicate:
        return Counter(data[ANNOTATIONS][annotation]
                       for u, v, data in graph.edges(data=True)
                       if edge_has_annotation(data, annotation)
                       and target_predicate(graph, u))
    else:
        return Counter(data[ANNOTATIONS][annotation]
                       for u, v, data in graph.edges(data=True)
                       if edge_has_annotation(data, annotation))
Esempio n. 7
0
def group_nodes_by_annotation(
    graph: BELGraph,
    annotation: str = 'Subgraph',
) -> Mapping[str, Set[BaseEntity]]:
    """Group the nodes occurring in edges by the given annotation."""
    result = defaultdict(set)

    for u, v, d in graph.edges(data=True):
        if not edge_has_annotation(d, annotation):
            continue

        result[d[ANNOTATIONS][annotation]].add(u)
        result[d[ANNOTATIONS][annotation]].add(v)

    return dict(result)
Esempio n. 8
0
def group_nodes_by_annotation(graph, annotation='Subgraph'):
    """Groups the nodes occurring in edges by the given annotation

    :param pybel.BELGraph graph: A BEL graph
    :param annotation: An annotation to use to group edges
    :type annotation: str
    :return: dict of sets of BELGraph nodes
    :rtype: dict
    """
    result = defaultdict(set)

    for u, v, d in graph.edges_iter(data=True):
        if not edge_has_annotation(d, annotation):
            continue

        result[d[ANNOTATIONS][annotation]].add(u)
        result[d[ANNOTATIONS][annotation]].add(v)

    return dict(result)
Esempio n. 9
0
def calculate_error_by_annotation(graph: BELGraph, annotation: str) -> Mapping[str, List[str]]:
    """Group the graph by a given annotation and builds lists of errors for each.

    :return: A dictionary of {annotation value: list of errors}
    """
    results = defaultdict(list)

    for _, exc, ctx in graph.warnings:
        if not ctx or not edge_has_annotation(ctx, annotation):
            continue

        values = ctx[ANNOTATIONS][annotation]

        if isinstance(values, str):
            results[values].append(exc.__class__.__name__)
        elif isinstance(values, Iterable):
            for value in values:
                results[value].append(exc.__class__.__name__)

    return dict(results)
Esempio n. 10
0
def count_citations_by_annotation(graph, annotation):
    """Groups the citation counters by subgraphs induced by the annotation

    :param pybel.BELGraph graph: A BEL graph
    :param str annotation: The annotation to use to group the graph
    :return: A dictionary of Counters {subgraph name: Counter from {citation: frequency}}
    """
    citations = defaultdict(lambda: defaultdict(set))
    for u, v, data in graph.edges_iter(data=True):
        if not edge_has_annotation(data, annotation) or CITATION not in data:
            continue

        k = data[ANNOTATIONS][annotation]

        citations[k][u, v].add((data[CITATION][CITATION_TYPE],
                                data[CITATION][CITATION_REFERENCE].strip()))

    return {
        k: Counter(itt.chain.from_iterable(v.values()))
        for k, v in citations.items()
    }
Esempio n. 11
0
def count_annotation_values_filtered(graph, annotation, source_filter=None, target_filter=None):
    """Counts in how many edges each annotation appears in a graph, but filter out source nodes and target nodes

    See :func:`pybel_tools.utils.keep_node` for a basic filter.

    :param pybel.BELGraph graph: A BEL graph
    :param str annotation: The annotation to count
    :param source_filter: A predicate (graph, node) -> bool for keeping source nodes
    :type source_filter: types.FunctionType
    :param target_filter: A predicate (graph, node) -> bool for keeping target nodes
    :type target_filter: types.FunctionType
    :return: A Counter from {annotation value: frequency}
    :rtype: Counter
    """
    source_filter = keep_node_permissive if source_filter is None else source_filter
    target_filter = keep_node_permissive if target_filter is None else target_filter

    return Counter(
        data[ANNOTATIONS][annotation]
        for u, v, data in graph.edges_iter(data=True)
        if edge_has_annotation(data, annotation) and source_filter(graph, u) and target_filter(graph, v)
    )
Esempio n. 12
0
def count_authors_by_annotation(graph, annotation='Subgraph'):
    """Groups the author counters by subgraphs induced by the annotation

    :param pybel.BELGraph graph: A BEL graph
    :param str annotation: The annotation to use to group the graph
    :return: A dictionary of Counters {subgraph name: Counter from {author: frequency}}
    :rtype: dict
    """
    authors = defaultdict(list)

    for data in graph_edge_data_iter(graph):
        if not edge_has_annotation(
                data, annotation
        ) or CITATION not in data or CITATION_AUTHORS not in data[CITATION]:
            continue
        if isinstance(data[CITATION][CITATION_AUTHORS], str):
            raise ValueError(
                'Graph should be converted with pybel.mutation.parse_authors first'
            )
        for author in data[CITATION][CITATION_AUTHORS]:
            authors[data[ANNOTATIONS][annotation]].append(author)

    return count_defaultdict(authors)
Esempio n. 13
0
def calculate_error_by_annotation(graph, annotation):
    """Groups the graph by a given annotation and builds lists of errors for each

    :param pybel.BELGraph graph: A BEL graph
    :param annotation: The annotation to group errors by
    :type annotation: str
    :return: A dictionary of {annotation value: list of errors}
    :rtype: dict[str, list[str]]
    """
    results = defaultdict(list)

    for line_number, line, e, context in graph.warnings:
        if not context or not edge_has_annotation(context, annotation):
            continue

        values = context[ANNOTATIONS][annotation]

        if isinstance(values, str):
            results[values].append(e.__class__.__name__)
        elif isinstance(values, (set, tuple, list)):
            for value in values:
                results[value].append(e.__class__.__name__)

    return dict(results)