예제 #1
0
def find_levels(DG, attr=None):
    """Find levels (generation number) of all vertices in graph G

  The backward topological level of a vertex v, which we denote as l_v or l(v),
  can be defined as its depth, or the length of longestt path
  to the leaf (sink) node.

   - if v has no immediate predecessors, then l_v = 0
   - otherwise l_v = 1 + max_{u: predecessors}(l_u)

  The levels / generation number information can be optionally saved in
  the graph, under given node attribute (name given by `attr` parameter).

  NOTE: whether for nodes without outcoming edges (without any neighbours)
  l_v is defined to be 0, or defined to be 1, is a matter of convention.
  Here the same convention as in the FELINE paper is used.

  NOTE: for compatibility with forward topological levels sometimes
  backward topological levels are used with negative sign applied,
  like in the PReaCH paper.

  Parameters:
  -----------
  DG : NetworkX DiGraph
      Directed acyclic graph.

  attr : str, optional (default=None)
      If set, name of a node attribute under which store vertex level.

  Returns:
  --------
  dict of ints
      Dictionary, where keys are node indices, and values are node levels
  """
    lvl = {}

    # topological levels are defined only for Directed Acyclic Graphs
    if not DG.is_directed():
        raise nx.NetworkXNotImplemented(
            "Vertex level is not defined on undirected graphs.")
    if not nx.is_directed_acyclic_graph(DG):
        raise nx.NetworkXNotImplemented(
            "Vertex level is not defined on directed graphs with loops")

    # it can be any post-order ordering
    nodelist = nx.dfs_postorder_nodes(DG)
    for node in nodelist:
        if DG.out_degree(node) == 0:
            lvl[node] = 0
            continue

        lvl[node] = max([lvl[neigh] for neigh in DG.successors(node)]) + 1

    if attr is not None:
        for node, level in lvl.items():
            DG.nodes[node][attr] = level

    return lvl
예제 #2
0
    def _not_implemented_for(g):
        if (mval is None or mval == g.is_multigraph()) and (
            dval is None or dval == g.is_directed()
        ):
            raise nx.NetworkXNotImplemented(errmsg)

        return g
예제 #3
0
def spectral_partitioning(G, class_nodes):
    # per integrore la parte di gestione dei nodi da stringhe a interi crea un'altra funzione
    # classes dev'essere una lista o una tupla

    G_nodes = number_of_nodes(G)
    given_nodes = sum(class_nodes)

    if is_empty(G):
        raise nx.NetworkXNotImplemented("Empty graph.")
    # if G_nodes < 2:
    #    raise nx.NetworkXException("Too small graph.")
    if given_nodes != G_nodes:
        raise nx.NetworkXException("Invalid classes")
    if is_weighted(G):
        raise nx.NetworkXNotImplemented("Weighted graph.")
    if is_directed(G):
        raise nx.NetworkXNotImplemented("Directed graph.")
    if number_of_selfloops(G):
        raise nx.NetworkXNotImplemented("Graph with self-edges.")
    if not nx.is_connected(G):
        raise nx.NetworkXException("Not connected graph.")
    # valuta il caso in cui classes == None e classes vuoto/a
    # suppongo che class_nodes sia una lista

    classes = len(class_nodes)

    if classes == 1:
        yield G
    else:
        half = classes // 2
        big_class1_nodes = sum(class_nodes[:half])
        big_class2_nodes = sum(class_nodes[half:])
        # working_graph = nx.convert_node_labels_to_integers(test_graph)
        temp_G = _basic_partitioning(G, big_class1_nodes, big_class2_nodes)
        nx.draw(temp_G, with_labels=True)
        plt.savefig("debug.png")
        plt.clf()
        # meglio component_generator
        for component in _working_components(temp_G):
            component_nodes = number_of_nodes(component)
            if component_nodes == big_class1_nodes:
                yield from spectral_partitioning(component, class_nodes[:half])
            else:
                yield from spectral_partitioning(component, class_nodes[half:])
def spectral_partitioning(G, groups_nodes):
    """
    Genera un numero di classi composte da un certo numero di nodi a partire da G.

    :param G: grafo semplice connesso
    :param groups_nodes: lista o tupla contenente il numero di vertici di ciascuna componente
    :return: generatore di grafi
    """

    G_nodes = nx.number_of_nodes(G)
    given_nodes = sum(groups_nodes)
    # print('G_nodes: ', G_nodes)
    # print('given_nodes:', groups_nodes, ', with sum = ', given_nodes)

    if (given_nodes != G_nodes) or (0 in groups_nodes):
        raise nx.NetworkXException("Invalid gruops")
    if nx.is_weighted(G):
        raise nx.NetworkXNotImplemented("Weighted graph.")
    if nx.is_directed(G):
        raise nx.NetworkXNotImplemented("Directed graph.")
    if nx.number_of_selfloops(G):
        raise nx.NetworkXNotImplemented("Graph with self-edges.")

    gruops = len(groups_nodes)  # numero delle componenti in cui dividere G

    if gruops == 1:
        yield G
    else:
        half = gruops // 2
        big_group_1_nodes = sum(groups_nodes[:half])
        big_group_2_nodes = sum(groups_nodes[half:])
        # print('big_group_1_nodes: ', big_group_1_nodes)
        # print('big_group_2_nodes: ', big_group_2_nodes)
        # divido in 2 G e delego il compito di proseguire con la divisione
        big_groups = [
            group for group in _basic_partitioning(G, big_group_1_nodes,
                                                   big_group_2_nodes)
        ]
        yield from spectral_partitioning(big_groups[0], groups_nodes[:half])
        yield from spectral_partitioning(big_groups[1], groups_nodes[half:])
예제 #5
0
def checkMotifs(G, s):
    if (s == 3):
        edgeLists = [[[1, 2], [1, 3]], [[1, 2], [1, 3], [2, 3]]]
    elif (s == 4):
        edgeLists = [[[1, 2], [1, 3], [1, 4]]]
        edgeLists.append([[1, 2], [1, 3], [1, 4], [2, 3]])
        edgeLists.append([[1, 2], [1, 3], [1, 4], [2, 3], [3, 4]])
        edgeLists.append([[1, 2], [1, 3], [1, 4], [2, 3], [3, 4], [2, 4]])
    else:
        raise networkx.NetworkXNotImplemented('Size of motif must be 3 or 4')
    listOfMotifs = [graphEdgeDisplay(x) for x in edgeLists]
    counter = [0 for i in range(len(edgeLists))]
    edgeCount = [s - 1, s]
    for n in G:
        for nodes in it.combinations(G[n], s - 1):
            h = 0
            s1 = set(nodes)
            ed1 = s - 1 + len(
                [x for x in it.combinations(nodes, 2) if x[0] in G[x[1]]])
            counter[ed1 - s + 1] += 1

    for i in range(len(listOfMotifs)):
        counter[i] = counter[i] / sum(
            1 for x in listOfMotifs[i] if len(listOfMotifs[i][x]) == s - 1)
    if (s == 4):
        # 	#Need extra motifs u's and squares
        listOfMotifs.append(graphEdgeDisplay([[1, 2], [2, 3], [3, 4]]))
        count = 0
        for nod1 in G:
            for nod2 in G[nod1]:
                c3 = len(set(G[nod1]).intersection(G[nod2]))
                c1 = len(G[nod1]) - c3 - 1
                c2 = len(G[nod2]) - c3 - 1
                count += c3 * (c3 - 1) + (c1 + c2) * c3 + c1 * c2
        counter.append(count / 2 - 12 * counter[3] - 6 * counter[2] -
                       2 * counter[1])
        listOfMotifs.append(graphEdgeDisplay([[1, 2], [2, 3], [3, 4], [1, 4]]))
        G1 = G
        count = 0
        for nod1 in G1:
            for nod2 in G1[nod1]:
                s2 = set(G1[nod2])
                for nod3 in G1[nod1]:
                    if nod2 == nod3:
                        continue
                    count += len(s2.intersection(G[nod3])) - 1
        counter.append((count - 8 * counter[2] - 24 * counter[3]) / 8)
        counter[-2] -= counter[-1] * 4
    # 	counts

    return zip(listOfMotifs, counter)
예제 #6
0
 def _not_implemented_for(not_implement_for_func, *args, **kwargs):
     graph = args[0]
     terms = {'directed': graph.is_directed(),
              'undirected': not graph.is_directed(),
              'multigraph': graph.is_multigraph(),
              'graph': not graph.is_multigraph()}
     match = True
     try:
         for t in graph_types:
             match = match and terms[t]
     except KeyError:
         raise KeyError('use one or more of ',
                        'directed, undirected, multigraph, graph')
     if match:
         msg = 'not implemented for %s type' % ' '.join(graph_types)
         raise nx.NetworkXNotImplemented(msg)
     else:
         return not_implement_for_func(*args, **kwargs)
예제 #7
0
 def _not_implemented_for(not_implement_for_func, *args, **kwargs):
     graph = args[0]
     terms = {
         "directed": graph.is_directed(),
         "undirected": not graph.is_directed(),
         "multigraph": graph.is_multigraph(),
         "graph": not graph.is_multigraph(),
     }
     match = True
     try:
         for t in graph_types:
             match = match and terms[t]
     except KeyError as e:
         raise KeyError("use one or more of "
                        "directed, undirected, multigraph, graph") from e
     if match:
         msg = f"not implemented for {' '.join(graph_types)} type"
         raise nx.NetworkXNotImplemented(msg)
     else:
         return not_implement_for_func(*args, **kwargs)
예제 #8
0
def configuration_model(deg_sequence, create_using=None, seed=None):
    """Returns a random graph with the given degree sequence.

    The configuration model generates a random pseudograph (graph with
    parallel edges and self loops) by randomly assigning edges to
    match the given degree sequence.

    Parameters
    ----------
    deg_sequence :  list of nonnegative integers
        Each list entry corresponds to the degree of a node.
    create_using : NetworkX graph constructor, optional (default MultiGraph)
        Graph type to create. If graph instance, then cleared before populated.
    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.

    Returns
    -------
    G : MultiGraph
        A graph with the specified degree sequence.
        Nodes are labeled starting at 0 with an index
        corresponding to the position in deg_sequence.

    Raises
    ------
    NetworkXError
        If the degree sequence does not have an even sum.

    See Also
    --------
    is_graphical

    Notes
    -----
    As described by Newman [1]_.

    A non-graphical degree sequence (not realizable by some simple
    graph) is allowed since this function returns graphs with self
    loops and parallel edges.  An exception is raised if the degree
    sequence does not have an even sum.

    This configuration model construction process can lead to
    duplicate edges and loops.  You can remove the self-loops and
    parallel edges (see below) which will likely result in a graph
    that doesn't have the exact degree sequence specified.

    The density of self-loops and parallel edges tends to decrease as
    the number of nodes increases. However, typically the number of
    self-loops will approach a Poisson distribution with a nonzero mean,
    and similarly for the number of parallel edges.  Consider a node
    with *k* stubs. The probability of being joined to another stub of
    the same node is basically (*k* - *1*) / *N*, where *k* is the
    degree and *N* is the number of nodes. So the probability of a
    self-loop scales like *c* / *N* for some constant *c*. As *N* grows,
    this means we expect *c* self-loops. Similarly for parallel edges.

    References
    ----------
    .. [1] M.E.J. Newman, "The structure and function of complex networks",
       SIAM REVIEW 45-2, pp 167-256, 2003.

    Examples
    --------
    You can create a degree sequence following a particular distribution
    by using the one of the distribution functions in
    :mod:`~networkx.utils.random_sequence` (or one of your own). For
    example, to create an undirected multigraph on one hundred nodes
    with degree sequence chosen from the power law distribution:

    >>> sequence = nx.random_powerlaw_tree_sequence(100, tries=5000)
    >>> G = nx.configuration_model(sequence)
    >>> len(G)
    100
    >>> actual_degrees = [d for v, d in G.degree()]
    >>> actual_degrees == sequence
    True

    The returned graph is a multigraph, which may have parallel
    edges. To remove any parallel edges from the returned graph:

    >>> G = nx.Graph(G)

    Similarly, to remove self-loops:

    >>> G.remove_edges_from(nx.selfloop_edges(G))

    """
    if sum(deg_sequence) % 2 != 0:
        msg = "Invalid degree sequence: sum of degrees must be even, not odd"
        raise nx.NetworkXError(msg)

    G = nx.empty_graph(0, create_using, default=nx.MultiGraph)
    if G.is_directed():
        raise nx.NetworkXNotImplemented("not implemented for directed graphs")

    G = _configuration_model(deg_sequence, G, seed=seed)

    return G
예제 #9
0
def maximum_common_ordered_tree_embedding(tree1, tree2, node_affinity='auto'):
    """
    Finds the maximum common subtree-embedding between two ordered trees.

    A tree S is an embedded subtree of T if it can be obtained from T by a
    series of edge contractions.

    Note this produces a subtree embedding, which is not necessarilly a
    subgraph isomorphism (although a subgraph isomorphism is also an
    embedding.)

    The maximum common embedded subtree problem can be solved in in
    `O(n1 * n2 * min(d1, l1) * min(d2, l2))` time on ordered trees with n1 and
    n2 nodes, of depth d1 and d2 and with l1 and l2 leaves, respectively

    Implements algorithm described in [1]_.

    References:
        On the Maximum Common Embedded Subtree Problem for Ordered Trees
        https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf

        http://algo.inria.fr/flajolet/Publications/FlSiSt90.pdf

    Notes:
        Exact algorithms for computing the tree edit distance between unordered trees - https://pdf.sciencedirectassets.com/271538/1-s2.0-S0304397510X00299/1-s2.0-S0304397510005463/main.pdf ?

        Tree Edit Distance and Common Subtrees - https://upcommons.upc.edu/bitstream/handle/2117/97554/R02-20.pdf

        A Survey on Tree Edit Distance and Related Problems - https://grfia.dlsi.ua.es/ml/algorithms/references/editsurvey_bille.pdf

    Args:

        tree1 (nx.OrderedDiGraph): first ordered tree
        tree2 (nx.OrderedDiGraph): second ordered tree
        node_affinity (callable): function

    Example:
        >>> from netharn.initializers._nx_extensions import *  # NOQA
        >>> from netharn.initializers._nx_extensions import _lcs, _print_forest
        >>> def random_ordered_tree(n, seed=None):
        >>>     tree = nx.dfs_tree(nx.random_tree(n, seed=seed))
        >>>     otree = nx.OrderedDiGraph()
        >>>     otree.add_edges_from(tree.edges)
        >>>     return otree
        >>> tree1 = random_ordered_tree(10, seed=1)
        >>> tree2 = random_ordered_tree(10, seed=2)
        >>> print('tree1')
        >>> _print_forest(tree1)
        >>> print('tree2')
        >>> _print_forest(tree2)

        >>> embedding1, embedding2 = maximum_common_ordered_tree_embedding(tree1, tree2 )
        >>> print('embedding1')
        >>> _print_forest(embedding1)
        >>> print('embedding2')
        >>> _print_forest(embedding2)
    """
    if not (isinstance(tree1, nx.OrderedDiGraph) and nx.is_forest(tree1)):
        raise nx.NetworkXNotImplemented(
            'only implemented for directed ordered trees')
    if not (isinstance(tree1, nx.OrderedDiGraph) and nx.is_forest(tree2)):
        raise nx.NetworkXNotImplemented(
            'only implemented for directed ordered trees')

    # Convert the trees to balanced sequences
    sequence1, open_to_close, toks = tree_to_seq(tree1,
                                                 open_to_close=None,
                                                 toks=None)
    sequence2, open_to_close, toks = tree_to_seq(tree2, open_to_close, toks)
    seq1 = sequence1
    seq2 = sequence2

    open_to_tok = ub.invert_dict(toks)

    # Solve the longest common balanced sequence problem
    best, value = longest_common_balanced_sequence(seq1,
                                                   seq2,
                                                   open_to_close,
                                                   open_to_tok=open_to_tok,
                                                   node_affinity=node_affinity)
    subseq1, subseq2 = best

    # Convert the subsequence back into a tree
    embedding1 = seq_to_tree(subseq1, open_to_close, toks)
    embedding2 = seq_to_tree(subseq2, open_to_close, toks)
    return embedding1, embedding2
예제 #10
0
def maximum_common_ordered_subtree_isomorphism(tree1,
                                               tree2,
                                               node_affinity='auto'):
    """
    Isomorphic version of `maximum_common_ordered_tree_embedding`.

    CommandLine:
        xdoctest -m /home/joncrall/code/netharn/netharn/initializers/_nx_extensions.py maximum_common_ordered_subtree_isomorphism:1 --profile && cat profile_output.txt

    Example:
        >>> from netharn.initializers._nx_extensions import *  # NOQA
        >>> from netharn.initializers._nx_extensions import _lcs, _print_forest
        >>> def random_ordered_tree(n, seed=None):
        >>>     tree = nx.dfs_tree(nx.random_tree(n, seed=seed))
        >>>     otree = nx.OrderedDiGraph()
        >>>     otree.add_edges_from(tree.edges)
        >>>     return otree
        >>> tree1 = random_ordered_tree(10, seed=3)
        >>> tree2 = random_ordered_tree(10, seed=2)
        >>> tree1.add_edges_from(tree2.edges, weight=1)
        >>> tree1 = nx.minimum_spanning_arborescence(tree1)
        >>> tree2.add_edges_from(tree1.edges, weight=1)
        >>> tree2 = nx.minimum_spanning_arborescence(tree2)

        >>> tree1.remove_edge(4, 7)
        >>> tree1.remove_edge(4, 9)
        >>> tree1.add_edge(4, 10)
        >>> tree1.add_edge(10, 7)
        >>> tree1.add_edge(10, 9)
        >>> #tree1.add_edges_from([(9, 11), (11, 12), (12, 13), (13, 14)])
        >>> #tree2.add_edges_from([(9, 11), (11, 12), (12, 13), (13, 14)])
        >>> tree1.add_edges_from([(9, 11), (11, 12)])
        >>> tree2.add_edges_from([(9, 11), (11, 12)])
        >>> tree2.add_edge(100, 0)
        >>> tree1.add_edge(102, 100)
        >>> tree1.add_edge(100, 101)
        >>> tree1.add_edge(101, 0)
        >>> tree1.add_edge(5, 201)
        >>> tree1.add_edge(5, 202)
        >>> tree1.add_edge(5, 203)
        >>> tree1.add_edge(201, 2000)
        >>> tree1.add_edge(2000, 2001)
        >>> tree1.add_edge(2001, 2002)
        >>> tree1.add_edge(2002, 2003)

        >>> tree2.add_edge(5, 202)
        >>> tree2.add_edge(5, 203)
        >>> tree2.add_edge(5, 201)
        >>> tree2.add_edge(201, 2000)
        >>> tree2.add_edge(2000, 2001)
        >>> tree2.add_edge(2001, 2002)
        >>> tree2.add_edge(2002, 2003)

        >>> print('-----')
        >>> print('tree1')
        >>> _print_forest(tree1)
        >>> print('tree2')
        >>> _print_forest(tree2)

        >>> subtree1, subtree2 = maximum_common_ordered_subtree_isomorphism(tree1, tree2 )
        >>> print('-----')
        >>> print('subtree1')
        >>> _print_forest(subtree1)
        >>> print('subtree2')
        >>> _print_forest(subtree2)

        >>> embedding1, embedding2 = maximum_common_ordered_tree_embedding(tree1, tree2)
        >>> print('-----')
        >>> print('embedding1')
        >>> _print_forest(embedding1)
        >>> print('embedding2')
        >>> _print_forest(embedding2)

        >>> if 0:
        >>>     ti = timerit.Timerit(6, bestof=2, verbose=2)
        >>>     for timer in ti.reset('isomorphism'):
        >>>         with timer:
        >>>             maximum_common_ordered_subtree_isomorphism(tree1, tree2 )
        >>>     for timer in ti.reset('embedding'):
        >>>         with timer:
        >>>             maximum_common_ordered_tree_embedding(tree1, tree2 )

        >>> from networkx import isomorphism
        >>> assert isomorphism.DiGraphMatcher(tree1, subtree1).subgraph_is_isomorphic()
        >>> assert isomorphism.DiGraphMatcher(tree2, subtree2).subgraph_is_isomorphic()

        >>> list(isomorphism.DiGraphMatcher(tree1, tree2).subgraph_isomorphisms_iter())
        >>> list(isomorphism.DiGraphMatcher(tree1, tree2).subgraph_monomorphisms_iter())

        >>> list(isomorphism.DiGraphMatcher(subtree1, subtree2).subgraph_isomorphisms_iter())
        >>> list(isomorphism.DiGraphMatcher(tree1, subtree1).subgraph_isomorphisms_iter())
        >>> list(isomorphism.DiGraphMatcher(tree2, subtree2).subgraph_isomorphisms_iter())

    Example:
        >>> from netharn.initializers._nx_extensions import *  # NOQA
        >>> from netharn.initializers._nx_extensions import _lcs, _print_forest
        >>> def random_ordered_tree(n, seed=None):
        >>>     if n > 0:
        >>>         tree = nx.dfs_tree(nx.random_tree(n, seed=seed))
        >>>     otree = nx.OrderedDiGraph()
        >>>     if n > 0:
        >>>         otree.add_edges_from(tree.edges)
        >>>     return otree
        >>> import random
        >>> rng = random.Random(90269698983701724775426457020022)
        >>> num = 1000
        >>> def _gen_seeds(num):
        >>>     for _ in range(num):
        >>>         yield (rng.randint(0, 50), rng.randint(0, 50), rng.randint(0, 2 ** 64), rng.randint(0, 2 ** 64))
        >>> for n1, n2, s1, s2 in ub.ProgIter(_gen_seeds(num=num), total=num, verbose=3):
        >>>     tree1 = random_ordered_tree(n1, seed=s1)
        >>>     tree2 = random_ordered_tree(n2, seed=s2)
        >>>     #print('-----')
        >>>     #print('tree1')
        >>>     #_print_forest(tree1)
        >>>     #print('tree2')
        >>>     #_print_forest(tree2)
        >>>     subtree1, subtree2 = maximum_common_ordered_subtree_isomorphism(tree1, tree2, node_affinity='auto')
        >>>     #print('-----')
        >>>     #print('subtree1')
        >>>     #_print_forest(subtree1)
        >>>     #print('subtree2')
        >>>     #_print_forest(subtree2)
        >>>     from networkx import isomorphism
        >>>     assert isomorphism.DiGraphMatcher(tree1, subtree1).subgraph_is_isomorphic()
        >>>     assert isomorphism.DiGraphMatcher(tree2, subtree2).subgraph_is_isomorphic()

    """
    try:
        if not (isinstance(tree1, nx.OrderedDiGraph) and nx.is_forest(tree1)):
            raise nx.NetworkXNotImplemented(
                'only implemented for directed ordered trees')
        if not (isinstance(tree1, nx.OrderedDiGraph) and nx.is_forest(tree2)):
            raise nx.NetworkXNotImplemented(
                'only implemented for directed ordered trees')
    except nx.NetworkXPointlessConcept:
        subtree1 = nx.OrderedDiGraph()
        subtree2 = nx.OrderedDiGraph()
        return subtree1, subtree2

    # Convert the trees to balanced sequences
    sequence1, open_to_close, toks = tree_to_seq(tree1,
                                                 open_to_close=None,
                                                 toks=None,
                                                 mode='chr')
    sequence2, open_to_close, toks = tree_to_seq(tree2,
                                                 open_to_close,
                                                 toks,
                                                 mode='chr')
    seq1 = sequence1
    seq2 = sequence2

    open_to_tok = ub.invert_dict(toks)

    # Solve the longest common balanced sequence problem
    best, value = longest_common_isomorphic_sequence(
        seq1,
        seq2,
        open_to_close,
        open_to_tok=open_to_tok,
        node_affinity=node_affinity)
    subseq1, subseq2 = best

    # Convert the subsequence back into a tree
    subtree1 = seq_to_tree(subseq1, open_to_close, toks)
    subtree2 = seq_to_tree(subseq2, open_to_close, toks)
    return subtree1, subtree2
예제 #11
0
def forest_str(graph, with_labels=True, sources=None, write=None):
    """
    Creates a nice utf8 representation of a directed forest

    Parameters
    ----------
    graph : nx.DiGraph | nx.Graph
        Graph to represent (must be a tree, forest, or the empty graph)

    with_labels : bool
        If True will use the "label" attribute of a node to display if it
        exists otherwise it will use the node value itself. Defaults to True.

    sources : List
        Mainly relevant for undirected forests, specifies which nodes to list
        first. If unspecified the root nodes of each tree will be used for
        directed forests; for undirected forests this defaults to the nodes
        with the smallest degree.

    write : callable
        Function to use to write to, if None new lines are appended to
        a list and returned. If set to the `print` function, lines will
        be written to stdout as they are generated. If specified,
        this function will return None. Defaults to None.

    Returns
    -------
    str | None :
        utf8 representation of the tree / forest

    Example
    -------
    >>> import networkx as nx
    >>> graph = nx.balanced_tree(r=2, h=3, create_using=nx.DiGraph)
    >>> print(forest_str(graph))
    ╙── 0
        ├─╼ 1
        │   ├─╼ 3
        │   │   ├─╼ 7
        │   │   └─╼ 8
        │   └─╼ 4
        │       ├─╼ 9
        │       └─╼ 10
        └─╼ 2
            ├─╼ 5
            │   ├─╼ 11
            │   └─╼ 12
            └─╼ 6
                ├─╼ 13
                └─╼ 14


    >>> graph = nx.balanced_tree(r=1, h=2, create_using=nx.Graph)
    >>> print(forest_str(graph))
    ╙── 0
        └── 1
            └── 2
    """
    import networkx as nx

    printbuf = []
    if write is None:
        _write = printbuf.append
    else:
        _write = write

    if len(graph.nodes) == 0:
        _write("╙")
    else:
        if not nx.is_forest(graph):
            raise nx.NetworkXNotImplemented(
                "input must be a forest or the empty graph")

        is_directed = graph.is_directed()
        succ = graph.succ if is_directed else graph.adj

        if sources is None:
            if is_directed:
                # use real source nodes for directed trees
                sources = [n for n in graph.nodes if graph.in_degree[n] == 0]
            else:
                # use arbitrary sources for undirected trees
                sources = [
                    min(cc, key=lambda n: graph.degree[n])
                    for cc in nx.connected_components(graph)
                ]

        # Populate the stack with each source node, empty indentation, and mark
        # the final node. Reverse the stack so sources are popped in the
        # correct order.
        last_idx = len(sources) - 1
        stack = [(node, "", (idx == last_idx))
                 for idx, node in enumerate(sources)][::-1]

        seen = set()
        while stack:
            node, indent, islast = stack.pop()
            if node in seen:
                continue
            seen.add(node)

            # Notes on available box and arrow characters
            # https://en.wikipedia.org/wiki/Box-drawing_character
            # https://stackoverflow.com/questions/2701192/triangle-arrow
            if not indent:
                # Top level items (i.e. trees in the forest) get different
                # glyphs to indicate they are not actually connected
                if islast:
                    this_prefix = indent + "╙── "
                    next_prefix = indent + "    "
                else:
                    this_prefix = indent + "╟── "
                    next_prefix = indent + "╎   "

            else:
                # For individual forests distinguish between directed and
                # undirected cases
                if is_directed:
                    if islast:
                        this_prefix = indent + "└─╼ "
                        next_prefix = indent + "    "
                    else:
                        this_prefix = indent + "├─╼ "
                        next_prefix = indent + "│   "
                else:
                    if islast:
                        this_prefix = indent + "└── "
                        next_prefix = indent + "    "
                    else:
                        this_prefix = indent + "├── "
                        next_prefix = indent + "│   "

            if with_labels:
                label = graph.nodes[node].get("label", node)
            else:
                label = node

            _write(this_prefix + str(label))

            # Push children on the stack in reverse order so they are popped in
            # the original order.
            children = [child for child in succ[node] if child not in seen]
            for idx, child in enumerate(children[::-1], start=1):
                islast_next = idx <= 1
                try_frame = (child, next_prefix, islast_next)
                stack.append(try_frame)

    if write is None:
        # Only return a string if the custom write function was not specified
        return "\n".join(printbuf)
예제 #12
0
def maximum_common_ordered_subtree_embedding(tree1,
                                             tree2,
                                             node_affinity="auto",
                                             impl="auto",
                                             item_type="auto"):
    r"""
    Finds the maximum common subtree-embedding between two ordered trees.

    A tree S is an embedded subtree (also known as a minor) of T if it can be
    obtained from T by a series of edge contractions.

    Subtree embeddings (or minors) are similar to tree isomorphisms --- if T is
    a subtree isomorphism then T is a minor. However, if you contract an edge in
    T it, then it may no longer be an isomorphism, but it is still a minor.

    This function computes the maximum common embedded subtrees S1 and S2
    between two trees T1 and T2. S1 and S2 are minors of T1 and T2 with maximal
    size such that S1 is isomorphic to S2.

    The computational complexity is: ``O(n1 * n2 * min(d1, l1) * min(d2, l2))``
    on ordered trees with n1 and n2 nodes, of depth d1 and d2 and with l1 and
    l2 leaves, respectively.

    This implementation follows the algorithm described in [1]_, which
    introduces the problem as follows:

    "An important generalization of tree and subtree isomorphism, known as
    minor containment, is the problem of determining whether a tree is
    isomorphic to an embedded subtree of another tree, where an embedded
    subtree of a tree is obtained by contracting some of the edges in the tree.
    A further generalization of minor containment on trees, known as maximum
    common embedded subtree, is the problem of finding or determining the size
    of a largest common embedded subtree of two trees. The latter also
    generalizes the maximum common subtree isomorphism problem, in which a
    common subtree of largest size is contained as a subtree, not only
    embedded, in the two trees."

    Parameters
    ----------
    tree1, tree2 : nx.OrderedDiGraph
        Trees to find the maximum embedding between

    node_affinity : None | str | callable
        Function for to determine if two nodes can be matched. The return is
        interpreted as a weight that is used to break ties. If None then any
        node can match any other node and only the topology is important.
        The default is "eq", which is the same as ``operator.eq``.

    impl : str
        Determines the backend implementation. Defaults to "auto".
        See :func:`netharn.initializers._nx_ext_v2.balanced_embedding.longest_common_balanced_embedding`
        for details. Other valid options are "iter", "recurse", and
        "iter-cython".

    item_type : str
        Determines the backend data structure used to encode the tree as a
        balanced sequence. Defaults to "auto", other valid options are "chr"
        and "number".

    Returns
    -------
    S1, S2, value: Tuple[nx.OrderedDiGraph, nx.OrderedDiGraph, float]
        The maximum value common embedding for each tree with respect to the
        chosen ``node_affinity`` function. The topology of both graphs will
        always be the same, the only difference is that the node labels in the
        first and second embeddings will correspond to ``tree1`` and ``tree2``
        respectively. When ``node_affinity='eq'`` then embeddings should be
        identical. The last return value is the "weight" of the solution with
        respect to ``node_affinity``.

    References
    ----------
    .. [1] Lozano, Antoni, and Gabriel Valiente.
        "On the maximum common embedded subtree problem for ordered trees."
        String Algorithmics (2004): 155-170.
        https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf

    See Also
    --------
    * For example usage see ``examples/applications/filesystem_embedding.py``
    * Core backends are in :mod:`netharn.initializers._nx_ext_v2.balanced_embedding.longest_common_balanced_embedding`

    Example
    -------
    >>> from netharn.initializers._nx_ext_v2.tree_embedding import *  # NOQA
    >>> import networkx as nx
    >>> # Create two random trees
    >>> tree1 = random_ordered_tree(7, seed=3257073545741117277206611, directed=True)
    >>> tree2 = random_ordered_tree(7, seed=123568587133124688238689717, directed=True)
    >>> print(forest_str(tree1))
    ╙── 0
        ├─╼ 5
        │   └─╼ 2
        └─╼ 1
            └─╼ 6
                ├─╼ 3
                └─╼ 4
    >>> print(forest_str(tree2))
    ╙── 0
        └─╼ 2
            ├─╼ 1
            │   ├─╼ 4
            │   └─╼ 3
            │       └─╼ 5
            └─╼ 6
    >>> # Compute the maximum common embedding between the two trees
    >>> embedding1, embedding2, _ = maximum_common_ordered_subtree_embedding(tree1, tree2)
    >>> print(forest_str(embedding1))
    ╙── 0
        └─╼ 1
            └─╼ 4
    >>> assert embedding1.edges == embedding2.edges, (
    ...     'when node_affinity is "eq" both embeddings will be the same')

    >>> # Demo with a custom node affinity where any node can match unless
    >>> # they are the same and we much prefer nodes that are disimilar
    >>> def custom_node_affinity(n1, n2):
    ...     return abs(n1 - n2) ** 2
    >>> embedding1, embedding2, _ = maximum_common_ordered_subtree_embedding(
    ...     tree1, tree2, node_affinity=custom_node_affinity)
    >>> # In this case the embeddings for each tree will be differnt
    >>> print(forest_str(embedding1))
    ╙── 0
        ├─╼ 5
        │   └─╼ 2
        └─╼ 1
    >>> print(forest_str(embedding2))
    ╙── 2
        ├─╼ 1
        │   └─╼ 5
        └─╼ 6
    """
    import networkx as nx

    # Note: checks that inputs are forests are handled by tree_to_seq
    if not isinstance(tree1, nx.OrderedDiGraph):
        raise nx.NetworkXNotImplemented(
            "only implemented for directed ordered trees")
    if not isinstance(tree1, nx.OrderedDiGraph):
        raise nx.NetworkXNotImplemented(
            "only implemented for directed ordered trees")

    if tree1.number_of_nodes() == 0 or tree2.number_of_nodes() == 0:
        raise nx.NetworkXPointlessConcept

    if item_type == "label":
        # If we do allow label, I think the algorithm will work, but the
        # returned tree embeddings will only be embedding wrt to the label
        # structure.
        raise AssertionError(
            "allowing sequences to be specified by the labels breaks assumptions"
        )

    # Convert the trees to balanced sequences.
    # NOTE: each sequence will contain each token at most once, this is an
    # important assumption in subsequent steps.
    seq1, open_to_close, node_to_open = tree_to_seq(
        tree1,
        open_to_close=None,
        node_to_open=None,
        item_type=item_type,
        container_type="auto",
    )
    seq2, open_to_close, node_to_open = tree_to_seq(tree2,
                                                    open_to_close,
                                                    node_to_open,
                                                    item_type=item_type,
                                                    container_type="auto")

    # NOTE: This DOES work in the case where all opening tokens within a single
    # sequence are unique. And we CAN enforce that this is the case in our
    # reduction because each node in a graph is always unique and we always
    # choose a unique token for each unique node in ``tree_to_seq``.
    open_to_node = {tok: node for node, tok in node_to_open.items()}

    # Solve the longest common balanced sequence problem
    best, value = balanced_embedding.longest_common_balanced_embedding(
        seq1,
        seq2,
        open_to_close,
        open_to_node=open_to_node,
        node_affinity=node_affinity,
        impl=impl,
    )
    subseq1, subseq2 = best

    # Convert the subsequence back into a tree.
    # Note: we could return the contracted edges as well here, but that can
    # always be done as a postprocessing step. See tests for an example of
    # this.
    embedding1 = seq_to_tree(subseq1, open_to_close, open_to_node)
    embedding2 = seq_to_tree(subseq2, open_to_close, open_to_node)

    return embedding1, embedding2, value
예제 #13
0
def check_Motifs(H, m):
    """
	Basic simple motif counter for networkx takes 2 arguments
	a Graph and the size of the motif. Motif sizes supported 3 and 4.

	This function is actually rather simple. It will extract all 3-grams from
	the original graph, and look for isomorphisms in the motifs contained
	in a dictionary. The returned object is a ``dict`` with the number of
	times each motif was found.::

	m :: motif type. Currently this is for 3-node. I am working on "bifan" and 4-node

	"""
    #This function will take each possible subgraphs of gr of size 3, then
    #compare them to the mo dict using .subgraph() and is_isomorphic

    #This line simply creates a dictionary with 0 for all values, and the
    #motif names as keys

    ##paper source "Higher-order organization ofcomplex networks" (2016) Benson et al, Science
    ## I choose only the unidirection ones : M1, M5, M8, M9, M10

    s = int(m)

    if (s == 3):
        #motifs = {'M1': nx.DiGraph([(1,2),(2,3),(3,1)]), 'M5': nx.DiGraph([(1,2),(2,3),(1,3)]), 'M8': nx.DiGraph([(2, 1),(2,3)]), 'M9': nx.DiGraph([(2, 1),(3, 2)]), 'M10': nx.DiGraph([(1,2),(3,2)])}
        motifs = {
            'M1': [(1, 2), (2, 3), (3, 1)],
            'M5': [(1, 2), (2, 3), (1, 3)],
            'M8': [(2, 1), (2, 3)],
            'M9': [(2, 1), (3, 2)],
            'M10': [(1, 2), (3, 2)],
            'M2': [(1, 2), (2, 3), (3, 2), (3, 1)],
            'M3': [(1, 2), (2, 3), (3, 2), (1, 3), (3, 1)],
            'M4': [(1, 2), (2, 1), (2, 3), (3, 2), (1, 3), (3, 1)],
            'M6': [(2, 1), (2, 3), (1, 3), (3, 1)],
            'M7': [(1, 2), (3, 2), (1, 3), (3, 1)],
            'M11': [(1, 2), (2, 1), (2, 3)],
            'M12': [(1, 2), (2, 1), (3, 2)],
            'M13': [(1, 2), (2, 1), (2, 3), (3, 2)]
        }

    elif (s == 4):  ## under development
        motifs = {'bifan': [(1, 2), (1, 3), (4, 2), (4, 3)]}

        #edgeLists=[[[1,2],[1,3],[1,4]]]
        #edgeLists.append([[1,2],[1,3],[1,4],[2,3]])
        #edgeLists.append([[1,2],[1,3],[1,4],[2,3],[3,4]])
        #edgeLists.append([[1,2],[1,3],[1,4],[2,3],[3,4],[2,4]])
    else:
        raise nx.NetworkXNotImplemented('Size of motif must be 3 or 4')

    #outf = open(f2, 'w')
    #print >> outf, 'commitid|motiflabel|count'

    G = H

    mcount = dict(zip(motifs.keys(), list(map(int, np.zeros(len(motifs))))))

    ## match the pattern and count the motifs
    dict_edges = defaultdict(list)
    dict_nodes = defaultdict(list)
    for key in motifs:

        pattern = motifs[key]

        gmoti = nx.DiGraph()
        gmoti.add_edges_from(pattern)

        motif_pattern_obs = subgraph_pattern(G, gmoti, sign_sensitive=False)

        s = []
        for subgraph in motif_pattern_obs:
            tup = tuple(subgraph.keys())
            s.append(tup)

        uniqs = list(set(s))

        if len(uniqs) > 0:
            maplist = map(list, uniqs)

            ### label the edges as per the motif labels
            mcount[str(key)] = len(maplist)

            for triplets in maplist:
                subgraph = G.subgraph(triplets)
                edgeLists = [e for e in subgraph.edges() if G.has_edge(*e)]

                ## an edge is part of multiple motifs
                ## lets count the number of motifs an edge is part of
                for u, v in edgeLists:
                    dict_edges[(u, v)].append(str(key))

            ## A node is also part of multiple motifs.
            ## We count the total number of motifs a node is part of
            ## We count the frequency of occurence each motif the node is part of
                nodelists = subgraph.nodes()
                for n in nodelists:
                    dict_nodes[str(n)].append(str(key))

    #for keys, values in mcount.items() :
    #	print >> outf, '%s|%s|%s' %(outname, keys, values)

    ### Let's mark the edge with motif type and count. We count the number of types
    ### of motif an edge is a part of. An edge could appear in M1: M1x times and in M2: M2x times and so on

    for u, v in G.edges():
        if (u, v) in dict_edges:
            G[u][v]['num_motif_edge'] = len(list(set(dict_edges[(u, v)])))

    ### Let's mark the node with motif type and count. We count the number of types of motif a node is a part of.

    for n in G.nodes():
        motficountnode = dict(
            zip(motifs.keys(), list(map(int, np.zeros(len(motifs))))))

        if str(n) in dict_nodes:
            subgraphnodeslist = dict_nodes[str(n)]

            for key in subgraphnodeslist:
                motficountnode[str(key)] += 1

        for motif, count in motficountnode.items():
            G.node[n][str(motif)] = int(count)

    ### Let's mark the edge with motif type and count. We count the number of types
    ### of motif an edge is a part of. An edge could appear in M1: M1x times and in M2: M2x times and so on

    for u, v in G.edges():
        motficountedge = dict(
            zip(motifs.keys(), list(map(int, np.zeros(len(motifs))))))

        if (u, v) in dict_edges:
            subgraphedgeslist = dict_edges[(u, v)]

            for key in subgraphedgeslist:
                motficountedge[str(key)] += 1

        for motif, count in motficountedge.items():
            G[u][v][str(motif)] = int(count)

    return G
예제 #14
0
    def _not_implemented(f, *args, **kwargs):

        raise nx.NetworkXNotImplemented('Method not implemented for dynamic graphs')
예제 #15
0
def maximum_common_ordered_subtree_isomorphism(tree1,
                                               tree2,
                                               node_affinity="auto",
                                               impl="auto",
                                               item_type="auto"):
    """
    Finds the maximum common subtree-isomorphism between two ordered trees.

    This function computes the maximum-weight common subtrees S1 and S2 between
    two trees T1 and T2. S1 and S2 are isomorphic to subgraphs of T1 and T2
    with maximal size such that S1 and S2 are also isomorphic to each other.

    This function is similar to :func:`maximum_common_ordered_subtree_embedding`
    with the main difference being that returned solution from this function
    will be proper subgraphs (i.e. all edges in the subgraphs will exist in the
    original graph), whereas in the subtree embedding problem the returned
    solutions are allowed to be minors of the input graphs (i.e. edges are
    allowed to be contracted).

    Parameters
    ----------
    tree1, tree2 : nx.OrderedDiGraph
        Trees to find the maximum subtree isomorphism between

    node_affinity : None | str | callable
        Function for to determine if two nodes can be matched. The return is
        interpreted as a weight that is used to break ties. If None then any
        node can match any other node and only the topology is important.
        The default is "eq", which is the same as ``operator.eq``.

    impl : str
        Determines the backend implementation. Defaults to "auto".
        See :func:`balanced_sequence.longest_common_balanced_sequence`
        for details. Other valid options are "iter", "recurse", and
        "iter-cython".

    item_type : str
        Determines the backend data structure used to encode the tree as a
        balanced sequence. Defaults to "auto", other valid options are "chr"
        and "number".

    Returns
    -------
    S1, S2, value: Tuple[nx.OrderedDiGraph, nx.OrderedDiGraph, int]
        The maximum value common subtree isomorphism for each tree with respect
        to the chosen ``node_affinity`` function. The topology of both graphs
        will always be the same, the only difference is that the node labels in
        the first and second embeddings will correspond to ``tree1`` and
        ``tree2`` respectively. When ``node_affinity='eq'`` then embeddings
        should be identical. The last return value is the "size" of the
        solution with respect to ``node_affinity``.

    See Also
    --------
    `maximum_common_ordered_subtree_embedding`
    """
    import networkx as nx

    # Note: checks that inputs are forests are handled by tree_to_seq
    if not isinstance(tree1, nx.OrderedDiGraph):
        raise nx.NetworkXNotImplemented(
            "only implemented for directed ordered trees")
    if not isinstance(tree1, nx.OrderedDiGraph):
        raise nx.NetworkXNotImplemented(
            "only implemented for directed ordered trees")

    if tree1.number_of_nodes() == 0 or tree2.number_of_nodes() == 0:
        raise nx.NetworkXPointlessConcept

    if item_type == "label":
        # If we do allow label, I think the algorithm will work, but the
        # returned tree embeddings will only be embedding wrt to the label
        # structure.
        raise AssertionError(
            "allowing sequences to be specified by the labels breaks assumptions"
        )

    # Convert the trees to balanced sequences.
    # Each sequence will contain each token at most once, this is an important
    # assumption in subsequent steps.
    seq1, open_to_close, node_to_open = tree_to_seq(
        tree1,
        open_to_close=None,
        node_to_open=None,
        item_type=item_type,
        container_type="auto",
    )
    seq2, open_to_close, node_to_open = tree_to_seq(tree2,
                                                    open_to_close,
                                                    node_to_open,
                                                    item_type=item_type,
                                                    container_type="auto")
    open_to_node = {tok: node for node, tok in node_to_open.items()}

    # Solve the longest common balanced sequence problem
    best, value = longest_common_balanced_isomorphism(
        seq1,
        seq2,
        open_to_close,
        open_to_node=open_to_node,
        node_affinity=node_affinity,
        impl=impl,
    )
    subseq1, subseq2 = best

    # Convert the subsequence back into a tree.
    subtree1 = seq_to_tree(subseq1, open_to_close, open_to_node)
    subtree2 = seq_to_tree(subseq2, open_to_close, open_to_node)
    return subtree1, subtree2, value