def find_levels(DG, attr=None): """Find levels (generation number) of all vertices in graph G The backward topological level of a vertex v, which we denote as l_v or l(v), can be defined as its depth, or the length of longestt path to the leaf (sink) node. - if v has no immediate predecessors, then l_v = 0 - otherwise l_v = 1 + max_{u: predecessors}(l_u) The levels / generation number information can be optionally saved in the graph, under given node attribute (name given by `attr` parameter). NOTE: whether for nodes without outcoming edges (without any neighbours) l_v is defined to be 0, or defined to be 1, is a matter of convention. Here the same convention as in the FELINE paper is used. NOTE: for compatibility with forward topological levels sometimes backward topological levels are used with negative sign applied, like in the PReaCH paper. Parameters: ----------- DG : NetworkX DiGraph Directed acyclic graph. attr : str, optional (default=None) If set, name of a node attribute under which store vertex level. Returns: -------- dict of ints Dictionary, where keys are node indices, and values are node levels """ lvl = {} # topological levels are defined only for Directed Acyclic Graphs if not DG.is_directed(): raise nx.NetworkXNotImplemented( "Vertex level is not defined on undirected graphs.") if not nx.is_directed_acyclic_graph(DG): raise nx.NetworkXNotImplemented( "Vertex level is not defined on directed graphs with loops") # it can be any post-order ordering nodelist = nx.dfs_postorder_nodes(DG) for node in nodelist: if DG.out_degree(node) == 0: lvl[node] = 0 continue lvl[node] = max([lvl[neigh] for neigh in DG.successors(node)]) + 1 if attr is not None: for node, level in lvl.items(): DG.nodes[node][attr] = level return lvl
def _not_implemented_for(g): if (mval is None or mval == g.is_multigraph()) and ( dval is None or dval == g.is_directed() ): raise nx.NetworkXNotImplemented(errmsg) return g
def spectral_partitioning(G, class_nodes): # per integrore la parte di gestione dei nodi da stringhe a interi crea un'altra funzione # classes dev'essere una lista o una tupla G_nodes = number_of_nodes(G) given_nodes = sum(class_nodes) if is_empty(G): raise nx.NetworkXNotImplemented("Empty graph.") # if G_nodes < 2: # raise nx.NetworkXException("Too small graph.") if given_nodes != G_nodes: raise nx.NetworkXException("Invalid classes") if is_weighted(G): raise nx.NetworkXNotImplemented("Weighted graph.") if is_directed(G): raise nx.NetworkXNotImplemented("Directed graph.") if number_of_selfloops(G): raise nx.NetworkXNotImplemented("Graph with self-edges.") if not nx.is_connected(G): raise nx.NetworkXException("Not connected graph.") # valuta il caso in cui classes == None e classes vuoto/a # suppongo che class_nodes sia una lista classes = len(class_nodes) if classes == 1: yield G else: half = classes // 2 big_class1_nodes = sum(class_nodes[:half]) big_class2_nodes = sum(class_nodes[half:]) # working_graph = nx.convert_node_labels_to_integers(test_graph) temp_G = _basic_partitioning(G, big_class1_nodes, big_class2_nodes) nx.draw(temp_G, with_labels=True) plt.savefig("debug.png") plt.clf() # meglio component_generator for component in _working_components(temp_G): component_nodes = number_of_nodes(component) if component_nodes == big_class1_nodes: yield from spectral_partitioning(component, class_nodes[:half]) else: yield from spectral_partitioning(component, class_nodes[half:])
def spectral_partitioning(G, groups_nodes): """ Genera un numero di classi composte da un certo numero di nodi a partire da G. :param G: grafo semplice connesso :param groups_nodes: lista o tupla contenente il numero di vertici di ciascuna componente :return: generatore di grafi """ G_nodes = nx.number_of_nodes(G) given_nodes = sum(groups_nodes) # print('G_nodes: ', G_nodes) # print('given_nodes:', groups_nodes, ', with sum = ', given_nodes) if (given_nodes != G_nodes) or (0 in groups_nodes): raise nx.NetworkXException("Invalid gruops") if nx.is_weighted(G): raise nx.NetworkXNotImplemented("Weighted graph.") if nx.is_directed(G): raise nx.NetworkXNotImplemented("Directed graph.") if nx.number_of_selfloops(G): raise nx.NetworkXNotImplemented("Graph with self-edges.") gruops = len(groups_nodes) # numero delle componenti in cui dividere G if gruops == 1: yield G else: half = gruops // 2 big_group_1_nodes = sum(groups_nodes[:half]) big_group_2_nodes = sum(groups_nodes[half:]) # print('big_group_1_nodes: ', big_group_1_nodes) # print('big_group_2_nodes: ', big_group_2_nodes) # divido in 2 G e delego il compito di proseguire con la divisione big_groups = [ group for group in _basic_partitioning(G, big_group_1_nodes, big_group_2_nodes) ] yield from spectral_partitioning(big_groups[0], groups_nodes[:half]) yield from spectral_partitioning(big_groups[1], groups_nodes[half:])
def checkMotifs(G, s): if (s == 3): edgeLists = [[[1, 2], [1, 3]], [[1, 2], [1, 3], [2, 3]]] elif (s == 4): edgeLists = [[[1, 2], [1, 3], [1, 4]]] edgeLists.append([[1, 2], [1, 3], [1, 4], [2, 3]]) edgeLists.append([[1, 2], [1, 3], [1, 4], [2, 3], [3, 4]]) edgeLists.append([[1, 2], [1, 3], [1, 4], [2, 3], [3, 4], [2, 4]]) else: raise networkx.NetworkXNotImplemented('Size of motif must be 3 or 4') listOfMotifs = [graphEdgeDisplay(x) for x in edgeLists] counter = [0 for i in range(len(edgeLists))] edgeCount = [s - 1, s] for n in G: for nodes in it.combinations(G[n], s - 1): h = 0 s1 = set(nodes) ed1 = s - 1 + len( [x for x in it.combinations(nodes, 2) if x[0] in G[x[1]]]) counter[ed1 - s + 1] += 1 for i in range(len(listOfMotifs)): counter[i] = counter[i] / sum( 1 for x in listOfMotifs[i] if len(listOfMotifs[i][x]) == s - 1) if (s == 4): # #Need extra motifs u's and squares listOfMotifs.append(graphEdgeDisplay([[1, 2], [2, 3], [3, 4]])) count = 0 for nod1 in G: for nod2 in G[nod1]: c3 = len(set(G[nod1]).intersection(G[nod2])) c1 = len(G[nod1]) - c3 - 1 c2 = len(G[nod2]) - c3 - 1 count += c3 * (c3 - 1) + (c1 + c2) * c3 + c1 * c2 counter.append(count / 2 - 12 * counter[3] - 6 * counter[2] - 2 * counter[1]) listOfMotifs.append(graphEdgeDisplay([[1, 2], [2, 3], [3, 4], [1, 4]])) G1 = G count = 0 for nod1 in G1: for nod2 in G1[nod1]: s2 = set(G1[nod2]) for nod3 in G1[nod1]: if nod2 == nod3: continue count += len(s2.intersection(G[nod3])) - 1 counter.append((count - 8 * counter[2] - 24 * counter[3]) / 8) counter[-2] -= counter[-1] * 4 # counts return zip(listOfMotifs, counter)
def _not_implemented_for(not_implement_for_func, *args, **kwargs): graph = args[0] terms = {'directed': graph.is_directed(), 'undirected': not graph.is_directed(), 'multigraph': graph.is_multigraph(), 'graph': not graph.is_multigraph()} match = True try: for t in graph_types: match = match and terms[t] except KeyError: raise KeyError('use one or more of ', 'directed, undirected, multigraph, graph') if match: msg = 'not implemented for %s type' % ' '.join(graph_types) raise nx.NetworkXNotImplemented(msg) else: return not_implement_for_func(*args, **kwargs)
def _not_implemented_for(not_implement_for_func, *args, **kwargs): graph = args[0] terms = { "directed": graph.is_directed(), "undirected": not graph.is_directed(), "multigraph": graph.is_multigraph(), "graph": not graph.is_multigraph(), } match = True try: for t in graph_types: match = match and terms[t] except KeyError as e: raise KeyError("use one or more of " "directed, undirected, multigraph, graph") from e if match: msg = f"not implemented for {' '.join(graph_types)} type" raise nx.NetworkXNotImplemented(msg) else: return not_implement_for_func(*args, **kwargs)
def configuration_model(deg_sequence, create_using=None, seed=None): """Returns a random graph with the given degree sequence. The configuration model generates a random pseudograph (graph with parallel edges and self loops) by randomly assigning edges to match the given degree sequence. Parameters ---------- deg_sequence : list of nonnegative integers Each list entry corresponds to the degree of a node. create_using : NetworkX graph constructor, optional (default MultiGraph) Graph type to create. If graph instance, then cleared before populated. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- G : MultiGraph A graph with the specified degree sequence. Nodes are labeled starting at 0 with an index corresponding to the position in deg_sequence. Raises ------ NetworkXError If the degree sequence does not have an even sum. See Also -------- is_graphical Notes ----- As described by Newman [1]_. A non-graphical degree sequence (not realizable by some simple graph) is allowed since this function returns graphs with self loops and parallel edges. An exception is raised if the degree sequence does not have an even sum. This configuration model construction process can lead to duplicate edges and loops. You can remove the self-loops and parallel edges (see below) which will likely result in a graph that doesn't have the exact degree sequence specified. The density of self-loops and parallel edges tends to decrease as the number of nodes increases. However, typically the number of self-loops will approach a Poisson distribution with a nonzero mean, and similarly for the number of parallel edges. Consider a node with *k* stubs. The probability of being joined to another stub of the same node is basically (*k* - *1*) / *N*, where *k* is the degree and *N* is the number of nodes. So the probability of a self-loop scales like *c* / *N* for some constant *c*. As *N* grows, this means we expect *c* self-loops. Similarly for parallel edges. References ---------- .. [1] M.E.J. Newman, "The structure and function of complex networks", SIAM REVIEW 45-2, pp 167-256, 2003. Examples -------- You can create a degree sequence following a particular distribution by using the one of the distribution functions in :mod:`~networkx.utils.random_sequence` (or one of your own). For example, to create an undirected multigraph on one hundred nodes with degree sequence chosen from the power law distribution: >>> sequence = nx.random_powerlaw_tree_sequence(100, tries=5000) >>> G = nx.configuration_model(sequence) >>> len(G) 100 >>> actual_degrees = [d for v, d in G.degree()] >>> actual_degrees == sequence True The returned graph is a multigraph, which may have parallel edges. To remove any parallel edges from the returned graph: >>> G = nx.Graph(G) Similarly, to remove self-loops: >>> G.remove_edges_from(nx.selfloop_edges(G)) """ if sum(deg_sequence) % 2 != 0: msg = "Invalid degree sequence: sum of degrees must be even, not odd" raise nx.NetworkXError(msg) G = nx.empty_graph(0, create_using, default=nx.MultiGraph) if G.is_directed(): raise nx.NetworkXNotImplemented("not implemented for directed graphs") G = _configuration_model(deg_sequence, G, seed=seed) return G
def maximum_common_ordered_tree_embedding(tree1, tree2, node_affinity='auto'): """ Finds the maximum common subtree-embedding between two ordered trees. A tree S is an embedded subtree of T if it can be obtained from T by a series of edge contractions. Note this produces a subtree embedding, which is not necessarilly a subgraph isomorphism (although a subgraph isomorphism is also an embedding.) The maximum common embedded subtree problem can be solved in in `O(n1 * n2 * min(d1, l1) * min(d2, l2))` time on ordered trees with n1 and n2 nodes, of depth d1 and d2 and with l1 and l2 leaves, respectively Implements algorithm described in [1]_. References: On the Maximum Common Embedded Subtree Problem for Ordered Trees https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf http://algo.inria.fr/flajolet/Publications/FlSiSt90.pdf Notes: Exact algorithms for computing the tree edit distance between unordered trees - https://pdf.sciencedirectassets.com/271538/1-s2.0-S0304397510X00299/1-s2.0-S0304397510005463/main.pdf ? Tree Edit Distance and Common Subtrees - https://upcommons.upc.edu/bitstream/handle/2117/97554/R02-20.pdf A Survey on Tree Edit Distance and Related Problems - https://grfia.dlsi.ua.es/ml/algorithms/references/editsurvey_bille.pdf Args: tree1 (nx.OrderedDiGraph): first ordered tree tree2 (nx.OrderedDiGraph): second ordered tree node_affinity (callable): function Example: >>> from netharn.initializers._nx_extensions import * # NOQA >>> from netharn.initializers._nx_extensions import _lcs, _print_forest >>> def random_ordered_tree(n, seed=None): >>> tree = nx.dfs_tree(nx.random_tree(n, seed=seed)) >>> otree = nx.OrderedDiGraph() >>> otree.add_edges_from(tree.edges) >>> return otree >>> tree1 = random_ordered_tree(10, seed=1) >>> tree2 = random_ordered_tree(10, seed=2) >>> print('tree1') >>> _print_forest(tree1) >>> print('tree2') >>> _print_forest(tree2) >>> embedding1, embedding2 = maximum_common_ordered_tree_embedding(tree1, tree2 ) >>> print('embedding1') >>> _print_forest(embedding1) >>> print('embedding2') >>> _print_forest(embedding2) """ if not (isinstance(tree1, nx.OrderedDiGraph) and nx.is_forest(tree1)): raise nx.NetworkXNotImplemented( 'only implemented for directed ordered trees') if not (isinstance(tree1, nx.OrderedDiGraph) and nx.is_forest(tree2)): raise nx.NetworkXNotImplemented( 'only implemented for directed ordered trees') # Convert the trees to balanced sequences sequence1, open_to_close, toks = tree_to_seq(tree1, open_to_close=None, toks=None) sequence2, open_to_close, toks = tree_to_seq(tree2, open_to_close, toks) seq1 = sequence1 seq2 = sequence2 open_to_tok = ub.invert_dict(toks) # Solve the longest common balanced sequence problem best, value = longest_common_balanced_sequence(seq1, seq2, open_to_close, open_to_tok=open_to_tok, node_affinity=node_affinity) subseq1, subseq2 = best # Convert the subsequence back into a tree embedding1 = seq_to_tree(subseq1, open_to_close, toks) embedding2 = seq_to_tree(subseq2, open_to_close, toks) return embedding1, embedding2
def maximum_common_ordered_subtree_isomorphism(tree1, tree2, node_affinity='auto'): """ Isomorphic version of `maximum_common_ordered_tree_embedding`. CommandLine: xdoctest -m /home/joncrall/code/netharn/netharn/initializers/_nx_extensions.py maximum_common_ordered_subtree_isomorphism:1 --profile && cat profile_output.txt Example: >>> from netharn.initializers._nx_extensions import * # NOQA >>> from netharn.initializers._nx_extensions import _lcs, _print_forest >>> def random_ordered_tree(n, seed=None): >>> tree = nx.dfs_tree(nx.random_tree(n, seed=seed)) >>> otree = nx.OrderedDiGraph() >>> otree.add_edges_from(tree.edges) >>> return otree >>> tree1 = random_ordered_tree(10, seed=3) >>> tree2 = random_ordered_tree(10, seed=2) >>> tree1.add_edges_from(tree2.edges, weight=1) >>> tree1 = nx.minimum_spanning_arborescence(tree1) >>> tree2.add_edges_from(tree1.edges, weight=1) >>> tree2 = nx.minimum_spanning_arborescence(tree2) >>> tree1.remove_edge(4, 7) >>> tree1.remove_edge(4, 9) >>> tree1.add_edge(4, 10) >>> tree1.add_edge(10, 7) >>> tree1.add_edge(10, 9) >>> #tree1.add_edges_from([(9, 11), (11, 12), (12, 13), (13, 14)]) >>> #tree2.add_edges_from([(9, 11), (11, 12), (12, 13), (13, 14)]) >>> tree1.add_edges_from([(9, 11), (11, 12)]) >>> tree2.add_edges_from([(9, 11), (11, 12)]) >>> tree2.add_edge(100, 0) >>> tree1.add_edge(102, 100) >>> tree1.add_edge(100, 101) >>> tree1.add_edge(101, 0) >>> tree1.add_edge(5, 201) >>> tree1.add_edge(5, 202) >>> tree1.add_edge(5, 203) >>> tree1.add_edge(201, 2000) >>> tree1.add_edge(2000, 2001) >>> tree1.add_edge(2001, 2002) >>> tree1.add_edge(2002, 2003) >>> tree2.add_edge(5, 202) >>> tree2.add_edge(5, 203) >>> tree2.add_edge(5, 201) >>> tree2.add_edge(201, 2000) >>> tree2.add_edge(2000, 2001) >>> tree2.add_edge(2001, 2002) >>> tree2.add_edge(2002, 2003) >>> print('-----') >>> print('tree1') >>> _print_forest(tree1) >>> print('tree2') >>> _print_forest(tree2) >>> subtree1, subtree2 = maximum_common_ordered_subtree_isomorphism(tree1, tree2 ) >>> print('-----') >>> print('subtree1') >>> _print_forest(subtree1) >>> print('subtree2') >>> _print_forest(subtree2) >>> embedding1, embedding2 = maximum_common_ordered_tree_embedding(tree1, tree2) >>> print('-----') >>> print('embedding1') >>> _print_forest(embedding1) >>> print('embedding2') >>> _print_forest(embedding2) >>> if 0: >>> ti = timerit.Timerit(6, bestof=2, verbose=2) >>> for timer in ti.reset('isomorphism'): >>> with timer: >>> maximum_common_ordered_subtree_isomorphism(tree1, tree2 ) >>> for timer in ti.reset('embedding'): >>> with timer: >>> maximum_common_ordered_tree_embedding(tree1, tree2 ) >>> from networkx import isomorphism >>> assert isomorphism.DiGraphMatcher(tree1, subtree1).subgraph_is_isomorphic() >>> assert isomorphism.DiGraphMatcher(tree2, subtree2).subgraph_is_isomorphic() >>> list(isomorphism.DiGraphMatcher(tree1, tree2).subgraph_isomorphisms_iter()) >>> list(isomorphism.DiGraphMatcher(tree1, tree2).subgraph_monomorphisms_iter()) >>> list(isomorphism.DiGraphMatcher(subtree1, subtree2).subgraph_isomorphisms_iter()) >>> list(isomorphism.DiGraphMatcher(tree1, subtree1).subgraph_isomorphisms_iter()) >>> list(isomorphism.DiGraphMatcher(tree2, subtree2).subgraph_isomorphisms_iter()) Example: >>> from netharn.initializers._nx_extensions import * # NOQA >>> from netharn.initializers._nx_extensions import _lcs, _print_forest >>> def random_ordered_tree(n, seed=None): >>> if n > 0: >>> tree = nx.dfs_tree(nx.random_tree(n, seed=seed)) >>> otree = nx.OrderedDiGraph() >>> if n > 0: >>> otree.add_edges_from(tree.edges) >>> return otree >>> import random >>> rng = random.Random(90269698983701724775426457020022) >>> num = 1000 >>> def _gen_seeds(num): >>> for _ in range(num): >>> yield (rng.randint(0, 50), rng.randint(0, 50), rng.randint(0, 2 ** 64), rng.randint(0, 2 ** 64)) >>> for n1, n2, s1, s2 in ub.ProgIter(_gen_seeds(num=num), total=num, verbose=3): >>> tree1 = random_ordered_tree(n1, seed=s1) >>> tree2 = random_ordered_tree(n2, seed=s2) >>> #print('-----') >>> #print('tree1') >>> #_print_forest(tree1) >>> #print('tree2') >>> #_print_forest(tree2) >>> subtree1, subtree2 = maximum_common_ordered_subtree_isomorphism(tree1, tree2, node_affinity='auto') >>> #print('-----') >>> #print('subtree1') >>> #_print_forest(subtree1) >>> #print('subtree2') >>> #_print_forest(subtree2) >>> from networkx import isomorphism >>> assert isomorphism.DiGraphMatcher(tree1, subtree1).subgraph_is_isomorphic() >>> assert isomorphism.DiGraphMatcher(tree2, subtree2).subgraph_is_isomorphic() """ try: if not (isinstance(tree1, nx.OrderedDiGraph) and nx.is_forest(tree1)): raise nx.NetworkXNotImplemented( 'only implemented for directed ordered trees') if not (isinstance(tree1, nx.OrderedDiGraph) and nx.is_forest(tree2)): raise nx.NetworkXNotImplemented( 'only implemented for directed ordered trees') except nx.NetworkXPointlessConcept: subtree1 = nx.OrderedDiGraph() subtree2 = nx.OrderedDiGraph() return subtree1, subtree2 # Convert the trees to balanced sequences sequence1, open_to_close, toks = tree_to_seq(tree1, open_to_close=None, toks=None, mode='chr') sequence2, open_to_close, toks = tree_to_seq(tree2, open_to_close, toks, mode='chr') seq1 = sequence1 seq2 = sequence2 open_to_tok = ub.invert_dict(toks) # Solve the longest common balanced sequence problem best, value = longest_common_isomorphic_sequence( seq1, seq2, open_to_close, open_to_tok=open_to_tok, node_affinity=node_affinity) subseq1, subseq2 = best # Convert the subsequence back into a tree subtree1 = seq_to_tree(subseq1, open_to_close, toks) subtree2 = seq_to_tree(subseq2, open_to_close, toks) return subtree1, subtree2
def forest_str(graph, with_labels=True, sources=None, write=None): """ Creates a nice utf8 representation of a directed forest Parameters ---------- graph : nx.DiGraph | nx.Graph Graph to represent (must be a tree, forest, or the empty graph) with_labels : bool If True will use the "label" attribute of a node to display if it exists otherwise it will use the node value itself. Defaults to True. sources : List Mainly relevant for undirected forests, specifies which nodes to list first. If unspecified the root nodes of each tree will be used for directed forests; for undirected forests this defaults to the nodes with the smallest degree. write : callable Function to use to write to, if None new lines are appended to a list and returned. If set to the `print` function, lines will be written to stdout as they are generated. If specified, this function will return None. Defaults to None. Returns ------- str | None : utf8 representation of the tree / forest Example ------- >>> import networkx as nx >>> graph = nx.balanced_tree(r=2, h=3, create_using=nx.DiGraph) >>> print(forest_str(graph)) ╙── 0 ├─╼ 1 │ ├─╼ 3 │ │ ├─╼ 7 │ │ └─╼ 8 │ └─╼ 4 │ ├─╼ 9 │ └─╼ 10 └─╼ 2 ├─╼ 5 │ ├─╼ 11 │ └─╼ 12 └─╼ 6 ├─╼ 13 └─╼ 14 >>> graph = nx.balanced_tree(r=1, h=2, create_using=nx.Graph) >>> print(forest_str(graph)) ╙── 0 └── 1 └── 2 """ import networkx as nx printbuf = [] if write is None: _write = printbuf.append else: _write = write if len(graph.nodes) == 0: _write("╙") else: if not nx.is_forest(graph): raise nx.NetworkXNotImplemented( "input must be a forest or the empty graph") is_directed = graph.is_directed() succ = graph.succ if is_directed else graph.adj if sources is None: if is_directed: # use real source nodes for directed trees sources = [n for n in graph.nodes if graph.in_degree[n] == 0] else: # use arbitrary sources for undirected trees sources = [ min(cc, key=lambda n: graph.degree[n]) for cc in nx.connected_components(graph) ] # Populate the stack with each source node, empty indentation, and mark # the final node. Reverse the stack so sources are popped in the # correct order. last_idx = len(sources) - 1 stack = [(node, "", (idx == last_idx)) for idx, node in enumerate(sources)][::-1] seen = set() while stack: node, indent, islast = stack.pop() if node in seen: continue seen.add(node) # Notes on available box and arrow characters # https://en.wikipedia.org/wiki/Box-drawing_character # https://stackoverflow.com/questions/2701192/triangle-arrow if not indent: # Top level items (i.e. trees in the forest) get different # glyphs to indicate they are not actually connected if islast: this_prefix = indent + "╙── " next_prefix = indent + " " else: this_prefix = indent + "╟── " next_prefix = indent + "╎ " else: # For individual forests distinguish between directed and # undirected cases if is_directed: if islast: this_prefix = indent + "└─╼ " next_prefix = indent + " " else: this_prefix = indent + "├─╼ " next_prefix = indent + "│ " else: if islast: this_prefix = indent + "└── " next_prefix = indent + " " else: this_prefix = indent + "├── " next_prefix = indent + "│ " if with_labels: label = graph.nodes[node].get("label", node) else: label = node _write(this_prefix + str(label)) # Push children on the stack in reverse order so they are popped in # the original order. children = [child for child in succ[node] if child not in seen] for idx, child in enumerate(children[::-1], start=1): islast_next = idx <= 1 try_frame = (child, next_prefix, islast_next) stack.append(try_frame) if write is None: # Only return a string if the custom write function was not specified return "\n".join(printbuf)
def maximum_common_ordered_subtree_embedding(tree1, tree2, node_affinity="auto", impl="auto", item_type="auto"): r""" Finds the maximum common subtree-embedding between two ordered trees. A tree S is an embedded subtree (also known as a minor) of T if it can be obtained from T by a series of edge contractions. Subtree embeddings (or minors) are similar to tree isomorphisms --- if T is a subtree isomorphism then T is a minor. However, if you contract an edge in T it, then it may no longer be an isomorphism, but it is still a minor. This function computes the maximum common embedded subtrees S1 and S2 between two trees T1 and T2. S1 and S2 are minors of T1 and T2 with maximal size such that S1 is isomorphic to S2. The computational complexity is: ``O(n1 * n2 * min(d1, l1) * min(d2, l2))`` on ordered trees with n1 and n2 nodes, of depth d1 and d2 and with l1 and l2 leaves, respectively. This implementation follows the algorithm described in [1]_, which introduces the problem as follows: "An important generalization of tree and subtree isomorphism, known as minor containment, is the problem of determining whether a tree is isomorphic to an embedded subtree of another tree, where an embedded subtree of a tree is obtained by contracting some of the edges in the tree. A further generalization of minor containment on trees, known as maximum common embedded subtree, is the problem of finding or determining the size of a largest common embedded subtree of two trees. The latter also generalizes the maximum common subtree isomorphism problem, in which a common subtree of largest size is contained as a subtree, not only embedded, in the two trees." Parameters ---------- tree1, tree2 : nx.OrderedDiGraph Trees to find the maximum embedding between node_affinity : None | str | callable Function for to determine if two nodes can be matched. The return is interpreted as a weight that is used to break ties. If None then any node can match any other node and only the topology is important. The default is "eq", which is the same as ``operator.eq``. impl : str Determines the backend implementation. Defaults to "auto". See :func:`netharn.initializers._nx_ext_v2.balanced_embedding.longest_common_balanced_embedding` for details. Other valid options are "iter", "recurse", and "iter-cython". item_type : str Determines the backend data structure used to encode the tree as a balanced sequence. Defaults to "auto", other valid options are "chr" and "number". Returns ------- S1, S2, value: Tuple[nx.OrderedDiGraph, nx.OrderedDiGraph, float] The maximum value common embedding for each tree with respect to the chosen ``node_affinity`` function. The topology of both graphs will always be the same, the only difference is that the node labels in the first and second embeddings will correspond to ``tree1`` and ``tree2`` respectively. When ``node_affinity='eq'`` then embeddings should be identical. The last return value is the "weight" of the solution with respect to ``node_affinity``. References ---------- .. [1] Lozano, Antoni, and Gabriel Valiente. "On the maximum common embedded subtree problem for ordered trees." String Algorithmics (2004): 155-170. https://pdfs.semanticscholar.org/0b6e/061af02353f7d9b887f9a378be70be64d165.pdf See Also -------- * For example usage see ``examples/applications/filesystem_embedding.py`` * Core backends are in :mod:`netharn.initializers._nx_ext_v2.balanced_embedding.longest_common_balanced_embedding` Example ------- >>> from netharn.initializers._nx_ext_v2.tree_embedding import * # NOQA >>> import networkx as nx >>> # Create two random trees >>> tree1 = random_ordered_tree(7, seed=3257073545741117277206611, directed=True) >>> tree2 = random_ordered_tree(7, seed=123568587133124688238689717, directed=True) >>> print(forest_str(tree1)) ╙── 0 ├─╼ 5 │ └─╼ 2 └─╼ 1 └─╼ 6 ├─╼ 3 └─╼ 4 >>> print(forest_str(tree2)) ╙── 0 └─╼ 2 ├─╼ 1 │ ├─╼ 4 │ └─╼ 3 │ └─╼ 5 └─╼ 6 >>> # Compute the maximum common embedding between the two trees >>> embedding1, embedding2, _ = maximum_common_ordered_subtree_embedding(tree1, tree2) >>> print(forest_str(embedding1)) ╙── 0 └─╼ 1 └─╼ 4 >>> assert embedding1.edges == embedding2.edges, ( ... 'when node_affinity is "eq" both embeddings will be the same') >>> # Demo with a custom node affinity where any node can match unless >>> # they are the same and we much prefer nodes that are disimilar >>> def custom_node_affinity(n1, n2): ... return abs(n1 - n2) ** 2 >>> embedding1, embedding2, _ = maximum_common_ordered_subtree_embedding( ... tree1, tree2, node_affinity=custom_node_affinity) >>> # In this case the embeddings for each tree will be differnt >>> print(forest_str(embedding1)) ╙── 0 ├─╼ 5 │ └─╼ 2 └─╼ 1 >>> print(forest_str(embedding2)) ╙── 2 ├─╼ 1 │ └─╼ 5 └─╼ 6 """ import networkx as nx # Note: checks that inputs are forests are handled by tree_to_seq if not isinstance(tree1, nx.OrderedDiGraph): raise nx.NetworkXNotImplemented( "only implemented for directed ordered trees") if not isinstance(tree1, nx.OrderedDiGraph): raise nx.NetworkXNotImplemented( "only implemented for directed ordered trees") if tree1.number_of_nodes() == 0 or tree2.number_of_nodes() == 0: raise nx.NetworkXPointlessConcept if item_type == "label": # If we do allow label, I think the algorithm will work, but the # returned tree embeddings will only be embedding wrt to the label # structure. raise AssertionError( "allowing sequences to be specified by the labels breaks assumptions" ) # Convert the trees to balanced sequences. # NOTE: each sequence will contain each token at most once, this is an # important assumption in subsequent steps. seq1, open_to_close, node_to_open = tree_to_seq( tree1, open_to_close=None, node_to_open=None, item_type=item_type, container_type="auto", ) seq2, open_to_close, node_to_open = tree_to_seq(tree2, open_to_close, node_to_open, item_type=item_type, container_type="auto") # NOTE: This DOES work in the case where all opening tokens within a single # sequence are unique. And we CAN enforce that this is the case in our # reduction because each node in a graph is always unique and we always # choose a unique token for each unique node in ``tree_to_seq``. open_to_node = {tok: node for node, tok in node_to_open.items()} # Solve the longest common balanced sequence problem best, value = balanced_embedding.longest_common_balanced_embedding( seq1, seq2, open_to_close, open_to_node=open_to_node, node_affinity=node_affinity, impl=impl, ) subseq1, subseq2 = best # Convert the subsequence back into a tree. # Note: we could return the contracted edges as well here, but that can # always be done as a postprocessing step. See tests for an example of # this. embedding1 = seq_to_tree(subseq1, open_to_close, open_to_node) embedding2 = seq_to_tree(subseq2, open_to_close, open_to_node) return embedding1, embedding2, value
def check_Motifs(H, m): """ Basic simple motif counter for networkx takes 2 arguments a Graph and the size of the motif. Motif sizes supported 3 and 4. This function is actually rather simple. It will extract all 3-grams from the original graph, and look for isomorphisms in the motifs contained in a dictionary. The returned object is a ``dict`` with the number of times each motif was found.:: m :: motif type. Currently this is for 3-node. I am working on "bifan" and 4-node """ #This function will take each possible subgraphs of gr of size 3, then #compare them to the mo dict using .subgraph() and is_isomorphic #This line simply creates a dictionary with 0 for all values, and the #motif names as keys ##paper source "Higher-order organization ofcomplex networks" (2016) Benson et al, Science ## I choose only the unidirection ones : M1, M5, M8, M9, M10 s = int(m) if (s == 3): #motifs = {'M1': nx.DiGraph([(1,2),(2,3),(3,1)]), 'M5': nx.DiGraph([(1,2),(2,3),(1,3)]), 'M8': nx.DiGraph([(2, 1),(2,3)]), 'M9': nx.DiGraph([(2, 1),(3, 2)]), 'M10': nx.DiGraph([(1,2),(3,2)])} motifs = { 'M1': [(1, 2), (2, 3), (3, 1)], 'M5': [(1, 2), (2, 3), (1, 3)], 'M8': [(2, 1), (2, 3)], 'M9': [(2, 1), (3, 2)], 'M10': [(1, 2), (3, 2)], 'M2': [(1, 2), (2, 3), (3, 2), (3, 1)], 'M3': [(1, 2), (2, 3), (3, 2), (1, 3), (3, 1)], 'M4': [(1, 2), (2, 1), (2, 3), (3, 2), (1, 3), (3, 1)], 'M6': [(2, 1), (2, 3), (1, 3), (3, 1)], 'M7': [(1, 2), (3, 2), (1, 3), (3, 1)], 'M11': [(1, 2), (2, 1), (2, 3)], 'M12': [(1, 2), (2, 1), (3, 2)], 'M13': [(1, 2), (2, 1), (2, 3), (3, 2)] } elif (s == 4): ## under development motifs = {'bifan': [(1, 2), (1, 3), (4, 2), (4, 3)]} #edgeLists=[[[1,2],[1,3],[1,4]]] #edgeLists.append([[1,2],[1,3],[1,4],[2,3]]) #edgeLists.append([[1,2],[1,3],[1,4],[2,3],[3,4]]) #edgeLists.append([[1,2],[1,3],[1,4],[2,3],[3,4],[2,4]]) else: raise nx.NetworkXNotImplemented('Size of motif must be 3 or 4') #outf = open(f2, 'w') #print >> outf, 'commitid|motiflabel|count' G = H mcount = dict(zip(motifs.keys(), list(map(int, np.zeros(len(motifs)))))) ## match the pattern and count the motifs dict_edges = defaultdict(list) dict_nodes = defaultdict(list) for key in motifs: pattern = motifs[key] gmoti = nx.DiGraph() gmoti.add_edges_from(pattern) motif_pattern_obs = subgraph_pattern(G, gmoti, sign_sensitive=False) s = [] for subgraph in motif_pattern_obs: tup = tuple(subgraph.keys()) s.append(tup) uniqs = list(set(s)) if len(uniqs) > 0: maplist = map(list, uniqs) ### label the edges as per the motif labels mcount[str(key)] = len(maplist) for triplets in maplist: subgraph = G.subgraph(triplets) edgeLists = [e for e in subgraph.edges() if G.has_edge(*e)] ## an edge is part of multiple motifs ## lets count the number of motifs an edge is part of for u, v in edgeLists: dict_edges[(u, v)].append(str(key)) ## A node is also part of multiple motifs. ## We count the total number of motifs a node is part of ## We count the frequency of occurence each motif the node is part of nodelists = subgraph.nodes() for n in nodelists: dict_nodes[str(n)].append(str(key)) #for keys, values in mcount.items() : # print >> outf, '%s|%s|%s' %(outname, keys, values) ### Let's mark the edge with motif type and count. We count the number of types ### of motif an edge is a part of. An edge could appear in M1: M1x times and in M2: M2x times and so on for u, v in G.edges(): if (u, v) in dict_edges: G[u][v]['num_motif_edge'] = len(list(set(dict_edges[(u, v)]))) ### Let's mark the node with motif type and count. We count the number of types of motif a node is a part of. for n in G.nodes(): motficountnode = dict( zip(motifs.keys(), list(map(int, np.zeros(len(motifs)))))) if str(n) in dict_nodes: subgraphnodeslist = dict_nodes[str(n)] for key in subgraphnodeslist: motficountnode[str(key)] += 1 for motif, count in motficountnode.items(): G.node[n][str(motif)] = int(count) ### Let's mark the edge with motif type and count. We count the number of types ### of motif an edge is a part of. An edge could appear in M1: M1x times and in M2: M2x times and so on for u, v in G.edges(): motficountedge = dict( zip(motifs.keys(), list(map(int, np.zeros(len(motifs)))))) if (u, v) in dict_edges: subgraphedgeslist = dict_edges[(u, v)] for key in subgraphedgeslist: motficountedge[str(key)] += 1 for motif, count in motficountedge.items(): G[u][v][str(motif)] = int(count) return G
def _not_implemented(f, *args, **kwargs): raise nx.NetworkXNotImplemented('Method not implemented for dynamic graphs')
def maximum_common_ordered_subtree_isomorphism(tree1, tree2, node_affinity="auto", impl="auto", item_type="auto"): """ Finds the maximum common subtree-isomorphism between two ordered trees. This function computes the maximum-weight common subtrees S1 and S2 between two trees T1 and T2. S1 and S2 are isomorphic to subgraphs of T1 and T2 with maximal size such that S1 and S2 are also isomorphic to each other. This function is similar to :func:`maximum_common_ordered_subtree_embedding` with the main difference being that returned solution from this function will be proper subgraphs (i.e. all edges in the subgraphs will exist in the original graph), whereas in the subtree embedding problem the returned solutions are allowed to be minors of the input graphs (i.e. edges are allowed to be contracted). Parameters ---------- tree1, tree2 : nx.OrderedDiGraph Trees to find the maximum subtree isomorphism between node_affinity : None | str | callable Function for to determine if two nodes can be matched. The return is interpreted as a weight that is used to break ties. If None then any node can match any other node and only the topology is important. The default is "eq", which is the same as ``operator.eq``. impl : str Determines the backend implementation. Defaults to "auto". See :func:`balanced_sequence.longest_common_balanced_sequence` for details. Other valid options are "iter", "recurse", and "iter-cython". item_type : str Determines the backend data structure used to encode the tree as a balanced sequence. Defaults to "auto", other valid options are "chr" and "number". Returns ------- S1, S2, value: Tuple[nx.OrderedDiGraph, nx.OrderedDiGraph, int] The maximum value common subtree isomorphism for each tree with respect to the chosen ``node_affinity`` function. The topology of both graphs will always be the same, the only difference is that the node labels in the first and second embeddings will correspond to ``tree1`` and ``tree2`` respectively. When ``node_affinity='eq'`` then embeddings should be identical. The last return value is the "size" of the solution with respect to ``node_affinity``. See Also -------- `maximum_common_ordered_subtree_embedding` """ import networkx as nx # Note: checks that inputs are forests are handled by tree_to_seq if not isinstance(tree1, nx.OrderedDiGraph): raise nx.NetworkXNotImplemented( "only implemented for directed ordered trees") if not isinstance(tree1, nx.OrderedDiGraph): raise nx.NetworkXNotImplemented( "only implemented for directed ordered trees") if tree1.number_of_nodes() == 0 or tree2.number_of_nodes() == 0: raise nx.NetworkXPointlessConcept if item_type == "label": # If we do allow label, I think the algorithm will work, but the # returned tree embeddings will only be embedding wrt to the label # structure. raise AssertionError( "allowing sequences to be specified by the labels breaks assumptions" ) # Convert the trees to balanced sequences. # Each sequence will contain each token at most once, this is an important # assumption in subsequent steps. seq1, open_to_close, node_to_open = tree_to_seq( tree1, open_to_close=None, node_to_open=None, item_type=item_type, container_type="auto", ) seq2, open_to_close, node_to_open = tree_to_seq(tree2, open_to_close, node_to_open, item_type=item_type, container_type="auto") open_to_node = {tok: node for node, tok in node_to_open.items()} # Solve the longest common balanced sequence problem best, value = longest_common_balanced_isomorphism( seq1, seq2, open_to_close, open_to_node=open_to_node, node_affinity=node_affinity, impl=impl, ) subseq1, subseq2 = best # Convert the subsequence back into a tree. subtree1 = seq_to_tree(subseq1, open_to_close, open_to_node) subtree2 = seq_to_tree(subseq2, open_to_close, open_to_node) return subtree1, subtree2, value