def asyn_lpa_communities(G, weight=None, seed=None): labels = {n: i for i, n in enumerate(G)} cont = True while cont: cont = False nodes = list(G) seed.shuffle(nodes) # Calculate the label for each node for node in nodes: if len(G[node]) < 1: continue # Get label frequencies. Depending on the order they are processed # in some nodes with be in t and others in t-1, making the # algorithm asynchronous. label_freq = Counter() for v in G[node]: label_freq.update({labels[v]: G.edges[node, v][weight] if weight else 1}) # Choose the label with the highest frecuency. If more than 1 label # has the highest frecuency choose one randomly. max_freq = max(label_freq.values()) best_labels = [label for label, freq in label_freq.items() if freq == max_freq] # Continue until all nodes have a majority label if labels[node] not in best_labels: labels[node] = seed.choice(best_labels) cont = True yield from groups(labels).values()
def to_sets(self): """Iterates over the sets stored in this structure. For example:: >>> partition = UnionFind('xyz') >>> sorted(map(sorted, partition.to_sets())) [['x'], ['y'], ['z']] >>> partition.union('x', 'y') >>> sorted(map(sorted, partition.to_sets())) [['x', 'y'], ['z']] """ # TODO In Python 3.3+, this should be `yield from ...`. for block in groups(self.parents).values(): yield block
def to_sets(self): """Iterates over the sets stored in this structure. For example:: >>> partition = UnionFind("xyz") >>> sorted(map(sorted, partition.to_sets())) [['x'], ['y'], ['z']] >>> partition.union("x", "y") >>> sorted(map(sorted, partition.to_sets())) [['x', 'y'], ['z']] """ # Ensure fully pruned paths for x in self.parents.keys(): _ = self[x] # Evaluated for side-effect only yield from groups(self.parents).values()
def to_sets(self): """Iterates over the sets stored in this structure. For example:: >>> partition = UnionFind('xyz') >>> sorted(map(sorted, partition.to_sets())) [['x'], ['y'], ['z']] >>> partition.union('x', 'y') >>> sorted(map(sorted, partition.to_sets())) [['x', 'y'], ['z']] """ # Ensure fully pruned paths for x in self.parents.keys(): _ = self[x] # Evaluated for side-effect only # TODO In Python 3.3+, this should be `yield from ...`. for block in groups(self.parents).values(): yield block
def voronoi_cells(G, center_nodes, weight='weight'): """Returns the Voronoi cells centered at `center_nodes` with respect to the shortest-path distance metric. If *C* is a set of nodes in the graph and *c* is an element of *C*, the *Voronoi cell* centered at a node *c* is the set of all nodes *v* that are closer to *c* than to any other center node in *C* with respect to the shortest-path distance metric. [1]_ For directed graphs, this will compute the "outward" Voronoi cells, as defined in [1]_, in which distance is measured from the center nodes to the target node. For the "inward" Voronoi cells, use the :meth:`DiGraph.reverse` method to reverse the orientation of the edges before invoking this function on the directed graph. Parameters ---------- G : NetworkX graph center_nodes : set A nonempty set of nodes in the graph `G` that represent the center of the Voronoi cells. weight : string or function The edge attribute (or an arbitrary function) representing the weight of an edge. This keyword argument is as described in the documentation for :func:`~networkx.multi_source_dijkstra_path`, for example. Returns ------- dictionary A mapping from center node to set of all nodes in the graph closer to that center node than to any other center node. The keys of the dictionary are the element of `center_nodes`, and the values of the dictionary form a partition of the nodes of `G`. Examples -------- To get only the partition of the graph induced by the Voronoi cells, take the collection of all values in the returned dictionary:: >>> G = nx.path_graph(6) >>> center_nodes = {0, 3} >>> cells = nx.voronoi_cells(G, center_nodes) >>> partition = set(map(frozenset, cells.values())) >>> sorted(map(sorted, partition)) [[0, 1], [2, 3, 4, 5]] Raises ------ ValueError If `center_nodes` is empty. References ---------- .. [1] Erwig, Martin. (2000), "The graph Voronoi diagram with applications." *Networks*, 36: 156--163. <dx.doi.org/10.1002/1097-0037(200010)36:3<156::AID-NET2>3.0.CO;2-L> """ # Determine the shortest paths from any one of the center nodes to # every node in the graph. # # This raises `ValueError` if `center_nodes` is an empty set. paths = nx.multi_source_dijkstra_path(G, center_nodes, weight=weight) # Determine the center node from which the shortest path originates. nearest = {v: p[0] for v, p in paths.items()} # Get the mapping from center node to all nodes closer to it than to # any other center node. cells = groups(nearest) # We collect all unreachable nodes under a special key, if there are any. unreachable = set(G) - set(nearest) if unreachable: cells['unreachable'] = unreachable return cells
def asyn_fluidc(G, k, max_iter=100, seed=None): """Returns communities in `G` as detected by Fluid Communities algorithm. The asynchronous fluid communities algorithm is described in [1]_. The algorithm is based on the simple idea of fluids interacting in an environment, expanding and pushing each other. It's initialization is random, so found communities may vary on different executions. The algorithm proceeds as follows. First each of the initial k communities is initialized in a random vertex in the graph. Then the algorithm iterates over all vertices in a random order, updating the community of each vertex based on its own community and the communities of its neighbours. This process is performed several times until convergence. At all times, each community has a total density of 1, which is equally distributed among the vertices it contains. If a vertex changes of community, vertex densities of affected communities are adjusted immediately. When a complete iteration over all vertices is done, such that no vertex changes the community it belongs to, the algorithm has converged and returns. This is the original version of the algorithm described in [1]_. Unfortunately, it does not support weighted graphs yet. Parameters ---------- G : Graph k : integer The number of communities to be found. max_iter : integer The number of maximum iterations allowed. By default 15. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ----- k variable is not an optional argument. References ---------- .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm". [https://arxiv.org/pdf/1703.09307.pdf]. """ # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities require connected Graphs.") if len(G) < k: raise NetworkXError("k cannot be bigger than the number of nodes.") # Initialization max_density = 1.0 vertices = list(G) seed.shuffle(vertices) communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) seed.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() # Take into account self vertex community try: com_counter.update( {communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update( {communities[v]: density[communities[v]]}) except KeyError: continue # Check which is the community with highest density new_com = -1 if len(com_counter.keys()) > 0: max_freq = max(com_counter.values()) best_communities = [ com for com, freq in com_counter.items() if (max_freq - freq) < 0.0001 ] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates new_com = seed.choice(best_communities) # Update previous community status try: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return iter(groups(communities).values())
def asyn_fluidc(G, k, max_iter=100, seed=None): # noqa, pylint: disable=too-many-locals,too-many-branches,too-many-statements """This function is adapted from networks directly.""" # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities require connected Graphs.") if len(G) < k: raise NetworkXError("k cannot be bigger than the number of nodes.") # Initialization max_density = 1.0 vertices = list(G) seed.shuffle(vertices) communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) seed.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() # Take into account self vertex community try: com_counter.update( {communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update( {communities[v]: density[communities[v]]}) except KeyError: continue # Check which is the community with highest density new_com = -1 if com_counter.keys(): max_freq = max(com_counter.values()) best_communities = [ com for com, freq in com_counter.items() if (max_freq - freq) < 0.0001 ] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates new_com = seed.choice(best_communities) # Update previous community status try: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = ( max_density / com_to_numvertices[communities[vertex]]) except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = ( max_density / com_to_numvertices[communities[vertex]]) # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return list(iter(groups(communities).values())), list(density.values())
def asyn_fluidc(G, k, max_iter=100, seed=None): """Returns communities in `G` as detected by Fluid Communities algorithm. The asynchronous fluid communities algorithm is described in [1]_. The algorithm is based on the simple idea of fluids interacting in an environment, expanding and pushing each other. It's initialization is random, so found communities may vary on different executions. The algorithm proceeds as follows. First each of the initial k communities is initialized in a random vertex in the graph. Then the algorithm iterates over all vertices in a random order, updating the community of each vertex based on its own community and the communities of its neighbours. This process is performed several times until convergence. At all times, each community has a total density of 1, which is equally distributed among the vertices it contains. If a vertex changes of community, vertex densities of affected communities are adjusted immediately. When a complete iteration over all vertices is done, such that no vertex changes the community it belongs to, the algorithm has converged and returns. This is the original version of the algorithm described in [1]_. Unfortunately, it does not support weighted graphs yet. Parameters ---------- G : Graph k : integer The number of communities to be found. max_iter : integer The number of maximum iterations allowed. By default 15. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ----- k variable is not an optional argument. References ---------- .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm". [https://arxiv.org/pdf/1703.09307.pdf]. """ # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities require connected Graphs.") if len(G) < k: raise NetworkXError("k cannot be bigger than the number of nodes.") # Initialization max_density = 1.0 vertices = list(G) seed.shuffle(vertices) communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) seed.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() # Take into account self vertex community try: com_counter.update({communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update({communities[v]: density[communities[v]]}) except KeyError: continue # Check which is the community with highest density new_com = -1 if len(com_counter.keys()) > 0: max_freq = max(com_counter.values()) best_communities = [com for com, freq in com_counter.items() if (max_freq - freq) < 0.0001] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates new_com = seed.choice(best_communities) # Update previous community status try: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return iter(groups(communities).values())
def asyn_lpa_communities(G, weight=None, seed=None): """Returns communities in `G` as detected by asynchronous label propagation. The asynchronous label propagation algorithm is described in [1]_. The algorithm is probabilistic and the found communities may vary on different executions. The algorithm proceeds as follows. After initializing each node with a unique label, the algorithm repeatedly sets the label of a node to be the label that appears most frequently among that nodes neighbors. The algorithm halts when each node has the label that appears most frequently among its neighbors. The algorithm is asynchronous because each node is updated without waiting for updates on the remaining nodes. This generalized version of the algorithm in [1]_ accepts edge weights. Parameters ---------- G : Graph weight : string The edge attribute representing the weight of an edge. If None, each edge is assumed to have weight one. In this algorithm, the weight of an edge is used in determining the frequency with which a label appears among the neighbors of a node: a higher weight means the label appears more often. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ----- Edge weight attributes must be numerical. References ---------- .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near linear time algorithm to detect community structures in large-scale networks." Physical Review E 76.3 (2007): 036106. """ labels = {n: i for i, n in enumerate(G)} cont = True while cont: cont = False nodes = list(G) seed.shuffle(nodes) for node in nodes: if not G[node]: continue # Get label frequencies among adjacent nodes. # Depending on the order they are processed in, # some nodes will be in iteration t and others in t-1, # making the algorithm asynchronous. if weight is None: # initialising a Counter from an iterator of labels is # faster for getting unweighted label frequencies label_freq = Counter(map(labels.get, G[node])) else: # updating a defaultdict is substantially faster # for getting weighted label frequencies label_freq = defaultdict(float) for _, v, wt in G.edges(node, data=weight, default=1): label_freq[labels[v]] += wt # Get the labels that appear with maximum frequency. max_freq = max(label_freq.values()) best_labels = [ label for label, freq in label_freq.items() if freq == max_freq ] # If the node does not have one of the maximum frequency labels, # randomly choose one of them and update the node's label. # Continue the iteration as long as at least one node # doesn't have a maximum frequency label. if labels[node] not in best_labels: labels[node] = seed.choice(best_labels) cont = True yield from groups(labels).values()
def test_groups(): many_to_one = dict(zip('abcde', [0, 0, 1, 1, 2])) actual = groups(many_to_one) expected = {0: {'a', 'b'}, 1: {'c', 'd'}, 2: {'e'}} assert actual == expected assert {} == groups({})
def test_groups(): many_to_one = dict(zip("abcde", [0, 0, 1, 1, 2])) actual = groups(many_to_one) expected = {0: {"a", "b"}, 1: {"c", "d"}, 2: {"e"}} assert actual == expected assert {} == groups({})
def asyn_lpa_communities(G, weight=None): """Returns communities in `G` as detected by asynchronous label propagation. The asynchronous label propagation algorithm is described in [1]_. The algorithm is probabilistic and the found communities may vary on different executions. The algorithm proceeds as follows. After initializing each node with a unique label, the algorithm repeatedly sets the label of a node to be the label that appears most frequently among that nodes neighbors. The algorithm halts when each node has the label that appears most frequently among its neighbors. The algorithm is asynchronous because each node is updated without waiting for updates on the remaining nodes. This generalized version of the algorithm in [1]_ accepts edge weights. Parameters ---------- G : Graph weight : string The edge attribute representing the weight of an edge. If None, each edge is assumed to have weight one. In this algorithm, the weight of an edge is used in determining the frequency with which a label appears among the neighbors of a node: a higher weight means the label appears more often. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ------ Edge weight attributes must be numerical. References ---------- .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near linear time algorithm to detect community structures in large-scale networks." Physical Review E 76.3 (2007): 036106. """ labels = {n: i for i, n in enumerate(G)} cont = True while cont: cont = False nodes = list(G) random.shuffle(nodes) # Calculate the label for each node for node in nodes: if len(G[node]) < 1: continue # Get label frequencies. Depending on the order they are processed # in some nodes with be in t and others in t-1, making the # algorithm asynchronous. label_freq = Counter({labels[v]: G.edge[v][node][weight] if weight else 1 for v in G[node]}) # Choose the label with the highest frecuency. If more than 1 label # has the highest frecuency choose one randomly. max_freq = max(label_freq.values()) best_labels = [label for label, freq in label_freq.items() if freq == max_freq] new_label = random.choice(best_labels) labels[node] = new_label # Continue until all nodes have a label that is better than other # neighbour labels (only one label has max_freq for each node). cont = cont or len(best_labels) > 1 # TODO In Python 3.3 or later, this should be `yield from ...`. return iter(groups(labels).values())
def asyn_lpa_communities(G, weight=None, seed=None, max_iter=float("inf")): """Returns communities in `G` as detected by asynchronous label propagation. The asynchronous label propagation algorithm is described in [1]_. The algorithm is probabilistic and the found communities may vary on different executions. The algorithm proceeds as follows. After initializing each node with a unique label, the algorithm repeatedly sets the label of a node to be the label that appears most frequently among that nodes neighbors. The algorithm halts when each node has the label that appears most frequently among its neighbors. The algorithm is asynchronous because each node is updated without waiting for updates on the remaining nodes. This generalized version of the algorithm in [1]_ accepts edge weights. Parameters ---------- G : Graph weight : string The edge attribute representing the weight of an edge. If None, each edge is assumed to have weight one. In this algorithm, the weight of an edge is used in determining the frequency with which a label appears among the neighbors of a node: a higher weight means the label appears more often. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. max_iter: integer or float('inf') (default). Maximum number of iteration to go through before the algorithm exits. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ------ Edge weight attributes must be numerical. References ---------- .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near linear time algorithm to detect community structures in large-scale networks." Physical Review E 76.3 (2007): 036106. """ labels = {n: i for i, n in enumerate(G)} cont = True c = 0 while cont and c < max_iter: c += 1 cont = False nodes = list(G) seed.shuffle(nodes) # Calculate the label for each node for node in nodes: if len(G[node]) < 1: continue # Get label frequencies. Depending on the order they are processed # in some nodes with be in t and others in t-1, making the # algorithm asynchronous. label_freq = Counter() for v in G[node]: label_freq.update( {labels[v]: G.edges[node, v][weight] if weight else 1} ) # Choose the label with the highest frecuency. If more than 1 label # has the highest frecuency choose one randomly. max_freq = max(label_freq.values()) best_labels = [ label for label, freq in label_freq.items() if freq == max_freq ] # Continue until all nodes have a majority label if labels[node] not in best_labels: labels[node] = seed.choice(best_labels) cont = True yield from groups(labels).values()
def asyn_fluidcWeight(G, k, max_iter=100, seed=None): """Returns communities in `G` as detected by Fluid Communities algorithm. The asynchronous fluid communities algorithm is described in [1]_. The algorithm is based on the simple idea of fluids interacting in an environment, expanding and pushing each other. Its initialization is random, so found communities may vary on different executions. The algorithm proceeds as follows. First each of the initial k communities is initialized in a random vertex in the graph. Then the algorithm iterates over all vertices in a random order, updating the community of each vertex based on its own community and the communities of its neighbours. This process is performed several times until convergence. At all times, each community has a total density of 1, which is equally distributed among the vertices it contains. If a vertex changes of community, vertex densities of affected communities are adjusted immediately. When a complete iteration over all vertices is done, such that no vertex changes the community it belongs to, the algorithm has converged and returns. This is a modified version of the algorithm described in [1]_. This version uses the density aggregate multiplied by the edge weights to determin community Parameters ---------- G : Graph k : integer The number of communities to be found. max_iter : integer The number of maximum iterations allowed. By default 100. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ----- k variable is not an optional argument. References ---------- .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm". [https://arxiv.org/pdf/1703.09307.pdf]. """ # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities require connected Graphs.") if len(G) < k: raise NetworkXError("k cannot be bigger than the number of nodes.") # Initialization max_density = 1.0 vertices = list(G) seed.shuffle(vertices) communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 G.nodes[vertex]["density"] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) seed.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() weight_counter = Counter() # Take into account self vertex community try: com_counter.update( {communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update( {communities[v]: density[communities[v]]}) weight_counter.update( {communities[v]: G.edges[v, vertex]['weight']}) except KeyError: continue combined = {} for k, v in com_counter.items(): if weight_counter.get(k) is not None: temp = v * weight_counter.get(k) combined.update({k: temp}) # Check which is the community with highest density new_com = -1 if len(com_counter.keys()) > 0: if combined: max_combined = max(combined.values()) best_communities = [ com for com, freq in com_counter.items() if (max_combined == combined.get(com)) ] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates if best_communities: new_com = seed.choice(best_communities) # Update previous community status try: if com_to_numvertices[communities[vertex]] > 1: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = ( max_density / com_to_numvertices[communities[vertex]]) except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = ( max_density / com_to_numvertices[communities[vertex]]) # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return iter(groups(communities).values()) # if __name__ == '__main__': # name3 = "/home/james/4F90/sg_infectious_graphs/weightededgesX_2009_05_06.out" # name = "/home/james/4F90/sg_infectious_graphs/weightededgesX_2009_07_15.out" # fh2 = open(name, "rb") # fh3 = open(name3, "rb") # my_graph2 = nx.read_weighted_edgelist(fh3) # testg = nx.read_weighted_edgelist(fh3) # fh2.close() # graphs = (my_graph2.subgraph(c) for c in nx.connected_components(my_graph2)) # graphs = list(graphs) # community = asyn_fluidcWeight(my_graph2, 10, seed=1) # fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 13, seed=10) # defaultFLuid = algorithms.async_fluid(my_graph2, 13) # louvain = algorithms.louvain(my_graph2, weight='weight') # com1 = [] # com2 = [] # coms1 = [list(x) for x in community] # fluid2 = [list(x) for x in fluid] # coms2 = cdlib.NodeClustering(coms1, my_graph2, "FluidWeight") # fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight") # pos = nx.spring_layout(testg, weight='weight',seed=5) # pos = nx.nx_pydot.graphviz_layout(testg) # wcom = asyn_fluidcWeight(testg, 10, seed=3) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, testg, "FluidWeight") # print(evaluation.newman_girvan_modularity(testg, wcoms2).score) # labels = nx.get_edge_attributes(testg, 'weight') # viz.plot_network_clusters(testg, wcoms2, pos,figsize=(20,20),node_size=600,cmap='gist_rainbow', plot_labels=False) # nx.draw_networkx_edge_labels(testg,pos, edge_labels=labels, font_size=6) # nx.draw_networkx_labels(testg, pos, font_size=8) # plt.savefig("Algo2_10com.png") # plt.show() # plt.close() # resolut = {} # resolut["5"] = 4 # resolut["7"] = 2.5 # resolut["10"] = 1.41 # resolut["13"] = 1 # resolut["15"] = 0.9 # resolut["17"] = 0.72 # resolut["20"] = 0.6 # resolutions = [4,2.5,1.41,1,0.9,0.72,0.6] # louvain = algorithms.louvain(my_graph2, weight='weight', resolution=1) # count = 0 # for i in fluid3.communities: # count = count +1 # print(count) # count = 0 # for i in louvain.communities: # count = count +1 # print(count) # with open('algo2fluidcontrol20comm.txt', 'w') as f: # count = 0 # s = 0 # scores = [] # while count <30: # try: # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcom = asyn_fluidcWeight(my_graph2, 20, seed=s) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 20, seed=s) # fluid2 = [list(x) for x in fluid] # fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight") # print("weightedfluid") # print(wcoms2.communities) # print("Benchmark Fluid") # print(fluid3.communities) # print("weightedfluid", file=f) # print(wcoms2.communities, file=f) # print("Benchmark Fluid", file=f) # print(fluid3.communities,file=f) # scores.append(evaluation.adjusted_rand_index(wcoms2, fluid3).score) # print(evaluation.adjusted_rand_index(wcoms2, fluid3), file=f) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # print("Adjusted rand indexes") # print("Adjusted rand indexes", file=f) # print(scores) # print(scores, file=f) # print("Mean") # print("Mean", file=f) # print(numpy.mean(scores)) # print(numpy.mean(scores),file=f) # print("Standard deviation") # print("Standard deviation", file=f) # print(numpy.std(scores)) # print(numpy.std(scores), file=f) # with open('algo2louvainnorand20comm.txt', 'w') as f: # count = 0 # s = 0 # scores = [] # while count <30: # try: # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcom = asyn_fluidcWeight(my_graph2, 20, seed=s) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # louvain = algorithms.louvain(my_graph2, weight='weight', resolution=0.4) # print("weightedfluid") # print(wcoms2.communities) # print("Benchmark Fluid") # print(louvain.communities) # print("weightedfluid", file=f) # print(wcoms2.communities, file=f) # print("Benchmark Fluid", file=f) # print(louvain.communities,file=f) # print(evaluation.adjusted_rand_index(wcoms2, louvain)) # print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f) # scores.append(evaluation.adjusted_rand_index(wcoms2, louvain).score) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # print("Adjusted rand indexes") # print("Adjusted rand indexes", file=f) # print(scores) # print(scores, file=f) # print("Mean") # print("Mean", file=f) # print(numpy.mean(scores)) # print(numpy.mean(scores),file=f) # print("Standard deviation") # print("Standard deviation", file=f) # print(numpy.std(scores)) # print(numpy.std(scores), file=f) # with open('algo2louvainrand20comm.txt', 'w') as f: # count = 0 # s = 0 # scores = [] # while count <30: # try: # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcom = asyn_fluidcWeight(my_graph2, 20, seed=s) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # #Adjust resolution to get community size [4,2.5,1.41,1,0.9,0.72,0.6] -> [5,7,10,13,15,17,20] # louvain = algorithms.louvain(my_graph2, weight='weight',randomize=1, resolution=0.4) # print("weightedfluid") # print(wcoms2.communities) # print("Benchmark Fluid") # print(louvain.communities) # print("weightedfluid", file=f) # print(wcoms2.communities, file=f) # print("Benchmark Fluid", file=f) # print(louvain.communities,file=f) # print(evaluation.adjusted_rand_index(wcoms2, louvain)) # print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f) # scores.append(evaluation.adjusted_rand_index(wcoms2, louvain).score) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # print("Adjusted rand indexes") # print("Adjusted rand indexes", file=f) # print(scores) # print(scores, file=f) # print("Mean") # print("Mean", file=f) # print(numpy.mean(scores)) # print(numpy.mean(scores),file=f) # print("Standard deviation") # print("Standard deviation", file=f) # print(numpy.std(scores)) # print(numpy.std(scores), file=f) # name2 = "/content/drive/MyDrive/4F90/sg_infectious_graphs/sg_infectious_graphs/weightededgesX_2009_06_02.out" # name = "/content/drive/MyDrive/4F90/sg_infectious_graphs/sg_infectious_graphs/weightededgesX_2009_07_15.out" # fh2 = open(name, "rb") # my_graph2 = nx.read_weighted_edgelist(fh2) # fh2.close() # graphs = (my_graph2.subgraph(c) for c in nx.connected_components(my_graph2)) # graphs = list(graphs) # com1 = [] # com2 = [] # with open('algo2fluidcontrol.txt', 'w') as f: # count = 0 # s = 0 # while count <30: # try: # wcom = asyn_fluidcWeight(my_graph2, 10, seed=s) # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 10, seed=s) # fluid2 = [list(x) for x in fluid] # fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight") # print(evaluation.adjusted_rand_index(wcoms2, fluid3)) # print(evaluation.adjusted_rand_index(wcoms2, fluid3), file=f) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # with open('algo2louvainnorand.txt', 'w') as f: # count = 0 # while count <30: # try: # wcom = asyn_fluidcWeight(my_graph2, 10, seed=s) # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # louvain = algorithms.louvain(my_graph2, weight='weight') # print(evaluation.adjusted_rand_index(wcoms2, louvain)) # print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # with open('algo2louvainrand.txt', 'w') as f: # count = 0 # while count <30: # try: # wcom = asyn_fluidcWeight(my_graph2, 10, seed=s) # print("seed: "+ str(s)) # print("seed: "+ str(s),file=f) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # louvain = algorithms.louvain(my_graph2, weight='weight',randomize=1) # print(evaluation.adjusted_rand_index(wcoms2, louvain)) # print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f) # count+=1 # s+=1 # except: # # print("Something went wrong with seed: "+ str(s)) # # print("Something went wrong with seed: "+ str(s),file=f) # s+=1 # for s in range(1, 30): # print("seed: "+ str(s)) # try: # wcom = asyn_fluidcWeight(my_graph2, 10, seed=s) # wcoms = [list(x) for x in wcom] # wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight") # fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 10, seed=s) # fluid2 = [list(x) for x in fluid] # fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight") # print(evaluation.adjusted_rand_index(wcoms2, fluid3)) # except: # print("Something went wrong with seed: "+ str(s))
def asyn_fluidc(G, k, max_iter=100, enable_pr=True): """Returns communities in `G` as detected by Fluid Communities algorithm. The asynchronous fluid communities algorithm is described in [1]_. The algorithm is based on the simple idea of fluids interacting in an environment, expanding and pushing each other. It's initialization is random, so found communities may vary on different executions. The algorithm proceeds as follows. First each of the initial k communities is initialized in a random vertex in the graph. Then the algorithm iterates over all vertices in a random order, updating the community of each vertex based on its own community and the communities of its neighbours. This process is performed several times until convergence. At all times, each community has a total density of 1, which is equally distributed among the vertices it contains. If a vertex changes of community, vertex densities of affected communities are adjusted immediately. When a complete iteration over all vertices is done, such that no vertex changes the community it belongs to, the algorithm has converged and returns. This is the original version of the algorithm described in [1]_. Unfortunately, it does not support weighted graphs yet. Parameters ---------- G : Graph k : integer The number of communities to be found. max_iter : integer The number of maximum iterations allowed. By default 15. enable_pr : Enable/disable Pagerank for initialize starting points Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ----- k variable is not an optional argument. References ---------- .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm". [https://arxiv.org/pdf/1703.09307.pdf]. """ # Initial checks if not isinstance(k, int): raise NetworkXError("k must be an integer.") if not k > 0: raise NetworkXError("k must be greater than 0.") if not is_connected(G): raise NetworkXError("Fluid Communities can only be run on connected\ Graphs.") if len(G) < k: raise NetworkXError("k must be greater than graph size.") # Initialization max_density = 1.0 vertices = list(G) random.shuffle(vertices) # print "@@@",vertices if enable_pr: # Run PageRank with alpha of 0.9 the push them to the head of vertices # so that it will be understand as start points maybe_print("PageRanks: {0}".format(pagerank(G)), 2, u'i') # Find the top k keys by page rank: run pr, sort the value, then get top k key top_keys = [word_id for word_id,_ in list(sorted(pagerank(G).items(), key=lambda x:x[1], reverse=True))] # random.shuffle(top_keys[:(len(top_keys))/4]) random.shuffle(top_keys[:(k*2)]) top_keys = top_keys[:k] maybe_print("Top keys: {0}".format(top_keys), 2, u'i') # print "+++", top_keys # Remove these top keys from the vertices, then append top_key to the head top_keys.extend([v for v in vertices if v not in top_keys]) # print "XXX", vertices communities = {n: i for i, n in enumerate(vertices[:k])} density = {} com_to_numvertices = {} for vertex in communities.keys(): com_to_numvertices[communities[vertex]] = 1 density[communities[vertex]] = max_density # Set up control variables and start iterating iter_count = 0 cont = True while cont: cont = False iter_count += 1 # Loop over all vertices in graph in a random order vertices = list(G) random.shuffle(vertices) for vertex in vertices: # Updating rule com_counter = Counter() # Take into account self vertex community try: com_counter.update({communities[vertex]: density[communities[vertex]]}) except KeyError: pass # Gather neighbour vertex communities for v in G[vertex]: try: com_counter.update({communities[v]: density[communities[v]]}) except KeyError: continue # Check which is the community with highest density new_com = -1 if len(com_counter.keys()) > 0: max_freq = max(com_counter.values()) best_communities = [com for com, freq in com_counter.items() if (max_freq - freq) < 0.0001] # If actual vertex com in best communities, it is preserved try: if communities[vertex] in best_communities: new_com = communities[vertex] except KeyError: pass # If vertex community changes... if new_com == -1: # Set flag of non-convergence cont = True # Randomly chose a new community from candidates new_com = random.choice(best_communities) # Update previous community status try: com_to_numvertices[communities[vertex]] -= 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] except KeyError: pass # Update new community status communities[vertex] = new_com com_to_numvertices[communities[vertex]] += 1 density[communities[vertex]] = max_density / \ com_to_numvertices[communities[vertex]] # If maximum iterations reached --> output actual results if iter_count > max_iter: break # Return results by grouping communities as list of vertices return iter(groups(communities).values())
def asyn_lpa_communities(G, weight=None): """Returns communities in `G` as detected by asynchronous label propagation. The asynchronous label propagation algorithm is described in [1]_. The algorithm is probabilistic and the found communities may vary on different executions. The algorithm proceeds as follows. After initializing each node with a unique label, the algorithm repeatedly sets the label of a node to be the label that appears most frequently among that nodes neighbors. The algorithm halts when each node has the label that appears most frequently among its neighbors. The algorithm is asynchronous because each node is updated without waiting for updates on the remaining nodes. This generalized version of the algorithm in [1]_ accepts edge weights. Parameters ---------- G : Graph weight : string The edge attribute representing the weight of an edge. If None, each edge is assumed to have weight one. In this algorithm, the weight of an edge is used in determining the frequency with which a label appears among the neighbors of a node: a higher weight means the label appears more often. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ------ Edge weight attributes must be numerical. References ---------- .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near linear time algorithm to detect community structures in large-scale networks." Physical Review E 76.3 (2007): 036106. """ labels = {n: i for i, n in enumerate(G)} cont = True while cont: cont = False nodes = list(G) random.shuffle(nodes) # Calculate the label for each node for node in nodes: if len(G[node]) < 1: continue # Get label frequencies. Depending on the order they are processed # in some nodes with be in t and others in t-1, making the # algorithm asynchronous. label_freq = Counter() for v in G[node]: label_freq.update( {labels[v]: G.edges[v, node][weight] if weight else 1}) # Choose the label with the highest frecuency. If more than 1 label # has the highest frecuency choose one randomly. max_freq = max(label_freq.values()) best_labels = [ label for label, freq in label_freq.items() if freq == max_freq ] new_label = random.choice(best_labels) labels[node] = new_label # Continue until all nodes have a label that is better than other # neighbour labels (only one label has max_freq for each node). cont = cont or len(best_labels) > 1 # TODO In Python 3.3 or later, this should be `yield from ...`. return iter(groups(labels).values())