def test_modularity_communities_directed_weighted(): G = nx.DiGraph() G.add_weighted_edges_from( [ (1, 2, 5), (1, 3, 3), (2, 3, 6), (2, 6, 1), (1, 4, 1), (4, 5, 3), (4, 6, 7), (5, 6, 2), (5, 7, 5), (5, 8, 4), (6, 8, 3), ] ) expected = [frozenset({4, 5, 6, 7, 8}), frozenset({1, 2, 3})] assert greedy_modularity_communities(G, weight="weight") == expected # A large weight of the edge (2, 6) causes 6 to change group, even if it shares # only one connection with the new group and 3 with the old one. G[2][6]["weight"] = 20 expected = [frozenset({1, 2, 3, 6}), frozenset({4, 5, 7, 8})] assert greedy_modularity_communities(G, weight="weight") == expected
def test_greedy_modularity_communities_directed(): G = nx.DiGraph( [ ("a", "b"), ("a", "c"), ("b", "c"), ("b", "d"), # inter-community edge ("d", "e"), ("d", "f"), ("d", "g"), ("f", "g"), ("d", "e"), ("f", "e"), ] ) expected = [frozenset({"f", "g", "e", "d"}), frozenset({"a", "b", "c"})] assert greedy_modularity_communities(G) == expected # with loops G = nx.DiGraph() G.add_edges_from( [(1, 1), (1, 2), (1, 3), (2, 3), (1, 4), (4, 4), (5, 5), (4, 5), (4, 6), (5, 6)] ) expected = [frozenset({1, 2, 3}), frozenset({4, 5, 6})] assert greedy_modularity_communities(G) == expected
def clauset_newman_moore_detection(G): # fit the model if nx.is_weighted(G): c = greedy_modularity_communities(G, weight='weight') else: c = greedy_modularity_communities(G) # format the result communities = {} for node in G.nodes(): for index, commu in enumerate(c): if node in sorted(commu): communities[node] = index # get the number of isolated nodes freq_dict = collections.Counter(communities.values()) num_isolated_nodes = list(freq_dict.values()).count(1) # report the result print("Clauset-Newman-Moore Community Detection") print("----------------------------------------") if num_isolated_nodes == 0: print("Number of communities detected: {}".format(len( freq_dict.keys()))) else: print("Number of communities detected: {}".format( len(freq_dict.keys()) - num_isolated_nodes)) print("Number of nodes not in any community: {}".format( num_isolated_nodes)) # return result out = {'algo': 'Clauset-Newman-Moore', 'communities': communities} return out
def domains_from_pae_matrix_networkx(pae_matrix, pae_power=1, pae_cutoff=5, graph_resolution=1, weight_by_ca_ca_distance=False, distance_power=1, distance_model=None): ''' Takes a predicted aligned error (PAE) matrix representing the predicted error in distances between each pair of residues in a model, and uses a graph-based community clustering algorithm to partition the model into approximately rigid groups. Arguments: * pae_matrix: a (n_residues x n_residues) numpy array. Diagonal elements should be set to some non-zero value to avoid divide-by-zero warnings * pae_power (optional, default=1): each edge in the graph will be weighted proportional to (1/pae**pae_power) * pae_cutoff (optional, default=5): graph edges will only be created for residue pairs with pae<pae_cutoff * graph_resolution (optional, default=1): regulates how aggressively the clustering algorithm is. Smaller values lead to larger clusters. Value should be larger than zero, and values larger than 5 are unlikely to be useful. * weight_by_ca_ca_distance (optional, default=False): adjust the edge weighting for each residue pair according to the distance between CA residues. If this is True, then `distance_model` must be provided. * distance_power (optional, default=1): If `weight_by_ca_ca_distance` is True, then edge weights will be multiplied by 1/distance**distance_power. * distance_model (optional, default=None): Model corresponding to the PAE matrix. Only needed if `weight_by_ca_ca_distances is True. Returns: a series of lists, where each list contains the indices of residues belonging to one cluster. ''' try: import networkx as nx except ImportError: raise Sorry( 'ERROR: This method requires NetworkX (>=2.6.2) to be installed. Please install it using "pip install networkx" ' 'in a Python >=3.7 environment and try again.') import numpy weights = 1 / pae_matrix**pae_power if weight_by_ca_ca_distance: if distance_model is None: raise Sorry( 'If weight_by_ca_ca_distance is True, distance_model must be provided!' ) weights *= weights_from_distance_matrix(distance_model, distance_power) g = nx.Graph() size = weights.shape[0] g.add_nodes_from(range(size)) edges = numpy.argwhere(pae_matrix < pae_cutoff) sel_weights = weights[edges.T[0], edges.T[1]] wedges = [(i, j, w) for (i, j), w in zip(edges, sel_weights)] g.add_weighted_edges_from(wedges) from networkx.algorithms import community try: clusters = community.greedy_modularity_communities( g, weight='weight', resolution=graph_resolution) except Exception as e: # run without resolution clusters = community.greedy_modularity_communities(g, weight='weight') return clusters
def test_greedy_modularity_communities_multigraph(): G = nx.MultiGraph() G.add_edges_from( [ (1, 2), (1, 2), (1, 3), (2, 3), (1, 4), (2, 4), (4, 5), (5, 6), (5, 7), (5, 7), (6, 7), (7, 8), (5, 8), ] ) expected = [frozenset({1, 2, 3, 4}), frozenset({5, 6, 7, 8})] assert greedy_modularity_communities(G) == expected # Converting (4, 5) into a multi-edge causes node 4 to change group. G.add_edge(4, 5) expected = [frozenset({4, 5, 6, 7, 8}), frozenset({1, 2, 3})] assert greedy_modularity_communities(G) == expected
def modularity(self): print('\n There are ' + len(greedy_modularity_communities(self.graph)).__str__() + ' communities \n') for community in greedy_modularity_communities(self.graph): print(set(community))
def test_modularity_communities_floating_point(): # check for floating point error when used as key in the mapped_queue dict. # Test for gh-4992 and gh-5000 G = nx.Graph() G.add_weighted_edges_from([(0, 1, 12), (1, 4, 71), (2, 3, 15), (2, 4, 10), (3, 6, 13)]) expected = [{0, 1, 4}, {2, 3, 6}] assert greedy_modularity_communities(G, weight="weight") == expected assert (greedy_modularity_communities(G, weight="weight", resolution=0.99) == expected)
def test_modularity_communities_weighted(): G = nx.balanced_tree(2, 3) for (a, b) in G.edges: if ((a == 1) or (a == 2)) and (b != 0): G[a][b]["weight"] = 10.0 else: G[a][b]["weight"] = 1.0 expected = [{0, 1, 3, 4, 7, 8, 9, 10}, {2, 5, 6, 11, 12, 13, 14}] assert greedy_modularity_communities(G, weight="weight") == expected assert greedy_modularity_communities(G, weight="weight", resolution=0.9) == expected assert greedy_modularity_communities(G, weight="weight", resolution=0.3) == expected assert greedy_modularity_communities(G, weight="weight", resolution=1.1) != expected
def test_n_communities_parameter(): G = nx.circular_ladder_graph(4) # No aggregation: expected = [{k} for k in range(8)] assert greedy_modularity_communities(G, n_communities=8) == expected # Aggregation to half order (number of nodes) expected = [{k, k + 1} for k in range(0, 8, 2)] assert greedy_modularity_communities(G, n_communities=4) == expected # Default aggregation case (here, 2 communities emerge) expected = [frozenset(range(0, 4)), frozenset(range(4, 8))] assert greedy_modularity_communities(G, n_communities=1) == expected
def cluster_subgraph_by_year_cnm(Graph,H): """ option='accumulate' will accumulate the nodes and edges of the graph year on year option='separate' will only keep the nodes year on year, edges from previous years will not be retained connected='yes' will only use the largest connected component for each year connected='no' will use all available nodes for each year retain_clus='yes' will initialize the louvain calculation such that the previous year's cluster is used to initialize this year's cluster retain_clus='no' will use a random initialization for the louvain calculation """ from networkx.algorithms.community import greedy_modularity_communities # This is to run CNM , remove if not neededs # get node and edge year node_yr=nx.get_node_attributes(Graph,'Year') edge_yr=nx.get_edge_attributes(Graph,'Year') # dictionarys to filter nodes and edges by year n_year=int(max(node_yr.values())-min(node_yr.values()))+1 min_year=min(node_yr.values()) # implement clustering J=Graph print("------------Clauset-Newman-Moore------------------") c_cnm=[[]for i in range(n_year)] for i in range(n_year): start = time.time() c_cnm[i]=list(greedy_modularity_communities(H[i])) set_cluster(c_cnm[i],H[i],'CNM cluster') set_cluster(c_cnm[i],J,'CNM cluster'+str(i+min_year)) stop = time.time() print('Year:',str(i+min_year),'--',round(stop-start,2),'seconds --',str(len(set(c_cnm[i]))),'clusters') del c_cnm, node_yr, edge_yr, n_year, min_year gc.collect() return J
def Modularity_with_dot(data): norm = (data-data.mean())/data.std() matrix = norm.dot(norm.T) matrix = matrix - np.diag(np.diag(matrix)) matrix[matrix > 0 ] = 1 matrix[matrix < 0 ] = 0 graph = from_numpy_array(matrix.values) try: cluster = list(community.greedy_modularity_communities(graph)) except Exception as e: warn(str(e)) return None if len(cluster) != 2: return None group = [None for _ in range(len(matrix))] for i in cluster[0]: group[i] = 0 for i in cluster[1]: group[i] = 1 return get_defective_cluster(data, group)
def format(self, graph, clusters=True): if len(graph.nodes()) > 1 and clusters: i = 0 def randomcolor(): c = '#' for i in range(3): c += str(hex(np.random.choice(range(64, 224))))[2:] return c for nodelist in community.greedy_modularity_communities(graph): color = randomcolor() for node in nodelist: graph.nodes[node]['cluster'] = i graph.nodes[node]['cluster-color'] = color i += 1 for a, b in graph.edges: graph.edges[a, b]['cluster-a'] = graph.nodes[a]['cluster'] graph.edges[a, b]['cluster-b'] = graph.nodes[b]['cluster'] graph.edges[ a, b]['cluster-color-a'] = graph.nodes[a]['cluster-color'] graph.edges[ a, b]['cluster-color-b'] = graph.nodes[b]['cluster-color'] return json.dumps(nx.readwrite.json_graph.cytoscape_data(graph), indent=4, separators=(',', ': '))
def main(): start_time = time.time() args = utils.create_argument_parser() graph = utils.load_graph(args.dataset, args.w) graph_copy = deepcopy(graph) preprocess(graph) c = greedy_modularity_communities(graph) finish_time = time.time() print('\nDone in %.4f seconds.' % (finish_time - start_time)) communities = dict() for i in range(len(c)): communities[i] = list(c[i]) partition = create_partition(communities) utils.print_comm_info_to_display(communities) # utils.write_comm_info_to_file(partition) print('modularity_value =', modularity(graph_copy, communities)) print('NMI =', NMI(args.output, partition)) finish_time = time.time() print('\nDone in %.4f seconds.' % (finish_time - start_time))
def communitiesRandomModule(graphForCommunities): # function for random analysis # variables com = 0 communitiesListCNM = [] listCommunities = [] communitiesDict = dict() community = None vertex = None networkModularity = 0 networkCoverage = 0 networkPerformance = 0 counter = 0 # get Clauset-Newman-Moore communities communitiesListCNM = list( greedy_modularity_communities(graphForCommunities)) # evaluate modularity for community in communitiesListCNM: for vertex in set(community): communitiesDict[vertex] = com listCommunities.append(set(community)) com = com + 1 networkModularity = louv.modularity(communitiesDict, graphForCommunities) networkCoverage = coverage(graphForCommunities, listCommunities) networkPerformance = performance(graphForCommunities, listCommunities) # end of function return (networkModularity, len(listCommunities), networkCoverage, networkPerformance)
def comparing_community_algortihms(): residuals = pd.read_csv("/Users/emg/GitHub/thesis/output/2019_01/1000_residuals_output_utf8.csv") edges = residuals[['source','target', 'resid']] edges.columns = ['source','target', 'weight'] top_edges = subset_df(edges, 'weight', q=0.95) G = edges_to_graph(top_edges, 'weight') add_partitions(G) membership = get_node_data(G).sort_values('community') from networkx.algorithms.community import greedy_modularity_communities c = list(greedy_modularity_communities(G, weight='weight')) clauset_search = {} for i,x in enumerate(c): for subreddit in x: clauset_search[subreddit] = i membership['clauset'] = membership.index.map(lambda x: clauset_search[x]) from networkx.algorithms.community import girvan_newman communities_generator = community.girvan_newman(G) top_level_communities = next(communities_generator) next_level_communities = next(communities_generator) sorted(map(sorted, next_level_communities))
def communityDetection(): """ Runs the Classet-Newmann community detection algorithm """ nn = createNearestNeighborEpsilon(allData, metric="Cosine") print(nn) g = nx.Graph() for cancer in range(len(cancerNames)): for nodeNumber in range(startingPositions[cancer], startingPositions[cancer + 1]): g.add_node(nodeNumber) for i in range(nn.shape[0]): for j in range(nn.shape[1]): if (nn[i][j]): g.add_edge(i, j) #Amount of each cancer type in each community communities = greedy_modularity_communities(g) cancerTypes = [] print(communities) #The cutoff to be put in the community detection part percentCutoff = 0 csvData = np.zeros((CANCER_TYPES, len(communities))) for j, community in enumerate(communities): tempCancers = {} for i in range(1, len(startingPositions)): #Num inbetween cancersInbetween = len([ x for x in community if startingPositions[i - 1] <= x < startingPositions[i] ]) tempCancers[cancerNames[i - 1]] = cancersInbetween csvData[i - 1, j] = int(cancersInbetween) totalCancers = sum(tempCancers.values()) #Sort by prevelance sortedCancers = sorted(tempCancers, key=tempCancers.get, reverse=True) t = {} for cancer in sortedCancers: if (tempCancers[cancer] >= totalCancers * percentCutoff): t[cancer] = (tempCancers[cancer], round(tempCancers[cancer] / totalCancers, 2)) cancerTypes.append(t) w = open("communityCSV.csv", "w") for i in range(csvData.shape[0]): tempLine = cancerNames[i] + "," tempLine += ",".join(list(map(str, csvData[i]))) tempLine += "\n" w.write(tempLine) w.close() return csvData
def assign_communities(self): """ assigned communities to be calculated just once for a graph in all analysis funcs. It generates communities from both networkx community and python louvain community. In the code we mostly use the latter as community so it is necessary to first pip the python-louvain and community. However, user may swich to networkx community by commenting and decommenting few lines. These lines are marked by #XXX, although coloring the graph is only available with python-louvain community. """ self.modularity_communitiesx = [ list(x) for x in communityx.greedy_modularity_communities(self.G) ] self.best_parts = community.best_partition(self.G) com_dict = {} for i, com in enumerate(self.modularity_communitiesx): for node in com: com_dict[node] = i com_list = [[] for c in list(set(self.best_parts.values()))] for n, c in zip(self.best_parts.keys(), self.best_parts.values()): com_list[c].append(n) self.modularity_communities = com_list self.best_parts_x = com_dict return
def clusters_mod(oid, evs, gene_list): # create network logging.info("%s Create network" % oid) evs_e = evs[["in_gene","out_gene","branch_support"]] evs_n = nx.convert_matrix.from_pandas_edgelist(evs_e, source="in_gene", target="out_gene", edge_attr="branch_support") # find communities using modularity logging.info("%s Find communities in network" % oid) evs_n_communities = list(community.greedy_modularity_communities(evs_n)) #evs_n_communities = list(community.girvan_newman(evs_n)) #evs_n_communities = list(community.asyn_lpa_communities(evs_n)) #evs_n_communities = list(community.k_clique_communities(evs_n, 5)) #evs_n_communities = list(community.asyn_fluidc(evs_n,2)) clus_list = np.zeros(len(gene_list)) for n,noi in enumerate(gene_list): for com in range(len(evs_n_communities)): if noi in evs_n_communities[com]: clus_list[n] = int(com)+1 # store clusters clu = pd.DataFrame( { "node" : gene_list, "cluster" : clus_list }, columns=["node","cluster"]) clu["cluster"] = clu["cluster"].astype(int).astype(str) logging.info("%s Num clustered genes = %i" % (oid, len(clu))) logging.info("%s Num clusters = %i" % (oid, len(np.unique(clus_list)))) return clu
def detect_communities(G, save_img_path=None): """ Returns the communities detected in the given graph using optimal modularity approach. Args: G (nx.Graph): graph for which the top nodes must be determined. save_img_path (str): path to save visualisation of the communities detected. Returns: communities (list): list of tuples of nodes representing communities detected. """ # perform community detection using greedy modularity approach _coms = community.greedy_modularity_communities(G) communities = [set(c) for c in _coms] # save image if path given if save_img_path: colors = np.linspace(0, 1, len(communities)) com_color_map = dict() for idx, com in enumerate(communities): for node in com: com_color_map[node] = colors[idx] labels = nx.draw_networkx_labels(G, pos=pos) nx.draw(G, pos, node_color=list(com_color_map.values())) plt.savefig(save_img_path, format="PNG") return communities
def plot_feature(path, sample=False): dataset = path.split("/")[1] orig_feature = np.load("{}/orig_feature.npy".format(path)) transformed_feature = np.load("{}/transformed_feature.npy".format(path)) # transformed_feature = transformed_feature[0] scaler = MinMaxScaler(feature_range=(-1, 1)) orig_fea_tsne = tsne(orig_feature, 2) trans_fea_tsne = tsne(transformed_feature, 2) orig_fea_tsne = scaler.fit_transform(orig_fea_tsne) trans_fea_tsne = scaler.fit_transform(trans_fea_tsne) # get community g = nx.read_gml("{}/{}.gml".format(path, dataset), label=None) partition = greedy_modularity_communities(g) n_nodes = g.number_of_nodes() n_com = len(partition) start_id = np.min(g.nodes) com_dict = dict() for i in range(n_com): for node in partition[i]: com_dict[node - start_id] = i # -1 because node id start from 1 # get node color colors = [ 'red', 'blue', 'green', 'aqua', 'yellow', 'skyblue', 'purple', 'olive' ] color_list = [] for node in range(n_nodes): color_list.append(colors[com_dict[node]]) """ fig = plt.figure(figsize=(9, 3)) ax = fig.add_subplot(131) plot_scatter(ax, orig_fea_tsne, color_list) ax = fig.add_subplot(132) plot_scatter(ax, trans_fea_tsne, color_list) if sample is True: F = np.load("{}/F.npy".format(path)) F_tsne = tsne(F, 2) F_tsne = scaler.fit_transform(F_tsne) ax = fig.add_subplot(133) plot_scatter(ax, F_tsne, color_list) """ fig = plt.figure(figsize=(3, 3)) ax = fig.add_subplot(111) plot_scatter(ax, trans_fea_tsne, color_list) fig.tight_layout() plt.savefig("{}/{}_feature.pdf".format(path, dataset), format='pdf', dpi=1000) plt.show()
def drawGraph(G, X, algo1="Graph"): g = nx.Graph(G) comm = community.greedy_modularity_communities(g) gridsize = (1, 1) fig = plt.figure(figsize=(8, 5)) axIN = plt.subplot2grid(gridsize, (0, 0)) plt.axis('off') axIN.set_xlim(min(X[:, 0]), max(X[:, 0])) axIN.set_ylim(min(X[:, 1]), max(X[:, 1])) linesIN = [] e = 0 print("Cluster:", len(comm)) mycolors = cm.rainbow(np.linspace(0, 1, len(comm))) #for i,j in zip(*G.nonzero()): # if i>j: # linesIN.append([[X[i][0], X[i][1]], [X[j][0], X[j][1]]]) # e += 1 gd = dict() for com in range(len(comm)): for node in list(comm[com]): gd[node] = com plt.scatter(X[node][0], X[node][1], s=2, color=mycolors[com]) plt.axis('off') modularity = commm.community_louvain.modularity(gd, g) print("Modularity:", algo1, "=", modularity) plt.savefig(algo1 + '_vis.pdf')
def train(self): G = nx.Graph() if self.is_weighted: edges, weight = ( self.data.edge_index.t().tolist(), self.data.edge_attr.tolist(), ) G.add_weighted_edges_from( [(edges[i][0], edges[i][1], weight[0][i]) for i in range(len(edges))] ) else: G.add_edges_from(self.data.edge_index.t().tolist()) partition = community.greedy_modularity_communities(G) base_label = [0] * G.number_of_nodes() for i, node_set in enumerate(partition): for node in node_set: base_label[node] = i nmi_score = normalized_mutual_info_score(self.label, base_label) print("NMI score of greedy modularity optimize algorithm: ", nmi_score) embeddings = self.model.train(G) # Map node2id features_matrix = np.zeros((self.num_nodes, self.hidden_size)) for vid, node in enumerate(G.nodes()): features_matrix[node] = embeddings[vid] return self._evaluate(features_matrix)
def select_comm(graph, graph_type, mapping=None): if graph_type == 'Email': # read into community info with open('../data/email-Eu-core-department-labels-cc.txt', 'r') as fid: f_label = fid.readlines() comm_to_nodes = {} for item in f_label: nodeID, commID = [int(i) for i in item.rstrip().split()] if commID not in comm_to_nodes: comm_to_nodes[commID] = [mapping[nodeID]] else: comm_to_nodes[commID].append(mapping[nodeID]) comm_size = sorted([(key, len(comm_to_nodes[key])) for key in comm_to_nodes.keys()], key=lambda x: x[1]) selected_comm = comm_size[math.floor(len(comm_size) * 0.5)][0] comm = comm_to_nodes[selected_comm] elif graph_type == 'Airport': deg = list(dict(graph.degree()).items()) deg = sorted(deg, key=lambda x: x[1]) selected_node = deg[math.floor(len(deg) * 0.9)][0] comm = list(graph.neighbors(selected_node)) + [selected_node] elif graph_type == 'Brain': comm = list(range(len(graph) - 100, len(graph))) else: all_comms = list(greedy_modularity_communities(graph)) all_comms = sorted(all_comms, key=lambda x: len(x)) comm = list(all_comms[math.floor(len(all_comms) * 0.5)]) assert (len(comm) != 0) return comm
def communityDetection(g): # greed modularity community detection greedy_communities = list(comm.greedy_modularity_communities(g)) greedy_communities_dict = {} for i, c in enumerate(greedy_communities): for node_id in c: greedy_communities_dict[node_id] = i greedy_score = community.modularity(greedy_communities_dict, g) print "Greedy Number of Communities: ", len( set(greedy_communities_dict.values())) print "Greedy Modularity: ", greedy_score greedy_communities_sorted = sorted(greedy_communities, key=lambda x: len(x), reverse=True) for i in range(1, 6): print "Size of", i, "Community: ", len(greedy_communities_sorted[i]) louvain_communities = community.best_partition(g) louvain_score = community.modularity(louvain_communities, g) print "Louvain Number of Communities: ", len( set(louvain_communities.values())) print "Louvain Modularity: ", louvain_score louvain_score_dict = defaultdict(list) for node_id, comm_id in louvain_communities.items(): louvain_score_dict[comm_id].append(node_id) louvain_communities_list = louvain_score_dict.values() louvain_communities_list = sorted(louvain_communities_list, key=lambda x: len(x), reverse=True) for i in range(1, 6): print "Size of ", i, " Community: ", len(louvain_communities_list[i])
def greedy_partition(graph): partition = greedy_modularity_communities(graph) res = dict() for i, part in enumerate(partition): for j in part: res[j] = i draw_graph(res, "greedy_modularity")
def detect_communities(network: SpatioTemporalNetwork, algo, **kwargs): if algo == 'fluid': comm_iter = community.asyn_fluidc(network.to_multigraph().to_undirected(), **kwargs) return list(comm_iter) if algo == 'clm': comm_iter = community.greedy_modularity_communities(network.to_multigraph().to_undirected(), **kwargs) return list(comm_iter)
def test_greedy_modularity_communities_relabeled(): # Test for gh-4966 G = nx.balanced_tree(2, 2) mapping = {0: "a", 1: "b", 2: "c", 3: "d", 4: "e", 5: "f", 6: "g", 7: "h"} G = nx.relabel_nodes(G, mapping) expected = [frozenset({"e", "d", "a", "b"}), frozenset({"c", "f", "g"})] assert greedy_modularity_communities(G) == expected
def test_greed_modularity_communities_multidigraph_weighted(): G = nx.MultiDiGraph() G.add_weighted_edges_from( [ (1, 2, 5), (1, 2, 3), (3, 1, 6), (1, 3, 6), (3, 2, 4), (1, 4, 2), (1, 4, 5), (2, 4, 3), (3, 2, 8), (4, 2, 3), (4, 3, 5), (4, 5, 2), (5, 6, 3), (5, 6, 7), (6, 5, 4), (5, 7, 9), (5, 7, 9), (7, 6, 8), (7, 8, 2), (8, 7, 2), (5, 8, 6), (5, 8, 6), ] ) expected = [frozenset({1, 2, 3, 4}), frozenset({5, 6, 7, 8})] assert greedy_modularity_communities(G, weight="weight") == expected
def create_g_cluster(self, word_pos): words = self.top_k(word_pos)[1:] if self.cluster_type < 4: pairs = self.gen_pairs(words) G = nx.Graph() G.add_weighted_edges_from(pairs) if self.cluster_type == 3: G = max(nx.connected_component_subgraphs(G), key=len) print('len_strip(G)', len(G)) if self.cluster_type == 1: from networkx.algorithms.community import greedy_modularity_communities clusters = list(greedy_modularity_communities(G)) elif self.cluster_type == 2: from chinese_whispers import chinese_whispers, aggregate_clusters chinese_whispers(G, iterations=20, weighting='log', seed=13) # top, nolog, log clusters = aggregate_clusters(G).values() elif self.cluster_type == 3: from networkx.algorithms.community import asyn_fluidc if self.is_k_depends_g: clusters = list(asyn_fluidc(G, k=self.k - int((self.k - 8) * ((200 - len(G)) / 100)))) else: clusters = list(asyn_fluidc(G, k=min(self.k, len(G)))) elif self.cluster_type == 4: from collections import defaultdict from sklearn.cluster import KMeans X = [sg.emb(_) for _ in words[1:]] clusters = defaultdict(list) kmeans = KMeans(n_clusters=self.k, random_state=13) assigned_clusters = kmeans.fit_predict(X) for cl, w in zip(assigned_clusters, words): clusters[cl].append(w) clusters = list(clusters.values()) elif self.cluster_type == 5: from collections import defaultdict from sklearn.cluster import DBSCAN X = [sg.emb(_) for _ in words[1:]] clusters = defaultdict(list) dbscan = DBSCAN(metric='l2', eps=self.min_dist_dbscan, min_samples=self.min_clust) assigned_clusters = dbscan.fit_predict(X) for cl, w in zip(assigned_clusters, words): clusters[cl].append(w) clusters = list(clusters.values()) else: raise Exception('no cluster type', self.cluster_type) if self.debug: for i, cluster in enumerate(sorted(clusters, key=lambda e: len(e), reverse=True)): print('Cluster ID\tCluster Elements\n') print('{}\t{}\n'.format(i, cluster)) print(word_pos, 'clusters', len(clusters)) return clusters
def buildGraph(self,additional_stopwords=[],min_frequency=5): #call getBatches method passing any contextual stop words as an arg batches = self.getBatches(additional_stopwords) #call getEdgesNodes mnethod taking max frequency as an arg self.getEdgesNodes(batches,min_frequency) #call the getGraph method and build the graph self.G = self.getGraph() print('Graph successfully built.') print('Node and Edge dataframes created.') """ save a number of attributes to the instance of the class """ #retain graph object adjacencies self.adjacencies = dict(self.G.adjacency()) #retain graph object node betweeness centrality self.betweeness = nx.betweenness_centrality(self.G) #retain graph object clustering coefficients self.clustering_coeff = nx.clustering(self.G) """ add these attributes as columns on the node dataframe """ self.node_df['adjacency_frequency'] = self.node_df['id_code'].map(lambda x: len(self.adjacencies[x])) self.node_df['betweeness_centrality'] = self.node_df['id_code'].map(lambda x: self.betweeness[x]) self.node_df['clustering_coefficient'] = self.node_df['id_code'].map(lambda x: self.clustering_coeff[x]) #identify communities in instance of graph object and retain as attribute self.communities = community.greedy_modularity_communities(self.G) """ assign each node to its community and add as column to node dataframe """ self.communities_dict = {} nodes_in_community = [list(i) for i in self.communities] for i in nodes_in_community: self.communities_dict[nodes_in_community.index(i)] = i def community_allocation(source_val): for k,v in self.communities_dict.items(): if source_val in v: return k self.node_df['community'] = self.node_df['id_code'].map(lambda x: community_allocation(x)) print('Communities calculated.') return
def _check_communities(self, expected): communities = set(greedy_modularity_communities(self.G)) assert_equal(communities, expected)