def clustering_LDA_type_new(mygraph, mygroups, algoname, corecalculation): # algoname is to specify which algorithm the function should use, corecalculation is wether to calcualte core documents or not counter_matrices = [] for k in range(1): # clustering is a greedy algorithm, run it 100 times and find the most frequent answer among all if algoname==0: myclusters = mygraph.community_fastgreedy(weights="weight").as_clustering(3) # cluster based on max weight elif algoname==1: myclusters = louvain.find_partition(mygraph, method='Modularity', weight='weight', resolution_parameter=1) # smaller resolution, smaller number of clusters #best elif algoname==2: myclusters= louvain.find_partition(mygraph, method='RBConfiguration', weight='weight', resolution_parameter=1) elif algoname ==3: myclusters=louvain.find_partition(mygraph, method='RBER', weight='weight', resolution_parameter=1) # not working very well elif algoname==4: myclusters=louvain.find_partition(mygraph, method='CPM', weight='weight', resolution_parameter=0.6) # not working very well cluster_list = list(myclusters) counter_matrix = np.zeros((len(cluster_list), len(mygroups))) cluster_subgraphs =[] for i in range(len(cluster_list)): cluster_subgraphs.insert(i, mygraph.subgraph(cluster_list[i])) # creating clusters subgraph for v in cluster_list[i]: # calculating matrices for j in range(len(mygroups)): if mygraph.vs[v]['name'] in mygroups[j]: counter_matrix[i][j] +=1 counter_matrices.insert(k, counter_matrix) counter_matrix = most_common(counter_matrices) # counter matrix (rows are clusters and columns are groups) cluster_cores =[] if corecalculation==1: for i in range(len(cluster_subgraphs)): # finding core of each cluster subgraph cluster_cores.insert(i,find_core(cluster_subgraphs[i], 3)) # 3 means return the three center documents, you can set it to whatever number you wish print cluster_cores[i].vs()['name'] group_matrix = counter_matrix/counter_matrix.sum(axis=0)[None,:] # probability of each cluster belonging to each group cluster_matrix = counter_matrix/counter_matrix.sum(axis=1)[:,None] # probability of each group belonging to each cluster return cluster_matrix, cluster_list, cluster_cores
def getGraphPartition(graph, resolution): """The returned partition is each county assigned to a community. While there are several methods available, RBConfiguration is tunable with a resolution parameter. Bigger resolutions mean smaller communities. Smaller resolutions mean fewer communities.""" return louvain.find_partition(G, method='RBConfiguration', weight='weight', resolution_parameter=resolution)
def cluster_adjmat(xmat, resolution=1, cutoff=0.1): """ Cluster the groups based on the adjacent matrix. Use the cutoff to discretize the matrix used to construct the adjacent graph. Then cluster the graph using the louvain clustering with a resolution value. As the adjacent matrix is binary, the default resolution value is set to 1. Input ----- xmat: `numpy.array` or sparse matrix the reference matrix/normalized confusion matrix cutoff: `float` optional (default: 0.1) threshold used to binarize the reference matrix resolution: `float` optional (default: 1.0) resolution parameter for louvain clustering return ----- new group names. """ g = sc._utils.get_igraph_from_adjacency((xmat > cutoff).astype(int), directed=False) print(g) part = louvain.find_partition(g, louvain.RBConfigurationVertexPartition, resolution_parameter=resolution) groups = np.array(part.membership) return groups
def cluster2dspectrumlouvain(cp, project): datapath = cp.get('datadir') realpeaks = Two_Column_List(project + os.sep + cp.get('spectruminput')) #print(realpeaks) g = Graph.Read_Edgelist(project + os.sep + 'result' + os.sep + cp.get('clusteringoutput'), directed=False) #print(g) louvainresult = louvain.find_partition(g, louvain.RBERVertexPartition, resolution_parameter=float( cp.get('rberresolution'))) #print(louvainresult) f = open(project + os.sep + 'result' + os.sep + cp.get('louvainoutput'), 'w') for cluster in louvainresult: if len(cluster) > 0: f.write('/\n') for peak in cluster: for realpeak in realpeaks: if realpeak[0] == peak: f.write( str(realpeak[0]) + ',' + str(realpeak[1]) + ',' + str(realpeak[2]) + '\n') f.close()
def partition_gievenNumPar(G, NumPar=None, edge_weight_factors=None): if NumPar is None: if 15 <= len(G.vs) / 10 <= 30: NumPar = len(G.vs) / 10 else: NumPar = 20 low = 0.001 high = 0.75 count = 0 thres = None w = G.es['weight'] if edge_weight_factors is not None: w = [a * b for a, b in zip(w, edge_weight_factors)] partitions = None if NumPar <= 0 or NumPar > len(G.vs): print("Numpar {} is wrong number".format(str(NumPar))) sys.exit() while True: thres = (low + high) / 2 partitions = louvain.find_partition( G, partition_type=louvain.CPMVertexPartition, weights=w, resolution_parameter=thres) count += 1 if np.abs(len(partitions) - NumPar) == 0 or count > 30: break elif len(partitions) > NumPar: high = thres thres = (low + high) / 2 else: low = thres thres = (low + high) / 2 return (partitions, thres)
def identify_clusters(vlm,conn,correct_tags = False, tag_correction_list = [], method_name='ModularityVertexPartition', seed=360): """ Cluster identification via the Louvain algorithm. Can be used for cluster discovery. If clusters are manually identified (e.g. by visualize_protein_markers()), clusters can be renumbered or combined using the tag correction list. Method names are any used in louvain.find_partition method. """ g=ig.Graph.Adjacency(conn.todense().tolist()) method=getattr(louvain,method_name) louvain.set_rng_seed(seed) partition=louvain.find_partition(g,method) tag_list = np.zeros(conn.shape[0]) for x in range(len(partition)): tag_list[partition[x]]=int(x) if correct_tags: cluster_ID = [tag_correction_list[int(X)] for X in tag_list] else: cluster_ID = [int(X) for X in tag_list] num_clusters = max(cluster_ID)+1 vlm.cluster_ID = cluster_ID vlm.num_clusters = int(num_clusters) return [cluster_ID, num_clusters]
def run_louvain(self, scalenumber): sources, targets = self.scales[scalenumber].tmatrix.nonzero() edgelist = list(zip(sources.tolist(), targets.tolist())) G = ig.Graph(edgelist) G.es['weight'] = self.scales[scalenumber].tmatrix.data return louvain.find_partition(G, louvain.ModularityVertexPartition, weights=G.es['weight']).membership
def main(argv): inputFile = '' outputFile = '' imax = 0 jmax = 0 theGraph = Graph() inputFile = sys.argv[1] outputFile = sys.argv[2] print 'argv[1] is:', sys.argv[1] print 'argv[2] is:', sys.argv[2] with open(inputFile, 'rb') as csvfile: csvReader = csv.reader(csvfile, delimiter=',',quotechar='|') # First line is the number of distinct nodes. headerRows = csvReader.next() nNodes = int(headerRows[0]) theGraph.add_vertices(nNodes) print 'nNodes: ', nNodes currentNodeIndex = 0 # We build a map between the matrix we want to build and the node identifiers # as we read in the rows. thisI = 0 thisJ = 0 thisEdge = 0 nodeMap = dict() # we also want a list that maps the indices to the node names indexList = list() # add this point, each row is an edge in the graph for row in csvReader: if (row[0] in nodeMap): thisI = nodeMap[row[0]] else: nodeMap[row[0]] = currentNodeIndex indexList.append(row[0]) currentNodeIndex += 1 if (row[1] in nodeMap): thisJ = nodeMap[row[1]] else: nodeMap[row[1]] = currentNodeIndex indexList.append(row[1]) currentNodeIndex += 1 # add this edge theGraph.add_edges([(thisI,thisJ)]) theGraph.es[thisEdge]["weight"] = float(row[2]) thisEdge += 1 part = louvain.find_partition(theGraph, method = 'Modularity', weight = 'weight') with open(outputFile, 'wb') as csvoutfile: csvWriter = csv.writer(csvoutfile, delimiter=',',quotechar='|') for i in range(0, nNodes): csvWriter.writerow([indexList[i], part.membership[i]])
def louvain_method(user_interaction_graph): ''' https://github.com/vtraag/louvain-igraph Fast unfolding of communities in large networks, Vincent D Blondel, Jean-Loup Guillaume, Renaud Lambiotte, Renaud Lefebvre, Journal of Statistical Mechanics: Theory and Experiment 2008(10), P10008 (12pp) :param user_interaction_graph: igraph Graph ''' louvain.set_rng_seed(43) node_names = user_interaction_graph.vs return[[node_names[node]['name'] for node in community] for community in louvain.find_partition(user_interaction_graph, louvain.ModularityVertexPartition)]
def create_partition( card_data_df, G, resolution_parameter=1, init=None ): """Take a card_data_df and the graph that represents it and create clusters based on lv.RBERVertexPartition. Parameters: ----------- card_data_df: pandas DataFrame containing as colu.mns card name and the decks that each card belongs to as a set. G: igraph Graph representation of card_data_df. resolution_parameter: float to pass to the RBERVertexPartition as represented by γ in the quality function Q=∑(ij) (A_ij−γp)δ(σi,σj). init: None or string or whether to specify an initial cluster membership for each card - if string, path Returns: -------- partition: the partition created by lv.find_partition. clusters: the number of clusters detected. See also: --------- create_card_df: function that creates card_data_df. create_graph: function that creates G. https://louvain-igraph.readthedocs.io/en/latest/reference.html#rbervertexpartition: information on the algorithm used. """ if init: initial_membership = card_data_df["init"].tolist() else: initial_membership = None partition = lv.find_partition( G, lv.RBERVertexPartition, weights="weight", resolution_parameter=resolution_parameter, node_sizes=card_data_df["Count"].tolist(), initial_membership=initial_membership, ) clusters = 0 card_data_df["Cluster"] = [set() for _ in card_data_df.index] card_data_df["Hub Score"] = G.hub_score("weight") card_data_df["Authority Score"] = G.authority_score("weight") for cluster in partition: for card in cluster: card_data_df.at[card, "Cluster"].add(clusters) clusters += 1 return partition, clusters
def parition_igraph(): gexf_path = os.path.join(VIS_DATA_DIR, 'song-signed.gexf') origin_gexf_g = nx.read_gexf(gexf_path) # pajek_path = os.path.join(VIS_DATA_DIR, 'song-signed.net') # nx.write_pajek(origin_gexf_g, pajek_path) graphml_path = os.path.join(VIS_DATA_DIR, 'song-signed.graphml') nx.write_graphml(origin_gexf_g, graphml_path) G = ig.Graph.Read_GraphML(graphml_path) partition = louvain.find_partition(G, louvain.ModularityVertexPartition) print(partition)
def get_louvain(mknn, min_cluster_size=10, resolution_parameter=1.0, seed=0): g = ig.Graph(n=mknn.shape[0], edges=list(zip(mknn.row, mknn.col)), directed=False) # Louvain clustering over the mKNN graph louvain.set_rng_seed(seed) part = louvain.find_partition(g, louvain.RBConfigurationVertexPartition, resolution_parameter=resolution_parameter) return CellLabels(clean_labels(part.membership, min_cluster_size=min_cluster_size))
def optimal_modularity_community_detection(self,visual=True,name='optimal_modularity'): """ Community detection Function using Louvain algorithm and maximization of modularity. Inputs: - visual: (Default = True) Visualize the communities computed - name: name of the .png exported file """ louvain.set_rng_seed(123456) partition = louvain.find_partition(self.G, louvain.ModularityVertexPartition,weights=self.G.es['weight']) self.G.vs['community_optimal_modularity'] = partition.membership print("The estimated number of communities is",len(set(partition.membership))) print('\n') print("Communities") for n in range(0,len(partition)): print('Community number', n, '- size:', len(partition[n])) #Create a dictionary whith keys as channels (names of our nodes) and values the community they belong comm_detect = dict(zip(self.G.vs['label'],self.G.vs['community_optimal_modularity'])) print() print('The communities are:') print() comms = {} for item in comm_detect.items(): if item[1] not in comms.keys(): comms[item[1]] = [] comms[item[1]].append(item[0]) comms = OrderedDict(sorted(comms.items(), key=lambda t:t[0])) print(comms.items()) if visual: visual_style = {} visual_style["vertex_size"] = 25 #visual_style["vertex_color"] = "white" visual_style["vertex_label"] = self.G.vs["label"] #visual_style["edge_width"] = [math.exp(weight)*0.5 for weight in self.G.es["weight"]] visual_style["edge_width"] = 0.2 visual_style["layout"] = self.G.vs["coords"] pal = igraph.drawing.colors.ClusterColoringPalette(len(set(self.G.vs['community_optimal_modularity']))) visual_style["vertex_color"] = pal.get_many(self.G.vs['community_optimal_modularity']) self.G.es['arrow_size'] = [0.1 for edge in self.G.es] graph = igraph.plot(self.G,bbox=(0, 0, 600, 600), **visual_style) graph.save(name + '.png') return(comms,graph) return(comms)
def find_partition(self, weight=True, mode='hypergeometry'): g = ig.Graph(list(self.graph.edges)) # use hyper geometry test as edge weights weights = [] for u, v in self.graph.edges: if (u, v) in self.matrix: weights.append(self.matrix[(u, v)]) elif (v, u) in self.matrix: weights.append(self.matrix[(v, u)]) else: weight = self.eu_test_single( u, v) if mode != 'hypergeometry' else self.co_test_single( u, v) weights.append(weight) if weight: g.es['weight'] = weights self.parts = louvain.find_partition(g, method='Modularity', weight='weight') else: self.parts = louvain.find_partition(g, method='Modularity')
def singlelayer_louvain(G, gamma, return_partition=False): if 'weight' not in G.es: G.es['weight'] = [1.0] * G.ecount() partition = louvain.find_partition(G, louvain.RBConfigurationVertexPartition, weights='weight', resolution_parameter=gamma) if return_partition: return partition else: return tuple(partition.membership)
def sample_partitions(): """Sample some partitions from the Karate club""" G = ig.Graph.Famous("Zachary") parts = [] for gamma in SAMPLE_GAMMAS: sampled_partitions = [louvain.find_partition(G, louvain.RBConfigurationVertexPartition, resolution_parameter=gamma) for _ in range(10 ** 4)] if gamma == 1.0: # artificially make this partition low-quality parts.append(min(sampled_partitions, key=lambda p: p.quality())) else: parts.append(max(sampled_partitions, key=lambda p: p.quality())) return parts
def modularity_analysis(feature_graph, opts): # feature_ungraph = pd.read_excel(opts['output']+'feature_graph_for_{}.xls'.format(opts['tradeday']), # sheet_name='sheet1') # 网络生成 length = len(feature_graph) edges = [] edge_weights = [] for i in range(length): # tuple(节点1, 节点2, 权值) edges.append( tuple([ '{:0>6}'.format(int(feature_graph.loc[i][0])), '{:0>6}'.format(int(feature_graph.loc[i][1])), feature_graph.loc[i][2] ])) edge_weights.append(feature_graph.loc[i][2]) graph = IGraph.TupleList(edges=edges, directed=False, weights=True) modularity_graph = louvain.find_partition( graph, louvain.ModularityVertexPartition, weights=edge_weights) # vertex_count = graph.vcount() graph.vs['label'] = graph.vs['name'] mode_num = len(modularity_graph) modularity_index = np.zeros(len(graph.vs['name']), ) for i in range(mode_num): modularity_index[modularity_graph[i]] = i modularity_index = list(map(int, modularity_index)) graph.vs['modularity'] = modularity_index mode_list = [] for i in range(mode_num): vertexes = graph.vs.select(modularity=i) one_mode_list = [vertexes[j]['name'] for j in range(len(vertexes))] mode_list.append(one_mode_list) with open( opts['output'] + 'modularity_for_{}.txt'.format(opts['tradeday']), 'w') as f: for i in range(mode_num): f.write(','.join(mode_list[i]) + '\n') # 作图 color_dict = {0: 'red', 1: 'green', 2: 'blue'} graph.vs['color'] = [color_dict[index] for index in graph.vs['modularity']] ig.plot(graph) return graph, modularity_graph
def get_communities(G, mode=1): if mode == 2: print('Infomap') vc = G.community_infomap(edge_weights='weight') elif mode == 3: print('Louvain Surprise') vc = louvain.find_partition(G, louvain.ModularityVertexPartition) elif mode == 4: print('Multilevel') vc = G.community_multilevel(weights='weight') else: print('Newman leading eigenvector') vc = G.community_leading_eigenvector(weights='weight') return vc
def get_school_communities(): multi_school_community_graph = louvain.find_partition(d_social_network_graph, louvain.CPMVertexPartition, resolution_parameter=0.0005) for idx, community in enumerate(multi_school_community_graph): for node in community: v = d_social_network_graph.vs[node] v["groupId"] = idx response_builder = ResponseBuilder() nodes = response_builder.return_node_list(d_social_network_graph) edges = response_builder.return_edge_list(d_social_network_graph) response = dict() response["nodes"] = nodes response['edges'] = edges return jsonify(response)
def louvain_clus(graph): partition = louvain.find_partition(graph, louvain.ModularityVertexPartition) print(partition.summary()) subgraphs = partition.subgraphs() subgraph_labels_df = pd.DataFrame(columns=['label','cluster']) index = 0 for i in range(len(subgraphs)): subgraph_labels = subgraphs[i].vs['label'] for label in subgraph_labels: subgraph_labels_df.loc[index] = [label,i] index = index + 1 print('Done') return partition,subgraph_labels_df
def run_alg(G, alg, gamma=1.0): ''' run community detection algorithm with resolution parameter. Right now only use RB in Louvain :param G: an igraph graph :param gamma: resolution parameter :return: ''' if alg == 'louvain': partition_type = louvain.RBConfigurationVertexPartition partition = louvain.find_partition(G, partition_type, resolution_parameter=gamma) elif alg == 'leiden': partition_type = leidenalg.RBConfigurationVertexPartition partition = leidenalg.RBConfigurationVertexPartition( G, partition_type, resolution_parameter=gamma) # partition = sorted(partition, key=len, reverse=True) return partition
def cluster_knn_louvain(data, neighbors=10): A = kneighbors_graph(data, 10, mode='connectivity', include_self=True) sources, targets = A.nonzero() weights = A[sources, targets] if isinstance(weights, np.matrix): weights = weights.A1 g = ig.Graph(directed=False) g.add_vertices(A.shape[0]) # this adds adjacency.shap[0] vertices g.add_edges(list(zip(sources, targets))) g.es['weight'] = weights weights = np.array(g.es["weight"]).astype(np.float64) partition_type = louvain.RBConfigurationVertexPartition partition_kwargs = {} partition_kwargs["weights"] = weights part = louvain.find_partition(g, partition_type, **partition_kwargs) groups = np.array(part.membership) return groups
def cluster(D, metric='euclidean', n_neighbors=20, method='louvain', resolution=1): import igraph as ig if method == 'louvain': try: import louvain as partition_alg except ImportError: print('package "louvain" is missing') else: try: import leidenalg as partition_alg except ImportError: print('package "leidenalg" is missing') adj = dist_to_nn(compute_distances(D, metric), K=n_neighbors) g = ig.Graph.Adjacency(adj.tolist()) partition = partition_alg.find_partition(g, partition_type=partition_alg.CPMVertexPartition, resolution_parameter=resolution) return np.array(partition.membership)
def community_detection(G, methods=['louvain', 'infomap'], infomap_trials=100): """Compute communities of an igraph network and generate cluster graphs. Parameters: G (igraph graph): retweet network or hashtag network methods (list of str): preferred method of community detection infomap_trials (int, default=100): amount of trials for infomap method Returns: G (igraph graph) with node attribute '{method}_com' C (igraph graph): one cluster graph per method """ G.vs['weight'] = 1 #print("Computing communities...") if 'louvain' in methods: #print("Louvain...") Louvain = louvain.find_partition(G, louvain.ModularityVertexPartition) cg_louv = Louvain.cluster_graph(combine_vertices=dict(weight="sum", followers="sum", friends="sum"), combine_edges=dict(weight=sum)) if 'infomap' in methods: #print("Infomap...") Infomap = G.community_infomap(trials=infomap_trials) cg_info = Infomap.cluster_graph(combine_vertices=dict(weight="sum", followers="sum", friends="sum"), combine_edges=dict(weight=sum)) del G.vs['weight'] del G.es['weight'] if 'louvain' and 'infomap' in methods: for v in G.vs: v["louvain_com"] = Louvain.membership[v.index] v["infomap_com"] = Infomap.membership[v.index] return G, cg_louv, cg_info if 'louvain' in methods and 'infomap' not in methods: for v in G.vs: v["louvain_com"] = Louvain.membership[v.index] return G, cg_louv if 'infomap' in methods and 'louvain' not in methods: for v in G.vs: v["infomap_com"] = Infomap.membership[v.index] return G, cg_info
def louvain_clusters(latent, k=10, rands=0, mutual=False): nn_matrix = kneighbors_graph(latent, k) rows, cols = nn_matrix.nonzero() if mutual == True: edges = [[row, col] if row < col else (col, row) for row, col in zip(rows, cols)] edges = np.asarray(edges) unique_edges, edges_count = np.unique(edges, return_counts=True, axis=0) edges = unique_edges[edges_count == 2] else: edges = [(row, col) for row, col in zip(rows, cols)] g = ig.Graph() g.add_vertices(latent.shape[0]) g.add_edges(edges) louvain.set_rng_seed(rands) res = louvain.find_partition(g, louvain.ModularityVertexPartition) clusters = np.asarray(res.membership) return clusters
def run_louvain(fileName): #Construct igraph g = ig.Graph.Read_Ncol(fileName, names=True, weights=True, directed=False) #Find clusters, using louvain. Pass in weights that's same order as edges. partition = louvain.find_partition(g, louvain.ModularityVertexPartition, weights=g.es["weight"]) #print(g.vs.indices) #print(vars(partition)) #Store vertices info vertices = g.vs #Get clusters membershipList = partition.membership #Get Names for vertices that matches order of membership verticeNames = [] for i in range(0, len(vertices)): verticeNames.append(vertices[i]["name"]) #print(membershipList) #print(verticeNames) return membershipList, verticeNames
def run_louvain(graphnum): G = Gs[graphnum] parts = [] start = time() for gamma_louvain in np.linspace(0, 10, 1000): part = louvain.find_partition( G, louvain.RBConfigurationVertexPartition, resolution_parameter=gamma_louvain).membership if num_communities(part) > 100: break else: parts.append(part) print( f"Running on Graph {graphnum}, n={G.vcount()}, m={G.ecount()}: " f"In {time() - start:.2f} s, found {len(parts)} partitions at {(time() - start) / len(parts):.2f} " "seconds per partition") return graphnum, {sorted_tuple(tuple(p)) for p in parts}
def buildGraph(self): print("Dataset" + str(self.dataset) + " ====================================") client = pymongo.MongoClient(host='localhost', port=27017) db = client[self.dbName] documents = db[self.dataset] cursor = documents.find({}, {'authors': 1, 'title': 1}) existingTitles = [] vertices = [] edges = [] weights = [] for c in cursor: if c['title'] in existingTitles: continue else: existingTitles.append(c['title']) for author in c["authors"]: if (author not in vertices): vertices.append(author) for pair in combinations(c["authors"], 2): ind = edges.index(pair) if pair in edges else -1 if ind == -1: edges.append(pair) weights.append(1) else: weights[ind] += 1 self.g.add_vertices(vertices) self.g.add_edges(edges) self.g.es['weight'] = weights self.partition = louvain.find_partition( self.g, louvain.ModularityVertexPartition)
def compute_louvain(G): """Compute Louvain communities of an igraph network and generate cluster graph. Parameters: G (igraph graph): retweet network or hashtag network Returns: G (igraph graph) with node attribute 'louvain_com' clustergraph (igraph graph): graph where every node is a community """ import louvain G.vs['weight'] = 1 partition = louvain.find_partition(G, louvain.ModularityVertexPartition) clustergraph = partition.cluster_graph(combine_vertices=dict( weight="sum", followers="sum", friends="sum"), combine_edges=dict(weight=sum)) del G.vs['weight'] del G.es['weight'] for v in G.vs: v["louvain_com"] = partition.membership[v.index] return G, clustergraph
def singlelayer_louvain(G, gamma, return_partition=False): r"""Run the Louvain modularity maximization algorithm at a single :math:`\gamma` value. :param G: graph of interest :type G: igraph.Graph :param gamma: gamma (resolution parameter) to run Louvain at :type gamma: float :param return_partition: if True, return a louvain partition. Otherwise, return a community membership tuple :type return_partition: bool :return: partition from louvain :rtype: tuple[int] or louvain.RBConfigurationVertexPartition """ if 'weight' not in G.es: G.es['weight'] = [1.0] * G.ecount() partition = louvain.find_partition(G, louvain.RBConfigurationVertexPartition, weights='weight', resolution_parameter=gamma) if return_partition: return partition else: return tuple(partition.membership)
dict_reverse[id] = [row[0]] users.append(id) G.add_vertex(id) id = id + 1 print len(dict_users) for index, row in df.iterrows(): print index rowList = str(row['friends']).split(' ') if rowList: for v in rowList: if v != 'nan' and int(v) in dict_users: G.add_edge(dict_users[row[0]], dict_users[int(v)]) # compute the best partition partition = louvain.find_partition(G, method='Modularity') p_dict = {} index = 0 for i in partition.membership: p_dict[index] = i index =index + 1 forced_partitions = limit_communities(p_dict, 50) user_partitions = pd.DataFrame({'user': users, 'user_community': map(lambda u: forced_partitions[u], users)}) user_partitions['user'] = user_partitions['user'].replace(dict_reverse)
def main(): args = get_args() logger.info('Start') graphs = dict() dates = list() for network in args.networks: logger.debug('Loading file {}...'.format(network)) with open(network, 'r') as infile: basefilename = os.path.basename(network) graph_date = basefilename.split('.')[-2] dates.append(graph_date) reader = csv.reader(infile, delimiter='\t') # skip header next(reader) edgelist = [edge for edge in reader] # collect the set of vertex names and then sort them into a list vertices = set() for edge in edgelist: # iterates on the list and add each element vertices.update(edge) vertices = sorted(vertices) # new graph G = ig.Graph() # add vertices to the graph G.add_vertices(vertices) # add edges to the graph G.add_edges(edgelist) graphs[graph_date] = G logger.debug('done!') logger.info('Loaded all graphs') logger.info('Preparing to drop empty graphs') graphs_copy = copy.deepcopy(graphs) for graph_date, G in graphs_copy.items(): if G.vcount() == 0: logger.debug('Dropping empty graph {}'.format(graph_date)) del graphs[graph_date] del graphs_copy logger.info('Dropped empty graphs') global_vset = set() for graph_date, G in graphs.items(): vertices = [v.attributes()['name'] for v in G.vs] global_vset.update(vertices) logger.info('Building global index of vertices') global_vlist = sorted(global_vset) del global_vset global_vtoid = dict((vname, vid) for vid, vname in enumerate(global_vlist)) global_idtov = dict((vid, vname) for vid, vname in enumerate(global_vlist)) with open(os.path.join('data', 'vertex.json'), 'w+') as vertexfile: json.dump(global_idtov, vertexfile) logger.info('Global index of vertices built') logger.info('Calculating partitions for all snapshots') partitions = dict() for graph_date, G in graphs.items(): logger.debug( 'Calculating partitions for graph {}...'.format(graph_date)) part = louvain.find_partition(G, louvain.ModularityVertexPartition) partitions[graph_date] = part logger.info('Calculated partitions for all snapshots') all_clusters = list() csv_header = ('date', 'n_partitions') with open(os.path.join('data', 'partitions.csv'), 'w+') as outfile: writer = csv.writer(outfile, delimiter='\t') writer.writerow(csv_header) for graph_date in dates: logger.debug('Writing clusters for snapshot {}'.format(graph_date)) parts = partitions.get(graph_date, None) if parts is not None: writer.writerow((graph_date, len(parts))) en_clusters = [cl for cl in enumerate(parts.subgraphs())] all_clusters.append(Cluster(arrow.get(graph_date), en_clusters)) clevoname = 'graph.{0}.clusters.csv'.format(graph_date) clevoutfile_path = os.path.join('data', 'partitions-evolution', clevoname) with open(clevoutfile_path, 'w+') as clevoutfile: for idx, cluster in en_clusters: nodes = set( [v.attributes()['name'] for v in cluster.vs]) nodes_ids = sorted([global_vtoid[n] for n in nodes]) clevoutfile.write('{}\n'.format(' '.join( str(nid) for nid in nodes_ids))) clname = ('graph.{0}.cluster.{1:02}.csv'.format( graph_date, idx)) cloutfile_path = os.path.join('data', 'partitions', clname) with open(cloutfile_path, 'w+') as cloutfile: for node in nodes: cloutfile.write('{}\n'.format(node)) else: writer.writerow((graph_date, 0)) logger.info('Written all clusters') # Iterate over all pairs of consecutive items from a given # list # https://stackoverflow.com/q/21303224/2377454 cluster_pairs = [pair for pair in zip(all_clusters, all_clusters[1:])] logger.info('Compared clusters at t and t+1') compare_clusters = dict() similarity_clusters = dict() for snap_t1, snap_t2 in cluster_pairs: t1 = str(snap_t1.date.format('YYYY-MM-DD')) t2 = str(snap_t2.date.format('YYYY-MM-DD')) assert snap_t1.date.replace(months=+1) == snap_t2.date # snap_t1.clusters and snap_t2.clusters are the clusters at # time t and t+1 snap_t1_clusters_nodes = list() for idx1, cl1 in snap_t1.clusters: snap_t1_clusters_nodes.append( (idx1, [v.attributes()['name'] for v in cl1.vs])) del idx1, cl1 snap_t2_clusters_nodes = list() for idx2, cl2 in snap_t2.clusters: snap_t2_clusters_nodes.append( (idx2, [v.attributes()['name'] for v in cl2.vs])) del idx2, cl2 cluster_product = itertools.product(snap_t1_clusters_nodes, snap_t2_clusters_nodes) # numpy.zeros(shape, dtype=float, order='C') n = len(snap_t1_clusters_nodes) m = len(snap_t2_clusters_nodes) clmatrix = np.zeros((n, m), dtype=float) for cl1, cl2 in cluster_product: ridx = cl1[0] cidx = cl2[0] logger.debug('Comparing clusters at {} and {}: ({},{})'.format( t1, t2, ridx, cidx)) nodes_cl1 = set(cl1[1]) nodes_cl2 = set(cl2[1]) sim = jaccard_distance(nodes_cl1, nodes_cl2) clmatrix[ridx][cidx] = sim logger.debug('Compared clusters at {} and {}'.format(t1, t2)) res = scipy.optimize.linear_sum_assignment(clmatrix) cluster_t1_indices = res[0].tolist() cluster_t2_indices = res[1].tolist() c1_to_c2 = dict(zip(cluster_t1_indices, cluster_t2_indices)) compare_clusters['{}_{}'.format(t1, t2)] = c1_to_c2 sim_c1c2 = dict() for c1, c2 in c1_to_c2.items(): sim_c1c2[c1] = clmatrix[c1][c2] similarity_clusters['{}_{}'.format(t1, t2)] = sim_c1c2 logger.info('Compared all clusters') clevo_filename = 'clusters_evolution.json' clevo_path = os.path.join('data', clevo_filename) with open(clevo_path, 'w') as clevo_out: json.dump(compare_clusters, clevo_out) evolved_clusters = defaultdict(dict) evolved_clusters_stable = defaultdict(dict) cl_date_prev = None cluster_no = 0 cluster_no_stable = 0 cluster_sizes = dict() for date, clusters in all_clusters: cl_date = date.format('YYYY-MM-DD') cluster_sizes[cl_date] = defaultdict(int) logger.info('Processing clusters for {}...'.format(cl_date)) cl_dict = None if cl_date_prev is not None: key = '{}_{}'.format(cl_date_prev, cl_date) cl_dict = compare_clusters[key] inv_cl_dict = {v: k for k, v in cl_dict.items()} for cl in clusters: clid = cl[0] if clid in cl_dict.values(): evolved_clusters[cl_date][clid] = \ evolved_clusters[cl_date_prev][inv_cl_dict[clid]] if similarity_clusters[key][inv_cl_dict[clid]] < 0.34: evolved_clusters_stable[cl_date][clid] = \ evolved_clusters_stable[cl_date_prev][inv_cl_dict[clid]] else: evolved_clusters_stable[cl_date][ clid] = cluster_no_stable cluster_no_stable += 1 else: evolved_clusters[cl_date][clid] = cluster_no evolved_clusters_stable[cl_date][clid] = cluster_no_stable cluster_no += 1 cluster_no_stable += 1 cluster_sizes[cl_date][evolved_clusters[cl_date][clid]] = \ cl[1].vcount() else: for cl in clusters: clid = cl[0] evolved_clusters[cl_date][clid] = cluster_no evolved_clusters_stable[cl_date][clid] = cluster_no_stable cluster_no += 1 cluster_no_stable += 1 cluster_sizes[cl_date][evolved_clusters[cl_date][clid]] = \ cl[1].vcount() cl_date_prev = cl_date for i in range(cluster_no): clsize_path = os.path.join('data', 'cluster-sizes', 'cluster_sizes.{:03}.csv'.format(i)) with open(clsize_path, 'w+') as clsizefile: clsizewriter = csv.writer(clsizefile, delimiter='\t') for graph_date in dates: if graph_date in cluster_sizes: cl_size = cluster_sizes[graph_date][i] else: cl_size = 0 clsizewriter.writerow((graph_date, cl_size)) evcl_path = os.path.join('data', 'evolved_clusters.json') with open(evcl_path, 'w+') as evcl_file: json.dump(evolved_clusters, evcl_file) evclstable_path = os.path.join('data', 'evolved_clusters_stable.json') with open(evclstable_path, 'w+') as evclstable_file: json.dump(evolved_clusters_stable, evclstable_file) cl_date_prev = None logger.info('Processing vertexes in clusters') vertex_clusters = defaultdict(dict) for date, clusters in all_clusters: cl_date = date.format('YYYY-MM-DD') logger.info('Processing clusters for {}...'.format(cl_date)) for clid, cl in clusters: logger.debug('Processing cluster id {} for {}...'.format( clid, cl_date)) nodes = [v.attributes()['name'] for v in cl.vs] for node in nodes: vertex_clusters[node][cl_date] = evolved_clusters[cl_date][ clid] for node in global_vlist: node_outfilename = get_valid_filename( 'node_evolution_{}.csv'.format(node)) node_outfilepath = os.path.join('data', 'nodes-evolution', node_outfilename) with open(node_outfilepath, 'w+') as node_outfile: writer = csv.writer(node_outfile, delimiter='\t') writer.writerow(('date', 'cluster_id')) for graph_date in dates: clid = vertex_clusters[node].get(graph_date, -1) with open(node_outfilepath, 'a+') as node_outfile: writer = csv.writer(node_outfile, delimiter='\t') writer.writerow((graph_date, clid)) logger.info('All done!')
def louvain(graph): #lv.set_rng_seed(0) lv.set_rng_seed(random.randint(1, 100000)) raw_partitions = lv.find_partition(graph, lv.ModularityVertexPartition) return raw_partitions
def louvain(self, load=True, save=False): """Computes cluster memberships returned by the Louvain method (implemented in C++ via louvain-igraph package).""" self._louvain_memberships = pd.DataFrame( louvain.find_partition(self, method="Modularity").membership, columns=["louvainMembership"] )
estimate_group3 = nx.karate_club_graph() estimate_group3.remove_nodes_from(partitions[partitions.estimate != 3].node-1) # estimate partition 4 estimate_group4 = nx.karate_club_graph() estimate_group4.remove_nodes_from(partitions[partitions.estimate != 4].node-1) # calculate densities g1_dens = nx.density(ground_first_group) g2_dens = nx.density(ground_second_group) e1_dens = nx.density(estimate_group1) e2_dens = nx.density(estimate_group2) e3_dens = nx.density(estimate_group3) e4_dens = nx.density(estimate_group4) # igraph approach ------------------------------------------------------------- # read and format the karate data karate = ig.Graph.Read_GraphML("../data/karate.GraphML") # ## find some partitions with different methods partM = louvain.find_partition(karate, method = "Modularity") partRBConfig = louvain.find_partition(karate, method = "RBConfiguration", resolution_parameter = 0.25) partRBER = louvain.find_partition(karate, method = "RBER") partDens = louvain.find_partition(karate, method = "CPM", resolution_parameter = 0.25) partSignif = louvain.find_partition(karate, method = "Significance") partSurp = louvain.find_partition(karate, method = "Surprise") # view paritions by printing # print partM
import igraph as ig import louvain G = ig.Graph.Erdos_Renyi(100, 0.1); louvain.find_partition(G, "Modularity"); louvain.find_partition(G, "RBConfiguration"); louvain.find_partition(G, "Surprise"); louvain.find_partition(G, "Significance"); G.es['weight'] = 1.0; louvain.find_partition(G, "Modularity", weight='weight'); louvain.find_partition(G, "RBConfiguration", weight='weight'); louvain.find_partition(G, "Surprise", weight='weight');
graph = pGraph("http://localhost:7474/db/data/") print graph query = """ MATCH (n)-[r]->(m) RETURN id(n)as from ,id(m) as to ,r.prob as prob """ data = graph.cypher.execute(query) print data ig = Graph.TupleList(data, weights=True) print ig.is_simple() part = louvain.find_partition(ig, method='Modularity', weight='weight') layout = ig.layout_fruchterman_reingold(weights='weight') visual_style = {} visual_style["layout"] = layout visual_style["vertex_label"] = ig.vs["name"] visual_style["bbox"] = (3200, 3200) visual_style["margin"] = 10 visual_style["vertex_size"] = 60 visual_style["edge_width"] = [2 + 2 * int(weight) for weight in ig.es["weight"]] plot(part, **visual_style)
def process_file(tm, year): ig_dpath = dpaths[tm, year, 'influenceGraph'] ig_prefix = prefixs[tm, year, 'influenceGraph'] gp_dpath = dpaths[tm, year, 'groupPartition'] gp_prefix = prefixs[tm, year, 'groupPartition'] # check_dir_create(gp_dpath) # gp_summary_fpath = '%s/%ssummary.csv' % (gp_dpath, gp_prefix) gp_original_fpath = '%s/%soriginal.pkl' % (gp_dpath, gp_prefix) gp_drivers_fpath = '%s/%sdrivers.pkl' % (gp_dpath, gp_prefix) # with open(gp_summary_fpath, 'wt') as w_csvfile: writer = csv.writer(w_csvfile, lineterminator='\n') writer.writerow(['groupName', 'numDrivers', 'numRelations', 'graphComplexity', 'tieStrength', 'contribution', 'benCon']) # logger.info('Start handling SP_group_dpath') orignal_graph = {} for fn in get_all_files(ig_dpath, '%s*' % ig_prefix): regression_graph = load_pickle_file('%s/%s' % (ig_dpath, fn)) for i, ((did0, did1), w) in enumerate(regression_graph.iteritems()): orignal_graph[did0, did1] = w save_pickle_file(gp_original_fpath, orignal_graph) # igid, did_igid = 0, {} igG = ig.Graph(directed=True) for i, ((did0, did1), w) in enumerate(orignal_graph.iteritems()): if not did_igid.has_key(did0): igG.add_vertex(did0) did_igid[did0] = igid igid += 1 if not did_igid.has_key(did1): igG.add_vertex(did1) did_igid[did1] = igid igid += 1 igG.add_edge(did_igid[did0], did_igid[did1], weight=abs(w)) # logger.info('Partitioning') part = louvain.find_partition(igG, method='Modularity', weight='weight') logger.info('Each group pickling and summary') gn_drivers = {} for i, sg in enumerate(part.subgraphs()): gn = 'G(%d)' % i group_fpath = '%s/%s%s.pkl' % (gp_dpath, gp_prefix, gn) sg.write_pickle(group_fpath) # drivers = [v['name'] for v in sg.vs] weights = [e['weight'] for e in sg.es] graphComplexity = len(weights) / float(len(drivers)) tie_strength = sum(weights) / float(len(drivers)) contribution = sum(weights) / float(len(weights)) benCon = tie_strength / float(len(drivers)) with open(gp_summary_fpath, 'a') as w_csvfile: writer = csv.writer(w_csvfile, lineterminator='\n') writer.writerow([gn, len(drivers), len(weights), graphComplexity, tie_strength, contribution, benCon]) gl_img_fpath = '%s/%simg-%s.pdf' % (gp_dpath, gp_prefix, gn) layout = sg.layout("kk") if len(drivers) < 100: ig.plot(sg, gl_img_fpath, layout=layout, vertex_label=drivers) else: ig.plot(sg, gl_img_fpath, layout=layout) gn_drivers[gn] = drivers gc_fpath = '%s/%scoef-%s.csv' % (gp_dpath, gp_prefix, gn) with open(gc_fpath, 'wt') as w_csvfile: writer = csv.writer(w_csvfile, lineterminator='\n') writer.writerow(['groupName', 'did0', 'did1', 'coef']) for e in sg.es: did0, did1 = [sg.vs[nIndex]['name'] for nIndex in e.tuple] coef = e['weight'] writer.writerow([gn, did0, did1, coef]) save_pickle_file(gp_drivers_fpath, gn_drivers)
def louvain(self): vertexCluster = louvain.find_partition(self.g, method='Modularity', weight='weight', initial_membership=range(self.g.vcount())); return self.igraphWrapper.getCommunities(vertexCluster)