def __set_necessary_info(self): v_degree = list() v_index = list() v_partation = list() memberships = self.__partitions._membership for index in range(len(memberships)): if memberships[index] == self.__community_index_0: v_index.append(index) v_degree.append(self.__graph.degree(index)) v_partation.append(0) if memberships[index] == self.__community_index_1: v_index.append(index) v_degree.append(self.__graph.degree(index)) v_partation.append(1) self.__degree_list = v_degree self.__vertex_list = v_index self.__vertex_part = v_partation # 最终合并的社区编号为self.__community_index_1 partation_expected = VertexClustering( graph=self.__partitions._graph, membership=list(self.__partitions._membership)) for i in range(len(partation_expected._membership)): if partation_expected._membership[i] == self.__community_index_0: partation_expected._membership[i] = self.__community_index_1 for i in range(len(partation_expected._membership)): if partation_expected._membership[ i] == partation_expected._len - 1: partation_expected._membership[i] = self.__community_index_0 partation_expected._len -= 1 # print(partation_expected._membership) self.__partitions_expected = partation_expected
def __init__(self, graph, value=None, cut=None, partition=None, partition2=None): """Initializes the cut. This should not be called directly, everything is taken care of by the functions that return cuts. """ # Input validation if partition is None or cut is None: raise ValueError("partition and cut must be given") # Set up a membership vector, initialize parent class membership = [1] * graph.vcount() for vid in partition: membership[vid] = 0 VertexClustering.__init__(self, graph, membership) if value is None: # Value of the cut not given, count the number of edges value = len(cut) self._value = float(value) self._partition = sorted(partition) self._cut = cut
def _desc(self, i): parts = VertexClustering(self.graph, membership=self.initial_membership) learners = self.learning.learners vcount = self.graph.vcount() actions = [learner.action for learner in learners] cls_actions = Counter(actions) dis_actions = [round(i / vcount * 100, 2) for i in cls_actions.values()] dis_actions.sort(reverse=True) clu_actions = VertexClustering(self.graph, membership=actions) l_conformity = count_conformity(parts, actions, self.available_action) g_conformity = count_security_index(self.graph, clu_actions) l_diversity = count_diversity(parts, actions, len(cls_actions)) g_diversity = count_diversity(parts, actions, self.available_action) avg_payoff = sum(self.learning.payoff) / len(self.learning.payoff) modularity = clu_actions.modularity result = dict() result['index'] = data_format(i, width=6) result['exist_action'] = data_format(len(cls_actions), width=4) result['ldiversity'] = data_format(l_diversity) result['gdiversity'] = data_format(g_diversity) result['lconformity'] = data_format(l_conformity) result['gconformity'] = data_format(g_conformity) result['avg_payoff'] = data_format(avg_payoff) result['modularity'] = data_format(modularity) result['action_dis'] = dis_actions logger.info(json.dumps(result))
def desc(graph: Graph): parts = VertexClustering(graph, [int(i) for i in graph.vs['part']]) print(graph.vcount(), graph.ecount()) print(f"Part Num: {len(parts)}") print(f"Part Size: {[len(part) for part in parts]}") print(f"Modularity: {parts.modularity}") in_edges = 0 for subgraph in parts.subgraphs(): in_edges += subgraph.ecount() print(f"fraction: {in_edges / graph.ecount()}") print("Degree Distribution: ") print(graph.degree_distribution())
def mergePartitions(clustering, g): modularity = clustering.modularity newMembership = clustering.membership clusterIndices = list(set(clustering.membership)) for i in range(0,len(clusterIndices) - 1): for j in range(i+1, len(clusterIndices)): #Check if the ith and jth clusters are neighbors. i.e. they have vertices that share an edge cxi = clusterIndices[i] cxj = clusterIndices[j] if isNeighboringCluster(clustering[cxi], clustering[cxj], g): #Merge clusters and see if the modularity value increases m = map(lambda x: cxi if x==cxj else x, newMembership) #Replaces cluster index j with i newClustering = igraph.clustering.VertexClustering(g, membership=m) if newClustering.modularity <= modularity: continue modularity = newClustering.modularity newMembership = m if id(clustering.membership) == id(newMembership): #The modularity didn't increase. So return the original clustering. return clustering # Renumber cluster ids so that cluster ids are continuous from 0. clusterIdMap = {} currentId = -1 renumberedMembership = [] for cid in newMembership: if cid not in clusterIdMap: currentId += 1 clusterIdMap[cid] = currentId renumberedMembership.append(clusterIdMap[cid]) return VertexClustering(g, membership=renumberedMembership)
def id_communities(g, layout_list, eps=0.42, min_samples=10): layout_distance = spatial.distance.squareform(spatial.distance.pdist(layout_list)) layout_similarity = 1 - (layout_distance / np.max(layout_distance)) community_idx_list = DBSCAN().fit(layout_similarity, eps=eps, min_samples=min_samples).labels_ if -1 in community_idx_list: community_idx_list = list(np.array(community_idx_list) + 1) community_idx_list = [int(x) for x in community_idx_list] vertex_clustering = VertexClustering(g, community_idx_list) return g, community_idx_list, vertex_clustering
def update_membership(self, membership): """ update membership manual :param membership: list[int] :return: """ self.membership = membership self.parts = VertexClustering(self.graph, self.membership) self._set_sorted_part_degree()
def fast_resistance(graph): pyr = PyResistance.from_igraph_Graph(graph) parts, _ = pyr.apply_method() membership = [0] * graph.vcount() for num, part in enumerate(parts): for node in part: membership[node] = num return VertexClustering(graph, membership=membership)
def __set_necessary_info(self): v_degree = list() v_index = list() v_partation = list() memberships = self.__partitions._membership for index in range(len(memberships)): if memberships[index] == self.__community_index_0: v_index.append(index) v_degree.append(self.__graph.degree(index)) v_partation.append(0) if memberships[index] == self.__community_index_1: v_index.append(index) v_degree.append(self.__graph.degree(index)) v_partation.append(1) self.__degree_list = v_degree self.__vertex_list = v_index self.__vertex_part = v_partation partation_expected = VertexClustering( graph=self.__partitions._graph, membership=list(self.__partitions._membership)) for i in range(len(partation_expected._membership)): if partation_expected._membership[i] == self.__community_index_0: partation_expected._membership[i] = self.__community_index_1 for i in range(len(partation_expected._membership)): if partation_expected._membership[ i] == partation_expected._len - 1: partation_expected._membership[i] = self.__community_index_0 partation_expected._len -= 1 self.__partitions_expected = partation_expected # for i in range(0, 11): # print(self.__partitions.subgraph(i).vcount()) # for i in range(0, 10): # print(self.__partitions_expected.subgraph(i).vcount()) for index, part in enumerate(self.__partitions_expected): subgraph: Graph = self.__partitions_expected.subgraph(index) self.__partitions_expected_degree.append(2 * subgraph.ecount()) self.__partitions_expected_volume.append( sum(self.__graph.degree(part)))
def __init__(self, graph, membership=None): self.graph = graph self.membership = membership if membership else [ int(i) for i in graph.vs['part'] ] self.parts = VertexClustering(self.graph, self.membership) self.neighbors = None self.sorted_parts_degree = None self._preprocess()
def id_communities(g, coordinates): layout_distance = spatial.distance.squareform( spatial.distance.pdist(coordinates)) max_distance = np.max(layout_distance) print 'max_distance: %s' % max_distance layout_distance = layout_distance / max_distance mean_distance = np.mean(layout_distance) print 'mean_distance: %s' % mean_distance eps = mean_distance / .85 min_samples = 4.5 / mean_distance print 'eps: %s' % eps print 'min_samples: %s' % min_samples layout_similarity = 1 - layout_distance community_idx_list = DBSCAN().fit(layout_similarity, eps=eps, min_samples=min_samples).labels_ if -1 in community_idx_list: print '-1 in community_idx_list' community_idx_list = list(np.array(community_idx_list) + 1) community_idx_list = [int(x) for x in community_idx_list] print set(community_idx_list) return VertexClustering(g, community_idx_list)
def subgraphMining(g, pattern, ig, clustering, frequentPatterns): clustering = mergePartitions(clustering, ig) gMeasure = len(clustering) if gMeasure < 2: #TODO: Remove hadcoded threshold return frequentPatterns.add(pattern) #Get child patterns for cp in getChildPatterns(g, pattern): if cp in PATTERN_INSTANCES: continue childInstanceParentMap = getPatternInstances(g, cp, pattern) allInstances = set(childInstanceParentMap.keys()) PATTERN_INSTANCES[cp] = allInstances #Create instance graph of child pattern childIg = createInstanceGraphForPattern(cp) #Assign child instances to clusters based on parent clusters clusterId = 0 membership = [] parentClusterIds = {} for cluster in clustering: #This is the clustering of the parent instance graph. for vertexIndex in cluster: parentInstance = ig.vs[vertexIndex]["instance"] parentClusterIds[parentInstance] = clusterId; clusterId += 1 for childVertex in childIg.vs: childInstance = childVertex["instance"] membership.append(parentClusterIds[childInstanceParentMap[childInstance]]) childIgClustering = VertexClustering(childIg, membership=membership) subgraphMining(g, cp, childIg, childIgClustering, frequentPatterns) return
def copy_partition(partition) -> VertexClustering: return VertexClustering(graph=partition._graph, membership=list(partition._membership))
def run_clustering_on_graph(topic_id='',method='fast_greedy',experiment=''): if not topic_id:topic_required=no_globals g,query_sentence,sims,query_node,query_index=load_topic_matrix(topic_id) #method='betweenness' #talking to much time, we will skip it #method='walktrap' #method='leading_eigenvector' #g=filter_graph(g) print ("EXPERIMENT: "+str(experiment)) if 'do7_sum_nodes' in experiment: #after doing the graph by the cos sim matrix, rank the sentences according to total score. clusters=[] cluster_weights=[] return g,clusters,cluster_weights,query_sentence,query_index elif 'do6' in experiment: print ("Calculate random walk scores...") g_random_walk_scores=calc_random_walk_with_restart(g, query_index) print ("Get edge distributions") query1_cosim_values=[] query2_cosim_values=[] cosim_values=[] ws_values=[] for i,e in enumerate(g.es): #FOR EACH EDGE #edge weight# weight=e['weight'] # Get nodes of edge vertex1=g.vs[e.tuple[0]] #node1 idx vertex2=g.vs[e.tuple[1]] #node2 idx # Get node walk scores walk_score1,vertex1,rank1=g_random_walk_scores[vertex1['s_idx']] walk_score2,vertex2,rank2=g_random_walk_scores[vertex2['s_idx']] #Get random walk score average rws_avg=(walk_score1+walk_score2)/2 #Get edge cos sim score (current weight) edge_cosim=sims[vertex1['s_idx']][vertex2['s_idx']] query_cosim1=sims[query_index][vertex1['s_idx']] #For do6_2 query_cosim2=sims[query_index][vertex2['s_idx']] #For do6_2 #Store values for normalization query1_cosim_values+=[query_cosim1] #For do6_2 query2_cosim_values+=[query_cosim2] #For do6_2 cosim_values+=[edge_cosim] ws_values+=[rws_avg] #Calculate percent distributions if True: #Do rank based query1_cosim=calc_rank_percent_distribution(query1_cosim_values) #For do6_2 query2_cosim=calc_rank_percent_distribution(query2_cosim_values) #For do6_2 cosim_dist=calc_rank_percent_distribution(cosim_values) ws_dist=calc_rank_percent_distribution(ws_values) else: query1_cosim=normalize_max_min(query1_cosim_values) #For do6_2 query2_cosim=normalize_max_min(query2_cosim_values) #For do6_2 cosim_dist=normalize_max_min(cosim_values) ws_dist=normalize_max_min(ws_values) #Use percent distributions to calculate new weight for i,e in enumerate(g.es): #FOR EACH EDGE if experiment=='do6_two_scores_1': weight=cosim_dist[i] #max of cosim OR ws_dist (do6_1): edge_weight= ( Max[(cos sim) , [(node1_rws)+(node2_rws)]/2)] ] elif experiment=='do6_two_scores_2': weight=(cosim_dist[i]+(query1_cosim[i]+query2_cosim[i])/2)/2 # (do6_2): edge_weight=((cos sim) + [(node1_qcs+node2_qcs)/2])/2 else: #Standard do6_two_scores weight=(cosim_dist[i]+ws_dist[i])/2 # print ("WEIGHT from: "+str(g.es[i]['weight'])+" to: "+str(weight)+" via: "+str(cosim_dist[i])+" and "+str(ws_dist[i])) g.es[i]['weight']=weight else: pass #Standard running no experiments communities=[] clusters=[] print ("Running clustering ["+method+"] on graph...") Perf.start() markov_subgraphs=[] uG='' #Undirected version of graph that corresponds to true cluster if 'markov' in method: print ("---> doing markov clustering") matrix=to_sparse(g,weight_attr='weight') result = mc.run_mcl(matrix) # run MCL with default parameters clusters = mc.get_clusters(result) #D# print ("GOT CLUSTERS: "+str(clusters)) #Initialize subgrpah for idx,v in enumerate(g.vs): g.vs[idx]['subgraph']='' for cluster in clusters: cluster_id=cluster[0] indexes=cluster[1:] if not indexes:continue print ("markov cluster "+str(cluster_id)+" has: "+str(indexes)) #Store subgraph index into back into igraph for idx in indexes: g.vs[idx]['subgraph']=cluster_id #Add subgraph cluster indexes back into graph print ("AT 0: "+str(g.vs[0])) #cl = VertexClustering(g, attribute='subgroup') #'g.vs["subgroup"]) VC = VertexClustering(g) clusters = VC.FromAttribute(g, 'subgraph') #print ("GOT CLUSTER: "+str(cl)) if 'betweenness' in method: #cluster_count=15 print ("**betweenness requires edge trimming. Doing that now...") g=filter_graph(g,the_type='remove_low_weights') print ("Calculating edge betweenness...") communities=g.community_edge_betweenness(clusters=cluster_count,weights='weight') #directed= print ("Fixing/checking dendogram -- must be fully connected.") communities=fix_dendrogram(g, communities) ######################################################### if 'fast_greedy' in method: #** only works with undirected graphs uG = g.as_undirected(combine_edges = 'mean') #Retain edge attributes: max, first. communities = uG.community_fastgreedy(weights = 'weight') #When an algorithm in igraph produces a VertexDendrogram, it may optionally produce a "hint" as well that tells us where to cut the dendrogram (i.e. after how many merges)to obtain a VertexClustering that is in some sense optimal. #For instance, the VertexDendrogram produced by community_fastgreedy() proposes that the dendrogram should be cut at the point where the modularity is maximized. #Running as_clustering() on a VertexDendrogram simply uses the hint produced by the clustering algorithm to flatten the dendrogram into a clustering, #but you may override this by specifying the desired number of clusters as an argument to as_clustering(). #As for the "distance" between two communities: it's a complicated thing because most community detection methods don't give you that information. #They simply produce a sequence of merges from individual vertices up to a mega-community encapsulating everyone, and there is no "distance" information encoded in the dendrogram; #in other words, the branches of the dendrogram have no "length". The best you can do is probably to go back to your graph and check the edge density between the communities; this could be a good indication of closeness. For example: #https://stackoverflow.com/questions/17413836/igraph-in-python-relation-between-a-vertexdendrogram-object-and-vertexclusterin ######################################################### if 'walktrap' in method: #** only works with undirected graphs uG = g.as_undirected(combine_edges = 'mean') #Retain edge attributes: max, first. communities = uG.community_walktrap(weights = 'weight') ######################################################### if 'leading_eigenvector' in method: #http://igraph.org/python/doc/igraph.Graph-class.html#community_leading_eigenvector uG = g.as_undirected(combine_edges = 'mean') #Retain edge attributes: max, first. clusters= uG.community_leading_eigenvector(clusters=None,weights = 'weight') #if clusters=None then tries as many as possible time_clustering=Perf.end() #Choose optimum number of communities if not clusters: num_communities = communities.optimal_count clusters = communities.as_clustering(n= num_communities) #Cut dendogram at level n. Returns VertexClustering object #"When an algorithm in igraph produces a `VertexDendrogram`, it may optionally produce a "hint" as well that tells us where to cut the dendrogram # Calc weight of each cluster ######################################################### #> weight = average cosine similarity between query and each node in cluster #> reuse sim calcs where possible #> note: subgraph index not same as g cluster_weights=[] if experiment=='do3_avg_cosims': #The weight of the cluster will be the average of two values: the average cosine similarity (the # existing one in the code now) AND the average value of cosine similarity between all pairs (without #the query). for i,subgraph in enumerate(clusters.subgraphs()): edge_sums=0 #FOR EACH cluster #[1] QUERY TO EACH SENTENCE for idx, v in enumerate(subgraph.vs): #Sim between [QUERY_SENTENCE] and [Sentence at s_idx] edge_sim=sims[query_index][v['s_idx']] #Use stored sentence index to look up old cosine sim value edge_sums+=edge_sim #[2] SENTENCE to SENTENCE all_sum=0 sum_count=0 for idx1, v1 in enumerate(subgraph.vs): if idx1==query_index:continue #skip query indexes for idx2, v2 in enumerate(subgraph.vs): if idx2==query_index:continue #skip query indexes if idx1==idx2: continue #skip same sentences all_sum+=sims[v1['s_idx']][v2['s_idx']] sum_count+=1 ## Calc Cluster weights avg_weight=edge_sums/subgraph.vcount() if sum_count: avg_inter_weights=((all_sum)/2)/sum_count #2 cause double count else: avg_inter_weights=0 print ("[experiment info]: Cluster #"+str(i)+" inter weights average: "+str(avg_inter_weights)) cluster_weights+=[(avg_weight+avg_inter_weights)/2] elif experiment=='do4_median_weight': #The weight of a cluster will be as: # Compute the average vector for all sentences in the cluster (each sentence is a vector, # compute the average vector to get the median vector) # Then compute the cos sim between the query vector and the median vector. # This score is the weight of the cluster. pass else: for i,subgraph in enumerate(clusters.subgraphs()): edge_sums=0 for idx, v in enumerate(subgraph.vs): if False: print ("GOT: "+str(v.attribute_names())) print ("Node: "+str(v['label'])) edge_sim=sims[query_index][v['s_idx']] #Use stored sentence index to look up old cosine sim value edge_sums+=edge_sim avg_weight=edge_sums/subgraph.vcount() cluster_weights+=[avg_weight] #D print ("Cluster #"+str(i)+" has node count: "+str(subgraph.vcount())+" avg weight: "+str(avg_weight)) print ("For topic: "+str(topic_id)+" Done clustering took: "+str(time_clustering)+" seconds") if False: #Dview output_clusters(g,communities,clusters,cluster_weights=cluster_weights) #Dview g.write_pickle(fname="save_clustered_graph.dat") print ("Visualize clusters...") view_graph_clusters(g,clusters) return g,clusters,cluster_weights,query_sentence,query_index,uG
def _update(self): membership = [i for i in self.gutil.membership] self.strategy.update_parts(VertexClustering(self.graph, membership=membership))
def _update(self): membership = [learner.action for learner in self.learning.learners] self.strategy.update_parts( VertexClustering(self.graph, membership=membership))