def simpleTests(): test = range(10) uf = UnionFind() for t in test: uf.makeSet(t) # END for for t in test: assert uf.find(t) == t, "Parent not initialized correctly." # END for assert uf.countGroups() == 10, "Counted wrong number of groups." uf.union(0,1) assert uf.find(1) == 0, "Parent not updated correctly." assert uf.data[0][1] == 1, "Order not updated for equal trees correctly." assert uf.countGroups() == 9, "Counted wrong number of groups." uf.union(1,2) assert uf.find(2) == 0, "Parent not updated correctly." assert uf.data[0][1] == 1, "Order not updated for unequal trees correctly." assert uf.countGroups() == 8, "Counted wrong number of groups." uf.union(3,4) uf.union(4,5) uf.union(0,3) assert uf.data[0][1] == 2, "Order not updated for unequal trees." assert uf.data[5][0] == 3, "Parent should not be updated until find operation." assert uf.find(5) == 0, "Find operation returned wrong parent." assert uf.data[5][0] == 0, "Parent should have been updated." assert uf.countGroups() == 5, "Counted wrong number of groups."
def testFind(self): elements = [1,2,3,4,5,6,7] uf = UnionFind(elements) self.assertEqual(uf.find(1), 1) self.assertEqual(uf.find(6), 6) self.assertNotEqual(uf.find(7), 5)
class Kruskal: def __init__(self, data): nodes = int(data[0].split()[0]) self.ufSet = UnionFind() for n in range(nodes): self.ufSet.makeSet(n) # END for self.edges = [] for k in data[1:]: row = map(int, k.strip().split()) self.edges.append((row[0] - 1, row[1] - 1, row[2])) # END for self.edges.sort(key=itemgetter(2)) # END __init__ def mstKruskal(self): mst = [] l = 0 for edge in self.edges: s1 = self.ufSet.find(edge[0]) s2 = self.ufSet.find(edge[1]) if s1 == s2: continue # END if self.ufSet.union(edge[0], edge[1]) mst.append(edge) l += edge[2] # END for self.mst = mst return l # END mstKruskal def clusterKruskal(self, k): print "Running Clustering, k={0}".format(k) done = False for edge in self.edges: s1 = self.ufSet.find(edge[0]) s2 = self.ufSet.find(edge[1]) if s1 == s2: continue # END if if not done: self.ufSet.union(edge[0], edge[1]) else: print "Smallest unallocated edge: {0}".format(edge) return edge[2] # END if if self.ufSet.countGroups() == k: done = True
def testUnionNotIntegers(self): elements = ["bye", "a", "80", "cat"] uf = UnionFind(elements) uf.union("a", "80") uf.union("80", "cat") self.assertFalse(uf.find("bye") in set(["80", "a", "cat"])) self.assertTrue(uf.find("a") in set(["80", "a", "cat"])) self.assertTrue(uf.find("cat") in set(["80", "a", "cat"])) self.assertTrue(uf.find("80") in set(["80", "a", "cat"]))
def compute_max_clusters_dist(edges, k): edges = sorted(edges) clusters = UnionFind() for i in range(1, 501): clusters.make_set(i) ithEdge = 0 while clusters.number_of_groups != k or len(clusters) != 500: clusters.union(edges[ithEdge][1], edges[ithEdge][2]) ithEdge += 1 while clusters.find(edges[ithEdge][1]) == clusters.find(edges[ithEdge][2]): ithEdge += 1 return edges[ithEdge][0]
def find_segments(v, edges, k): edges.sort(key=lambda edge: edge.w) segments = UnionFind(v) thresh = k * np.ones(v) for edge in edges: a = segments.find(edge.a) b = segments.find(edge.b) if a != b: if edge.w <= thresh[a] and edge.w <= thresh[b]: segments.union(a, b) a = segments.find(a) thresh[a] = edge.w + k / segments.csize[a] return segments
def f_equivalence_classes(self): """Returns a partition of the states into finite-difference equivalence clases, using the experimental O(n^2) algorithm.""" sd = symmetric_difference(self, self) self_pairs = [(x, x) for x in self.states] fd_equiv_pairs = sd.right_finite_states(self_pairs) sets = UnionFind() for state in self.states: sets.make_set(state) for (state1, state2) in fd_equiv_pairs: set1, set2 = sets.find(state1), sets.find(state2) if set1 != set2: sets.union(set1, set2) state_classes = sets.as_lists() return state_classes
def equationsPossible(equations): # 初始化,树的深度初始化值为1,都是以自己作为初始化值 unionFind = UnionFind() for str in equations: left = ord(str[0]) - 97 right = ord(str[-1]) - 97 #同一集合的,去合并 if str[0] != str[-1] and str[1] == "=": unionFind.union(left, right) for str in equations: left = ord(str[0]) - 97 right = ord(str[-1]) - 97 # 查找,目前查找的是已经有合并先关的集合 if unionFind.find(left) == unionFind.find(right) and str[1] == "!": return False return True
def kruskalAlgorithm(G): A = [] #list empty u = UnionFind(G.number_of_nodes()) sorted_edges = sorted(G.edges(data=True), key=lambda edge: edge[2]['weight']) for e in sorted_edges: if u.find(e[0] - 1) != u.find(e[1] - 1): A.append(e) u.union(e[0] - 1, e[1] - 1) T = nx.Graph() for v in G.nodes(): T.add_node(v) for e in A: T.add_edge(e[0], e[1]) return T
def kruskal(self): Weight = {} Trees = [] UnionSet = UnionFind() UnionSet.makeset(self.V()) for v in self.V(): edge = self.Adj(v) for u in edge: uv = u + v Weight[uv] = self.Weight(u,v) #edges = [(self.Weight(u,v),u,v) for v in self.V() for u in self.Adj(v)].sort() edges = [(self.Weight(u,v),v,u) for v in self.V() for u in self.Adj(v)] edges.sort() for w,u,v in edges: if UnionSet.find(u) != UnionSet.find(v): Trees.append(u+v) UnionSet.union(u,v) return Trees
def kruskal(edges, num_vertex): uft = UnionFind(num_vertex) cost_sum = 0 # 最小全域木のコストの総和 edges = sorted(edges, key = lambda x: x[2]) for e in edges: if not uft.find(e[0], e[1]): uft.union(e[0], e[1]) cost_sum += e[2] return cost_sum
def unite_all_segment_on_same_line(lines_mat, lines_info, width, height): n_lines = len(lines_info) uf = UnionFind(n_lines, lines_info[:, 0:2], lines_info[:, 2], lines_info[:, 3], lines_info[:, 4], lines_mat[:]) for i in range(n_lines): for j in range(n_lines): if i == j or uf.find(i) == uf.find(j): continue l1 = lines_info[i] l2 = lines_info[j] l1_slope = uf.get_slope(i) l1_b = uf.get_b(i) l2_slope = uf.get_slope(j) l2_b = uf.get_b(j) if ((i == 100) and (j == 182)) or ((i == 100) and (j == 200)): pass # debug if is_on_same_line(l1_slope, l2_slope, l1_b, l2_b) and \ are_lines_adjacent(lines_mat[i][0:4], lines_mat[j][0:4], width, height): uf.union(i, j) return uf
def main(): uf = UnionFind(10) nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] uf.unify(0, 1) uf.unify(2, 3) uf.unify(4, 5) uf.unify(6, 7) uf.unify(8, 9) for num in nums: print(num, uf.find(num)) uf.unify(1, 3) uf.unify(5, 7) uf.unify(6, 9) for num in nums: print(num, uf.find(num)) for num in range(uf.size): if uf.id[num] == 4: print(num)
def max_spacing_k_clustering(G, V, k): ''' Apply a variant of Kruskal's MST algorithm to max-spacing k-clustering problems. Return the maximum spacing of k-clustering. G is a list which represents a graph. Each value of G, G[i], is a tuple (u,v,edge_cost) which represents two vertices of an edge and the cost of that edge. V is a list of vertices. k is the number of clusters. ''' # use Union-Find data structure to represent clusters unionfind=UnionFind() heap=[] # edges for u,v,cost in G: heappush(heap,(cost,u,v)) n=len(V) # number of vertices i=0 while i<n-k: # An MST has n-1 edges. Stops early by k-1 steps to produce a k-clustering. cost,u,v=heappop(heap) # pop the edge with least cost # check cycle. No cycles if either vertex has not be added to any cluster or they belong to different clusters. if unionfind.find(u) is None or unionfind.find(v) is None or unionfind.find(u)!=unionfind.find(v): # add the edge. unionfind.union(u,v) i+=1 # unionfind.getNumGroups() # in case that vertices of next edges has been added to the same cluster. while True: cost,u,v=heappop(heap) if unionfind.find(u) is None or unionfind.find(v) is None or unionfind.find(u)!=unionfind.find(v): return cost
def max_spacing_k_clustering(G, V, k): ''' Apply a variant of Kruskal's MST algorithm to max-spacing k-clustering problems. Return the maximum spacing of k-clustering. G is a list which represents a graph. Each value of G, G[i], is a tuple (u,v,edge_cost) which represents two vertices of an edge and the cost of that edge. V is a list of vertices. k is the number of clusters. ''' # use Union-Find data structure to represent clusters unionfind = UnionFind() heap = [] # edges for u, v, cost in G: heappush(heap, (cost, u, v)) n = len(V) # number of vertices i = 0 while i < n - k: # An MST has n-1 edges. Stops early by k-1 steps to produce a k-clustering. cost, u, v = heappop(heap) # pop the edge with least cost # check cycle. No cycles if either vertex has not be added to any cluster or they belong to different clusters. if unionfind.find(u) is None or unionfind.find( v) is None or unionfind.find(u) != unionfind.find(v): # add the edge. unionfind.union(u, v) i += 1 # unionfind.getNumGroups() # in case that vertices of next edges has been added to the same cluster. while True: cost, u, v = heappop(heap) if unionfind.find(u) is None or unionfind.find( v) is None or unionfind.find(u) != unionfind.find(v): return cost
def testUnion(self): elements = [1,2,3,4,5,6,7] uf = UnionFind(elements) self.assertEqual(uf.find(6), 6) uf.union(6, 7) self.assertTrue(uf.find(6) in set([6,7])) self.assertTrue(uf.find(7) in set([6,7])) self.assertFalse(uf.find(3) in set([6,7])) uf.union(5,3) self.assertFalse(uf.find(3) in set([6,7])) self.assertTrue(uf.find(5) in set([5,3])) uf.union(3, 6) self.assertTrue(uf.find(5) in set([3,5,6,7])) self.assertTrue(uf.find(7) in set([3,5,6,7])) self.assertTrue(uf.find(3) in set([3,5,6,7])) self.assertTrue(uf.find(6) in set([3,5,6,7]))
nodeUnion = UnionFind(numClusters) nodeCt = 0 for node in nodes: #find neighbors of node where hamdist <= 2 #then find neighbors of neighbors ,.. recursively posscands = findcandidates(node) #find the candidates that actually exist actualcands = list(set(posscands) & set(nodes)) #note, nodes are stored in union with their ids nodeid = nodeDict[str(node)] for cand in actualcands: #check roots of node and cand, if not equal, merge them rootnode = nodeUnion.find(nodeid) candid = nodeDict[str(cand)] rootcand = nodeUnion.find(candid) if rootnode != rootcand: nodeUnion.union(nodeid, candid) numClusters -= 1 print(numClusters) ''' #find distances bw each and every node, node^2 time for i in range(numNodes): for j in range(i, numNodes): hamdist = bin(nodes[i]^nodes[j]).count("1") #calculating hamming distance bw two nodes curedge = (hamdist, i, j) hq.heappush(edges,curedge) '''
def build(G): '采用并查集自底向上建立TreeIndex' N = nx.number_of_nodes(G) #图的节点个数 '步骤1:计算k-core,按coreness分组' coreDict = nx.core_number(G) #将节点按照core number进行分组 Vk = defaultdict(list) #字典的value是列表 for key, value in coreDict.iteritems(): Vk[value].append(key) #将Vk按照coreness(key)进行排序,降序 # sortedVk=sorted(Vk.items(),key=lambda d:d[0],reverse=True) '步骤2:初始化并查集和一些需要的数据结构' unodeArr = [] #存储的是并查集的节点 uf = UnionFind() #包含所有并查集方法的类 restNodeList = [] #储存没有父母的节点,最后直接连接到core为0的根节点下方作为孩子 vertexTNodelist = [None] * N #图节点到TNode的映射的列表 core0List = [] #coreness=0的节点,作为这棵树的根 for i in range(N): unode = UNode(i) uf.makeSet(unode) unodeArr.append(unode) '步骤3:自底向上建立树' #level by level, for key in sorted(Vk.keys(), reverse=True): curcore = key vkList = Vk[key] if curcore > 0: idUFMap = {} #(id->UNode)这里用字典但是unodeArr用列表是因为这里的id不一定是连续的 '步骤3.1: 先在同一个core值节点中找连通分量,利用一个临时并查集idUFMap' for id in vkList: if not idUFMap.has_key(id): #加入Vk unode = UNode(id) uf.makeSet(unode) idUFMap[id] = unode for ngid in G.neighbors(id): if coreDict[ngid] >= coreDict[id]: #先处理core大的 if coreDict[ngid] > coreDict[id]: ngid = uf.find(unodeArr[ngid] ).value #如果邻居的core比较大,说明已经处理过,用父母代替 if not idUFMap.has_key(ngid): #加入V' unode = UNode(ngid) uf.makeSet(unode) idUFMap[ngid] = unode uf.union(idUFMap[id], idUFMap[ngid]) '步骤3.2:按照上面临时并查集的结果,给图节点分组,找树节点孩子' ufGNodeMap = defaultdict( list) #(UNode->[vertex])unode到同一个组的unode的图节点的字典 ufTNodeMap = defaultdict(list) #(UNode->[TNode])unode到TNode的映射 for reId, reUNode in idUFMap.iteritems(): newParent = uf.find(reUNode) #在新的并查集里面,节点的父母 if coreDict[reId] == curcore: #同一个core值的节点分成一组 ufGNodeMap[newParent].append(reId) if coreDict[reId] > curcore: #由于是自底向上的,当前这个reid应该已经处理过了 oldParent = unodeArr[reId] #这个是外面的并查集记录的reId的父母 tnode = vertexTNodelist[oldParent.represent] ufTNodeMap[newParent].append(tnode) '步骤3.3:产生新的TNode节点并建立树节点之间的联系' for parent, nodeList in ufGNodeMap.iteritems(): childList = ufTNodeMap[parent] tnode = TNode(curcore) #新建一个树节点 tnode.nodeList = nodeList if childList: #如果孩子不为空,给树节点添加孩子节点 tnode.childList = childList #这里用不用深拷贝? restNodeList.append(tnode) #假设这个节点目前没有父母咯 #更新(id->TNode) for nodeId in nodeList: vertexTNodelist[nodeId] = tnode #更新没有父母的树节点列表 for subTNode in tnode.childList: restNodeList.remove(subTNode) '步骤3.4: 更新外面的并查集' for id in vkList: x = unodeArr[id] #当前节点的UNode for ngid in G.neighbors(id): if coreDict[ngid] >= curcore: #遍历边的优先级,core大的先检查,保证自底向上的 y = unodeArr[ngid] uf.union(x, y) #更新represent节点 xRoot = uf.find(x) xRepresent = uf.find(x).represent if coreDict[xRepresent] > coreDict[id]: xRoot.represent = id else: #core为0的节点作为根 core0List = vkList '步骤4:建立root节点' root = TNode(0) root.nodeList = core0List root.childList = copy.deepcopy(restNodeList) '步骤5:在树节点上获得nodeList的属性的倒排' attachKw(root, G) return root, vertexTNodelist, coreDict
hq.heappush(edges, curedge) #finally append to nodes and increment nodeCounter nodes.append(curval) if nodeCounter % 1000 == 0: print(nodeCounter) nodeCounter += 1 f.close() ''' #find distances bw each and every node, node^2 time for i in range(numNodes): for j in range(i, numNodes): hamdist = bin(nodes[i]^nodes[j]).count("1") #calculating hamming distance bw two nodes curedge = (hamdist, i, j) hq.heappush(edges,curedge) ''' numClusters = numNodes verts = UnionFind(numNodes) while True: curedge = hq.heappop(edges) if curedge[0] >= 3: break root1 = verts.find(curedge[1]) root2 = verts.find(curedge[2]) if root1 != root2: verts.union(root1, root2) numClusters += -1 print(numClusters)
edges = [] with open('clustering1.txt', 'r') as f: numNodes = int(f.readline()) for line in f: nodescost = [int(x) for x in line.split(" ")] #-1 because want to get from 1 index to 0 index newedge = { 'src': nodescost[0] - 1, 'dest': nodescost[1] - 1, 'cost': nodescost[2] } edges.append(newedge) f.close() edges = sorted(edges, key=lambda k: k['cost']) numClusters = numNodes k = 4 verts = UnionFind(numNodes) while numClusters >= k: curedge = edges.pop(0) root1 = verts.find(curedge['src']) root2 = verts.find(curedge['dest']) if root1 != root2: verts.union(root1, root2) numClusters += -1 print(curedge['cost'])
class GraphGenerator: def __init__(self, nodes, density, graphFilePath, outputFilePath, leftProb = 1 / 3.0, rightProb = 1 / 3.0): self.__nodes = nodes self.__density = density self.__graphFilePath = graphFilePath self.__outputFilePath = outputFilePath self.__leftProb = leftProb self.__crossProb = 1 - leftProb - rightProb self.__rightProb = rightProb self.__UF = UnionFind(nodes) self.__graph = [[] for i in range(nodes)] def generateTree(self): trees = [i for i in range(self.__nodes)] while(len(trees) > 1): node1 = node2 = -1 if(random.random() < 0.005): random.shuffle(trees) choice = random.random() if(choice <= self.__leftProb): node1 = trees.pop(0) node2 = trees.pop(0) self.__UF.union(node1, node2) trees[0:0] = [self.__UF.find(node1)] elif(choice <= self.__leftProb + self.__crossProb): node1 = trees.pop(0) node2 = trees.pop() self.__UF.union(node1, node2) if(random.random() <= 0.5): trees[0:0] = [self.__UF.find(node1)] else: trees.append(self.__UF.find(node1)) else: node1 = trees.pop() node2 = trees.pop() self.__UF.union(node1, node2) trees.append(self.__UF.find(node1)) weight = random.randint(1, 100) if(len(self.__graph[node1]) > 0 and random.random() >= 1.0 / len(self.__graph[node1])): rand = random.randint(1, len(self.__graph[node1]) >> 1) - 1 node1 = self.__graph[node1][rand << 1] if(len(self.__graph[node2]) > 0 and random.random() >= 1.0 / len(self.__graph[node2])): rand = random.randint(1, len(self.__graph[node2]) >> 1) - 1 node2 = self.__graph[node2][rand << 1] self.__graph[node1].extend([node2, weight]) self.__graph[node2].extend([node1, weight]) def writeGraph(self, mode = True): if(mode): outputFile = open(self.__graphFilePath, 'w') else: outputFile = open(self.__outputFilePath, 'w') for i in range(self.__nodes): outputFile.writelines(str(i) + '\n') outputFile.writelines('#\n') for i in range(self.__nodes): for j in range(0, len(self.__graph[i]), 2): if(self.__graph[i][j] <= i): continue outputFile.writelines(str(i) + ' ') outputFile.writelines(str(self.__graph[i][j]) + ' ') outputFile.writelines(str(self.__graph[i][j + 1]) + '\n') outputFile.close() def generateGraph(self): density = self.__density mark = [-1] * self.__nodes for i in range(self.__nodes): if(len(self.__graph[i]) >= 2 * density): continue count = len(self.__graph[i]) for j in range(0, len(self.__graph[i]), 2): mark[self.__graph[i][j]] = 1 for j in range(i + 1, self.__nodes): if(mark[j] == -1): weight = random.randint(101, 200) self.__graph[i].extend([j, weight]) self.__graph[j].extend([i, weight]) if(len(self.__graph[i]) >= 2 * density): break for j in range(0, count, 2): mark[self.__graph[i][j]] = -1
class ClusterHamming: def __init__(self, data): (nodes, self.bits) = map(int, data.pop(0).split()) self.uf = UnionFind() for n in range(nodes): self.uf.makeSet(n) # END for self.hammingData = defaultdict(list) for n in range(nodes): s = data[n].replace(' ', '') self.hammingData[s].append(n) # END for # END __init__ def flip(self, s, flipbits): """ Given an input string (s) and tuple of indices (flipbits), returns a new string with bits at specified indices flipped. The length of (flipbits) determines the resulting hamming distance. """ result = '' for i, c in enumerate(s): if i in flipbits: if c == '1': result += '0' else: result += '1' else: result += c return result # END flip def getHammingPermutations(self, s, n): """ Generate permutations of s whose distance is less than or equal to n """ result = [] result.append(s) for d in range(1, n + 1): for flipbits in combinations(range(self.bits), d): result.append(self.flip(s, flipbits)) # END for # END for return result # END getHammingPermutations def printSummary(self): resultMap = defaultdict(list) for k, v in self.hammingData.iteritems(): cluster = self.uf.find(v) resultMap[cluster].append(k) # END for for k, v in resultMap.iteritems(): print "\n\nCluster {0}:".format(k) for key in v: print "\t{0}".format(key) # END for # END for # END printSummary def run(self, minDist): data = copy(self.hammingData) while data: (nodeKey, refNodes) = data.popitem() if len(refNodes) > 1: for i in range(1, len(refNodes)): self.uf.union(refNodes[0], refNodes[i]) # END for # END for nearestNodes = self.getHammingPermutations(nodeKey, minDist - 1) for testNodeKey in nearestNodes: if testNodeKey not in data: continue testNodes = self.hammingData[testNodeKey] for n in testNodes: if self.uf.find(n) == self.uf.find(refNodes[0]): continue self.uf.union(refNodes[0], n) # END for # END for # END while return self.uf.countGroups()
def clustering_big(): rV=[] with open('clustering_big.txt') as f: s=f.readline() bits=int(s.split()[1]) for line in f: s=line.replace(' ','') v=int(s,2) insort_left(rV,v) n=len(rV) # only collect vertices one of which has a distance less than 3 with another one. V=set([]) G=[] # The brute-force way definitely runs in O (n^2) time. It's too slow. # Consider how many ways you can flip 1 bit in a node: 24. How many ways can you flip 2 bits: 24*23/2. # All together that's only 300 possibilities to try per node. ops=[1] for i in xrange(bits-1): ops.append(ops[i]<<1) def flip(b,i): 'flip the ith bit of b' o=ops[i] return (b&o^o)|(b&(~o)) u=0 while u<n: x=rV[u] # handle the case that duplicates of x exist. v=bisect_right(rV,x) dups=xrange(u,v) for i in dups: for j in xrange(i+1,v): G.append((i,j)) V.add(i) V.add(j) for j in xrange(bits): # handle the case of flipping 1 bit. y=flip(x,j) for v in bi_index_range(rV,y): for i in dups: # handle duplicates, no need to re-compute. G.append((i,v)) V.add(i) V.add(v) # handle the case of flipping 2 bits. for k in xrange(j+1,bits): z=flip(y,k) for v in bi_index_range(rV,z): for i in dups: # handle duplicates, no need to re-compute. G.append((i,v)) V.add(i) V.add(v) u+=len(dups) # handle duplicates, no need to re-compute. # print rV # print G # compute how many clusters these 2-distance vertices union. unionfind=UnionFind() for u,v in G: # check cycle. No cycles if either vertex has not be added to any cluster or they belong to different clusters. if unionfind.find(u) is None or unionfind.find(v) is None or unionfind.find(u)!=unionfind.find(v): # add the edge. unionfind.union(u,v) return n-len(V)+unionfind.getNumGroups()
def clustering_big(): rV = [] with open('clustering_big.txt') as f: s = f.readline() bits = int(s.split()[1]) for line in f: s = line.replace(' ', '') v = int(s, 2) insort_left(rV, v) n = len(rV) # only collect vertices one of which has a distance less than 3 with another one. V = set([]) G = [] # The brute-force way definitely runs in O (n^2) time. It's too slow. # Consider how many ways you can flip 1 bit in a node: 24. How many ways can you flip 2 bits: 24*23/2. # All together that's only 300 possibilities to try per node. ops = [1] for i in xrange(bits - 1): ops.append(ops[i] << 1) def flip(b, i): 'flip the ith bit of b' o = ops[i] return (b & o ^ o) | (b & (~o)) u = 0 while u < n: x = rV[u] # handle the case that duplicates of x exist. v = bisect_right(rV, x) dups = xrange(u, v) for i in dups: for j in xrange(i + 1, v): G.append((i, j)) V.add(i) V.add(j) for j in xrange(bits): # handle the case of flipping 1 bit. y = flip(x, j) for v in bi_index_range(rV, y): for i in dups: # handle duplicates, no need to re-compute. G.append((i, v)) V.add(i) V.add(v) # handle the case of flipping 2 bits. for k in xrange(j + 1, bits): z = flip(y, k) for v in bi_index_range(rV, z): for i in dups: # handle duplicates, no need to re-compute. G.append((i, v)) V.add(i) V.add(v) u += len(dups) # handle duplicates, no need to re-compute. # print rV # print G # compute how many clusters these 2-distance vertices union. unionfind = UnionFind() for u, v in G: # check cycle. No cycles if either vertex has not be added to any cluster or they belong to different clusters. if unionfind.find(u) is None or unionfind.find( v) is None or unionfind.find(u) != unionfind.find(v): # add the edge. unionfind.union(u, v) return n - len(V) + unionfind.getNumGroups()
def build(self): '采用并查集自底向上建立TreeIndex' N = nx.number_of_nodes(ShellStructIndex.G) #图的节点个数 '步骤1:计算k-core,按coreness分组' ##k-core分解 ShellStructIndex.coreDict = nx.core_number(ShellStructIndex.G) #将节点按照core number进行分组 Vk = defaultdict(list) #字典的value是列表 for key, value in ShellStructIndex.coreDict.iteritems( ): ###(2017.3.5:发现不在图里面的节点,怀疑是nx.core_number函数# ) Vk[value].append(key) #将Vk按照coreness(key)进行排序,降序 # sortedVk=sorted(Vk.items(),key=lambda d:d[0],reverse=True) '步骤2:初始化并查集和一些需要的数据结构' restNodeList = [] #储存没有父母的节点,最后直接连接到core为0的根节点下方作为孩子 '为了处理节点不连续的问题,找iD最大的节点,将maxID替换所有的N' maxID = 0 for nodeID in ShellStructIndex.G.nodes(): if nodeID > maxID: maxID = nodeID ShellStructIndex.vertexTNodelist = [None] * (maxID + 1 ) #图节点到TNode的映射的列表 # print str(N+1) core0List = [] #coreness=0的节点,作为这棵树的根 #############初始化并查集############# unodeArr = [] #存储的是并查集的节点(id->UNode) uf = UnionFind() #包含所有并查集方法的类 for i in range(maxID + 1): #加1是因为可能从1才开始编号 unode = UNode(i) uf.makeSet(unode) unodeArr.append(unode) '步骤3:自底向上建立树' #level by level, tnodeCounter = 0 ##计算TNode个数的计数器 for key in sorted(Vk.keys(), reverse=True): #Vk按照core值从大到小排序 curcore = key vkList = Vk[key] if curcore > 0: idUFMap = { } #(id->UNode)这里用字典但是unodeArr用列表是因为这里的id不一定是连续的(临时的一个并查集映射) '步骤3.1: 先在同一个core值节点中找连通分量,利用一个临时并查集idUFMap' for id in vkList: if not idUFMap.has_key(id): #加入Vk unode = UNode(id) uf.makeSet(unode) idUFMap[id] = unode for ngid in ShellStructIndex.G.neighbors(id): if ShellStructIndex.coreDict[ ngid] >= ShellStructIndex.coreDict[ id]: #先处理core大的 if ShellStructIndex.coreDict[ ngid] > ShellStructIndex.coreDict[id]: ngid = uf.find( unodeArr[ngid] ).value #如果邻居的core比较大,说明已经处理过,用父母代替 if not idUFMap.has_key(ngid): #加入V' unode = UNode(ngid) uf.makeSet(unode) idUFMap[ngid] = unode uf.union(idUFMap[id], idUFMap[ngid]) #合并id和他的邻居(或者邻居的父母) '步骤3.2:按照上面临时并查集的结果,给图节点分组,找树节点孩子' ufGNodeMap = defaultdict( list) #(UNode->[vertex])unode到同一个组的unode的图节点的字典 ufTNodeMap = defaultdict(list) #(UNode->[TNode])unode到TNode的映射 for reId, reUNode in idUFMap.iteritems(): newParent = uf.find(reUNode) #在新的并查集里面,节点的父母 if ShellStructIndex.coreDict[ reId] == curcore: #同一个core值的节点分成一组 ufGNodeMap[newParent].append(reId) if ShellStructIndex.coreDict[ reId] > curcore: #由于是自底向上的,当前这个reid应该已经处理过了 oldParent = unodeArr[reId] #这个是外面的并查集记录的reId的父母 tnode = ShellStructIndex.vertexTNodelist[ oldParent.represent] ufTNodeMap[newParent].append(tnode) '步骤3.3:产生新的TNode节点并建立树节点之间的联系' for parent, nodeList in ufGNodeMap.iteritems(): childList = ufTNodeMap[parent] tnodeCounter = tnodeCounter + 1 # # print 'tnodeCounter:',tnodeCounter tnode = TNode( curcore, tnodeCounter) #新建一个树节点(给定coreness和树节点编号)(re:2017.2.26) tnode.nodeList = nodeList if childList: #如果孩子不为空,给树节点添加孩子节点 tnode.childList = childList #这里用不用深拷贝? #####给孩子节点添加父母,方便后面的retrieve(re:2017.2.26)######## for chid in childList: chid.parent = tnode restNodeList.append(tnode) #假设这个节点目前没有父母咯 #更新(id->TNode) for nodeId in nodeList: # print nodeId ShellStructIndex.vertexTNodelist[nodeId] = tnode #更新没有父母的树节点列表 for subTNode in tnode.childList: restNodeList.remove(subTNode) '步骤3.4: 更新外面包含所有节点的并查集' for id in vkList: x = unodeArr[id] #当前节点的UNode for ngid in ShellStructIndex.G.neighbors(id): if ShellStructIndex.coreDict[ ngid] >= curcore: #遍历边的优先级,core大的先检查,保证自底向上的 y = unodeArr[ngid] uf.union(x, y) #更新represent节点 xRoot = uf.find(x) xRepresent = uf.find(x).represent if ShellStructIndex.coreDict[ xRepresent] > ShellStructIndex.coreDict[id]: xRoot.represent = id else: #core为0的节点作为根 core0List = vkList '步骤4:建立root节点' tnodeCounter = tnodeCounter + 1 #(re:2017.2.26) # print 'tnodeCounter:', tnodeCounter ShellStructIndex.root = TNode(core=0, data=tnodeCounter) ShellStructIndex.root.nodeList = core0List ShellStructIndex.root.childList = restNodeList #这里需要深拷贝(copy.deepcopy(restNodeList))吗? ####(re:2017.2.26) for chid in ShellStructIndex.root.childList: chid.parent = ShellStructIndex.root #####把节点到树的映射也更新一下#### for v in core0List: ShellStructIndex.vertexTNodelist[v] = ShellStructIndex.root '步骤5:在树节点上获得nodeList的属性的倒排'