Exemple #1
0
def simpleTests():
    test = range(10)
    uf = UnionFind()
    for t in test:
        uf.makeSet(t)
    # END for
    for t in test:
        assert uf.find(t) == t, "Parent not initialized correctly."
    # END for
    assert uf.countGroups() == 10, "Counted wrong number of groups."

    uf.union(0,1)
    assert uf.find(1) == 0, "Parent not updated correctly."
    assert uf.data[0][1] == 1, "Order not updated for equal trees correctly."
    assert uf.countGroups() == 9, "Counted wrong number of groups."

    uf.union(1,2)
    assert uf.find(2) == 0, "Parent not updated correctly."
    assert uf.data[0][1] == 1, "Order not updated for unequal trees correctly."
    assert uf.countGroups() == 8, "Counted wrong number of groups."

    uf.union(3,4)
    uf.union(4,5)
    uf.union(0,3)
    assert uf.data[0][1] == 2, "Order not updated for unequal trees."
    assert uf.data[5][0] == 3, "Parent should not be updated until find operation."
    assert uf.find(5) == 0, "Find operation returned wrong parent."
    assert uf.data[5][0] == 0, "Parent should have been updated."
    assert uf.countGroups() == 5, "Counted wrong number of groups."
    def testFind(self):
        elements = [1,2,3,4,5,6,7]
        uf = UnionFind(elements)

        self.assertEqual(uf.find(1), 1)
        self.assertEqual(uf.find(6), 6)
        self.assertNotEqual(uf.find(7), 5)
class Kruskal:
    def __init__(self, data):
        nodes = int(data[0].split()[0])
        self.ufSet = UnionFind()
        for n in range(nodes):
            self.ufSet.makeSet(n)
        # END for

        self.edges = []
        for k in data[1:]:
            row = map(int, k.strip().split())
            self.edges.append((row[0] - 1, row[1] - 1, row[2]))
        # END for

        self.edges.sort(key=itemgetter(2))

    # END __init__

    def mstKruskal(self):
        mst = []
        l = 0
        for edge in self.edges:
            s1 = self.ufSet.find(edge[0])
            s2 = self.ufSet.find(edge[1])
            if s1 == s2:
                continue
            # END if

            self.ufSet.union(edge[0], edge[1])
            mst.append(edge)
            l += edge[2]
        # END for

        self.mst = mst
        return l

    # END mstKruskal

    def clusterKruskal(self, k):
        print "Running Clustering, k={0}".format(k)
        done = False
        for edge in self.edges:
            s1 = self.ufSet.find(edge[0])
            s2 = self.ufSet.find(edge[1])
            if s1 == s2:
                continue
            # END if
            if not done:
                self.ufSet.union(edge[0], edge[1])
            else:
                print "Smallest unallocated edge: {0}".format(edge)
                return edge[2]
            # END if

            if self.ufSet.countGroups() == k:
                done = True
    def testUnionNotIntegers(self):
        elements = ["bye", "a", "80", "cat"]
        uf = UnionFind(elements)

        uf.union("a", "80")
        uf.union("80", "cat")

        self.assertFalse(uf.find("bye") in set(["80", "a", "cat"]))
        self.assertTrue(uf.find("a") in set(["80", "a", "cat"]))
        self.assertTrue(uf.find("cat") in set(["80", "a", "cat"]))
        self.assertTrue(uf.find("80") in set(["80", "a", "cat"]))
def compute_max_clusters_dist(edges, k):
    edges = sorted(edges)
    clusters = UnionFind()
    for i in range(1, 501):
        clusters.make_set(i)
    ithEdge = 0

    while clusters.number_of_groups != k or len(clusters) != 500:
        clusters.union(edges[ithEdge][1], edges[ithEdge][2])
        ithEdge += 1

    while clusters.find(edges[ithEdge][1]) == clusters.find(edges[ithEdge][2]):
        ithEdge += 1
    return edges[ithEdge][0]
def find_segments(v, edges, k):
    edges.sort(key=lambda edge: edge.w)
    segments = UnionFind(v)
    thresh = k * np.ones(v)
    for edge in edges:
        a = segments.find(edge.a)
        b = segments.find(edge.b)
        if a != b:
            if edge.w <= thresh[a] and edge.w <= thresh[b]:
                segments.union(a, b)
                a = segments.find(a)
                thresh[a] = edge.w + k / segments.csize[a]

    return segments
Exemple #7
0
 def f_equivalence_classes(self):
     """Returns a partition of the states into finite-difference equivalence clases, using
     the experimental O(n^2) algorithm."""
     sd = symmetric_difference(self, self)
     self_pairs = [(x, x) for x in self.states]
     fd_equiv_pairs = sd.right_finite_states(self_pairs)
     sets = UnionFind()
     for state in self.states:
         sets.make_set(state)
     for (state1, state2) in fd_equiv_pairs:
         set1, set2 = sets.find(state1), sets.find(state2)
         if set1 != set2:
             sets.union(set1, set2)
     state_classes = sets.as_lists()
     return state_classes
Exemple #8
0
 def f_equivalence_classes(self):
     """Returns a partition of the states into finite-difference equivalence clases, using
     the experimental O(n^2) algorithm."""
     sd = symmetric_difference(self, self)
     self_pairs = [(x, x) for x in self.states]
     fd_equiv_pairs = sd.right_finite_states(self_pairs)
     sets = UnionFind()
     for state in self.states:
         sets.make_set(state)
     for (state1, state2) in fd_equiv_pairs:
         set1, set2 = sets.find(state1), sets.find(state2)
         if set1 != set2:
             sets.union(set1, set2)
     state_classes = sets.as_lists()
     return state_classes
def equationsPossible(equations):
    # 初始化,树的深度初始化值为1,都是以自己作为初始化值
    unionFind = UnionFind()
    for str in equations:
        left = ord(str[0]) - 97
        right = ord(str[-1]) - 97
        #同一集合的,去合并
        if str[0] != str[-1] and str[1] == "=":
            unionFind.union(left, right)

    for str in equations:
        left = ord(str[0]) - 97
        right = ord(str[-1]) - 97
        # 查找,目前查找的是已经有合并先关的集合
        if unionFind.find(left) == unionFind.find(right) and str[1] == "!":
            return False
    return True
    def kruskalAlgorithm(G):
        A = []  #list empty
        u = UnionFind(G.number_of_nodes())
        sorted_edges = sorted(G.edges(data=True),
                              key=lambda edge: edge[2]['weight'])
        for e in sorted_edges:
            if u.find(e[0] - 1) != u.find(e[1] - 1):
                A.append(e)
                u.union(e[0] - 1, e[1] - 1)

        T = nx.Graph()
        for v in G.nodes():
            T.add_node(v)
        for e in A:
            T.add_edge(e[0], e[1])

        return T
Exemple #11
0
	def kruskal(self):
		Weight = {}
		Trees = []
		UnionSet = UnionFind()
		UnionSet.makeset(self.V())
		for v in self.V():
			edge = self.Adj(v)
			for u in edge:
				uv = u + v
				Weight[uv] = self.Weight(u,v)

		#edges = [(self.Weight(u,v),u,v) for v in self.V() for u in self.Adj(v)].sort()
		edges = [(self.Weight(u,v),v,u) for v in self.V() for u in self.Adj(v)]
		edges.sort()
		for w,u,v in edges:
			if UnionSet.find(u) != UnionSet.find(v):
				Trees.append(u+v)
				UnionSet.union(u,v)
		return Trees	
Exemple #12
0
def kruskal(edges, num_vertex):
    uft = UnionFind(num_vertex)
    cost_sum = 0    # 最小全域木のコストの総和

    edges = sorted(edges, key = lambda x: x[2])
    for e in edges:
        if not uft.find(e[0], e[1]):
            uft.union(e[0], e[1])
            cost_sum += e[2]
    return cost_sum
Exemple #13
0
def unite_all_segment_on_same_line(lines_mat, lines_info, width, height):
    n_lines = len(lines_info)
    uf = UnionFind(n_lines, lines_info[:, 0:2], lines_info[:, 2],
                   lines_info[:, 3], lines_info[:, 4], lines_mat[:])

    for i in range(n_lines):
        for j in range(n_lines):
            if i == j or uf.find(i) == uf.find(j): continue
            l1 = lines_info[i]
            l2 = lines_info[j]
            l1_slope = uf.get_slope(i)
            l1_b = uf.get_b(i)
            l2_slope = uf.get_slope(j)
            l2_b = uf.get_b(j)
            if ((i == 100) and (j == 182)) or ((i == 100) and (j == 200)):
                pass
                # debug
            if is_on_same_line(l1_slope, l2_slope, l1_b, l2_b) and \
                    are_lines_adjacent(lines_mat[i][0:4], lines_mat[j][0:4], width, height):
                uf.union(i, j)
    return uf
Exemple #14
0
def main():
    uf = UnionFind(10)
    nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    uf.unify(0, 1)
    uf.unify(2, 3)
    uf.unify(4, 5)
    uf.unify(6, 7)
    uf.unify(8, 9)
    for num in nums:
        print(num, uf.find(num))

    uf.unify(1, 3)
    uf.unify(5, 7)
    uf.unify(6, 9)

    for num in nums:
        print(num, uf.find(num))

    for num in range(uf.size):
        if uf.id[num] == 4:
            print(num)
def max_spacing_k_clustering(G, V, k):
    '''
    Apply a variant of Kruskal's MST algorithm to max-spacing k-clustering problems.
    Return the maximum spacing of k-clustering.
    
    G is a list which represents a graph. 
    Each value of G, G[i], is a tuple (u,v,edge_cost) which represents two vertices of an edge and the cost of that edge.
    V is a list of vertices.
    k is the number of clusters.
    '''
    # use Union-Find data structure to represent clusters
    unionfind=UnionFind()
    
    heap=[] # edges
    for u,v,cost in G:
        heappush(heap,(cost,u,v))
      
    n=len(V) # number of vertices
    i=0
    while i<n-k: # An MST has n-1 edges. Stops early by k-1 steps to produce a k-clustering.
        cost,u,v=heappop(heap) # pop the edge with least cost
             
        # check cycle. No cycles if either vertex has not be added to any cluster or they belong to different clusters.
        if unionfind.find(u) is None or unionfind.find(v) is None or unionfind.find(u)!=unionfind.find(v):
            # add the edge.
            unionfind.union(u,v)
            i+=1
            
#     unionfind.getNumGroups()
    
    # in case that vertices of next edges has been added to the same cluster.
    while True:
        cost,u,v=heappop(heap)
        if unionfind.find(u) is None or unionfind.find(v) is None or unionfind.find(u)!=unionfind.find(v):
            return cost
def max_spacing_k_clustering(G, V, k):
    '''
    Apply a variant of Kruskal's MST algorithm to max-spacing k-clustering problems.
    Return the maximum spacing of k-clustering.
    
    G is a list which represents a graph. 
    Each value of G, G[i], is a tuple (u,v,edge_cost) which represents two vertices of an edge and the cost of that edge.
    V is a list of vertices.
    k is the number of clusters.
    '''
    # use Union-Find data structure to represent clusters
    unionfind = UnionFind()

    heap = []  # edges
    for u, v, cost in G:
        heappush(heap, (cost, u, v))

    n = len(V)  # number of vertices
    i = 0
    while i < n - k:  # An MST has n-1 edges. Stops early by k-1 steps to produce a k-clustering.
        cost, u, v = heappop(heap)  # pop the edge with least cost

        # check cycle. No cycles if either vertex has not be added to any cluster or they belong to different clusters.
        if unionfind.find(u) is None or unionfind.find(
                v) is None or unionfind.find(u) != unionfind.find(v):
            # add the edge.
            unionfind.union(u, v)
            i += 1


#     unionfind.getNumGroups()

# in case that vertices of next edges has been added to the same cluster.
    while True:
        cost, u, v = heappop(heap)
        if unionfind.find(u) is None or unionfind.find(
                v) is None or unionfind.find(u) != unionfind.find(v):
            return cost
    def testUnion(self):
        elements = [1,2,3,4,5,6,7]
        uf = UnionFind(elements)

        self.assertEqual(uf.find(6), 6)
        
        uf.union(6, 7)
        self.assertTrue(uf.find(6) in set([6,7]))
        self.assertTrue(uf.find(7) in set([6,7]))
        self.assertFalse(uf.find(3) in set([6,7]))

        uf.union(5,3)
        self.assertFalse(uf.find(3) in set([6,7]))
        self.assertTrue(uf.find(5) in set([5,3]))

        uf.union(3, 6)
        self.assertTrue(uf.find(5) in set([3,5,6,7]))
        self.assertTrue(uf.find(7) in set([3,5,6,7]))
        self.assertTrue(uf.find(3) in set([3,5,6,7]))
        self.assertTrue(uf.find(6) in set([3,5,6,7]))
nodeUnion = UnionFind(numClusters)


nodeCt = 0
for node in nodes:
    #find neighbors of node where hamdist <= 2
    #then find neighbors of neighbors ,.. recursively

    posscands = findcandidates(node)
    #find the candidates that actually exist
    actualcands = list(set(posscands) & set(nodes))
    #note, nodes are stored in union with their ids
    nodeid = nodeDict[str(node)]
    for cand in actualcands:
        #check roots of node and cand, if not equal, merge them
        rootnode = nodeUnion.find(nodeid)
        candid = nodeDict[str(cand)]
        rootcand = nodeUnion.find(candid)
        if rootnode != rootcand:
            nodeUnion.union(nodeid, candid)
            numClusters -= 1
        
print(numClusters)
'''
#find distances bw each and every node, node^2 time
for i in range(numNodes):
    for j in range(i, numNodes):
        hamdist = bin(nodes[i]^nodes[j]).count("1") #calculating hamming distance bw two nodes
        curedge = (hamdist, i, j)
        hq.heappush(edges,curedge)
'''
Exemple #19
0
def build(G):
    '采用并查集自底向上建立TreeIndex'
    N = nx.number_of_nodes(G)  #图的节点个数
    '步骤1:计算k-core,按coreness分组'
    coreDict = nx.core_number(G)
    #将节点按照core number进行分组
    Vk = defaultdict(list)  #字典的value是列表
    for key, value in coreDict.iteritems():
        Vk[value].append(key)
    #将Vk按照coreness(key)进行排序,降序
    # sortedVk=sorted(Vk.items(),key=lambda d:d[0],reverse=True)
    '步骤2:初始化并查集和一些需要的数据结构'
    unodeArr = []  #存储的是并查集的节点
    uf = UnionFind()  #包含所有并查集方法的类
    restNodeList = []  #储存没有父母的节点,最后直接连接到core为0的根节点下方作为孩子
    vertexTNodelist = [None] * N  #图节点到TNode的映射的列表
    core0List = []  #coreness=0的节点,作为这棵树的根
    for i in range(N):
        unode = UNode(i)
        uf.makeSet(unode)
        unodeArr.append(unode)
    '步骤3:自底向上建立树'
    #level by level,
    for key in sorted(Vk.keys(), reverse=True):
        curcore = key
        vkList = Vk[key]
        if curcore > 0:
            idUFMap = {}  #(id->UNode)这里用字典但是unodeArr用列表是因为这里的id不一定是连续的
            '步骤3.1: 先在同一个core值节点中找连通分量,利用一个临时并查集idUFMap'
            for id in vkList:
                if not idUFMap.has_key(id):  #加入Vk
                    unode = UNode(id)
                    uf.makeSet(unode)
                    idUFMap[id] = unode
                for ngid in G.neighbors(id):
                    if coreDict[ngid] >= coreDict[id]:  #先处理core大的
                        if coreDict[ngid] > coreDict[id]:
                            ngid = uf.find(unodeArr[ngid]
                                           ).value  #如果邻居的core比较大,说明已经处理过,用父母代替
                        if not idUFMap.has_key(ngid):  #加入V'
                            unode = UNode(ngid)
                            uf.makeSet(unode)
                            idUFMap[ngid] = unode
                        uf.union(idUFMap[id], idUFMap[ngid])
            '步骤3.2:按照上面临时并查集的结果,给图节点分组,找树节点孩子'
            ufGNodeMap = defaultdict(
                list)  #(UNode->[vertex])unode到同一个组的unode的图节点的字典
            ufTNodeMap = defaultdict(list)  #(UNode->[TNode])unode到TNode的映射
            for reId, reUNode in idUFMap.iteritems():
                newParent = uf.find(reUNode)  #在新的并查集里面,节点的父母
                if coreDict[reId] == curcore:  #同一个core值的节点分成一组
                    ufGNodeMap[newParent].append(reId)
                if coreDict[reId] > curcore:  #由于是自底向上的,当前这个reid应该已经处理过了
                    oldParent = unodeArr[reId]  #这个是外面的并查集记录的reId的父母
                    tnode = vertexTNodelist[oldParent.represent]
                    ufTNodeMap[newParent].append(tnode)
            '步骤3.3:产生新的TNode节点并建立树节点之间的联系'
            for parent, nodeList in ufGNodeMap.iteritems():
                childList = ufTNodeMap[parent]
                tnode = TNode(curcore)  #新建一个树节点
                tnode.nodeList = nodeList
                if childList:  #如果孩子不为空,给树节点添加孩子节点
                    tnode.childList = childList  #这里用不用深拷贝?
                restNodeList.append(tnode)  #假设这个节点目前没有父母咯
                #更新(id->TNode)
                for nodeId in nodeList:
                    vertexTNodelist[nodeId] = tnode
                #更新没有父母的树节点列表
                for subTNode in tnode.childList:
                    restNodeList.remove(subTNode)
            '步骤3.4: 更新外面的并查集'
            for id in vkList:
                x = unodeArr[id]  #当前节点的UNode
                for ngid in G.neighbors(id):
                    if coreDict[ngid] >= curcore:  #遍历边的优先级,core大的先检查,保证自底向上的
                        y = unodeArr[ngid]
                        uf.union(x, y)
                #更新represent节点
                xRoot = uf.find(x)
                xRepresent = uf.find(x).represent
                if coreDict[xRepresent] > coreDict[id]:
                    xRoot.represent = id
        else:  #core为0的节点作为根
            core0List = vkList

    '步骤4:建立root节点'
    root = TNode(0)
    root.nodeList = core0List
    root.childList = copy.deepcopy(restNodeList)
    '步骤5:在树节点上获得nodeList的属性的倒排'
    attachKw(root, G)
    return root, vertexTNodelist, coreDict
Exemple #20
0
            hq.heappush(edges, curedge)
        #finally append to nodes and increment nodeCounter
        nodes.append(curval)
        if nodeCounter % 1000 == 0:
            print(nodeCounter)
        nodeCounter += 1
    f.close()
'''
#find distances bw each and every node, node^2 time
for i in range(numNodes):
    for j in range(i, numNodes):
        hamdist = bin(nodes[i]^nodes[j]).count("1") #calculating hamming distance bw two nodes
        curedge = (hamdist, i, j)
        hq.heappush(edges,curedge)
'''

numClusters = numNodes
verts = UnionFind(numNodes)

while True:
    curedge = hq.heappop(edges)
    if curedge[0] >= 3:
        break
    root1 = verts.find(curedge[1])
    root2 = verts.find(curedge[2])
    if root1 != root2:
        verts.union(root1, root2)
        numClusters += -1

print(numClusters)
edges = []

with open('clustering1.txt', 'r') as f:
    numNodes = int(f.readline())
    for line in f:
        nodescost = [int(x) for x in line.split(" ")]
        #-1 because want to get from 1 index to 0 index
        newedge = {
            'src': nodescost[0] - 1,
            'dest': nodescost[1] - 1,
            'cost': nodescost[2]
        }
        edges.append(newedge)
    f.close()

edges = sorted(edges, key=lambda k: k['cost'])

numClusters = numNodes
k = 4
verts = UnionFind(numNodes)

while numClusters >= k:
    curedge = edges.pop(0)
    root1 = verts.find(curedge['src'])
    root2 = verts.find(curedge['dest'])
    if root1 != root2:
        verts.union(root1, root2)
        numClusters += -1

print(curedge['cost'])
class GraphGenerator:
    def __init__(self, nodes, density,
                 graphFilePath, outputFilePath, 
                 leftProb = 1 / 3.0, rightProb = 1 / 3.0):
        self.__nodes = nodes
        self.__density = density
        self.__graphFilePath = graphFilePath
        self.__outputFilePath = outputFilePath
        
        self.__leftProb = leftProb
        self.__crossProb = 1 - leftProb - rightProb
        self.__rightProb = rightProb

        self.__UF = UnionFind(nodes)
        self.__graph = [[] for i in range(nodes)]

    def generateTree(self):
        trees = [i for i in range(self.__nodes)]

        while(len(trees) > 1):
            node1 = node2 = -1

            if(random.random() < 0.005):
                random.shuffle(trees)
            
            choice = random.random()
            if(choice <= self.__leftProb):
                node1 = trees.pop(0)
                node2 = trees.pop(0)

                self.__UF.union(node1, node2)
                
                trees[0:0] = [self.__UF.find(node1)]
            elif(choice <= self.__leftProb + self.__crossProb):
                node1 = trees.pop(0)
                node2 = trees.pop()

                self.__UF.union(node1, node2)

                if(random.random() <= 0.5):
                    trees[0:0] = [self.__UF.find(node1)]
                else:
                    trees.append(self.__UF.find(node1))
            else:
                node1 = trees.pop()
                node2 = trees.pop()

                self.__UF.union(node1, node2)

                trees.append(self.__UF.find(node1))

            weight = random.randint(1, 100)

            if(len(self.__graph[node1]) > 0 and random.random() >= 1.0 / len(self.__graph[node1])):
                rand = random.randint(1, len(self.__graph[node1]) >> 1) - 1
                node1 = self.__graph[node1][rand << 1]

            if(len(self.__graph[node2]) > 0 and random.random() >= 1.0 / len(self.__graph[node2])):
                rand = random.randint(1, len(self.__graph[node2]) >> 1) - 1
                node2 = self.__graph[node2][rand << 1]
            
            self.__graph[node1].extend([node2, weight])
            self.__graph[node2].extend([node1, weight])

    def writeGraph(self, mode = True):
        if(mode):
            outputFile = open(self.__graphFilePath, 'w')
        else:
            outputFile = open(self.__outputFilePath, 'w')
        
        for i in range(self.__nodes):
            outputFile.writelines(str(i) + '\n')
        outputFile.writelines('#\n')
        
        for i in range(self.__nodes):
            for j in range(0, len(self.__graph[i]), 2):
                if(self.__graph[i][j] <= i):
                    continue
                
                outputFile.writelines(str(i) + ' ')
                outputFile.writelines(str(self.__graph[i][j]) + ' ')
                outputFile.writelines(str(self.__graph[i][j + 1]) + '\n')
                
        outputFile.close()

    def generateGraph(self):
        density = self.__density

        mark = [-1] * self.__nodes
        for i in range(self.__nodes):
            if(len(self.__graph[i]) >= 2 * density):
                continue

            count = len(self.__graph[i])
            for j in range(0, len(self.__graph[i]), 2):
                mark[self.__graph[i][j]] = 1

            for j in range(i + 1, self.__nodes):
                if(mark[j] == -1):
                    weight = random.randint(101, 200)
                    self.__graph[i].extend([j, weight])
                    self.__graph[j].extend([i, weight])

                if(len(self.__graph[i]) >= 2 * density):
                    break

            for j in range(0, count, 2):
                mark[self.__graph[i][j]] = -1
Exemple #23
0
class ClusterHamming:
    def __init__(self, data):
        (nodes, self.bits) = map(int, data.pop(0).split())
        self.uf = UnionFind()
        for n in range(nodes):
            self.uf.makeSet(n)
        # END for

        self.hammingData = defaultdict(list)
        for n in range(nodes):
            s = data[n].replace(' ', '')
            self.hammingData[s].append(n)
        # END for

    # END __init__

    def flip(self, s, flipbits):
        """
        Given an input string (s) and tuple of indices (flipbits), returns a new
        string with bits at specified indices flipped.

        The length of (flipbits) determines the resulting hamming distance.
        """
        result = ''
        for i, c in enumerate(s):
            if i in flipbits:
                if c == '1':
                    result += '0'
                else:
                    result += '1'
            else:
                result += c
        return result

    # END flip

    def getHammingPermutations(self, s, n):
        """
        Generate permutations of s whose distance is less than or equal to n
        """
        result = []
        result.append(s)
        for d in range(1, n + 1):
            for flipbits in combinations(range(self.bits), d):
                result.append(self.flip(s, flipbits))
            # END for
        # END for
        return result

    # END getHammingPermutations

    def printSummary(self):
        resultMap = defaultdict(list)
        for k, v in self.hammingData.iteritems():
            cluster = self.uf.find(v)
            resultMap[cluster].append(k)
        # END for

        for k, v in resultMap.iteritems():
            print "\n\nCluster {0}:".format(k)
            for key in v:
                print "\t{0}".format(key)
            # END for
        # END for

    # END printSummary

    def run(self, minDist):
        data = copy(self.hammingData)
        while data:
            (nodeKey, refNodes) = data.popitem()
            if len(refNodes) > 1:
                for i in range(1, len(refNodes)):
                    self.uf.union(refNodes[0], refNodes[i])
                # END for
            # END for

            nearestNodes = self.getHammingPermutations(nodeKey, minDist - 1)
            for testNodeKey in nearestNodes:
                if testNodeKey not in data:
                    continue

                testNodes = self.hammingData[testNodeKey]
                for n in testNodes:
                    if self.uf.find(n) == self.uf.find(refNodes[0]):
                        continue
                    self.uf.union(refNodes[0], n)
                # END for
            # END for
        # END while

        return self.uf.countGroups()
def clustering_big():
    rV=[]
    with open('clustering_big.txt') as f:
        s=f.readline()
        bits=int(s.split()[1])
        for line in f:
            s=line.replace(' ','')
            v=int(s,2)
            insort_left(rV,v)
    
    n=len(rV)
    
    # only collect vertices one of which has a distance less than 3 with another one.
    V=set([])
    G=[]

    # The brute-force way definitely runs in O (n^2) time. It's too slow.
    # Consider how many ways you can flip 1 bit in a node: 24.  How many ways can you flip 2 bits: 24*23/2.  
    # All together that's only 300 possibilities to try per node.
    ops=[1]
    for i in xrange(bits-1):
        ops.append(ops[i]<<1)
    
    def flip(b,i):
        'flip the ith bit of b'
        o=ops[i]
        return (b&o^o)|(b&(~o))
    
    u=0
    while u<n:
        x=rV[u]
        # handle the case that duplicates of x exist.
        v=bisect_right(rV,x)
        dups=xrange(u,v)
        for i in dups:
            for j in xrange(i+1,v):
                G.append((i,j))
                V.add(i)
                V.add(j)
        
        for j in xrange(bits):
            # handle the case of flipping 1 bit.
            y=flip(x,j)
            for v in bi_index_range(rV,y):
                for i in dups: # handle duplicates, no need to re-compute.
                    G.append((i,v))
                    V.add(i)
                V.add(v)
            # handle the case of flipping 2 bits. 
            for k in xrange(j+1,bits):
                z=flip(y,k)
                for v in bi_index_range(rV,z):
                    for i in dups: # handle duplicates, no need to re-compute.
                        G.append((i,v))
                        V.add(i)
                    V.add(v)
        
        u+=len(dups) # handle duplicates, no need to re-compute.
              
#     print rV
#     print G

    # compute how many clusters these 2-distance vertices union.
    unionfind=UnionFind()
    for u,v in G:
        # check cycle. No cycles if either vertex has not be added to any cluster or they belong to different clusters.
        if unionfind.find(u) is None or unionfind.find(v) is None or unionfind.find(u)!=unionfind.find(v):
            # add the edge.
            unionfind.union(u,v)
    
    return n-len(V)+unionfind.getNumGroups()
def clustering_big():
    rV = []
    with open('clustering_big.txt') as f:
        s = f.readline()
        bits = int(s.split()[1])
        for line in f:
            s = line.replace(' ', '')
            v = int(s, 2)
            insort_left(rV, v)

    n = len(rV)

    # only collect vertices one of which has a distance less than 3 with another one.
    V = set([])
    G = []

    # The brute-force way definitely runs in O (n^2) time. It's too slow.
    # Consider how many ways you can flip 1 bit in a node: 24.  How many ways can you flip 2 bits: 24*23/2.
    # All together that's only 300 possibilities to try per node.
    ops = [1]
    for i in xrange(bits - 1):
        ops.append(ops[i] << 1)

    def flip(b, i):
        'flip the ith bit of b'
        o = ops[i]
        return (b & o ^ o) | (b & (~o))

    u = 0
    while u < n:
        x = rV[u]
        # handle the case that duplicates of x exist.
        v = bisect_right(rV, x)
        dups = xrange(u, v)
        for i in dups:
            for j in xrange(i + 1, v):
                G.append((i, j))
                V.add(i)
                V.add(j)

        for j in xrange(bits):
            # handle the case of flipping 1 bit.
            y = flip(x, j)
            for v in bi_index_range(rV, y):
                for i in dups:  # handle duplicates, no need to re-compute.
                    G.append((i, v))
                    V.add(i)
                V.add(v)
            # handle the case of flipping 2 bits.
            for k in xrange(j + 1, bits):
                z = flip(y, k)
                for v in bi_index_range(rV, z):
                    for i in dups:  # handle duplicates, no need to re-compute.
                        G.append((i, v))
                        V.add(i)
                    V.add(v)

        u += len(dups)  # handle duplicates, no need to re-compute.


#     print rV
#     print G

# compute how many clusters these 2-distance vertices union.
    unionfind = UnionFind()
    for u, v in G:
        # check cycle. No cycles if either vertex has not be added to any cluster or they belong to different clusters.
        if unionfind.find(u) is None or unionfind.find(
                v) is None or unionfind.find(u) != unionfind.find(v):
            # add the edge.
            unionfind.union(u, v)

    return n - len(V) + unionfind.getNumGroups()
Exemple #26
0
    def build(self):
        '采用并查集自底向上建立TreeIndex'
        N = nx.number_of_nodes(ShellStructIndex.G)  #图的节点个数
        '步骤1:计算k-core,按coreness分组'
        ##k-core分解
        ShellStructIndex.coreDict = nx.core_number(ShellStructIndex.G)
        #将节点按照core number进行分组
        Vk = defaultdict(list)  #字典的value是列表
        for key, value in ShellStructIndex.coreDict.iteritems(
        ):  ###(2017.3.5:发现不在图里面的节点,怀疑是nx.core_number函数# )
            Vk[value].append(key)
        #将Vk按照coreness(key)进行排序,降序
        # sortedVk=sorted(Vk.items(),key=lambda d:d[0],reverse=True)
        '步骤2:初始化并查集和一些需要的数据结构'
        restNodeList = []  #储存没有父母的节点,最后直接连接到core为0的根节点下方作为孩子
        '为了处理节点不连续的问题,找iD最大的节点,将maxID替换所有的N'
        maxID = 0
        for nodeID in ShellStructIndex.G.nodes():
            if nodeID > maxID:
                maxID = nodeID
        ShellStructIndex.vertexTNodelist = [None] * (maxID + 1
                                                     )  #图节点到TNode的映射的列表
        # print str(N+1)
        core0List = []  #coreness=0的节点,作为这棵树的根
        #############初始化并查集#############
        unodeArr = []  #存储的是并查集的节点(id->UNode)
        uf = UnionFind()  #包含所有并查集方法的类
        for i in range(maxID + 1):  #加1是因为可能从1才开始编号
            unode = UNode(i)
            uf.makeSet(unode)
            unodeArr.append(unode)
        '步骤3:自底向上建立树'
        #level by level,
        tnodeCounter = 0  ##计算TNode个数的计数器
        for key in sorted(Vk.keys(), reverse=True):  #Vk按照core值从大到小排序
            curcore = key
            vkList = Vk[key]
            if curcore > 0:
                idUFMap = {
                }  #(id->UNode)这里用字典但是unodeArr用列表是因为这里的id不一定是连续的(临时的一个并查集映射)
                '步骤3.1: 先在同一个core值节点中找连通分量,利用一个临时并查集idUFMap'
                for id in vkList:
                    if not idUFMap.has_key(id):  #加入Vk
                        unode = UNode(id)
                        uf.makeSet(unode)
                        idUFMap[id] = unode
                    for ngid in ShellStructIndex.G.neighbors(id):
                        if ShellStructIndex.coreDict[
                                ngid] >= ShellStructIndex.coreDict[
                                    id]:  #先处理core大的
                            if ShellStructIndex.coreDict[
                                    ngid] > ShellStructIndex.coreDict[id]:
                                ngid = uf.find(
                                    unodeArr[ngid]
                                ).value  #如果邻居的core比较大,说明已经处理过,用父母代替
                            if not idUFMap.has_key(ngid):  #加入V'
                                unode = UNode(ngid)
                                uf.makeSet(unode)
                                idUFMap[ngid] = unode
                            uf.union(idUFMap[id],
                                     idUFMap[ngid])  #合并id和他的邻居(或者邻居的父母)
                '步骤3.2:按照上面临时并查集的结果,给图节点分组,找树节点孩子'
                ufGNodeMap = defaultdict(
                    list)  #(UNode->[vertex])unode到同一个组的unode的图节点的字典
                ufTNodeMap = defaultdict(list)  #(UNode->[TNode])unode到TNode的映射
                for reId, reUNode in idUFMap.iteritems():
                    newParent = uf.find(reUNode)  #在新的并查集里面,节点的父母
                    if ShellStructIndex.coreDict[
                            reId] == curcore:  #同一个core值的节点分成一组
                        ufGNodeMap[newParent].append(reId)
                    if ShellStructIndex.coreDict[
                            reId] > curcore:  #由于是自底向上的,当前这个reid应该已经处理过了
                        oldParent = unodeArr[reId]  #这个是外面的并查集记录的reId的父母
                        tnode = ShellStructIndex.vertexTNodelist[
                            oldParent.represent]
                        ufTNodeMap[newParent].append(tnode)
                '步骤3.3:产生新的TNode节点并建立树节点之间的联系'
                for parent, nodeList in ufGNodeMap.iteritems():
                    childList = ufTNodeMap[parent]
                    tnodeCounter = tnodeCounter + 1  #
                    # print 'tnodeCounter:',tnodeCounter
                    tnode = TNode(
                        curcore,
                        tnodeCounter)  #新建一个树节点(给定coreness和树节点编号)(re:2017.2.26)
                    tnode.nodeList = nodeList
                    if childList:  #如果孩子不为空,给树节点添加孩子节点
                        tnode.childList = childList  #这里用不用深拷贝?
                        #####给孩子节点添加父母,方便后面的retrieve(re:2017.2.26)########
                        for chid in childList:
                            chid.parent = tnode
                    restNodeList.append(tnode)  #假设这个节点目前没有父母咯
                    #更新(id->TNode)
                    for nodeId in nodeList:
                        # print nodeId
                        ShellStructIndex.vertexTNodelist[nodeId] = tnode
                    #更新没有父母的树节点列表
                    for subTNode in tnode.childList:
                        restNodeList.remove(subTNode)
                '步骤3.4: 更新外面包含所有节点的并查集'
                for id in vkList:
                    x = unodeArr[id]  #当前节点的UNode
                    for ngid in ShellStructIndex.G.neighbors(id):
                        if ShellStructIndex.coreDict[
                                ngid] >= curcore:  #遍历边的优先级,core大的先检查,保证自底向上的
                            y = unodeArr[ngid]
                            uf.union(x, y)
                    #更新represent节点
                    xRoot = uf.find(x)
                    xRepresent = uf.find(x).represent
                    if ShellStructIndex.coreDict[
                            xRepresent] > ShellStructIndex.coreDict[id]:
                        xRoot.represent = id
            else:  #core为0的节点作为根
                core0List = vkList

        '步骤4:建立root节点'
        tnodeCounter = tnodeCounter + 1  #(re:2017.2.26)
        # print 'tnodeCounter:', tnodeCounter
        ShellStructIndex.root = TNode(core=0, data=tnodeCounter)
        ShellStructIndex.root.nodeList = core0List
        ShellStructIndex.root.childList = restNodeList  #这里需要深拷贝(copy.deepcopy(restNodeList))吗?
        ####(re:2017.2.26)
        for chid in ShellStructIndex.root.childList:
            chid.parent = ShellStructIndex.root
        #####把节点到树的映射也更新一下####
        for v in core0List:
            ShellStructIndex.vertexTNodelist[v] = ShellStructIndex.root
        '步骤5:在树节点上获得nodeList的属性的倒排'