def max_spacing_k_clustering_big(graph_set):
    disjoint_set = DisjointSet.DisjointSet()

    # Create sets
    for node in list(graph_set):
        disjoint_set.makeSet(DisjointSet.Node(node))

    # Merge distance 1 nodes
    for node in list(graph_set):
        candidates = hamming_distance_1_candidates(node)
        matches = candidates & graph_set
        for match in list(matches):
            n1 = disjoint_set.nodes[match]
            n2 = disjoint_set.nodes[node]
            disjoint_set.union(n1, n2)

    # Merge distance 2 nodes
    for node in list(graph_set):
        candidates = hamming_distance_2_candidates(node)
        matches = candidates & graph_set
        for match in list(matches):
            n1 = disjoint_set.nodes[match]
            n2 = disjoint_set.nodes[node]
            disjoint_set.union(n1, n2)
    return disjoint_set.connected_components
Ejemplo n.º 2
0
def max_spacing_k_clustering(graph, k):
    disjoint_set = DisjointSet.DisjointSet()

    # Create sets
    for edge in graph:
        disjoint_set.makeSet(DisjointSet.Node(edge[1]))
        disjoint_set.makeSet(DisjointSet.Node(edge[2]))

    # Union until there's k sets
    for edge in graph:
        distance = edge[0]
        n1 = disjoint_set.nodes[edge[1]]
        n2 = disjoint_set.nodes[edge[2]]
        disjoint_set.union(n1, n2)
        if disjoint_set.connected_components == k - 1:
            break
    return distance