Beispiel #1
0
def two():
    """ 
    Calculates largest value k such that there is a k-clustering
    with spacing >= 3
    """
    
    # Read in the file, converting to str of binary number
    vert_df = pd.read_csv('clustering_big.txt',
                          header=0,
                          names=['vid'],
                          converters = {'vid' : strip})
    
    vertices = vert_df['vid'].unique()
    uf = UnionFind(vertices)
    to_visit = set(vertices)

    # 
    while len(to_visit) > 0:
        # Every iteration of the while loop corresponds to 
        # pulling off a new vertex label and attempting to merge
        # with all other vertices connected at cost <= 2
        this_v = to_visit.pop()

        nearby_v = nearby(this_v, to_visit)

        for v in nearby_v:
            l1 = uf[this_v]
            l2 = uf[v]
            if l1 != l2:
                uf.union(this_v, v)
                to_visit.remove(v)

    return uf.numleaders()
Beispiel #2
0
def one():
    mst = set([])
    edge_df = pd.read_csv('clustering1.txt',
                          sep=" ",
                          header=0,
                          names=['v1','v2','cost'])
    
    # Sort in order of least cost
    edge_df.sort(['cost'], inplace=True)
    edge_df.index = range(1,len(edge_df)+1)

    # The unique set of vertices...
    vertices = pd.concat([edge_df['v1'], 
                          edge_df['v2']]).unique()

    # Initialize the UnionFind structure with
    # unmerged set of unique vertices
    uf = UnionFind(vertices)

    # Iterate through Kruskal's until the number
    # of groups in the UnionFind struct is K
    it = edge_df.iterrows()
    while uf.numleaders() > K:
        rec = next(it)[1]
        v1 = rec['v1']
        v2 = rec['v2']
        cost = rec['cost']

        edge = Edge(v1, v2, cost)
        v1_lead = uf[v1]
        v2_lead = uf[v2]
        if v1_lead != v2_lead:
            uf.union(v1, v2)
            mst.add(edge)

    # Now iterate to the next edge that would be added 
    # to form the K-1th cluster
    cond = True
    while cond:
        rec = next(it)[1]
        v1 = rec['v1']
        v2 = rec['v2']
        cost = rec['cost']
        
        edge = Edge(v1, v2, cost)
        v1_lead = uf[v1]
        v2_lead = uf[v2]
        cond = (v1_lead == v2_lead)
        if not cond:
            mks = cost

    return mks