def two(): """ Calculates largest value k such that there is a k-clustering with spacing >= 3 """ # Read in the file, converting to str of binary number vert_df = pd.read_csv('clustering_big.txt', header=0, names=['vid'], converters = {'vid' : strip}) vertices = vert_df['vid'].unique() uf = UnionFind(vertices) to_visit = set(vertices) # while len(to_visit) > 0: # Every iteration of the while loop corresponds to # pulling off a new vertex label and attempting to merge # with all other vertices connected at cost <= 2 this_v = to_visit.pop() nearby_v = nearby(this_v, to_visit) for v in nearby_v: l1 = uf[this_v] l2 = uf[v] if l1 != l2: uf.union(this_v, v) to_visit.remove(v) return uf.numleaders()
def one(): mst = set([]) edge_df = pd.read_csv('clustering1.txt', sep=" ", header=0, names=['v1','v2','cost']) # Sort in order of least cost edge_df.sort(['cost'], inplace=True) edge_df.index = range(1,len(edge_df)+1) # The unique set of vertices... vertices = pd.concat([edge_df['v1'], edge_df['v2']]).unique() # Initialize the UnionFind structure with # unmerged set of unique vertices uf = UnionFind(vertices) # Iterate through Kruskal's until the number # of groups in the UnionFind struct is K it = edge_df.iterrows() while uf.numleaders() > K: rec = next(it)[1] v1 = rec['v1'] v2 = rec['v2'] cost = rec['cost'] edge = Edge(v1, v2, cost) v1_lead = uf[v1] v2_lead = uf[v2] if v1_lead != v2_lead: uf.union(v1, v2) mst.add(edge) # Now iterate to the next edge that would be added # to form the K-1th cluster cond = True while cond: rec = next(it)[1] v1 = rec['v1'] v2 = rec['v2'] cost = rec['cost'] edge = Edge(v1, v2, cost) v1_lead = uf[v1] v2_lead = uf[v2] cond = (v1_lead == v2_lead) if not cond: mks = cost return mks