class SuperBase: superbase = None lematizer = None def __init__(self, lemat_dict_file): self.lematizer = Lematizer(lemat_dict_file) self.superbase = UnionFind() lemats = self.lematizer.all_lemats() for l in lemats: self.superbase.make_set(l) for (_, lems) in self.lematizer.items(): sofar = None for l in lems: if sofar: self.superbase.union(sofar, l) sofar = self.superbase.find(l) def __getitem__(self, word): try: # trick for lem in self.lematizer[word]: break # confused? # above code is the best way I know to extract an element from the set return self.superbase.find(lem) except KeyError: return word def items(self): return ((w, self[w]) for (w, _) in self.lematizer.items())
def clustering(edge_list, count_nodes, clusters): u = UnionFind([x+1 for x in range(count_nodes)]) count_edges = len(edge_list) i = 0 while True: if not u.find(edge_list[i][1][0]) == u.find(edge_list[i][1][1]): if count_nodes <= clusters: return edge_list[i][0], u u.union(edge_list[i][1][0], edge_list[i][1][1]) count_nodes -= 1 i += 1
def test_union(self): u = UnionFind() foo = Node("foo") u.add(foo) bar = Node("bar") u.add(bar) self.assertEqual(foo, u.find(foo)) self.assertEqual(bar, u.find(bar)) u.union(foo, bar) self.assertEqual(bar, u.find(foo)) self.assertEqual(bar, u.find(bar))
def cluster(graph, k): edges = heapify(graph.edges) u = UnionFind() [u.add(node) for node in graph.nodes.values()] while u.clusters > k: cost, edge = heappop(edges) if cycle(u, edge): #print "skipping {}".format(edge) pass else: u.union(u.find(edge.v0), u.find(edge.v1)) mindist = get_mindist(u, edges) return mindist, u.followers
def clustering(self, groups, minheap): #maxheap is a max heap of edges (one direction only) unionfind = UnionFind(list(self.graph.keys())) while unionfind.size() > groups: #keep merging until number of desired groups reached curr = minheap.pop() curr_edge = curr.get_data() if unionfind.find(curr_edge[0]) == unionfind.find(curr_edge[1]): #same group continue unionfind.union(curr_edge[0], curr_edge[1], True) while unionfind.find(curr_edge[0]) == unionfind.find(curr_edge[1]): #pop until different groups to get max distance, because next edge might be within a group minheap.pop().get_data() curr_edge = minheap.peek().get_data() #smallest distance is the edge at the top of max heap since they are in different groups return unionfind, minheap.peek().get_key()
def kclustering(graph, k): """ compute the maximum spacing of a k-cluster """ nodes = set() for u, v, d in graph: nodes.add(u) nodes.add(v) group = UnionFind(nodes) # sort the graph by costs graph = sorted(graph, key=lambda x: x[2]) while len(group.subtree.keys()) > k: u, v, d = graph.pop(0) group.union(u, v) # do not output the cost between two nodes that are both in the same cluster while True: u, v, min_cost = graph.pop(0) if group.find(u) != group.find(v): break return min_cost
def recolor_by_connected_components(self): from unionfind import UnionFind uf = UnionFind() for t in self.gtm.times: for g in self.gtm.time[t]: uf.find(g) for i in self.gtm.inds: uf.find((i, t)) for g in self.gtm.time[t]: for i in self.gtm.group[g]: if self.group_color[g - 1] == self.ind_color[i - 1][t - 1]: uf.union(g, (i, t)) leader = uf.find(g) if t > 1: for i in self.gtm.inds: if self.ind_color[i - 1][t - 1] == self.ind_color[i - 1][t - 2]: uf.union((i, t - 1), (i, t)) leader = uf.find((i, t - 1)) new_color = {} for t in self.gtm.times: for g in self.gtm.time[t]: leader = uf.find(g) if leader not in new_color: new_color[leader] = len(new_color) + 1 for i in self.gtm.inds: leader = uf.find((i, t)) if leader not in new_color: new_color[leader] = len(new_color) + 1 for g in self.gtm.groups: self.group_color[g - 1] = new_color[uf.find(g)] for i in self.gtm.inds: for t in self.gtm.times: self.ind_color[i - 1][t - 1] = new_color[uf.find((i, t))]
def recolor_by_connected_components(self): from unionfind import UnionFind uf = UnionFind() for t in self.gtm.times: for g in self.gtm.time[t]: uf.find(g) for i in self.gtm.inds: uf.find((i,t)) for g in self.gtm.time[t]: for i in self.gtm.group[g]: if self.group_color[g-1]==self.ind_color[i-1][t-1]: uf.union(g, (i,t)) leader = uf.find(g) if t>1: for i in self.gtm.inds: if self.ind_color[i-1][t-1]==self.ind_color[i-1][t-2]: uf.union((i,t-1), (i,t)) leader = uf.find((i,t-1)) new_color = {} for t in self.gtm.times: for g in self.gtm.time[t]: leader = uf.find(g) if leader not in new_color: new_color[leader] = len(new_color)+1 for i in self.gtm.inds: leader = uf.find((i,t)) if leader not in new_color: new_color[leader] = len(new_color)+1 for g in self.gtm.groups: self.group_color[g-1] = new_color[uf.find(g)] for i in self.gtm.inds: for t in self.gtm.times: self.ind_color[i-1][t-1] = new_color[uf.find((i,t))]
def hammond_distances(file_path): file_stream = open(file_path) line_one = file_stream.readline().split(' ') count_edges, count_bits = int(line_one[0]), int(line_one[1]) uf = UnionFind([]) for i in range(count_edges): code = file_stream.readline() code = code.replace(' ', '').replace('\n', '') uf.add(code) update_singles(uf, code, count_bits) update_doubles(uf, code, count_bits) file_stream.close() clusters = set() for k in uf._node_titles.keys(): clusters.add(uf.find(k)) return len(clusters)
def kruskal(self): queue = PriorityQueue() mst = list() mst_weight = 0 uf = UnionFind(len(self.vertexes)) for edge in self.edges: queue.put(edge) while not queue.empty() and len(mst) < self.size: current_edge = queue.get() if not uf.find(self.vertexes[current_edge.origin].index, self.vertexes[current_edge.dest].index): uf.union(self.vertexes[current_edge.origin].index, self.vertexes[current_edge.dest].index) mst.append(current_edge) mst_weight += current_edge.weight return mst, mst_weight
def kruskal_ts(graph, edges): """ Kruskal's algorithm with tim sort """ sorted_edges = sorted(edges, key=lambda t: t[2]) num_nodes = len([v for v in graph]) data_st = UnionFind(num_nodes) tree_edges = list() for edge in sorted_edges: (u, v, weight) = edge[0], edge[1], edge[2] if not data_st.find(u, v): tree_edges.append((weight, u, v)) data_st.union(u, v) if len(tree_edges) == (num_nodes - 1): break return tree_edges
for j in xrange(y): grid.append([int(i) for i in f.readline().split()]) ### print neighbours(grid, 1, 1) for j in xrange(y): for i in xrange(x): height = grid[j][i] n = sorted(neighbours(grid, i, j)) # Check if I'm a sink if not n or not min(k[0] for k in n) < height: pass else: # Else flow to the lowest neighbour _, __, coord = n[0] uf.union((i, j), coord) ### for line in grid: ### print line letters = list("abcdefghijklmnopqrstuvwxyz") print "Case #%d:" % (casenum + 1) key = {} for j in xrange(y): for i in xrange(x): node = uf.find((i, j)).id if node not in key: key[node] = letters.pop(0) print key[node], print
if line.startswith("# y"): break if line=="" or line.startswith("#"): continue if line.find(',')>=0: line = line.split(',') elif line.find(' ')>=0: line = line.split(' ') else: raise Exception("Invalid line: "+line) if len(line)>=2: u,v = int(line[0]), int(line[1]) else: raise Exception("ERROR line: %s"%line) uf.union(u,v) # make lists of groups (no dummies) in each component vertices = range(1, group_count+1) component = {} for v in vertices: l = uf.find(v) if l not in component: component[l] = list() component[l].append(v) for l in component: component[l].sort() component = sorted(component.values()) # build color-conflict graph adj_list = {} for t in xrange(len(gtm.times)): groups = gtm.time[t+1] for i in xrange(len(groups)): g = uf.find(groups[i]) for j in xrange(i+1, len(groups)): h = uf.find(groups[j])
def forward(self, x, batch: OptTensor=None): if batch is None: batch = torch.zeros(x.size()[0], dtype=torch.int64, device=x.device) '''Embedding1: Intermediate Latent space features (hiddenDim)''' x_emb = self.inputnet(x) '''KNN(k neighbors) over intermediate Latent space features''' for ec in self.edgeconvs: edge_index = knn_graph(x_emb, self.k, batch, loop=False, flow=ec.flow) x_emb = x_emb + ec(x_emb, edge_index) ''' [1] Embedding2: Final Latent Space embedding coords from x,y,z to ncats_out ''' out = self.output(x_emb) #plot = self.plotlayer(out) '''KNN(k neighbors) over Embedding2 features''' edge_index = knn_graph(out, self.k, batch, loop=False, flow=ec.flow) ''' use Embedding1 to build an edge classifier inputnet_cat is residual to inputnet ''' x_cat = self.inputnet_cat(x) + x_emb ''' [2] Compute Edge Categories Convolution over Embedding1 ''' for ec in self.edgecatconvs: x_cat = x_cat + ec(torch.cat([x_cat, x_emb, x], dim=1), edge_index) edge_scores = self.edge_classifier(torch.cat([x_cat[edge_index[0]], x_cat[edge_index[1]]], dim=1)).squeeze() ''' use the predicted graph to generate disjoint subgraphs these are our physics objects ''' objects = UnionFind(x.size()[0]) good_edges = edge_index[:,torch.argmax(edge_scores, dim=1) > 0] good_edges_cpu = good_edges.cpu().numpy() for edge in good_edges_cpu.T: objects.union(edge[0],edge[1]) cluster_map = torch.from_numpy(np.array([objects.find(i) for i in range(x.shape[0])], dtype=np.int64)).to(x.device) cluster_roots, inverse = torch.unique(cluster_map, return_inverse=True) # remap roots to [0, ..., nclusters-1] cluster_map = torch.arange(cluster_roots.size()[0], dtype=torch.int64, device=x.device)[inverse] ''' [3] use Embedding1 to learn segmented cluster properties inputnet_cat is residual to inputnet ''' x_prop = self.inputnet_prop(x) + x_emb # now we accumulate over all selected disjoint subgraphs # to define per-object properties for ec in self.propertyconvs: x_prop = x_prop + ec(torch.cat([x_prop, x_emb, x], dim=1), good_edges) props_pooled, cluster_batch = max_pool_x(cluster_map, x_prop, batch) cluster_props = self.property_predictor(props_pooled) return out, edge_scores, edge_index, cluster_map, cluster_props, cluster_batch
def maze(w, h, size=2): def conv_size(n): return (n - 1) // size + 1 nw, nh = conv_size(w), conv_size(h) ns = size // 2 - 1 uf = UnionFind(nw * nh) lab = Labyrinth(w, h) for x in range(w): for y in range(h): lab[x, y] = 0 edges = [] for i in range(nh - 1): for j in range(nw - 1): f = flatten(i, j, nw, nh) edges.append((f, f + 1)) # right edges.append((f, f + nw)) # down for i in range(nh - 1): f = flatten(i, nw - 1, nw, nh) edges.append((f, f + nw)) # down for j in range(nw - 1): f = flatten(nh - 1, j, nw, nh) edges.append((f, f + 1)) # right shuffle(edges) while len(uf) > 1: u, v = edges.pop() y1, x1 = unflatten(u, nw, nh) y2, x2 = unflatten(v, nw, nh) if uf.find(u) != uf.find(v): uf.union(u, v) if x2 - x1 == 1: for i in range(size + 1): for j in range(1, ns + 1): ny = size * y1 - j if ny >= 0: lab[size * x1 + i, ny] = True else: break lab[size * x1 + i, size * y1] = True for j in range(1, ns + 1): ny = size * y1 + j if ny < h: lab[size * x1 + i, ny] = True else: break else: for i in range(size + 1): for j in range(1, ns + 1): nx = size * x1 - j if nx >= 0: lab[nx, size * y1 + i] = True else: break lab[size * x1, size * y1 + i] = True for j in range(1, ns + 1): nx = size * x1 + j if nx < w: lab[nx, size * y1 + i] = True else: break lab[0, 0] = 1 lab.start = 0, 0 lab[lab.w - 2, lab.h - 2] = 1 lab.goal = lab.w - 2, lab.h - 2 return lab
def test_find(self): u = UnionFind() foo = Node("foo") u.add(foo) self.assertEqual(foo, u.find(foo))