class SuperBase: superbase = None lematizer = None def __init__(self, lemat_dict_file): self.lematizer = Lematizer(lemat_dict_file) self.superbase = UnionFind() lemats = self.lematizer.all_lemats() for l in lemats: self.superbase.make_set(l) for (_, lems) in self.lematizer.items(): sofar = None for l in lems: if sofar: self.superbase.union(sofar, l) sofar = self.superbase.find(l) def __getitem__(self, word): try: # trick for lem in self.lematizer[word]: break # confused? # above code is the best way I know to extract an element from the set return self.superbase.find(lem) except KeyError: return word def items(self): return ((w, self[w]) for (w, _) in self.lematizer.items())
def kruskal_mst(self): assert self.is_connected(), "Can only find MST of a connected graph" uf = UnionFind() mst = set() for v in self.vs: uf.make_set(v) half = set() for u, v in sorted(self.es): if (v, u) not in half: half.add((u, v)) w = 0 vs = set() for u, v in sorted(half, key = lambda e : self.weights[e]): if len(vs) == len(self.vs): return mst, w if uf.find_set(u) != uf.find_set(v): uf.union(u, v) mst.add((u, v)) vs.add(u) vs.add(v) w += self.weights[(u, v)]