class SuperBase:
    superbase = None
    lematizer = None

    def __init__(self, lemat_dict_file):
        self.lematizer = Lematizer(lemat_dict_file)
        self.superbase = UnionFind()

        lemats = self.lematizer.all_lemats()
        for l in lemats:
            self.superbase.make_set(l)

        for (_, lems) in self.lematizer.items():
            sofar = None
            for l in lems:
                if sofar:
                    self.superbase.union(sofar, l)
                sofar = self.superbase.find(l)

    def __getitem__(self, word):
        try:
            # trick
            for lem in self.lematizer[word]:
                break
            # confused?
            # above code is the best way I know to extract an element from the set
            return self.superbase.find(lem)
        except KeyError:
            return word

    def items(self):
        return ((w, self[w]) for (w, _) in self.lematizer.items())
Exemple #2
0
    def kruskal_mst(self):
        assert self.is_connected(), "Can only find MST of a connected graph"
        uf = UnionFind()
        mst = set()

        for v in self.vs:
            uf.make_set(v)

        half = set()
        for u, v in sorted(self.es):
            if (v, u) not in half:
                half.add((u, v))

        w = 0
        vs = set()
        for u, v in sorted(half, key = lambda e : self.weights[e]):
            if len(vs) == len(self.vs):
                return mst, w

            if uf.find_set(u) != uf.find_set(v):
                uf.union(u, v)
                mst.add((u, v))
                vs.add(u)
                vs.add(v)
                w += self.weights[(u, v)]