Example #1
0
    def __init__(self, filetrie="anchors.marisa", filestop="stop.txt"):
        self.trie = marisa_trie.Trie()
        self.disambig = Disambiguator()
        self.stopwords = set()

        with open(filestop, 'r') as stopfile:
            self.stopwords = set(
                filter(lambda x: x and x[0] != '#',
                       map(lambda x: x.rstrip(), stopfile.readlines())))

        with open(filetrie, 'r') as inputfile:
            self.trie.read(inputfile)

        print "Loaded %d anchors" % len(self.trie)