Esempio n. 1
0
    def __init__(self, runnable, path, cache_file, only_alpha=False):

        self.hunspell = Hunspell(runnable, path)
        self._cache_file = cache_file
        self.cached_words = {}
        if cache_file != None:
            self.read_cache()
        self.only_alpha = only_alpha
        if self.only_alpha is True:
            self.alpha_matcher = re.compile("[^\W\d_]+", re.UNICODE)
        self.hunspell.start()
Esempio n. 2
0
class Hunspell_chache_aimed(object):
        
    def __init__(self, runnable, path, cache_file, only_alpha=False):
        
        self.hunspell = Hunspell(runnable, path)
        self._cache_file = cache_file
        self.cached_words = {}
        if cache_file != None:
            self.read_cache()
        self.only_alpha = only_alpha
        if self.only_alpha is True:
            self.alpha_matcher = re.compile("[^\W\d_]+", re.UNICODE)
        self.hunspell.start()
        
    def read_cache(self):
        
        if not os.path.exists(self._cache_file):
            return 
        for l_utf in open(self._cache_file):
            l = l_utf.strip().decode('utf-8')
            if len(l.split(' ')) == 1:
                self.cached_words[l] = l
            if len(l.split(' ')) == 2:
                orig, stemmed = l.split(' ')
                self.cached_words[orig] = stemmed

    def write_cache(self):
        
        with open(self._cache_file, "w") as f:
            for tok in self.cached_words:
                print 4655555555555555555555
                f.write(u'{0} {1}\n'.format(tok,
                              self.cached_words[tok]).encode('utf-8'))

    def cached_stem(self, word):

        if self.only_alpha is True:
            if self.alpha_matcher.match(word) == None or\
            self.alpha_matcher.match(word).group() != word:
                return word
        if word in self.cached_words:
            return self.cached_words[word]
        stem = self.hunspell.stem_word(word)
        self.cached_words[word] = stem
        return stem
Esempio n. 3
0
class Hunspell_chache_aimed(object):
    def __init__(self, runnable, path, cache_file, only_alpha=False):

        self.hunspell = Hunspell(runnable, path)
        self._cache_file = cache_file
        self.cached_words = {}
        if cache_file != None:
            self.read_cache()
        self.only_alpha = only_alpha
        if self.only_alpha is True:
            self.alpha_matcher = re.compile("[^\W\d_]+", re.UNICODE)
        self.hunspell.start()

    def read_cache(self):

        if not os.path.exists(self._cache_file):
            return
        for l_utf in open(self._cache_file):
            l = l_utf.strip().decode('utf-8')
            if len(l.split(' ')) == 1:
                self.cached_words[l] = l
            if len(l.split(' ')) == 2:
                orig, stemmed = l.split(' ')
                self.cached_words[orig] = stemmed

    def write_cache(self):

        with open(self._cache_file, "w") as f:
            for tok in self.cached_words:
                print 4655555555555555555555
                f.write(u'{0} {1}\n'.format(
                    tok, self.cached_words[tok]).encode('utf-8'))

    def cached_stem(self, word):

        if self.only_alpha is True:
            if self.alpha_matcher.match(word) == None or\
            self.alpha_matcher.match(word).group() != word:
                return word
        if word in self.cached_words:
            return self.cached_words[word]
        stem = self.hunspell.stem_word(word)
        self.cached_words[word] = stem
        return stem
Esempio n. 4
0
 def __init__(self, runnable, path, cache_file, only_alpha=False):
     
     self.hunspell = Hunspell(runnable, path)
     self._cache_file = cache_file
     self.cached_words = {}
     if cache_file != None:
         self.read_cache()
     self.only_alpha = only_alpha
     if self.only_alpha is True:
         self.alpha_matcher = re.compile("[^\W\d_]+", re.UNICODE)
     self.hunspell.start()