def __init__(self, runnable, path, cache_file, only_alpha=False): self.hunspell = Hunspell(runnable, path) self._cache_file = cache_file self.cached_words = {} if cache_file != None: self.read_cache() self.only_alpha = only_alpha if self.only_alpha is True: self.alpha_matcher = re.compile("[^\W\d_]+", re.UNICODE) self.hunspell.start()
class Hunspell_chache_aimed(object): def __init__(self, runnable, path, cache_file, only_alpha=False): self.hunspell = Hunspell(runnable, path) self._cache_file = cache_file self.cached_words = {} if cache_file != None: self.read_cache() self.only_alpha = only_alpha if self.only_alpha is True: self.alpha_matcher = re.compile("[^\W\d_]+", re.UNICODE) self.hunspell.start() def read_cache(self): if not os.path.exists(self._cache_file): return for l_utf in open(self._cache_file): l = l_utf.strip().decode('utf-8') if len(l.split(' ')) == 1: self.cached_words[l] = l if len(l.split(' ')) == 2: orig, stemmed = l.split(' ') self.cached_words[orig] = stemmed def write_cache(self): with open(self._cache_file, "w") as f: for tok in self.cached_words: print 4655555555555555555555 f.write(u'{0} {1}\n'.format(tok, self.cached_words[tok]).encode('utf-8')) def cached_stem(self, word): if self.only_alpha is True: if self.alpha_matcher.match(word) == None or\ self.alpha_matcher.match(word).group() != word: return word if word in self.cached_words: return self.cached_words[word] stem = self.hunspell.stem_word(word) self.cached_words[word] = stem return stem
class Hunspell_chache_aimed(object): def __init__(self, runnable, path, cache_file, only_alpha=False): self.hunspell = Hunspell(runnable, path) self._cache_file = cache_file self.cached_words = {} if cache_file != None: self.read_cache() self.only_alpha = only_alpha if self.only_alpha is True: self.alpha_matcher = re.compile("[^\W\d_]+", re.UNICODE) self.hunspell.start() def read_cache(self): if not os.path.exists(self._cache_file): return for l_utf in open(self._cache_file): l = l_utf.strip().decode('utf-8') if len(l.split(' ')) == 1: self.cached_words[l] = l if len(l.split(' ')) == 2: orig, stemmed = l.split(' ') self.cached_words[orig] = stemmed def write_cache(self): with open(self._cache_file, "w") as f: for tok in self.cached_words: print 4655555555555555555555 f.write(u'{0} {1}\n'.format( tok, self.cached_words[tok]).encode('utf-8')) def cached_stem(self, word): if self.only_alpha is True: if self.alpha_matcher.match(word) == None or\ self.alpha_matcher.match(word).group() != word: return word if word in self.cached_words: return self.cached_words[word] stem = self.hunspell.stem_word(word) self.cached_words[word] = stem return stem