def checkWords(self, words): if words and words[0]: suggestions = {x:self.check(x) for x in words} suggestions = {k:v for k,v in suggestions.items() if strip_accents(k) != v} return suggestions else: return {}
def check(self, word): word = strip_accents(word) if word in self.allWords or not word: return word lword = len(word) maxDiffs = max(1, math.floor(lword / 5)) candidates = [x for x in self.allWords if abs(len(x) - lword) <= maxDiffs and (x[0] == word[0] or x[len(x)-1] == word[lword-1])] matches = get_close_matches(word, candidates, cutoff=0.7) return matches[0] if matches else word
def _getStemDict(self, keywords): stemsWords = {} keywords = {x[0] for x in keywords} for word, stem in savedStems.items(): stem = strip_accents(stem) if stem in keywords: temp = stemsWords.get(stem, []) temp.append(word) stemsWords[stem] = temp for stem, words in stemsWords.items(): stemsWords[stem] = max([(x, wordCounter[x]) for x in words], key=lambda x: x[1])[0] return stemsWords
def normalizeQuery(self, query): return strip_accents(createStem(normalize_text(query)))
def stripAccents(words): return {strip_accents(x) for x in words}
def getstem(word, lang): word = normalize_text(word) stem = createStem(word, lang) stem = strip_accents(stem) return stem
def test_strip_accents(self): self.assertEqual('escrzyaieuuESCRZYAIEUU', strip_accents('ěščřžýáíéúůĚŠČŘŽÝÁÍÉÚŮ'))