Example #1
0
	def checkWords(self, words):
		if words and words[0]:
			suggestions = {x:self.check(x) for x in words}
			suggestions = {k:v for k,v in suggestions.items() if strip_accents(k) != v}
			return suggestions
		else:
			return {}
Example #2
0
	def check(self, word):
		word = strip_accents(word)

		if word in self.allWords or not word:
			return word

		lword = len(word)
		maxDiffs = max(1, math.floor(lword / 5))
		candidates = [x for x in self.allWords if abs(len(x) - lword) <= maxDiffs and (x[0] == word[0] or x[len(x)-1] == word[lword-1])]
		matches = get_close_matches(word, candidates, cutoff=0.7)
		return matches[0] if matches else word	
Example #3
0
	def _getStemDict(self, keywords):
		stemsWords = {}
		keywords = {x[0] for x in keywords}
		
		for word, stem in savedStems.items():
			stem = strip_accents(stem)
			if stem in keywords:
				temp = stemsWords.get(stem, [])
				temp.append(word)
				stemsWords[stem] = temp
			
		for stem, words in stemsWords.items():
			stemsWords[stem] = max([(x, wordCounter[x]) for x in words], key=lambda x: x[1])[0]
			
		return stemsWords
Example #4
0
	def normalizeQuery(self, query):
		return strip_accents(createStem(normalize_text(query)))
Example #5
0
	def test_strip_accents(self):
		self.assertEqual('escrzyaieuuESCRZYAIEUU', strip_accents('ěščřžýáíéúůĚŠČŘŽÝÁÍÉÚŮ'))
Example #6
0
def stripAccents(words):
	return {strip_accents(x) for x in words}
Example #7
0
def getstem(word, lang):
	word = normalize_text(word)
	stem = createStem(word, lang)
	stem = strip_accents(stem)
	return stem