def correct(self, word: str, **kwargs): """ Most probable spelling correction for word. """ if word in ENGLISH_WORDS: return word if self._corpus.get(word, 0) > 5000: return word if word in MALAY_WORDS: return word if word in stopword_tatabahasa: return word cp_word = word[:] hujung_result = [v for k, v in hujung.items() if word.endswith(k)] if len(hujung_result): hujung_result = max(hujung_result, key=len) if len(hujung_result): word = word[:-len(hujung_result)] permulaan_result = [ v for k, v in permulaan.items() if word.startswith(k) ] if len(permulaan_result): permulaan_result = max(permulaan_result, key=len) if len(permulaan_result): word = word[len(permulaan_result):] combined = True if len(word): if word in rules_normalizer: word = rules_normalizer[word] elif self._corpus.get(word, 0) > 1000: pass else: candidates1 = self.edit_candidates(word) candidates2 = self.edit_candidates(cp_word) word1 = max(candidates1, key=self.P) word2 = max(candidates2, key=self.P) if self.WORDS[word1] > self.WORDS[word2]: word = word1 else: word = word2 combined = False if (len(hujung_result) and not word.endswith(hujung_result) and combined): word = word + hujung_result if (len(permulaan_result) and not word.startswith(permulaan_result) and combined): word = permulaan_result + word else: if len(hujung_result) and not word.endswith(hujung_result): word = word + hujung_result if len(permulaan_result) and not word.startswith(permulaan_result): word = permulaan_result + word return word
def stem_word(self, word): hujung_result = [v for k, v in hujung.items() if word.endswith(k)] if len(hujung_result): hujung_result = max(hujung_result, key=len) if len(hujung_result): word = word[:-len(hujung_result)] permulaan_result = [ v for k, v in permulaan.items() if word.startswith(k) ] if len(permulaan_result): permulaan_result = max(permulaan_result, key=len) if len(permulaan_result): word = word[len(permulaan_result):] return word
def naive(word: str): """ Stem a string using startswith and endswith. Parameters ---------- string : str Returns ------- string: stemmed string """ hujung_result = [v for k, v in hujung.items() if word.endswith(k)] if len(hujung_result): hujung_result = max(hujung_result, key=len) if len(hujung_result): word = word[:-len(hujung_result)] permulaan_result = [v for k, v in permulaan.items() if word.startswith(k)] if len(permulaan_result): permulaan_result = max(permulaan_result, key=len) if len(permulaan_result): word = word[len(permulaan_result):] return word