예제 #1
0
    def correct(self, word: str, **kwargs):
        """
        Most probable spelling correction for word.
        """

        if word in ENGLISH_WORDS:
            return word
        if self._corpus.get(word, 0) > 5000:
            return word
        if word in MALAY_WORDS:
            return word
        if word in stopword_tatabahasa:
            return word

        cp_word = word[:]
        hujung_result = [v for k, v in hujung.items() if word.endswith(k)]
        if len(hujung_result):
            hujung_result = max(hujung_result, key=len)
            if len(hujung_result):
                word = word[:-len(hujung_result)]
        permulaan_result = [
            v for k, v in permulaan.items() if word.startswith(k)
        ]
        if len(permulaan_result):
            permulaan_result = max(permulaan_result, key=len)
            if len(permulaan_result):
                word = word[len(permulaan_result):]

        combined = True
        if len(word):
            if word in rules_normalizer:
                word = rules_normalizer[word]
            elif self._corpus.get(word, 0) > 1000:
                pass
            else:
                candidates1 = self.edit_candidates(word)
                candidates2 = self.edit_candidates(cp_word)
                word1 = max(candidates1, key=self.P)
                word2 = max(candidates2, key=self.P)

                if self.WORDS[word1] > self.WORDS[word2]:
                    word = word1
                else:
                    word = word2
                    combined = False

            if (len(hujung_result) and not word.endswith(hujung_result)
                    and combined):
                word = word + hujung_result
            if (len(permulaan_result) and not word.startswith(permulaan_result)
                    and combined):
                word = permulaan_result + word

        else:
            if len(hujung_result) and not word.endswith(hujung_result):
                word = word + hujung_result
            if len(permulaan_result) and not word.startswith(permulaan_result):
                word = permulaan_result + word

        return word
예제 #2
0
 def stem_word(self, word):
     hujung_result = [v for k, v in hujung.items() if word.endswith(k)]
     if len(hujung_result):
         hujung_result = max(hujung_result, key=len)
         if len(hujung_result):
             word = word[:-len(hujung_result)]
     permulaan_result = [
         v for k, v in permulaan.items() if word.startswith(k)
     ]
     if len(permulaan_result):
         permulaan_result = max(permulaan_result, key=len)
         if len(permulaan_result):
             word = word[len(permulaan_result):]
     return word
예제 #3
0
파일: stem.py 프로젝트: lkngin/Malaya
def naive(word: str):
    """
    Stem a string using startswith and endswith.

    Parameters
    ----------
    string : str

    Returns
    -------
    string: stemmed string
    """
    hujung_result = [v for k, v in hujung.items() if word.endswith(k)]
    if len(hujung_result):
        hujung_result = max(hujung_result, key=len)
        if len(hujung_result):
            word = word[:-len(hujung_result)]
    permulaan_result = [v for k, v in permulaan.items() if word.startswith(k)]
    if len(permulaan_result):
        permulaan_result = max(permulaan_result, key=len)
        if len(permulaan_result):
            word = word[len(permulaan_result):]
    return word