def __init_flash_text_corpus(self):
        """ Init flash text corpus. """
        # build slang word corpus
        slang_words_raw = Repository.get_slang_word()
        for word in slang_words_raw.values:
            self.keyword_processor_slang_word.add_keyword(word[0], word[1])

        # build emoticon corpus
        emoticon_raw = constant.EMOTICON_LIST
        for key, values in emoticon_raw:
            for value in values:
                self.keyword_processor_emoticon.add_keyword(value, key)

        # build meaning word corpus
        meaning_words_raw = Repository.get_meaning_text()
        for word in meaning_words_raw.values:
            self.keyword_processor_meaning_text.add_keyword(word[0], word[1])
Exemple #2
0
    def normalize_slang_word(text):
        """
        Normalize the slang/'alay' word.

        :param text: (str) text to be normalize.
        :return: (str) text has been normalize.
        """
        text_list = text.split(' ')
        slang_words_raw = Repository.get_slang_word()
        slang_word_dict = {}

        for item in slang_words_raw.values:
            slang_word_dict[item[0]] = item[1]

        for index in range(len(text_list)):
            if text_list[index] in slang_word_dict.keys():
                text_list[index] = slang_word_dict[text_list[index]]

        return ' '.join(text_list)