def __init_flash_text_corpus(self): """ Init flash text corpus. """ # build slang word corpus slang_words_raw = Repository.get_slang_word() for word in slang_words_raw.values: self.keyword_processor_slang_word.add_keyword(word[0], word[1]) # build emoticon corpus emoticon_raw = constant.EMOTICON_LIST for key, values in emoticon_raw: for value in values: self.keyword_processor_emoticon.add_keyword(value, key) # build meaning word corpus meaning_words_raw = Repository.get_meaning_text() for word in meaning_words_raw.values: self.keyword_processor_meaning_text.add_keyword(word[0], word[1])
def normalize_slang_word(text): """ Normalize the slang/'alay' word. :param text: (str) text to be normalize. :return: (str) text has been normalize. """ text_list = text.split(' ') slang_words_raw = Repository.get_slang_word() slang_word_dict = {} for item in slang_words_raw.values: slang_word_dict[item[0]] = item[1] for index in range(len(text_list)): if text_list[index] in slang_word_dict.keys(): text_list[index] = slang_word_dict[text_list[index]] return ' '.join(text_list)