Esempio n. 1
0
 def _break_and_wrap(text_to_tokenize_match):
     text_to_tokenize = text_to_tokenize_match.group(0)
     wrapped = ""
     for token in Languages.tokenize(Languages.chinese.value, text_to_tokenize):
         word_zh = WordZH.get_or_create_with_translator(word=token)[0]
         wrapped += '<span class="chinese-word"><span>' + word_zh.pinyin + '</span><span>' + token + '</span></span>'
     return wrapped
Esempio n. 2
0
 def auto_tokenize(self):
     """
     Tokenize the business text into words, create their objects
     if necessary and link the business text to them.
     Only applied to Chinese words.
     """
     word_model = to_word_model(self.language)
     if word_model == WordZH:
         tokens = Languages.tokenize(self.language, self.text)
         self.words_zh.clear()
         ordinal = 0
         for token in tokens:
             word_object = word_model.get_or_create_with_translator(word=token)[0]
             BusinessTextWordZH.objects.create(text=self, word=word_object, ordinal=ordinal).save()
             ordinal += 1