Ejemplo n.º 1
0
 def cut(self, sentence):
     sentence = utils.toUnicode(sentence)
     result = ''
     for i, s in enumerate(self._split(sentence)):
         if ord(s[0]) < MyanmarTokenizer._MYANMAR_CODES_START \
                 or ord(s[0]) > MyanmarTokenizer._MYANMAR_CODES_END:
             if i != 0: result += self.separator
             result += s + self.separator
             continue
         categorys = self.code2Category(s)
         result += self._syllableSegmentation(categorys, s)[1]
     return result
Ejemplo n.º 2
0
 def cut(self, sentence):
     sentence = utils.toUnicode(sentence)
     result = ''
     for i, s in enumerate(self._split(sentence)):
         if ord(s[0]) < MyanmarTokenizer._MYANMAR_CODES_START \
                 or ord(s[0]) > MyanmarTokenizer._MYANMAR_CODES_END:
             if i != 0: result += self.separator
             result += s + self.separator
             continue
         categorys = self.code2Category(s)
         result += self._syllableSegmentation(categorys, s)[1]
     return result
Ejemplo n.º 3
0
 def code2Category(self, sentence):
     sentence = utils.toUnicode(sentence)
     return ''.join([
         self.codeCategory[c]
         if c in MyanmarTokenizer._MYANMAR_CODES else '?' for c in sentence
     ])
Ejemplo n.º 4
0
 def code2Category(self, sentence):
     sentence = utils.toUnicode(sentence)
     return ''.join([self.codeCategory[c] if c in MyanmarTokenizer._MYANMAR_CODES else '?' for c in sentence])