Python isChineseChar Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: isChineseChar

Examples at hotexamples.com: 6

Python isChineseChar - 6 examples found. These are the top rated real world Python examples of utils.isChineseChar extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: yammseg.py Project: EricChanBD/yaseg

 def getMatchChineseWords(self):  
     #use cache,check it   
     for i in range(self.cacheSize):  
         if self.cache[i][0] == self.pos:  
             return self.cache[i][1]  
           
     originalPos = self.pos  
     words = []  
     index = 0  
     while self.pos < self.textLength:  
         if index >= maxWordLength :  
             break  
         if not isChineseChar(self.getNextChar()):  
             break  
         self.pos += 1  
         index += 1  
           
         text = self.text[originalPos:self.pos]
         word = getDictWord(text)  
         if word:  
             words.append(word)  
               
     self.pos = originalPos  
     #没有词则放置个‘X’，将文本长度标记为-1  
     if not words:  
         word = Word()  
         word.length = -1  
         word.text = 'X'  
         words.append(word)  
       
     self.cache[self.cacheIndex] = (self.pos,words)  
     self.cacheIndex += 1  
     if self.cacheIndex >= self.cacheSize:  
         self.cacheIndex = 0  
     return words

Example #2

Show file

File: yammseg.py Project: EricChanBD/yaseg

 def getASCIIWords(self):  
     # Skip pre-word whitespaces and punctuations  
     #跳过中英文标点和空格  
     while self.pos < self.textLength:  
         ch = self.getNextChar()  
         if isASCIIChar(ch) or isChineseChar(ch):  
             break  
         self.pos += 1  
     #得到英文单词的起始位置      
     start = self.pos  
       
     #找出英文单词的结束位置  
     while self.pos < self.textLength:  
         ch = self.getNextChar()  
         if not isASCIIChar(ch):  
             break  
         self.pos += 1  
     end = self.pos  
       
     ##Skip chinese word whitespaces and punctuations  
     ##跳过中英文标点和空格  
     #while self.pos < self.textLength:  
     #    ch = self.getNextChar()  
     #    if isASCIIChar(ch) or isChineseChar(ch):  
     #        break  
     #    self.pos += 1  
           
     #返回英文单词  
     return self.text[start:end]

Example #3

Show file

File: yammseg.py Project: xuyitie/yaseg

    def getMatchChineseWords(self):
        #use cache,check it
        for i in range(self.cacheSize):
            if self.cache[i][0] == self.pos:
                return self.cache[i][1]

        originalPos = self.pos
        words = []
        index = 0
        while self.pos < self.textLength:
            if index >= maxWordLength:
                break
            if not isChineseChar(self.getNextChar()):
                break
            self.pos += 1
            index += 1

            text = self.text[originalPos:self.pos]
            word = getDictWord(text)
            if word:
                words.append(word)

        self.pos = originalPos
        #没有词则放置个‘X’，将文本长度标记为-1
        if not words:
            word = Word()
            word.length = -1
            word.text = 'X'
            words.append(word)

        self.cache[self.cacheIndex] = (self.pos, words)
        self.cacheIndex += 1
        if self.cacheIndex >= self.cacheSize:
            self.cacheIndex = 0
        return words

Example #4

Show file

File: yammseg.py Project: xuyitie/yaseg

    def getASCIIWords(self):
        # Skip pre-word whitespaces and punctuations
        #跳过中英文标点和空格
        while self.pos < self.textLength:
            ch = self.getNextChar()
            if isASCIIChar(ch) or isChineseChar(ch):
                break
            self.pos += 1
        #得到英文单词的起始位置
        start = self.pos

        #找出英文单词的结束位置
        while self.pos < self.textLength:
            ch = self.getNextChar()
            if not isASCIIChar(ch):
                break
            self.pos += 1
        end = self.pos

        ##Skip chinese word whitespaces and punctuations
        ##跳过中英文标点和空格
        #while self.pos < self.textLength:
        #    ch = self.getNextChar()
        #    if isASCIIChar(ch) or isChineseChar(ch):
        #        break
        #    self.pos += 1

        #返回英文单词
        return self.text[start:end]

Example #5

Show file

File: yammseg.py Project: EricChanBD/yaseg

 def getNextToken(self):  
     if self.pos < self.textLength:  
         if isChineseChar(self.getNextChar()):  
             token = self.getChineseWords()  
         else :  
             token = self.getASCIIWords()+'/'  
         if len(token) > 0:  
             return token  
     return None

Example #6

Show file

File: yammseg.py Project: xuyitie/yaseg

 def getNextToken(self):
     if self.pos < self.textLength:
         if isChineseChar(self.getNextChar()):
             token = self.getChineseWords()
         else:
             token = self.getASCIIWords() + '/'
         if len(token) > 0:
             return token
     return None