def reset(self): CharSetProber.reset(self) if self._mCodingSM: self._mCodingSM.reset() if self._mDistributionAnalyzer: self._mDistributionAnalyzer.reset() self._mLastChar = [_bytechar(0), _bytechar(0)]
def get_order(self, aStr): # for GB2312 encoding, we are interested # first byte range: 0xb0 -- 0xfe # second byte range: 0xa1 -- 0xfe # no validation needed here. State machine has done that if (aStr[0] >= _bytechar(0xB0)) and (aStr[1] >= _bytechar(0xA1)): return 94 * (_byteord(aStr[0]) - 0xB0) + _byteord(aStr[1]) - 0xA1 else: return -1
def get_order(self, aStr): # for GB2312 encoding, we are interested # first byte range: 0xb0 -- 0xfe # second byte range: 0xa1 -- 0xfe # no validation needed here. State machine has done that if (aStr[0] >= _bytechar(0xB0)) and (aStr[1] >= _bytechar(0xA1)): return 94 * (_byteord(aStr[0]) - 0xB0) + _byteord(aStr[1]) - 0xA1 else: return -1;
def get_order(self, aStr): # for big5 encoding, we are interested # first byte range: 0xa4 -- 0xfe # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe # no validation needed here. State machine has done that if aStr[0] >= _bytechar(0xA4): if aStr[1] >= _bytechar(0xA1): return 157 * (_byteord(aStr[0]) - 0xA4) + _byteord(aStr[1]) - 0xA1 + 63 else: return 157 * (_byteord(aStr[0]) - 0xA4) + _byteord(aStr[1]) - 0x40 else: return -1
def get_order(self, aStr): # for big5 encoding, we are interested # first byte range: 0xa4 -- 0xfe # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe # no validation needed here. State machine has done that if aStr[0] >= _bytechar(0xA4): if aStr[1] >= _bytechar(0xA1): return 157 * (_byteord(aStr[0]) - 0xA4) + _byteord( aStr[1]) - 0xA1 + 63 else: return 157 * (_byteord(aStr[0]) - 0xA4) + _byteord( aStr[1]) - 0x40 else: return -1
def get_order(self, aStr): # for sjis encoding, we are interested # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe # no validation needed here. State machine has done that if (_bytechar(0x81) <= aStr[0] <= _bytechar(0x9F)): order = 188 * (_byteord(aStr[0]) - 0x81) elif (_bytechar(0xE0) <= aStr[0] <= _bytechar(0xEF)): order = 188 * (_byteord(aStr[0]) - 0xE0 + 31) else: return -1 order = order + _byteord(aStr[1]) - 0x40 if aStr[1] > _bytechar(0x7F): order = -1 return order
def get_order(self, aStr): # for sjis encoding, we are interested # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe # no validation needed here. State machine has done that if (_bytechar(0x81) <= aStr[0] <= _bytechar(0x9F)): order = 188 * (_byteord(aStr[0]) - 0x81) elif (_bytechar(0xE0) <= aStr[0] <= _bytechar(0xEF)): order = 188 * (_byteord(aStr[0]) - 0xE0 + 31) else: return -1; order = order + _byteord(aStr[1]) - 0x40 if aStr[1] > _bytechar(0x7F): order = -1 return order
def get_order(self, aStr): # for euc-TW encoding, we are interested # first byte range: 0xc4 -- 0xfe # second byte range: 0xa1 -- 0xfe # no validation needed here. State machine has done that if aStr[0] >= _bytechar(0xC4): return 94 * (_byteord(aStr[0]) - 0xC4) + _byteord(aStr[1]) - 0xA1 else: return -1
def get_order(self, aStr): if not aStr: return -1, 1 # find out current char's byte length try: if (_bytechar(0x81) <= aStr[0] <= _bytechar(0x9F)) or \ (_bytechar(0xE0) <= aStr[0] <= _bytechar(0xFC)): charLen = 2 else: charLen = 1 except UnicodeDecodeError: return -1, 1 # return its order if it is hiragana if len(aStr) > 1: if (aStr[0] == _bytechar(202)) and \ (_bytechar(0x9F) <= aStr[1] <= _bytechar(0xF1)): return _byteord(aStr[1]) - 0x9F, charLen return -1, charLen
def get_order(self, aStr): if not aStr: return -1, 1 # find out current char's byte length try: if (aStr[0] == _bytechar(0x8E)) or \ (_bytechar(0xA1) <= aStr[0] <= _bytechar(0xFE)): charLen = 2 elif aStr[0] == _bytechar(0x8F): charLen = 3 else: charLen = 1 except UnicodeDecodeError: return -1, 1 # return its order if it is hiragana if len(aStr) > 1: if (aStr[0] == _bytechar(0xA4)) and \ (_bytechar(0xA1) <= aStr[1] <= _bytechar(0xF3)): return _byteord(aStr[1]) - 0xA1, charLen return -1, charLen
def __init__(self): CharSetProber.__init__(self) self._mDistributionAnalyzer = None self._mCodingSM = None self._mLastChar = [_bytechar(0), _bytechar(0)]