Ejemplo n.º 1
0
 def reset(self):
     CharSetProber.reset(self)
     if self._mCodingSM:
         self._mCodingSM.reset()
     if self._mDistributionAnalyzer:
         self._mDistributionAnalyzer.reset()
     self._mLastChar = [_bytechar(0), _bytechar(0)]
Ejemplo n.º 2
0
 def reset(self):
     CharSetProber.reset(self)
     if self._mCodingSM:
         self._mCodingSM.reset()
     if self._mDistributionAnalyzer:
         self._mDistributionAnalyzer.reset()
     self._mLastChar = [_bytechar(0), _bytechar(0)]
Ejemplo n.º 3
0
 def get_order(self, aStr):
     # for GB2312 encoding, we are interested
     #  first  byte range: 0xb0 -- 0xfe
     #  second byte range: 0xa1 -- 0xfe
     # no validation needed here. State machine has done that
     if (aStr[0] >= _bytechar(0xB0)) and (aStr[1] >= _bytechar(0xA1)):
         return 94 * (_byteord(aStr[0]) - 0xB0) + _byteord(aStr[1]) - 0xA1
     else:
         return -1
Ejemplo n.º 4
0
 def get_order(self, aStr):
     # for GB2312 encoding, we are interested 
     #  first  byte range: 0xb0 -- 0xfe
     #  second byte range: 0xa1 -- 0xfe
     # no validation needed here. State machine has done that
     if (aStr[0] >= _bytechar(0xB0)) and (aStr[1] >= _bytechar(0xA1)):
         return 94 * (_byteord(aStr[0]) - 0xB0) + _byteord(aStr[1]) - 0xA1
     else:
         return -1;
Ejemplo n.º 5
0
 def get_order(self, aStr):
     # for big5 encoding, we are interested 
     #   first  byte range: 0xa4 -- 0xfe
     #   second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
     # no validation needed here. State machine has done that
     if aStr[0] >= _bytechar(0xA4):
         if aStr[1] >= _bytechar(0xA1):
             return 157 * (_byteord(aStr[0]) - 0xA4) + _byteord(aStr[1]) - 0xA1 + 63
         else:
             return 157 * (_byteord(aStr[0]) - 0xA4) + _byteord(aStr[1]) - 0x40
     else:
         return -1
Ejemplo n.º 6
0
 def get_order(self, aStr):
     # for big5 encoding, we are interested
     #   first  byte range: 0xa4 -- 0xfe
     #   second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
     # no validation needed here. State machine has done that
     if aStr[0] >= _bytechar(0xA4):
         if aStr[1] >= _bytechar(0xA1):
             return 157 * (_byteord(aStr[0]) - 0xA4) + _byteord(
                 aStr[1]) - 0xA1 + 63
         else:
             return 157 * (_byteord(aStr[0]) - 0xA4) + _byteord(
                 aStr[1]) - 0x40
     else:
         return -1
Ejemplo n.º 7
0
 def get_order(self, aStr):
     # for sjis encoding, we are interested
     #   first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
     #   second byte range: 0x40 -- 0x7e,  0x81 -- oxfe
     # no validation needed here. State machine has done that
     if (_bytechar(0x81) <= aStr[0] <= _bytechar(0x9F)):
         order = 188 * (_byteord(aStr[0]) - 0x81)
     elif (_bytechar(0xE0) <= aStr[0] <= _bytechar(0xEF)):
         order = 188 * (_byteord(aStr[0]) - 0xE0 + 31)
     else:
         return -1
     order = order + _byteord(aStr[1]) - 0x40
     if aStr[1] > _bytechar(0x7F):
         order = -1
     return order
Ejemplo n.º 8
0
 def get_order(self, aStr):
     # for sjis encoding, we are interested 
     #   first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
     #   second byte range: 0x40 -- 0x7e,  0x81 -- oxfe
     # no validation needed here. State machine has done that
     if (_bytechar(0x81) <= aStr[0] <= _bytechar(0x9F)):
         order = 188 * (_byteord(aStr[0]) - 0x81)
     elif (_bytechar(0xE0) <= aStr[0] <= _bytechar(0xEF)):
         order = 188 * (_byteord(aStr[0]) - 0xE0 + 31)
     else:
         return -1;
     order = order + _byteord(aStr[1]) - 0x40
     if aStr[1] > _bytechar(0x7F):
         order = -1
     return order
Ejemplo n.º 9
0
 def get_order(self, aStr):
     # for euc-TW encoding, we are interested
     #   first  byte range: 0xc4 -- 0xfe
     #   second byte range: 0xa1 -- 0xfe
     # no validation needed here. State machine has done that
     if aStr[0] >= _bytechar(0xC4):
         return 94 * (_byteord(aStr[0]) - 0xC4) + _byteord(aStr[1]) - 0xA1
     else:
         return -1
Ejemplo n.º 10
0
 def get_order(self, aStr):
     # for euc-TW encoding, we are interested 
     #   first  byte range: 0xc4 -- 0xfe
     #   second byte range: 0xa1 -- 0xfe
     # no validation needed here. State machine has done that
     if aStr[0] >= _bytechar(0xC4):
         return 94 * (_byteord(aStr[0]) - 0xC4) + _byteord(aStr[1]) - 0xA1
     else:
         return -1
Ejemplo n.º 11
0
    def get_order(self, aStr):
        if not aStr: return -1, 1
        # find out current char's byte length
        try:
            if (_bytechar(0x81) <= aStr[0] <= _bytechar(0x9F)) or \
               (_bytechar(0xE0) <= aStr[0] <= _bytechar(0xFC)):
                charLen = 2
            else:
                charLen = 1
        except UnicodeDecodeError:
            return -1, 1

        # return its order if it is hiragana
        if len(aStr) > 1:
            if (aStr[0] == _bytechar(202)) and \
               (_bytechar(0x9F) <= aStr[1] <= _bytechar(0xF1)):
                return _byteord(aStr[1]) - 0x9F, charLen

        return -1, charLen
Ejemplo n.º 12
0
    def get_order(self, aStr):
        if not aStr: return -1, 1
        # find out current char's byte length
        try:
            if (aStr[0] == _bytechar(0x8E)) or \
               (_bytechar(0xA1) <= aStr[0] <= _bytechar(0xFE)):
                charLen = 2
            elif aStr[0] == _bytechar(0x8F):
                charLen = 3
            else:
                charLen = 1
        except UnicodeDecodeError:
            return -1, 1

        # return its order if it is hiragana
        if len(aStr) > 1:
            if (aStr[0] == _bytechar(0xA4)) and \
               (_bytechar(0xA1) <= aStr[1] <= _bytechar(0xF3)):
                return _byteord(aStr[1]) - 0xA1, charLen

        return -1, charLen
Ejemplo n.º 13
0
 def __init__(self):
     CharSetProber.__init__(self)
     self._mDistributionAnalyzer = None
     self._mCodingSM = None
     self._mLastChar = [_bytechar(0), _bytechar(0)]
Ejemplo n.º 14
0
 def __init__(self):
     CharSetProber.__init__(self)
     self._mDistributionAnalyzer = None
     self._mCodingSM = None
     self._mLastChar = [_bytechar(0), _bytechar(0)]