def reset(self): self.result = {'encoding': None, 'confidence': 0.0} self.done = False self._mStart = True self._mGotData = False self._mInputState = ePureAscii self._mLastChar = _b('') if self._mEscCharSetProber: self._mEscCharSetProber.reset() for prober in self._mCharSetProbers: prober.reset()
def filter_without_english_letters(self, aBuf): aBuf = re.sub(_b(r'([A-Za-z])+'), _b(' '), aBuf) return aBuf
def filter_high_bit_only(self, aBuf): aBuf = re.sub(_b(r'([\x00-\x7F])+'), _b(' '), aBuf) return aBuf
def __init__(self): self._highBitDetector = re.compile(_b(r'[\x80-\xFF]')) self._escDetector = re.compile(_b(r'(\033|~{)')) self._mEscCharSetProber = None self._mCharSetProbers = [] self.reset()