Example #1
0
	def reset(self):
		CharSetProber.reset(self)
		if self._mCodingSM:
			self._mCodingSM.reset()
		if self._mDistributionAnalyzer:
			self._mDistributionAnalyzer.reset()
		self._mLastChar = ['\x00', '\x00']
Example #2
0
 def reset(self):
     CharSetProber.reset(self)
     self._mLastOrder = 255  # char order of last character
     self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
     self._mTotalSeqs = 0
     self._mTotalChar = 0
     self._mFreqChar = 0  # characters that fall in our sampling range
Example #3
0
 def reset(self):
     CharSetProber.reset(self)
     if self._mCodingSM:
         self._mCodingSM.reset()
     if self._mDistributionAnalyzer:
         self._mDistributionAnalyzer.reset()
     self._mLastChar = ['\x00', '\x00']
 def reset(self):
     CharSetProber.reset(self)
     self._mLastOrder = 255 # char order of last character
     self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
     self._mTotalSeqs = 0
     self._mTotalChar = 0
     self._mFreqChar = 0 # characters that fall in our sampling range
 def reset(self):
     CharSetProber.reset(self)
     if self._mCodingSM:
         self._mCodingSM.reset()
     if self._mDistributionAnalyzer:
         self._mDistributionAnalyzer.reset()
     self._mLastChar = [_bytechar(0), _bytechar(0)]
 def reset(self):
     CharSetProber.reset(self)
     if self._mCodingSM:
         self._mCodingSM.reset()
     if self._mDistributionAnalyzer:
         self._mDistributionAnalyzer.reset()
     self._mLastChar = [_bytechar(0), _bytechar(0)]
Example #7
0
 def reset(self):
     CharSetProber.reset(self)
     for codingSM in self._mCodingSM:
         if not codingSM: continue
         codingSM.active = constants.True
         codingSM.reset()
     self._mActiveSM = len(self._mCodingSM)
     self._mDetectedCharset = None
Example #8
0
 def reset(self):
     CharSetProber.reset(self)
     for codingSM in self._mCodingSM:
         if not codingSM: continue
         codingSM.active = True
         codingSM.reset()
     self._mActiveSM = len(self._mCodingSM)
     self._mDetectedCharset = None
 def reset(self):
     CharSetProber.reset(self)
     self._mActiveNum = 0
     for prober in self._mProbers:
         if prober:
             prober.reset()
             prober.active = constants. True
             self._mActiveNum += 1
     self._mBestGuessProber = None
Example #10
0
 def __init__(self):
     CharSetProber.__init__(self)
     self._mCodingSM = [ \
         CodingStateMachine(HZSMModel),
         CodingStateMachine(ISO2022CNSMModel),
         CodingStateMachine(ISO2022JPSMModel),
         CodingStateMachine(ISO2022KRSMModel)
         ]
     self.reset()
Example #11
0
 def __init__(self):
     CharSetProber.__init__(self)
     self._mCodingSM = [ \
         CodingStateMachine(HZSMModel),
         CodingStateMachine(ISO2022CNSMModel),
         CodingStateMachine(ISO2022JPSMModel),
         CodingStateMachine(ISO2022KRSMModel)
         ]
     self.reset()
 def reset(self):
     CharSetProber.reset(self)
     self._mActiveNum = 0
     for prober in self._mProbers:
         if prober:
             prober.reset()
             prober.active = constants.True
             self._mActiveNum += 1
     self._mBestGuessProber = None
Example #13
0
    def reset(self):
        self._mLastCharClass = OTH
        self._mFreqCounter = [0] * FREQ_CAT_NUM

        # express the prior that MacRoman is a somewhat rare encoding;
        # this can be done by starting out in a slightly improbable state
        # that must be overcome
        self._mFreqCounter[2] = 10

        CharSetProber.reset(self)
Example #14
0
	def __init__(self):
		CharSetProber.__init__(self)
		self._mDistributionAnalyzer = None
		self._mCodingSM = None
		self._mLastChar = ['\x00', '\x00']
 def __init__(self):
     CharSetProber.__init__(self)
     self._mDistributionAnalyzer = None
     self._mCodingSM = None
     self._mLastChar = [_bytechar(0), _bytechar(0)]
 def __init__(self):
     CharSetProber.__init__(self)
     self._mActiveNum = 0
     self._mProbers = []
     self._mBestGuessProber = None
Example #17
0
 def __init__(self):
     CharSetProber.__init__(self)
     self._mCodingSM = CodingStateMachine(UTF8SMModel)
     self.reset()
Example #18
0
 def __init__(self):
     CharSetProber.__init__(self)
     self._mDistributionAnalyzer = None
     self._mCodingSM = None
     self._mLastChar = ['\x00', '\x00']
Example #19
0
 def reset(self):
     self._mLastCharClass = OTH
     self._mFreqCounter = [0] * FREQ_CAT_NUM
     CharSetProber.reset(self)
import constants, sys
from charsetprober import CharSetProber

SAMPLE_SIZE = 64
SB_ENOUGH_REL_THRESHOLD = 1024
POSITIVE_SHORTCUT_THRESHOLD = 0.95
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
SYMBOL_CAT_ORDER = 250
NUMBER_OF_SEQ_CAT = 4
POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
#NEGATIVE_CAT = 0
 
class SingleByteCharSetProber(CharSetProber):
    def __init__(self, model, reversed=constants.False, nameProber=None):
        CharSetProber.__init__(self)
        self._mModel = model
        self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup
        self._mNameProber = nameProber # Optional auxiliary prober for name decision
        self.reset()

    def reset(self):
        CharSetProber.reset(self)
        self._mLastOrder = 255 # char order of last character
        self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
        self._mTotalSeqs = 0
        self._mTotalChar = 0
        self._mFreqChar = 0 # characters that fall in our sampling range

    def get_charset_name(self):
        if self._mNameProber:
Example #21
0
 def reset(self):
     CharSetProber.reset(self)
     self._mCodingSM.reset()
     self._mNumOfMBChar = 0
Example #22
0
 def __init__(self):
     CharSetProber.__init__(self)
     self.reset()
Example #23
0
 def __init__(self):
     CharSetProber.__init__(self)
     self._mLogicalProber = None
     self._mVisualProber = None
     self.reset()
 def __init__(self):
     CharSetProber.__init__(self)
     self._mActiveNum = 0
     self._mProbers = []
     self._mBestGuessProber = None
Example #25
0
	def __init__(self):
		CharSetProber.__init__(self)
		self._mLogicalProber = None
		self._mVisualProber = None
		self.reset()
Example #26
0
from charsetprober import CharSetProber

SAMPLE_SIZE = 64
SB_ENOUGH_REL_THRESHOLD = 1024
POSITIVE_SHORTCUT_THRESHOLD = 0.95
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
SYMBOL_CAT_ORDER = 250
NUMBER_OF_SEQ_CAT = 4
POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1

#NEGATIVE_CAT = 0


class SingleByteCharSetProber(CharSetProber):
    def __init__(self, model, reversed=constants. False, nameProber=None):
        CharSetProber.__init__(self)
        self._mModel = model
        self._mReversed = reversed  # TRUE if we need to reverse every pair in the model lookup
        self._mNameProber = nameProber  # Optional auxiliary prober for name decision
        self.reset()

    def reset(self):
        CharSetProber.reset(self)
        self._mLastOrder = 255  # char order of last character
        self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
        self._mTotalSeqs = 0
        self._mTotalChar = 0
        self._mFreqChar = 0  # characters that fall in our sampling range

    def get_charset_name(self):
        if self._mNameProber:
Example #27
0
 def __init__(self):
     CharSetProber.__init__(self)
     self.reset()
Example #28
0
 def reset(self):
     CharSetProber.reset(self)
     self._mCodingSM.reset()
     self._mNumOfMBChar = 0
Example #29
0
 def __init__(self):
     CharSetProber.__init__(self)
     self._mCodingSM = CodingStateMachine(UTF8SMModel)
     self.reset()
Example #30
0
 def reset(self):
     self._mLastCharClass = OTH
     self._mFreqCounter = [0] * FREQ_CAT_NUM
     CharSetProber.reset(self)
 def __init__(self):
     CharSetProber.__init__(self)
     self._mDistributionAnalyzer = None
     self._mCodingSM = None
     self._mLastChar = [_bytechar(0), _bytechar(0)]
 def __init__(self, model, reversed=False, nameProber=None):
     CharSetProber.__init__(self)
     self._mModel = model
     self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup
     self._mNameProber = nameProber # Optional auxiliary prober for name decision
     self.reset()
Example #33
0
 def __init__(self, model, reversed=False, nameProber=None):
     CharSetProber.__init__(self)
     self._mModel = model
     self._mReversed = reversed  # TRUE if we need to reverse every pair in the model lookup
     self._mNameProber = nameProber  # Optional auxiliary prober for name decision
     self.reset()