def readData(self, dictFile): magicId = dictFile.read(4) if magicId != b'JDC0': raise RuntimeError('File is not JDIC file') charSetBuffer, = struct.unpack('32s', dictFile.read(32)) self.charset = extractString(charSetBuffer).lower() fmt = 'III' (dartsSize, entryOffsetBlobSize, entryBlobSize) = \ struct.unpack('III', dictFile.read(struct.calcsize(fmt))) self.lookupDict = DoubleArray(dictFile.read(dartsSize)) self.entryOffsetBlob = dictFile.read(entryOffsetBlobSize) self.entryBlob = dictFile.read(entryBlobSize)
def loadFromBinary(self, dictFile): """ Loads the dictionary from the blob """ fmt = str('<IIIIIIIIII') header = dictFile.read(calcsize(fmt)) magic, version, dictType, lexSize, \ leftSize, rightSize, dataSize, \ tokenPartSize, featurePartSize, dummy = \ unpack(fmt, header) if version != 102: raise RuntimeError('Incompatible dictionary version: {0}'.format(version)) charSetBuffer, = unpack(str('32s'), dictFile.read(32)) self.charset = extractString(charSetBuffer).lower() self.doubleArray = DoubleArray(dictFile.read(dataSize)) self.tokenBlob = dictFile.read(tokenPartSize) self.featureBlob = dictFile.read(featurePartSize)
def loadFromBinary(self, inFile, encoding): # with open(fileName, 'rb') as inFile: uintSize = 4 categoryBuffer = 32 categoryNum, = unpack('<I', inFile.read(uintSize)) calcFileSize = uintSize + categoryBuffer * categoryNum + uintSize * 0xffff for i in range(categoryNum): categoryStr, = unpack(str(categoryBuffer) + 's', inFile.read(categoryBuffer)) self.__categories.append( utils.extractString(categoryStr, encoding)) for i in range(0xffff): packedCharInfo, = unpack('<I', inFile.read(uintSize)) charInfo = CharInfo( (packedCharInfo ) & 0x3FFFF, (packedCharInfo >> 18) & 0xFF, (packedCharInfo >> 26) & 0xF, (packedCharInfo >> 30) & 0x1, (packedCharInfo >> 31) & 0x1) self.__map.append(charInfo)
def testExtractString(self): res = utils.extractString(b'sample\x00\x00\x00') self.assertEqual('sample', res)