def write_ext(glos, filename, sort=True, dictZip=True): if sort: g = glos.copy() g.data.sort() else: g = glos try: import _stardictbuilder except ImportError: printAsError('Binary module "_stardictbuilder" can not be imported! '+\ 'Using internal StarDict builder') return g.writeStardict(filename, sort=False) db = _stardictbuilder.new_StarDictBuilder(filename) _stardictbuilder.StarDictBuilder_swigregister(db) for item in g.data: _stardictbuilder.StarDictBuilder_addHeadword(db, item[0], item[1], '') _stardictbuilder.StarDictBuilder_setTitle(db, g.getInfo('name')) _stardictbuilder.StarDictBuilder_setAuthor(db, g.getInfo('author')) _stardictbuilder.StarDictBuilder_setLicense(db, g.getInfo('license')) _stardictbuilder.StarDictBuilder_setOrigLang(db, g.getInfo('origLang')) _stardictbuilder.StarDictBuilder_setDestLang(db, g.getInfo('destLang')) _stardictbuilder.StarDictBuilder_setDescription(db, g.getInfo('description')) _stardictbuilder.StarDictBuilder_setComments(db, g.getInfo('comments')) _stardictbuilder.StarDictBuilder_setEmail(db, g.getInfo('email')) _stardictbuilder.StarDictBuilder_setWebsite(db, g.getInfo('website')) _stardictbuilder.StarDictBuilder_setVersion(db, g.getInfo('version')) _stardictbuilder.StarDictBuilder_setcreationTime(db, '') _stardictbuilder.StarDictBuilder_setLastUpdate(db, '') _stardictbuilder.StarDictBuilder_finish(db) if dictZip: if filename[-4:] == '.ifo': filename = filename[:-4] runDictzip(filename)
def readIdxFile(self): if isfile(self.fileBasePath + '.idx.gz'): import gzip with gzip.open(self.fileBasePath + '.idx.gz') as f: idxStr = f.read() else: with open(self.fileBasePath + '.idx', 'rb') as f: idxStr = f.read() self.indexData = [] i = 0 while i < len(idxStr): beg = i i = idxStr.find('\x00', beg) if i < 0: printAsError("Index file is corrupted.") break word = idxStr[beg:i] i += 1 if i + 8 > len(idxStr): printAsError("Index file is corrupted") break offset = binStrToInt(idxStr[i:i + 4]) i += 4 size = binStrToInt(idxStr[i:i + 4]) i += 4 self.indexData.append([word, offset, size, [], []])
def copyResources(self, fromPath, toPath, overwrite): '''Copy resource files from fromPath to toPath. ''' if not fromPath: return fromPath = os.path.abspath(fromPath) toPath = os.path.abspath(toPath) if fromPath == toPath: return if not isdir(fromPath): return if len(os.listdir(fromPath)) == 0: return if overwrite and os.path.exists(toPath): shutil.rmtree(toPath) if os.path.exists(toPath): if len(os.listdir(toPath)) > 0: printAsError( '''Output resource directory is not empty: "{0}". Resources will not be copied! Clean the output directory before running the converter or pass option: --write-options=res-overwrite=True.'''\ .format(toPath) ) return os.rmdir(toPath) shutil.copytree(fromPath, toPath)
def readDictFile(self, sametypesequence): if isfile(self.fileBasePath+'.dict.dz'): import gzip dictFd = gzip.open(self.fileBasePath+'.dict.dz') else: dictFd = open(self.fileBasePath+'.dict', 'rb') for rec in self.indexData: dictFd.seek(rec[1]) if dictFd.tell() != rec[1]: printAsError("Unable to read definition for word \"{0}\"".format(rec[0])) rec[0] = None continue data = dictFd.read(rec[2]) if len(data) != rec[2]: printAsError("Unable to read definition for word \"{0}\"".format(rec[0])) rec[0] = None continue if sametypesequence: res = self.parseDefiBlockCompact(data, sametypesequence, rec[0]) else: res = self.parseDefiBlockGeneral(data, rec[0]) if res == None: rec[0] = None continue res = self.convertDefinitionsToPyglossaryFormat(res) if len(res) == 0: rec[0] = None continue rec[3] = res dictFd.close()
def write_ext(glos, filename, sort=True, dictZip=True): if sort: g = glos.copy() g.data.sort() else: g = glos try: import _stardictbuilder except ImportError: printAsError('Binary module "_stardictbuilder" can not be imported! '+\ 'Using internal StarDict builder') return g.writeStardict(filename, sort=False) db = _stardictbuilder.new_StarDictBuilder(filename) _stardictbuilder.StarDictBuilder_swigregister(db) for item in g.data: _stardictbuilder.StarDictBuilder_addHeadword(db,item[0],item[1], '') _stardictbuilder.StarDictBuilder_setTitle(db, g.getInfo('name')) _stardictbuilder.StarDictBuilder_setAuthor(db, g.getInfo('author')) _stardictbuilder.StarDictBuilder_setLicense(db, g.getInfo('license')) _stardictbuilder.StarDictBuilder_setOrigLang(db, g.getInfo('origLang')) _stardictbuilder.StarDictBuilder_setDestLang(db, g.getInfo('destLang')) _stardictbuilder.StarDictBuilder_setDescription(db, g.getInfo('description')) _stardictbuilder.StarDictBuilder_setComments(db, g.getInfo('comments')) _stardictbuilder.StarDictBuilder_setEmail(db, g.getInfo('email')) _stardictbuilder.StarDictBuilder_setWebsite(db, g.getInfo('website')) _stardictbuilder.StarDictBuilder_setVersion(db, g.getInfo('version')) _stardictbuilder.StarDictBuilder_setcreationTime(db, '') _stardictbuilder.StarDictBuilder_setLastUpdate(db, '') _stardictbuilder.StarDictBuilder_finish(db) if dictZip: if filename[-4:]=='.ifo': filename = filename[:-4] runDictzip(filename)
def readIdxFile(self): if isfile(self.fileBasePath+'.idx.gz'): import gzip with gzip.open(self.fileBasePath+'.idx.gz') as f: idxStr = f.read() else: with open(self.fileBasePath+'.idx', 'rb') as f: idxStr = f.read() self.indexData = [] i = 0 while i < len(idxStr): beg = i i = idxStr.find('\x00', beg) if i < 0: printAsError("Index file is corrupted.") break word = idxStr[beg:i] i += 1 if i + 8 > len(idxStr): printAsError("Index file is corrupted") break offset = binStrToInt(idxStr[i:i+4]) i += 4 size = binStrToInt(idxStr[i:i+4]) i += 4 self.indexData.append([word, offset, size, [], []])
def copyResources(self, fromPath, toPath, overwrite): '''Copy resource files from fromPath to toPath. ''' if not fromPath: return fromPath = os.path.abspath(fromPath) toPath = os.path.abspath(toPath) if fromPath == toPath: return if not isdir(fromPath): return if len(os.listdir(fromPath))==0: return if overwrite and os.path.exists(toPath): shutil.rmtree(toPath) if os.path.exists(toPath): if len(os.listdir(toPath)) > 0: printAsError( '''Output resource directory is not empty: "{0}". Resources will not be copied! Clean the output directory before running the converter or pass option: --write-options=res-overwrite=True.'''\ .format(toPath) ) return os.rmdir(toPath) shutil.copytree(fromPath, toPath)
def verifySameTypeSequence(s): if not s: return True for t in s: if not isAsciiAlpha(t): printAsError("Invalid sametypesequence option") return False return True
def verifySameTypeSequence(s): if not s: return True for t in s: if not isAsciiAlpha(t): printAsError("Invalid sametypesequence option") return False return True
def parseDefiBlockGeneral(self, data, word): """Parse definition block when sametypesequence option is not specified. """ dataFileCorruptedError = "Data file is corrupted. Word \"{0}\"".format( word) res = [] i = 0 while i < len(data): t = data[i] if not isAsciiAlpha(t): printAsError(dataFileCorruptedError) return None i += 1 if isAsciiLower(t): beg = i i = data.find('\x00', beg) if i < 0: printAsError(dataFileCorruptedError) return None res.append((data[beg:i], t)) i += 1 else: assert isAsciiUpper(t) if i + 4 > len(data): printAsError(dataFileCorruptedError) return None size = binStrToInt(data[i:i + 4]) i += 4 if i + size > len(data): printAsError(dataFileCorruptedError) return None res.append((data[i:i + size], t)) i += size return res
def parseDefiBlockGeneral(self, data, word): """Parse definition block when sametypesequence option is not specified. """ dataFileCorruptedError = "Data file is corrupted. Word \"{0}\"".format(word) res = [] i = 0 while i < len(data): t = data[i] if not isAsciiAlpha(t): printAsError(dataFileCorruptedError) return None i += 1 if isAsciiLower(t): beg = i i = data.find('\x00', beg) if i < 0: printAsError(dataFileCorruptedError) return None res.append((data[beg:i], t)) i += 1 else: assert isAsciiUpper(t) if i + 4 > len(data): printAsError(dataFileCorruptedError) return None size = binStrToInt(data[i:i+4]) i += 4 if i + size > len(data): printAsError(dataFileCorruptedError) return None res.append((data[i:i+size], t)) i += size return res
def readSynFile(self): if not isfile(self.fileBasePath + '.syn'): return with open(self.fileBasePath + '.syn', 'rb') as f: synStr = f.read() i = 0 while i < len(synStr): beg = i i = synStr.find('\x00', beg) if i < 0: printAsError("Synonym file is corrupted.") break word = synStr[beg:i] i += 1 if i + 4 > len(synStr): printAsError("Synonym file is corrupted.") break index = binStrToInt(synStr[i:i + 4]) i += 4 if index >= len(self.indexData): printAsError( "Corrupted synonym file. Word \"{0}\" references invalid item." .format(word)) continue self.indexData[index][4].append(word)
def readDictFile(self, sametypesequence): if isfile(self.fileBasePath + '.dict.dz'): import gzip dictFd = gzip.open(self.fileBasePath + '.dict.dz') else: dictFd = open(self.fileBasePath + '.dict', 'rb') for rec in self.indexData: dictFd.seek(rec[1]) if dictFd.tell() != rec[1]: printAsError( "Unable to read definition for word \"{0}\"".format( rec[0])) rec[0] = None continue data = dictFd.read(rec[2]) if len(data) != rec[2]: printAsError( "Unable to read definition for word \"{0}\"".format( rec[0])) rec[0] = None continue if sametypesequence: res = self.parseDefiBlockCompact(data, sametypesequence, rec[0]) else: res = self.parseDefiBlockGeneral(data, rec[0]) if res == None: rec[0] = None continue res = self.convertDefinitionsToPyglossaryFormat(res) if len(res) == 0: rec[0] = None continue rec[3] = res dictFd.close()
def readSynFile(self): if not isfile(self.fileBasePath+'.syn'): return with open(self.fileBasePath+'.syn', 'rb') as f: synStr = f.read() i = 0 while i < len(synStr): beg = i i = synStr.find('\x00', beg) if i < 0: printAsError("Synonym file is corrupted.") break word = synStr[beg:i] i += 1 if i + 4 > len(synStr): printAsError("Synonym file is corrupted.") break index = binStrToInt(synStr[i:i+4]) i += 4 if index >= len(self.indexData): printAsError("Corrupted synonym file. Word \"{0}\" references invalid item.".format(word)) continue self.indexData[index][4].append(word)
def parseDefiBlockCompact(self, data, sametypesequence, word): """Parse definition block when sametypesequence option is specified. """ assert type(sametypesequence) == str assert len(sametypesequence) > 0 dataFileCorruptedError = "Data file is corrupted. Word \"{0}\"".format( word) res = [] i = 0 for t in sametypesequence[:-1]: if i >= len(data): printAsError(dataFileCorruptedError) return None if isAsciiLower(t): beg = i i = data.find('\x00', beg) if i < 0: printAsError(dataFileCorruptedError) return None res.append((data[beg:i], t)) i += 1 else: assert isAsciiUpper(t) if i + 4 > len(data): printAsError(dataFileCorruptedError) return None size = binStrToInt(data[i:i + 4]) i += 4 if i + size > len(data): printAsError(dataFileCorruptedError) return None res.append((data[i:i + size], t)) i += size if i >= len(data): printAsError(dataFileCorruptedError) return None t = sametypesequence[-1] if isAsciiLower(t): i2 = data.find('\x00', i) if i2 >= 0: printAsError(dataFileCorruptedError) return None res.append((data[i:], t)) else: assert isAsciiUpper(t) res.append((data[i:], t)) return res
def parseDefiBlockCompact(self, data, sametypesequence, word): """Parse definition block when sametypesequence option is specified. """ assert type(sametypesequence) == str assert len(sametypesequence) > 0 dataFileCorruptedError = "Data file is corrupted. Word \"{0}\"".format(word) res = [] i = 0 for t in sametypesequence[:-1]: if i >= len(data): printAsError(dataFileCorruptedError) return None if isAsciiLower(t): beg = i i = data.find('\x00', beg) if i < 0: printAsError(dataFileCorruptedError) return None res.append((data[beg:i], t)) i += 1 else: assert isAsciiUpper(t) if i + 4 > len(data): printAsError(dataFileCorruptedError) return None size = binStrToInt(data[i:i+4]) i += 4 if i + size > len(data): printAsError(dataFileCorruptedError) return None res.append((data[i:i+size], t)) i += size if i >= len(data): printAsError(dataFileCorruptedError) return None t = sametypesequence[-1] if isAsciiLower(t): i2 = data.find('\x00', i) if i2 >= 0: printAsError(dataFileCorruptedError) return None res.append((data[i:], t)) else: assert isAsciiUpper(t) res.append((data[i:], t)) return res