Ejemplo n.º 1
0
def write_ext(glos, filename, sort=True, dictZip=True):
    if sort:
        g = glos.copy()
        g.data.sort()
    else:
        g = glos
    try:
        import _stardictbuilder
    except ImportError:
        printAsError('Binary module "_stardictbuilder" can not be imported! '+\
            'Using internal StarDict builder')
        return g.writeStardict(filename, sort=False)
    db = _stardictbuilder.new_StarDictBuilder(filename)
    _stardictbuilder.StarDictBuilder_swigregister(db)
    for item in g.data:
        _stardictbuilder.StarDictBuilder_addHeadword(db, item[0], item[1], '')
    _stardictbuilder.StarDictBuilder_setTitle(db, g.getInfo('name'))
    _stardictbuilder.StarDictBuilder_setAuthor(db, g.getInfo('author'))
    _stardictbuilder.StarDictBuilder_setLicense(db, g.getInfo('license'))
    _stardictbuilder.StarDictBuilder_setOrigLang(db, g.getInfo('origLang'))
    _stardictbuilder.StarDictBuilder_setDestLang(db, g.getInfo('destLang'))
    _stardictbuilder.StarDictBuilder_setDescription(db,
                                                    g.getInfo('description'))
    _stardictbuilder.StarDictBuilder_setComments(db, g.getInfo('comments'))
    _stardictbuilder.StarDictBuilder_setEmail(db, g.getInfo('email'))
    _stardictbuilder.StarDictBuilder_setWebsite(db, g.getInfo('website'))
    _stardictbuilder.StarDictBuilder_setVersion(db, g.getInfo('version'))
    _stardictbuilder.StarDictBuilder_setcreationTime(db, '')
    _stardictbuilder.StarDictBuilder_setLastUpdate(db, '')
    _stardictbuilder.StarDictBuilder_finish(db)
    if dictZip:
        if filename[-4:] == '.ifo':
            filename = filename[:-4]
            runDictzip(filename)
Ejemplo n.º 2
0
 def readIdxFile(self):
     if isfile(self.fileBasePath + '.idx.gz'):
         import gzip
         with gzip.open(self.fileBasePath + '.idx.gz') as f:
             idxStr = f.read()
     else:
         with open(self.fileBasePath + '.idx', 'rb') as f:
             idxStr = f.read()
     self.indexData = []
     i = 0
     while i < len(idxStr):
         beg = i
         i = idxStr.find('\x00', beg)
         if i < 0:
             printAsError("Index file is corrupted.")
             break
         word = idxStr[beg:i]
         i += 1
         if i + 8 > len(idxStr):
             printAsError("Index file is corrupted")
             break
         offset = binStrToInt(idxStr[i:i + 4])
         i += 4
         size = binStrToInt(idxStr[i:i + 4])
         i += 4
         self.indexData.append([word, offset, size, [], []])
Ejemplo n.º 3
0
 def copyResources(self, fromPath, toPath, overwrite):
     '''Copy resource files from fromPath to toPath.
     '''
     if not fromPath:
         return
     fromPath = os.path.abspath(fromPath)
     toPath = os.path.abspath(toPath)
     if fromPath == toPath:
         return
     if not isdir(fromPath):
         return
     if len(os.listdir(fromPath)) == 0:
         return
     if overwrite and os.path.exists(toPath):
         shutil.rmtree(toPath)
     if os.path.exists(toPath):
         if len(os.listdir(toPath)) > 0:
             printAsError(
             '''Output resource directory is not empty: "{0}". Resources will not be copied!
 Clean the output directory before running the converter or pass option: --write-options=res-overwrite=True.'''\
 .format(toPath)
             )
             return
         os.rmdir(toPath)
     shutil.copytree(fromPath, toPath)
Ejemplo n.º 4
0
 def readDictFile(self, sametypesequence):
     if isfile(self.fileBasePath+'.dict.dz'):
         import gzip
         dictFd = gzip.open(self.fileBasePath+'.dict.dz')
     else:
         dictFd = open(self.fileBasePath+'.dict', 'rb')
     
     for rec in self.indexData:
         dictFd.seek(rec[1])
         if dictFd.tell() != rec[1]:
             printAsError("Unable to read definition for word \"{0}\"".format(rec[0]))
             rec[0] = None
             continue
         data = dictFd.read(rec[2])
         if len(data) != rec[2]:
             printAsError("Unable to read definition for word \"{0}\"".format(rec[0]))
             rec[0] = None
             continue
         if sametypesequence:
             res = self.parseDefiBlockCompact(data, sametypesequence, rec[0])
         else:
             res = self.parseDefiBlockGeneral(data, rec[0])
         if res == None:
             rec[0] = None
             continue
         res = self.convertDefinitionsToPyglossaryFormat(res)
         if len(res) == 0:
             rec[0] = None
             continue
         rec[3] = res
         
     dictFd.close()
Ejemplo n.º 5
0
def write_ext(glos, filename, sort=True, dictZip=True):
    if sort:
        g = glos.copy()
        g.data.sort()
    else:
        g = glos
    try:
        import _stardictbuilder
    except ImportError:
        printAsError('Binary module "_stardictbuilder" can not be imported! '+\
            'Using internal StarDict builder')
        return g.writeStardict(filename, sort=False)
    db = _stardictbuilder.new_StarDictBuilder(filename)
    _stardictbuilder.StarDictBuilder_swigregister(db)
    for item in g.data:
        _stardictbuilder.StarDictBuilder_addHeadword(db,item[0],item[1], '')
    _stardictbuilder.StarDictBuilder_setTitle(db, g.getInfo('name'))
    _stardictbuilder.StarDictBuilder_setAuthor(db, g.getInfo('author'))
    _stardictbuilder.StarDictBuilder_setLicense(db, g.getInfo('license'))
    _stardictbuilder.StarDictBuilder_setOrigLang(db, g.getInfo('origLang'))
    _stardictbuilder.StarDictBuilder_setDestLang(db, g.getInfo('destLang'))
    _stardictbuilder.StarDictBuilder_setDescription(db, g.getInfo('description'))
    _stardictbuilder.StarDictBuilder_setComments(db, g.getInfo('comments'))
    _stardictbuilder.StarDictBuilder_setEmail(db, g.getInfo('email'))
    _stardictbuilder.StarDictBuilder_setWebsite(db, g.getInfo('website'))
    _stardictbuilder.StarDictBuilder_setVersion(db, g.getInfo('version'))
    _stardictbuilder.StarDictBuilder_setcreationTime(db, '')
    _stardictbuilder.StarDictBuilder_setLastUpdate(db, '')
    _stardictbuilder.StarDictBuilder_finish(db)
    if dictZip:
        if filename[-4:]=='.ifo':
            filename = filename[:-4]
            runDictzip(filename)
Ejemplo n.º 6
0
 def readIdxFile(self):
     if isfile(self.fileBasePath+'.idx.gz'):
         import gzip
         with gzip.open(self.fileBasePath+'.idx.gz') as f:
             idxStr = f.read()
     else:
         with open(self.fileBasePath+'.idx', 'rb') as f:
             idxStr = f.read()
     self.indexData = []
     i = 0
     while i < len(idxStr):
         beg = i
         i = idxStr.find('\x00', beg)
         if i < 0:
             printAsError("Index file is corrupted.")
             break
         word = idxStr[beg:i]
         i += 1
         if i + 8 > len(idxStr):
             printAsError("Index file is corrupted")
             break
         offset = binStrToInt(idxStr[i:i+4])
         i += 4
         size = binStrToInt(idxStr[i:i+4])
         i += 4
         self.indexData.append([word, offset, size, [], []])
Ejemplo n.º 7
0
 def copyResources(self, fromPath, toPath, overwrite):
     '''Copy resource files from fromPath to toPath.
     '''
     if not fromPath:
         return
     fromPath = os.path.abspath(fromPath)
     toPath = os.path.abspath(toPath)
     if fromPath == toPath:
         return
     if not isdir(fromPath):
         return
     if len(os.listdir(fromPath))==0:
         return
     if overwrite and os.path.exists(toPath):
         shutil.rmtree(toPath)
     if os.path.exists(toPath):
         if len(os.listdir(toPath)) > 0:
             printAsError(
 '''Output resource directory is not empty: "{0}". Resources will not be copied!
 Clean the output directory before running the converter or pass option: --write-options=res-overwrite=True.'''\
 .format(toPath)
             )
             return
         os.rmdir(toPath)
     shutil.copytree(fromPath, toPath)
Ejemplo n.º 8
0
def verifySameTypeSequence(s):
    if not s:
        return True
    for t in s:
        if not isAsciiAlpha(t):
            printAsError("Invalid sametypesequence option")
            return False
    return True
Ejemplo n.º 9
0
def verifySameTypeSequence(s):
    if not s:
        return True
    for t in s:
        if not isAsciiAlpha(t):
            printAsError("Invalid sametypesequence option")
            return False
    return True
Ejemplo n.º 10
0
 def parseDefiBlockGeneral(self, data, word):
     """Parse definition block when sametypesequence option is not specified.
     """
     dataFileCorruptedError = "Data file is corrupted. Word \"{0}\"".format(
         word)
     res = []
     i = 0
     while i < len(data):
         t = data[i]
         if not isAsciiAlpha(t):
             printAsError(dataFileCorruptedError)
             return None
         i += 1
         if isAsciiLower(t):
             beg = i
             i = data.find('\x00', beg)
             if i < 0:
                 printAsError(dataFileCorruptedError)
                 return None
             res.append((data[beg:i], t))
             i += 1
         else:
             assert isAsciiUpper(t)
             if i + 4 > len(data):
                 printAsError(dataFileCorruptedError)
                 return None
             size = binStrToInt(data[i:i + 4])
             i += 4
             if i + size > len(data):
                 printAsError(dataFileCorruptedError)
                 return None
             res.append((data[i:i + size], t))
             i += size
     return res
Ejemplo n.º 11
0
 def parseDefiBlockGeneral(self, data, word):
     """Parse definition block when sametypesequence option is not specified.
     """
     dataFileCorruptedError = "Data file is corrupted. Word \"{0}\"".format(word)
     res = []
     i = 0
     while i < len(data):
         t = data[i]
         if not isAsciiAlpha(t):
             printAsError(dataFileCorruptedError)
             return None
         i += 1
         if isAsciiLower(t):
             beg = i
             i = data.find('\x00', beg)
             if i < 0:
                 printAsError(dataFileCorruptedError)
                 return None
             res.append((data[beg:i], t))
             i += 1
         else:
             assert isAsciiUpper(t)
             if i + 4 > len(data):
                 printAsError(dataFileCorruptedError)
                 return None
             size = binStrToInt(data[i:i+4])
             i += 4
             if i + size > len(data):
                 printAsError(dataFileCorruptedError)
                 return None
             res.append((data[i:i+size], t))
             i += size
     return res
Ejemplo n.º 12
0
 def readSynFile(self):
     if not isfile(self.fileBasePath + '.syn'):
         return
     with open(self.fileBasePath + '.syn', 'rb') as f:
         synStr = f.read()
     i = 0
     while i < len(synStr):
         beg = i
         i = synStr.find('\x00', beg)
         if i < 0:
             printAsError("Synonym file is corrupted.")
             break
         word = synStr[beg:i]
         i += 1
         if i + 4 > len(synStr):
             printAsError("Synonym file is corrupted.")
             break
         index = binStrToInt(synStr[i:i + 4])
         i += 4
         if index >= len(self.indexData):
             printAsError(
                 "Corrupted synonym file. Word \"{0}\" references invalid item."
                 .format(word))
             continue
         self.indexData[index][4].append(word)
Ejemplo n.º 13
0
    def readDictFile(self, sametypesequence):
        if isfile(self.fileBasePath + '.dict.dz'):
            import gzip
            dictFd = gzip.open(self.fileBasePath + '.dict.dz')
        else:
            dictFd = open(self.fileBasePath + '.dict', 'rb')

        for rec in self.indexData:
            dictFd.seek(rec[1])
            if dictFd.tell() != rec[1]:
                printAsError(
                    "Unable to read definition for word \"{0}\"".format(
                        rec[0]))
                rec[0] = None
                continue
            data = dictFd.read(rec[2])
            if len(data) != rec[2]:
                printAsError(
                    "Unable to read definition for word \"{0}\"".format(
                        rec[0]))
                rec[0] = None
                continue
            if sametypesequence:
                res = self.parseDefiBlockCompact(data, sametypesequence,
                                                 rec[0])
            else:
                res = self.parseDefiBlockGeneral(data, rec[0])
            if res == None:
                rec[0] = None
                continue
            res = self.convertDefinitionsToPyglossaryFormat(res)
            if len(res) == 0:
                rec[0] = None
                continue
            rec[3] = res

        dictFd.close()
Ejemplo n.º 14
0
 def readSynFile(self):
     if not isfile(self.fileBasePath+'.syn'):
         return
     with open(self.fileBasePath+'.syn', 'rb') as f:
         synStr = f.read()
     i = 0
     while i < len(synStr):
         beg = i
         i = synStr.find('\x00', beg)
         if i < 0:
             printAsError("Synonym file is corrupted.")
             break
         word = synStr[beg:i]
         i += 1
         if i + 4 > len(synStr):
             printAsError("Synonym file is corrupted.")
             break
         index = binStrToInt(synStr[i:i+4])
         i += 4
         if index >= len(self.indexData):
             printAsError("Corrupted synonym file. Word \"{0}\" references invalid item.".format(word))
             continue
         self.indexData[index][4].append(word)
Ejemplo n.º 15
0
    def parseDefiBlockCompact(self, data, sametypesequence, word):
        """Parse definition block when sametypesequence option is specified.
        """
        assert type(sametypesequence) == str
        assert len(sametypesequence) > 0
        dataFileCorruptedError = "Data file is corrupted. Word \"{0}\"".format(
            word)
        res = []
        i = 0
        for t in sametypesequence[:-1]:
            if i >= len(data):
                printAsError(dataFileCorruptedError)
                return None
            if isAsciiLower(t):
                beg = i
                i = data.find('\x00', beg)
                if i < 0:
                    printAsError(dataFileCorruptedError)
                    return None
                res.append((data[beg:i], t))
                i += 1
            else:
                assert isAsciiUpper(t)
                if i + 4 > len(data):
                    printAsError(dataFileCorruptedError)
                    return None
                size = binStrToInt(data[i:i + 4])
                i += 4
                if i + size > len(data):
                    printAsError(dataFileCorruptedError)
                    return None
                res.append((data[i:i + size], t))
                i += size

        if i >= len(data):
            printAsError(dataFileCorruptedError)
            return None
        t = sametypesequence[-1]
        if isAsciiLower(t):
            i2 = data.find('\x00', i)
            if i2 >= 0:
                printAsError(dataFileCorruptedError)
                return None
            res.append((data[i:], t))
        else:
            assert isAsciiUpper(t)
            res.append((data[i:], t))

        return res
Ejemplo n.º 16
0
 def parseDefiBlockCompact(self, data, sametypesequence, word):
     """Parse definition block when sametypesequence option is specified.
     """
     assert type(sametypesequence) == str
     assert len(sametypesequence) > 0
     dataFileCorruptedError = "Data file is corrupted. Word \"{0}\"".format(word)
     res = []
     i = 0
     for t in sametypesequence[:-1]:
         if i >= len(data):
             printAsError(dataFileCorruptedError)
             return None
         if isAsciiLower(t):
             beg = i
             i = data.find('\x00', beg)
             if i < 0:
                 printAsError(dataFileCorruptedError)
                 return None
             res.append((data[beg:i], t))
             i += 1
         else:
             assert isAsciiUpper(t)
             if i + 4 > len(data):
                 printAsError(dataFileCorruptedError)
                 return None
             size = binStrToInt(data[i:i+4])
             i += 4
             if i + size > len(data):
                 printAsError(dataFileCorruptedError)
                 return None
             res.append((data[i:i+size], t))
             i += size
     
     if i >= len(data):
         printAsError(dataFileCorruptedError)
         return None
     t = sametypesequence[-1]
     if isAsciiLower(t):
         i2 = data.find('\x00', i)
         if i2 >= 0:
             printAsError(dataFileCorruptedError)
             return None
         res.append((data[i:], t))
     else:
         assert isAsciiUpper(t)
         res.append((data[i:], t))
     
     return res