def get_filetype(self, filepath, dom): """Note to developer: Try to make it so that this function can never silently fail, even if for example a JPEG file is attempted. """ logger.debug(util.funcName('begin')) filetype = "" if dom is None: raise exceptions.FileAccessError("Error with file: %s", filepath) docElem = dom.documentElement docElemChild = None if docElem.hasChildNodes(): if len(docElem.childNodes) >= 2: docElemChild = docElem.childNodes[1] else: docElemChild = docElem.childNodes[0] if not docElemChild: raise exceptions.FileAccessError( "File does not seem to be from Toolbox or FieldWorks: %s", filepath) elif (docElem.nodeName == "database" and re.match(r"[a-zA-Z0-9]+Group", docElemChild.nodeName)): filetype = "toolbox" elif (docElem.nodeName == "document" and docElemChild.nodeName == "interlinear-text"): filetype = "fieldworks" else: raise exceptions.FileAccessError( "File does not seem to be from Toolbox or FieldWorks: %s", filepath) logger.debug("File type is %s", filetype) return filetype
def loadLibrary(self): """Load ECDriver library.""" if self.loaded: return if platform.system() == "Windows": libfile = "ECDriver" wide = 'W' # use functions named with 'W' for wide characters else: wide = '' ## Look for libecdriver.so in order of location precedence. liblocs = [(prefix, dirname, libname) for dirname in ("encConverters", "fieldworks") for prefix in ("/usr/local", "/usr") for libname in ("libecdriver.so", "libecdriver_64.so")] libfile = "" for prefix, dirname, libname in liblocs: filepath = os.path.join(prefix, "lib", dirname, libname) if os.path.exists(filepath): libfile = filepath break if not libfile: # Perhaps it is in current dir, LD_LIBRARY_PATH or ldconfig. libfile = "libecdriver.so" logger.debug("Loading %s", libfile) try: libecdriver = ctypes.cdll.LoadLibrary(libfile) except OSError as exc: raise exceptions.FileAccessError("Library error: %s.", exc) logger.debug("Getting functions from library") try: self.funcIsEcInstalled = libecdriver.IsEcInstalled self.funcSelectConverter = getattr( libecdriver, 'EncConverterSelectConverter' + wide) self.funcInitConverter = getattr( libecdriver, 'EncConverterInitializeConverter' + wide) self.funcConvertString = getattr( libecdriver, 'EncConverterConvertString' + wide) self.funcDescription = getattr( libecdriver, 'EncConverterConverterDescription' + wide) if platform.system() == "Linux": self.funcCleanup = libecdriver.Cleanup except AttributeError as exc: raise exceptions.FileAccessError("Library error: %s.", exc) logger.debug("Library successfully loaded.") try: self.funcAddConverter = getattr(libecdriver, 'EncConverterAddConverter' + wide) except AttributeError as exc: logger.warning("Could not load AddConverter function.") self.loaded = True
def read(self): """Read in the data. Returns list with elements of type FontItem. Tries to overcome several zipfile reading exceptions that may occur. """ logger.debug(util.funcName('begin')) try: self.make_temp_dir() except exceptions.FileAccessError as exc: self.msgbox.displayExc(exc) return list() data = None self.progressRange_partNum = 0 try: data = self.readFile() except zipfile.BadZipFile as exc: logger.warning(exc) self.convert_to_odt() data = self.readFile() except exceptions.FileAccessError as exc: if exc.msg.startswith("Error reading file"): logger.warning(exc) self.convert_to_odt() data = self.readFile() else: raise exc except FileNotFoundError as exc: raise exceptions.FileAccessError(str(exc)) #self.cleanup() logger.debug(util.funcName('end')) return data
def pickConverter(self): """Let the user pick a converter.""" logger.debug(util.funcName('begin')) self.loadLibrary() if not self.loaded: return if not self.funcIsEcInstalled(): raise exceptions.FileAccessError( "EncConverters does not seem to be installed properly.") bufConverterName = createBuffer(1024) c_forward = ctypes.c_bool(False) c_normForm = ctypes.c_ushort(0) logger.debug("Calling funcSelectConverter.") status = self.funcSelectConverter(bufConverterName, ctypes.byref(c_forward), ctypes.byref(c_normForm)) if status == -1: logger.debug( "EncConverters returned %d. User probably pressed Cancel.", status) return verifyStatusOk(status) logger.debug("Converter name was %r", bufConverterName.value) self.config = ConverterSettings(self.config.userVars) if platform.system() == "Windows": self.config.convName = bufConverterName.value else: self.config.convName = bufConverterName.value.decode("utf-8") self.config.forward = c_forward.value self.config.normForm = c_normForm.value logger.debug(util.funcName('end'))
def loadFile(self, filepath): """Returns dom, raises exceptions.FileAccessError.""" logger.debug(util.funcName('begin', args=filepath)) if not os.path.exists(filepath): raise exceptions.FileAccessError("Cannot find file %s", filepath) dom = None try: dom = xml.dom.minidom.parse(filepath) except xml.parsers.expat.ExpatError as exc: raise exceptions.FileAccessError("Error reading file %s\n\n%s", filepath, str(exc).capitalize()) if dom is None: raise exceptions.FileAccessError("Error reading file %s", filepath) logger.debug(util.funcName('end')) return dom
def _fillInData(self, wordList, word_i1, word_i2): data = [] for word_i in range(word_i1, word_i2 + 1): word = wordList[word_i] colOrd = self.colOrder # shorthand variable name colOrd.resetRowData() colOrd.setRowVal('colWord', word.text) colOrd.setRowVal('colOccur', word.occurrences) colOrd.setRowVal('colOk', word.isCorrect_str()) colOrd.setRowVal('colChange', word.correction) colOrd.setRowVal('colSrc', word.sources_str()) data.append(colOrd.getRowTuple()) row1 = word_i1 + 2 # start at second row, so index 0 is row 2 row2 = word_i2 + 2 col2 = chr(ord('A') + len(self.colOrder.COLUMNS) - 1) rangeName = "A%d:%s%d" % (row1, col2, row2) logger.debug("Adding %d rows to range %s", len(data), rangeName) #logger.debug(repr(data)) oRange = self.sheet.getCellRangeByName(rangeName) try: oRange.setDataArray(tuple(data)) except RuntimeException as exc: raise exceptions.FileAccessError( "There was a problem while writing the list.\n\n%s", exc) self.progressRanges.update(word_i1)
def convert_to_odt(self): """Opens a file such as .doc, saves as .odt and then closes it.""" logger.debug(util.funcName('begin')) self.incrementProgressPart() basename = os.path.basename(self.fileconfig.filepath) name, dummy_ext = os.path.splitext(basename) newpath = os.path.join(self.tempBaseDir, name + "_converted.odt") if os.path.exists(newpath): logger.warning("File already exists: %s", newpath) self.fileconfig.filepath = newpath logger.debug(util.funcName('return')) return doc_loader = doc_reader.DocReader(self.fileconfig, self.unoObjs, 0) doc_loader.loadDoc(self.fileconfig.filepath) loaded_doc = doc_loader.doc uno_args = ( #util.createProp("FilterName", "StarOffice XML (Writer)"), #util.createProp("FilterName", "writer8"), util.createProp("Overwrite", False), ) logger.debug("Saving as %s", newpath) fileUrl = uno.systemPathToFileUrl(os.path.realpath(newpath)) try: loaded_doc.document.storeAsURL(fileUrl, uno_args) except ErrorCodeIOException: raise exceptions.FileAccessError("Error saving %s", newpath) try: loaded_doc.document.close(True) except CloseVetoException: logger.warning("Could not close %s", newpath) self.fileconfig.filepath = newpath self.incrementProgressPart() logger.debug(util.funcName('end'))
def addConverter(self, mappingName, converterSpec, conversionType, leftEncoding, rightEncoding, processType): """Add a converter to the repository. Used for automated testing. :param mappingName: friendly name key that the converter is to be accessed with :param converterSpec: technical spec of the converter (e.g. TECkit & CC = filespec to map) :param conversionType: ConvType parameter indicating the type of conversion (e.g. "Legacy_to_from_Unicode") :param leftEncoding: optional technical name of the left-hand side encoding (e.g. SIL-ANNAPURNA-05) :param rightEncoding: optional technical name of the right-hand side encoding (e.g. UNICODE) :param processType: ProcessTypeFlags flag to indicate the implementation/transduction type (e.g. UnicodeEncodingConversion) from which you can do later filtering (e.g. ByEncodingID) """ logger.debug( util.funcName('begin', args=(mappingName, converterSpec, conversionType, leftEncoding, rightEncoding, processType))) self.loadLibrary() if not self.funcIsEcInstalled(): raise exceptions.FileAccessError( "EncConverters does not seem to be installed properly.") if not self.funcAddConverter: raise exceptions.FileAccessError( "Could not get AddConverter function. " "Automatically adding a converter requires SEC4.0 or higher.") c_convName = getStringParam(mappingName) if not c_convName: raise exceptions.LogicError("No converter was specified.") c_convSpec = getStringParam(converterSpec) c_convType = ctypes.c_ushort(conversionType) c_leftEnc = getStringParam(leftEncoding) c_rightEnc = getStringParam(rightEncoding) c_processType = ctypes.c_ushort(processType) logger.debug("Calling funcAddConverter.") status = self.funcAddConverter(c_convName, c_convSpec, c_convType, c_leftEnc, c_rightEnc, c_processType) verifyStatusOk(status) logger.debug(util.funcName('end'))
def _read(self): logger.debug("Parsing file %s", self.filepath) if not os.path.exists(self.filepath): raise exceptions.FileAccessError("Cannot find file %s", self.filepath) try: self.dom = xml.dom.minidom.parse(self.filepath) except xml.parsers.expat.ExpatError as exc: raise exceptions.FileAccessError("Error reading file %s\n\n%s", self.filepath, str(exc).capitalize()) logger.debug("Parse finished.") self.progressBar.updatePercent(60) if self.fileconfig.filetype == 'spellingStatus': self.read_spellingStatus_file() else: raise exceptions.FileAccessError("Unexpected file type %s", self.fileconfig.filetype)
def verifyStatusOk(status): """Raises exception if not ok.""" if status == ErrStatus.NoError: return description = "" if status in ErrStatus.DESCRIPTIONS: description = " (%s)" % ErrStatus.DESCRIPTIONS[status] raise exceptions.FileAccessError("Error: EncConverters returned %d%s.", status, description)
def _read(self): try: self.loadDoc(self.filepath) except (exceptions.FileAccessError, exceptions.DocAccessError): raise exceptions.FileAccessError("Error reading file %s", self.filepath) self.progressBar.updatePercent(60) self.read_document() logger.debug("Setting visible.") self.doc.window.setVisible(True)
def _read(self): """Read in the data. Modifies self.data""" logger.debug("Parsing file %s", self.filepath) if not os.path.exists(self.filepath): raise exceptions.FileAccessError( "Cannot find file %s", self.filepath) self.progressBar.updatePercent(30) self.read_sfm_file() for dummy_marker, value in self.rawData: word = wordlist_structs.WordInList() word.text = value word.source = self.filepath self.data.append(word)
def _read(self): filetype = self.get_filetype() self.progressBar.updatePercent(30) logger.debug("Parsing file %s", self.filepath) if not os.path.exists(self.filepath): raise exceptions.FileAccessError("Cannot find file %s", self.filepath) try: self.dom = xml.dom.minidom.parse(self.filepath) except xml.parsers.expat.ExpatError as exc: raise exceptions.FileAccessError("Error reading file %s\n\n%s", self.filepath, str(exc).capitalize()) logger.debug("Parse finished.") self.progressBar.updatePercent(60) if filetype == 'paxml': self.read_paxml_file() elif filetype == 'lift': self.read_lift_file() elif filetype == 'xml': self.read_toolbox_file() else: raise exceptions.LogicError("Unexpected file type %s", filetype)
def _read(self): progressRange = ProgressRange(ops=len(self.config.fileList), pbar=self.progressBar) progressRange.partSize = 3 self.suggestions = [] self.duplicate_refnums = set() list_index = 1 # 1-based index of current element in list for fileItem in self.config.fileList: logger.debug("Parsing file %s", fileItem.filepath) self.prefix = fileItem.prefix self.use_segnum = fileItem.use_segnum self.dom = None if not os.path.exists(fileItem.filepath): raise exceptions.FileAccessError("Cannot find file %s", fileItem.filepath) try: self.dom = xml.dom.minidom.parse(fileItem.filepath) except (xml.parsers.expat.ExpatError, IOError) as exc: raise exceptions.FileAccessError("Error reading file %s\n\n%s", fileItem.filepath, str(exc).capitalize()) logger.debug("Parse finished.") progressRange.updatePart(1) filetype = self.get_filetype(fileItem.filepath, self.dom) progressRange.updatePart(2) prevLen = len(self.data) if filetype == "toolbox": ToolboxXML(self).read() elif filetype == "fieldworks": FieldworksXML(self).read() logger.debug("Read %d examples.", len(self.data)) if len(self.data) == prevLen: raise exceptions.DataNotFoundError( "Did not find any data in file %s", fileItem.filepath) progressRange.update(list_index) list_index += 1
def make_temp_dir(self): """Make temporary directory to extract .odt file contents.""" self.tempBaseDir = os.path.join(self.outdir, 'OOLT Converted Files') if not os.path.exists(self.tempBaseDir): try: os.makedirs(self.tempBaseDir) except OSError: raise exceptions.FileAccessError( "Could not create temporary folder %s", self.tempBaseDir) MAX_FOLDERS = 1000 for folderNum in range(1, MAX_FOLDERS): tempDirCandidate = os.path.join(self.tempBaseDir, "%03d" % folderNum) if not os.path.exists(tempDirCandidate): self.tempDir = tempDirCandidate break if not self.tempDir: raise exceptions.FileAccessError( "Too many temporary folders in %s.", self.tempBaseDir) try: os.mkdir(self.tempDir) except OSError: raise exceptions.FileAccessError( "Could not create temporary folder %s", self.tempDir)
def loadDoc(self, filepath): """Sets self.calcUnoObjs to a loaded Calc doc. File will open minimized if not already open. """ logger.debug("Opening file %s", filepath) if not os.path.exists(filepath): raise exceptions.FileAccessError("Cannot find file %s", filepath) fileUrl = uno.systemPathToFileUrl(os.path.realpath(filepath)) uno_args = (util.createProp("Minimized", True), ) newDoc = self.unoObjs.desktop.loadComponentFromURL( fileUrl, "_default", 0, uno_args) try: self.calcUnoObjs = self.unoObjs.getDocObjs( newDoc, doctype=util.UnoObjs.DOCTYPE_CALC) except AttributeError: raise exceptions.DocAccessError() self.calcUnoObjs.window.setVisible(True) # otherwise it will be hidden logger.debug("Opened file.")
def get_filetype(self): """Determines file type based on extension. Does not read file contents. """ logger.debug(util.funcName('begin')) filename = os.path.basename(self.filepath) filetype = "" if re.search(r"\.paxml$", filename): filetype = 'paxml' elif re.search(r"\.lift$", filename): filetype = 'lift' elif re.search(r"\.xml$", filename): filetype = 'xml' else: raise exceptions.FileAccessError("Unknown file type for %s", filename) logger.debug("File type %s.", filetype) return filetype
def loadDoc(self, filepath): logger.debug(util.funcName('begin', args=filepath)) if not os.path.exists(filepath): raise exceptions.FileAccessError("Cannot find file %s", filepath) fileUrl = uno.systemPathToFileUrl(os.path.realpath(filepath)) uno_args = ( util.createProp("Minimized", True), # Setting a filter makes some files work but then .odt fails. # Instead just have the user open the file first. #util.createProp("FilterName", "Text"), ) # Loading the document hidden was reported to frequently crash # before OOo 2.0. It seems to work fine now though. newDoc = self.unoObjs.desktop.loadComponentFromURL( fileUrl, "_default", 0, uno_args) try: self.doc = self.unoObjs.getDocObjs(newDoc) except AttributeError: raise exceptions.DocAccessError() logger.debug(util.funcName('end'))
def _read(self): """Harvest data by grabbing word strings from one or more columns.""" try: self.loadDoc(self.filepath) except (exceptions.FileAccessError, exceptions.DocAccessError): raise exceptions.FileAccessError("Error reading file %s", self.filepath) reader = SpreadsheetReader(self.calcUnoObjs) self.progressBar.updatePercent(60) for whatToGrab in self.fileconfig.thingsToGrab: if whatToGrab.grabType == wordlist_structs.WhatToGrab.COLUMN: stringList = reader.getColumnStringList( whatToGrab.whichOne, self.fileconfig.skipFirstRow) for text in stringList: if text != "": ## Add word word = wordlist_structs.WordInList() word.text = text word.source = self.filepath self.data.append(word) logger.debug("Setting visible.") self.calcUnoObjs.window.setVisible(True)
def setConverter(self, newConfig=None): """Initialize a converter to the specified values. :param newConfig: type ConverterSettings """ logger.debug(util.funcName('begin')) if not newConfig: # Useful for multiple converter objects with different settings. newConfig = self.config self.loadLibrary() if not self.funcIsEcInstalled(): raise exceptions.FileAccessError( "EncConverters does not seem to be installed properly.") c_convName = getStringParam(newConfig.convName) if c_convName is None: raise exceptions.LogicError("No converter was specified.") c_forward = ctypes.c_bool(newConfig.forward) c_normForm = ctypes.c_ushort(newConfig.normForm) logger.debug("calling funcInitConverter with %r", newConfig) status = self.funcInitConverter(c_convName, c_forward, c_normForm) verifyStatusOk(status) self.config = newConfig logger.debug(util.funcName('end'))
def read_sfm_file(self): """ Grabs a flat list of marker data, not organized by records of several markers. This should work whether self.fileconfig contains one field with several markers, or several fields with one marker each, or some combination of the two. Modifies self.rawData """ logger.debug("reading SFM file") infile = io.open(self.filepath, mode='r', encoding='UTF8') sfMarkerList = list() for whatToGrab in self.fileconfig.thingsToGrab: if whatToGrab.grabType == wordlist_structs.WhatToGrab.SFM: sfMarkerList.extend(whatToGrab.whichOne.split()) lineNum = 1 try: for line in infile: logger.debug("Line #%d.", lineNum) lineNum += 1 for marker in sfMarkerList: markerWithSpace = marker + " " if line.startswith(markerWithSpace): logger.debug("^%s", markerWithSpace) data = line[len(markerWithSpace):] data = data.strip() # is this needed? self.rawData.append((marker, data)) except UnicodeDecodeError as exc: raise exceptions.FileAccessError( "Error reading file %s\n\n%s", self.filepath, str(exc)) finally: infile.close() logger.debug("Found %d words.", len(self.rawData))
def loadLibrary(self): """Load ECDriver library.""" if self.loaded: return if platform.system() == "Windows": libfile = "ECDriver" wide = 'W' # use functions named with 'W' for wide characters logger.debug("Loading %s", libfile) try: try: # Python 3.8 and newer # It is recommended to use os.add_dll_directory("..."), # but that would not search with environment variables. See # docs.python.org/3/whatsnew/3.8.html#bpo-36085-whatsnew libecdriver = ctypes.CDLL(libfile, winmode=0) except TypeError: # Python 3.7 or older libecdriver = ctypes.cdll.LoadLibrary(libfile) except OSError as exc: raise exceptions.FileAccessError("Library error: %s.", exc) else: wide = '' ## Look for libecdriver.so in order of location precedence. liblocs = [(prefix, dirname, libname) for dirname in ("encConverters", "fieldworks") for prefix in ("/usr/local", "/usr") for libname in ("libecdriver.so", "libecdriver_64.so")] libfile = "" for prefix, dirname, libname in liblocs: filepath = os.path.join(prefix, "lib", dirname, libname) if os.path.exists(filepath): libfile = filepath break if not libfile: # Perhaps it is in current dir, LD_LIBRARY_PATH or ldconfig. libfile = "libecdriver.so" logger.debug("Loading %s", libfile) try: libecdriver = ctypes.cdll.LoadLibrary(libfile) except OSError as exc: raise exceptions.FileAccessError("Library error: %s.", exc) logger.debug("Getting functions from library") try: self.funcIsEcInstalled = libecdriver.IsEcInstalled self.funcSelectConverter = getattr( libecdriver, 'EncConverterSelectConverter' + wide) self.funcInitConverter = getattr( libecdriver, 'EncConverterInitializeConverter' + wide) self.funcConvertString = getattr( libecdriver, 'EncConverterConvertString' + wide) self.funcDescription = getattr( libecdriver, 'EncConverterConverterDescription' + wide) if platform.system() == "Linux": self.funcCleanup = libecdriver.Cleanup except AttributeError as exc: raise exceptions.FileAccessError("Library error: %s.", exc) logger.debug("Library successfully loaded.") try: self.funcAddConverter = getattr(libecdriver, 'EncConverterAddConverter' + wide) except AttributeError as exc: logger.warning("Could not load AddConverter function.") self.loaded = True