def readXml(self, fileRef): """Read a generic (non-TreeLine) XML file""" try: f = self.getReadFileObj(fileRef) filePath = unicode(f.name, sys.getfilesystemencoding()) handler = treexmlparse.GenericXmlHandler() input = xml.sax.InputSource() input.setByteStream(f) input.setEncoding('utf-8') reader = xml.sax.make_parser() reader.setContentHandler(handler) reader.setFeature(xml.sax.handler.feature_external_ges, 0) reader.parse(input) except UnicodeError: print 'Error - bad Unicode in file', \ filePath.encode(globalref.localTextEncoding) f.close() raise ReadFileError(_('Problem with Unicode characters in file')) except xml.sax.SAXException: f.close() raise ReadFileError(_('Could not open XML file')) f.close() if not handler.rootItem: raise ReadFileError(_('Could not open XML file')) self.root = handler.rootItem self.fileName = filePath self.treeFormats = TreeFormats(handler.formats) for format in self.treeFormats.values(): format.fixImportedFormat( treexmlparse.GenericXmlHandler.textFieldName)
def readOdf(self, fileRef): """Read an Open Document Format (ODF) file""" self.treeFormats = TreeFormats(None, True) rootItem = TreeItem(None, TreeFormats.rootFormatDefault, TreeDoc.rootTitleDefault) defaultFormat = self.treeFormats[TreeFormats.formatDefault] defaultFormat.addNewField(TreeFormats.textFieldName, { u'html': 'n', u'lines': '6' }) defaultFormat.changeOutputLines([ u'<b>{*%s*}</b>' % TreeFormats.fieldDefault, u'{*%s*}' % TreeFormats.textFieldName ]) try: f = self.getReadFileObj(fileRef) filePath = unicode(f.name, sys.getfilesystemencoding()) zip = zipfile.ZipFile(f, 'r') text = zip.read('content.xml') handler = treexmlparse.OdfSaxHandler(rootItem, defaultFormat) xml.sax.parseString(text, handler) except (zipfile.BadZipfile, KeyError): f.close() raise ReadFileError(_('Could not unzip ODF file')) except UnicodeError: f.close() raise ReadFileError(_('Problem with Unicode characters in file')) except xml.sax.SAXException: f.close() raise ReadFileError(_('Could not open corrupt ODF file')) f.close() self.root = rootItem self.fileName = filePath
def readPara(self, fileRef, errors='strict'): """Import plain text, blank line delimitted""" try: f = self.getEncodedFileObj(fileRef, globalref.localTextEncoding, errors) filePath = unicode(f.name, sys.getfilesystemencoding()) fullText = f.read().replace('\r', '') except UnicodeError: print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readPara(fileRef, 'replace') else: f.close() return textList = fullText.split('\n\n') f.close() self.treeFormats = TreeFormats({}, True) # set defaults ROOT & DEFAULT newRoot = TreeItem(None, TreeFormats.rootFormatDefault) defaultFormat = self.treeFormats[TreeFormats.formatDefault] defaultFormat.fieldList = [] defaultFormat.lineList = [] defaultFormat.iconName = 'doc' defaultFormat.addTableFields([TreeFormats.textFieldName]) defaultFormat.fieldList[0].numLines = globalref.options.\ intData('MaxEditLines', 1, optiondefaults.maxNumLines) newRoot.setTitle(TreeDoc.rootTitleDefault) for line in textList: line = line.strip() if line: newItem = TreeItem(newRoot, TreeFormats.formatDefault) newRoot.childList.append(newItem) newItem.data[TreeFormats.textFieldName] = line self.root = newRoot self.fileName = filePath
def readLines(self, fileRef, errors='strict'): """Import plain text, node per line""" try: f = self.getEncodedFileObj(fileRef, globalref.localTextEncoding, errors) filePath = unicode(f.name, sys.getfilesystemencoding()) textList = f.readlines() except UnicodeError: print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readLines(fileRef, 'replace') else: f.close() return f.close() self.treeFormats = TreeFormats({}, True) # set defaults ROOT & DEFAULT newRoot = TreeItem(None, TreeFormats.rootFormatDefault) defaultFormat = self.treeFormats[TreeFormats.formatDefault] defaultFormat.fieldList = [] defaultFormat.lineList = [] defaultFormat.addTableFields([TreeFormats.textFieldName]) newRoot.setTitle(TreeDoc.rootTitleDefault) for line in textList: line = line.strip() if line: newItem = TreeItem(newRoot, TreeFormats.formatDefault) newRoot.childList.append(newItem) newItem.data[TreeFormats.textFieldName] = line self.root = newRoot self.fileName = filePath
def readTabbed(self, fileRef, errors='strict'): """Import tabbed data into a flat tree - raise exception on failure""" try: f = self.getEncodedFileObj(fileRef, globalref.localTextEncoding, errors) filePath = unicode(f.name, sys.getfilesystemencoding()) textList = f.readlines() except UnicodeError: print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readTabbed(fileRef, 'replace') else: f.close() return f.close() bufList = [(text.count('\t', 0, len(text) - len(text.lstrip())), text.strip()) for text in textList if text.strip()] if bufList: buf = bufList.pop(0) if buf[0] == 0: # set default formats ROOT & DEFAULT self.treeFormats = TreeFormats({}, True) newRoot = TreeItem(None, TreeFormats.rootFormatDefault) newRoot.setTitle(buf[1]) if newRoot.loadTabbedChildren(bufList): self.root = newRoot self.fileName = filePath return raise ReadFileError(_('Error in tabbed list'))
def readTreepad(self, fileRef, errors='strict'): """Read Treepad text-node file""" try: f = self.getEncodedFileObj(fileRef, globalref.localTextEncoding, errors) filePath = unicode(f.name, sys.getfilesystemencoding()) textList = f.read().split('<end node> 5P9i0s8y19Z') f.close() except UnicodeError: # error common - broken unicode on windows print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readTreepad(fileRef, 'replace') else: f.close() return self.treeFormats = TreeFormats() format = nodeformat.NodeFormat(TreeFormats.formatDefault) titleFieldName = _('Title', 'title field name') format.addNewField(titleFieldName) format.addLine(u'{*%s*}' % titleFieldName) numLines = globalref.options.intData('MaxEditLines', 1, optiondefaults.maxNumLines) format.addNewField(TreeFormats.textFieldName, {'lines': repr(numLines)}) format.addLine(u'{*%s*}' % TreeFormats.textFieldName) self.treeFormats[format.name] = format itemList = [] for text in textList: text = text.strip() if text: try: text = text.split('<node>', 1)[1].lstrip() lines = text.split('\n') title = lines[0] level = int(lines[1]) lines = lines[2:] except (ValueError, IndexError): print 'Error - bad file format in %s' % \ filePath.encode(globalref.localTextEncoding) raise ReadFileError(_('Bad file format in %s') % filePath) item = TreeItem(None, format.name) item.data[titleFieldName] = title item.data[TreeFormats.textFieldName] = '\n'.join(lines) item.level = level itemList.append(item) self.root = itemList[0] parentList = [] for item in itemList: if item.level != 0: parentList = parentList[:item.level] item.parent = parentList[-1] parentList[-1].childList.append(item) parentList.append(item) self.root = itemList[0] self.fileName = filePath
def __init__(self, filePath=None, setNewDefaults=False, importType=None): """Open filePath (can also be file ref) if given, setNewDefaults uses user defaults for compression & encryption, importType gives an import method to read the file""" globalref.docRef = self self.root = None self.treeFormats = TreeFormats() self.fileInfoItem = TreeItem(None, nodeformat.FileInfoFormat.name) self.fileInfoFormat = None TreeDoc.copyFormat = nodeformat.NodeFormat('_DUMMY__ROOT_', {}, TreeFormats.fieldDefault) self.undoStore = undo.UndoRedoStore() self.redoStore = undo.UndoRedoStore() self.sortFields = [''] self.fileName = '' self.spaceBetween = True self.lineBreaks = True self.formHtml = True self.childFieldSep = TreeDoc.childFieldSepDflt self.spellChkLang = '' self.xlstLink = '' self.xslCssLink = '' self.tlVersion = __version__ self.fileInfoFormat = nodeformat.FileInfoFormat() if filePath: if importType: getattr(self, importType)(filePath) else: self.readFile(filePath) else: self.treeFormats = TreeFormats({}, True) self.root = TreeItem(None, TreeFormats.rootFormatDefault) self.root.setTitle(TreeDoc.rootTitleDefault) self.modified = False if setNewDefaults or not hasattr(self, 'compressFile'): self.compressFile = globalref.options.boolData('CompressNewFiles') self.encryptFile = globalref.options.boolData('EncryptNewFiles') elif not hasattr(self, 'encryptFile'): self.encryptFile = False self.selection = treeselection.TreeSelection([self.root]) self.fileInfoFormat.translateFields() self.fileInfoFormat.updateFileInfo()
def readFile(self, fileRef): """Open and read file - raise exception on failure, fileRef is either file path or file object""" filePath = hasattr(fileRef, 'read') and \ unicode(fileRef.name, sys.getfilesystemencoding()) or \ fileRef try: f = self.getReadFileObj(fileRef) f = self.decryptFile(f) handler = treexmlparse.TreeSaxHandler(self) input = xml.sax.InputSource() input.setByteStream(f) input.setEncoding('utf-8') reader = xml.sax.make_parser() reader.setContentHandler(handler) reader.setFeature(xml.sax.handler.feature_external_ges, 0) reader.parse(input) except IOError: print 'Error - could not read file', \ filePath.encode(globalref.localTextEncoding) raise except UnicodeError: print 'Error - bad Unicode in file', \ filePath.encode(globalref.localTextEncoding) f.close() raise except xml.sax.SAXException: f.close() raise ReadFileError(_('Could not open as treeline file')) f.close() self.root = handler.rootItem self.fileName = filePath self.treeFormats = TreeFormats(handler.formats) self.fileInfoFormat.replaceListFormat() self.treeFormats.updateAutoChoices() self.treeFormats.updateUniqueID() self.treeFormats.updateDerivedTypes() if not self.tlVersion: # file from before 0.12.80, fix number format for format in self.treeFormats.values(): for field in format.fieldList: if field.typeName == 'Number': field.format = field.format.replace(',', '\,')
def createBookmarkFormat(self): """Return a set of formats for bookmark imports""" treeFormats = TreeFormats() format = nodeformat.NodeFormat(TreeDoc.folderName) format.addNewField(TreeFormats.fieldDefault) format.addLine(u'{*%s*}' % TreeFormats.fieldDefault) format.addLine(u'{*%s*}' % TreeFormats.fieldDefault) format.iconName = 'folder_3' treeFormats[format.name] = format format = nodeformat.NodeFormat(TreeDoc.bookmarkName) format.addNewField(TreeFormats.fieldDefault) format.addLine(u'{*%s*}' % TreeFormats.fieldDefault) format.addLine(u'{*%s*}' % TreeFormats.fieldDefault) format.addNewField(TreeFormats.linkFieldName, {'type': 'URL'}) format.addLine(u'{*%s*}' % TreeFormats.linkFieldName) format.iconName = 'bookmark' treeFormats[format.name] = format format = nodeformat.NodeFormat(TreeDoc.separatorName) format.addNewField(TreeFormats.fieldDefault) format.addLine(u'------------------') format.addLine(u'<hr>') treeFormats[format.name] = format return treeFormats
def readTable(self, fileRef, errors='strict'): """Import table data into a flat tree - raise exception on failure""" try: f = self.getEncodedFileObj(fileRef, globalref.localTextEncoding, errors) filePath = unicode(f.name, sys.getfilesystemencoding()) textList = f.readlines() except UnicodeError: print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readTable(fileRef, 'replace') else: f.close() return f.close() self.treeFormats = TreeFormats({}, True) # set defaults ROOT & DEFAULT newRoot = TreeItem(None, TreeFormats.rootFormatDefault) defaultFormat = self.treeFormats[TreeFormats.formatDefault] defaultFormat.fieldList = [] defaultFormat.lineList = [] defaultFormat.addTableFields(textList.pop(0).strip().split('\t')) newRoot.setTitle(TreeDoc.rootTitleDefault) for line in textList: newItem = TreeItem(newRoot, TreeFormats.formatDefault) newRoot.childList.append(newItem) lineList = line.strip().split('\t') try: for num in range(len(lineList)): newItem.data[self.treeFormats[TreeFormats.formatDefault]. fieldList[num].name] = lineList[num].strip() except IndexError: print 'Too few headings to read data as a table' raise ReadFileError( _('Too few headings to read data as table')) self.root = newRoot self.fileName = filePath