def convert(self, doc, encoding, mimetype, logError=False, raiseException=False): """Convert WinWord document to raw text""" tmp_name = self.saveFile(doc) err = TmpFile('') if sys.platform == 'win32': result = (self.execute( '%s -c utf-8 --nographics -x "%s" "%s" 2> "%s"' % (self.depends_on, wvConf_file, tmp_name, str(err))), 'utf-8') else: result = (self.execute( '%s -c utf-8 --nographics -x "%s" "%s" 2> "%s"' % (self.depends_on, wvConf_file, tmp_name, str(err))), 'utf-8') try: errors = open(str(err), 'r+').read() except OSError: errors = "" if errors: if logError: LOG.warn('Converter %s experienced an error %s' % (self.content_description, errors)) if raiseException: raise ConversionError(errors) return result
def convert(self, doc, encoding, mimetype, logError=False, raiseException=False): """Convert WinWord document to raw text""" tmp_name = self.saveFile(doc) err = TmpFile('') if sys.platform == 'win32': result = (self.execute( '%s -c utf-8 --nographics -x "%s" "%s" 2> "%s"' % ( self.depends_on, wvConf_file, tmp_name, str(err))), 'utf-8') else: result = (self.execute( '%s -c utf-8 --nographics -x "%s" "%s" 2> "%s"' % ( self.depends_on, wvConf_file, tmp_name, str(err))), 'utf-8') try: errors = open(str(err), 'r+').read() except OSError: errors = "" if errors: if logError: LOG.warn('Converter %s experienced an error %s' % ( self.content_description, errors) ) if raiseException: raise ConversionError(errors) return result
def convert(self, doc, encoding, mimetype, logError=False, raiseException=False): """Convert PowerPoint document to raw text""" tmp_name = self.saveFile(doc) err = TmpFile('') if sys.platform == 'win32': html = self.execute('ppthtml "%s" 2> "%s"' % ( tmp_name, str(err))) else: html = self.execute('ppthtml "%s" 2> "%s"' % ( tmp_name, str(err))) try: errors = open(str(err), 'r+').read() except OSError: errors = "" if errors: if logError: LOG.warn('Converter %s experienced an error %s' % ( self.content_description, errors) ) if raiseException: raise ConversionError(errors) return html2text(html, ignore_tags=('img',), indent_width=4, page_width=80), 'iso-8859-15'
def convert(self, doc, encoding, mimetype, logError=False, raiseException=False): """Convert Excel document to raw text""" tmp_name = self.saveFile(doc) err = TmpFile('') if sys.platform == 'win32': result = (self.execute('xls2csv /d 8859-1 /q 0 "%s" 2> %s' % ( tmp_name, str(err))), 'iso-8859-15') else: result = (self.execute('xls2csv -d 8859-1 -q 0 "%s" 2> %s' % ( tmp_name, str(err))), 'iso-8859-15') try: errors = open(str(err), 'r+').read() except OSError: errors = "" if errors: if logError: LOG.warn('Converter %s experienced an error %s' % ( self.content_description, errors) ) if raiseException: raise ConversionError(errors) return result
def convert(self, doc, encoding, mimetype, logError=False, raiseException=False): """Convert Excel document to raw text""" tmp_name = self.saveFile(doc) err = TmpFile('') if sys.platform == 'win32': result = (self.execute('xls2csv /d 8859-1 /q 0 "%s" 2> %s' % (tmp_name, str(err))), 'iso-8859-15') else: result = (self.execute('xls2csv -d 8859-1 -q 0 "%s" 2> %s' % (tmp_name, str(err))), 'iso-8859-15') try: errors = open(str(err), 'r+').read() except OSError: errors = "" if errors: if logError: LOG.warn('Converter %s experienced an error %s' % (self.content_description, errors)) if raiseException: raise ConversionError(errors) return result
def _c(self, text): if isinstance(text, unicode): return text try: return unicode(text, self.encoding) except UnicodeDecodeError: LOG.warn('Content from %s could not be converted to unicode using the site encoding %s' % (self.context.absolute_url(1), self.encoding)) raise
def _c(self, text): if isinstance(text, str): return text try: return str(text, self.encoding) except UnicodeDecodeError: LOG.warn( 'Content from %s could not be converted to unicode using the site encoding %s' % (self.context.absolute_url(1), self.encoding)) raise