Esempio n. 1
0
    def convert(self,
                doc,
                encoding,
                mimetype,
                logError=False,
                raiseException=False):
        """Convert WinWord document to raw text"""

        tmp_name = self.saveFile(doc)
        err = TmpFile('')
        if sys.platform == 'win32':
            result = (self.execute(
                '%s -c utf-8 --nographics -x "%s" "%s" 2> "%s"' %
                (self.depends_on, wvConf_file, tmp_name, str(err))), 'utf-8')
        else:
            result = (self.execute(
                '%s -c utf-8 --nographics -x "%s" "%s" 2> "%s"' %
                (self.depends_on, wvConf_file, tmp_name, str(err))), 'utf-8')

        try:
            errors = open(str(err), 'r+').read()
        except OSError:
            errors = ""
        if errors:
            if logError:
                LOG.warn('Converter %s experienced an error %s' %
                         (self.content_description, errors))

            if raiseException:
                raise ConversionError(errors)

        return result
Esempio n. 2
0
 def convert(self, doc, encoding, mimetype,
             logError=False, raiseException=False):
     """Convert WinWord document to raw text"""
     
     tmp_name = self.saveFile(doc)
     err = TmpFile('')
     if sys.platform == 'win32':
         result = (self.execute(
             '%s -c utf-8 --nographics -x "%s" "%s" 2> "%s"' % (
                 self.depends_on, wvConf_file, tmp_name, str(err))), 'utf-8')
     else:
         result = (self.execute(
             '%s -c utf-8 --nographics -x "%s" "%s" 2> "%s"' % (
                 self.depends_on, wvConf_file, tmp_name, str(err))), 'utf-8')
     
     try:
         errors = open(str(err), 'r+').read()
     except OSError:
         errors = ""
     if errors:
         if logError:
             LOG.warn('Converter %s experienced an error %s' % (
                 self.content_description, errors)
             )
         
         if raiseException:
             raise ConversionError(errors)
     
     return result
Esempio n. 3
0
    def convert(self, doc, encoding, mimetype,
                logError=False, raiseException=False):
        """Convert PowerPoint document to raw text"""
        
        tmp_name = self.saveFile(doc)
        err = TmpFile('')
        if sys.platform == 'win32':
            html = self.execute('ppthtml "%s" 2> "%s"' % (
                tmp_name, str(err)))
        else:
            html = self.execute('ppthtml "%s" 2> "%s"' % (
                tmp_name, str(err)))
        
        try:
            errors = open(str(err), 'r+').read()
        except OSError:
            errors = ""
        if errors:
            if logError:
                LOG.warn('Converter %s experienced an error %s' % (
                    self.content_description, errors)
                )
            
            if raiseException:
                raise ConversionError(errors)

        return html2text(html,
                         ignore_tags=('img',),
                         indent_width=4,
                         page_width=80), 'iso-8859-15'
Esempio n. 4
0
    def convert(self, doc, encoding, mimetype,
                logError=False, raiseException=False):
        """Convert Excel document to raw text"""

        tmp_name = self.saveFile(doc)
        err = TmpFile('')

        if sys.platform == 'win32':
            result = (self.execute('xls2csv /d 8859-1 /q 0 "%s" 2> %s' % (
                tmp_name, str(err))), 'iso-8859-15')
        else:
            result = (self.execute('xls2csv -d 8859-1 -q 0 "%s" 2> %s' % (
                tmp_name, str(err))), 'iso-8859-15')
        
        try:
            errors = open(str(err), 'r+').read()
        except OSError:
            errors = ""
        if errors:
            if logError:
                LOG.warn('Converter %s experienced an error %s' % (
                    self.content_description, errors)
                )
            if raiseException:
                raise ConversionError(errors)
        
        return result
Esempio n. 5
0
    def convert(self,
                doc,
                encoding,
                mimetype,
                logError=False,
                raiseException=False):
        """Convert Excel document to raw text"""

        tmp_name = self.saveFile(doc)
        err = TmpFile('')

        if sys.platform == 'win32':
            result = (self.execute('xls2csv /d 8859-1 /q 0 "%s" 2> %s' %
                                   (tmp_name, str(err))), 'iso-8859-15')
        else:
            result = (self.execute('xls2csv -d 8859-1 -q 0 "%s" 2> %s' %
                                   (tmp_name, str(err))), 'iso-8859-15')

        try:
            errors = open(str(err), 'r+').read()
        except OSError:
            errors = ""
        if errors:
            if logError:
                LOG.warn('Converter %s experienced an error %s' %
                         (self.content_description, errors))
            if raiseException:
                raise ConversionError(errors)

        return result
 def _c(self, text):
     if isinstance(text, unicode):
         return text
     try:
         return unicode(text, self.encoding)
     except UnicodeDecodeError:
         LOG.warn('Content from %s could not be converted to unicode using the site encoding %s' %
                 (self.context.absolute_url(1), self.encoding))
         raise
Esempio n. 7
0
 def _c(self, text):
     if isinstance(text, str):
         return text
     try:
         return str(text, self.encoding)
     except UnicodeDecodeError:
         LOG.warn(
             'Content from %s could not be converted to unicode using the site encoding %s'
             % (self.context.absolute_url(1), self.encoding))
         raise