Beispiel #1
0
 def __init__(self, cr, cmd=False, dir_tmp=False):
     if not cmd or not dir_tmp:
         cr.execute("SELECT soffice, dir_tmp FROM oo_config")
         cmd, dir_tmp = cr.fetchone()
     # cmd = '/Applications/LibreOffice.app/Contents/MacOS/soffice'
     DocumentConverter.__init__(self, cmd, prefix='aeroo-', dir_tmp=dir_tmp, suffix='.odt')
     netsvc.Service.__init__(self, 'openoffice')
Beispiel #2
0
 def _init_conn(self):
     logger = logging.getLogger('main')
     try:
         self.oservice = DocumentConverter(self.oo_host, self.oo_port)
     except DocumentConversionException as e:
         self.oservice = None
         logger.warning("Failed to initiate OpenOffice/LibreOffice connection.")
Beispiel #3
0
def convert_to_pdf(user, presentationObject):

  inputFile = "%s/%s" % (settings.CURRENT_DIR, presentationObject.presentation_slides)
  profile = user.profile
  
  # Location to store user's files
  # based on the hash of their username  
  levelOne = profile.user_hash[0:4]
  levelTwo = profile.user_hash[4:8]
  path = "%s/%s/%s" % (settings.STORAGE_ROOT, levelOne, levelTwo)
  url_path = "%s/%s/%s" % (settings.STORAGE_URL, levelOne, levelTwo)
  
  # Check if directory exists, if not...create it
  if not os.path.exists(path):
    os.makedirs(path)
  
  # Check that the uploaded file exists
  if os.path.exists("%s" % (inputFile)):
  
    print "Uploaded file exists"
  
    # BaseName of the uploaded file
    baseName = os.path.basename(inputFile)
    
    # Name of the PDF File
    pdfFile = "%s/%s.pdf" % (path, baseName)
    pdfFile_url = "%s/%s.pdf" % (url_path, baseName)

    # New file location
    newFile = "%s/%s" % (path, baseName)
    newFile_url = "%s/%s" % (url_path, baseName)    
    
    # Check to see if the file exists in the user storage   
    if os.path.exists("%s" % (newFile)):
      if filecmp.cmp(inputFile, newFile):
        print "upload file and newFile the same, moving on"
        # Files are the same, no need to move
        pass
      else:
        # Move uploaded file into user's file location
        print "Moving uploaded file %s into location %s" % (inputFile, path)      
        shutil.move(inputFile, path)        
    else:
      # Move uploaded file into user's file location
      print "File does not exist in storage location"
      print "Moving uploaded file %s into location %s" % (inputFile, path)      
      shutil.move(inputFile, path)

    # Convert presentation to PDF
    try:
      converter = DocumentConverter()    
      converter.convert(newFile, pdfFile)
    except DocumentConversionException, exception:
      print "ERROR! " + str(exception)
      exit(1)
    except ErrorCodeIOException, exception:
      print e, exception
      print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
      exit(1)
Beispiel #4
0
 def __init__(self, cr, cmd=False, dir_tmp=False):
     if not cmd or not dir_tmp:
         cr.execute("SELECT soffice, dir_tmp FROM oo_config")
         cmd, dir_tmp = cr.fetchone()
     # cmd = '/Applications/LibreOffice.app/Contents/MacOS/soffice'
     DocumentConverter.__init__(self,
                                cmd,
                                prefix='aeroo-',
                                dir_tmp=dir_tmp,
                                suffix='.odt')
     netsvc.Service.__init__(self, 'openoffice')
Beispiel #5
0
def main(fn_template, fn_data):
    fp_data = open(fn_data)
    reader = csv.DictReader(open(fn_data))

    users = list(reader)
    converter = DocumentConverter()

    for user in users:
        user["filename"] = "%s_%s.odt" % (user["first_name"], user["last_name"])
        user["filename_pdf"] = "%s_%s.pdf" % (user["first_name"], user["last_name"])
        mail_merge(fn_template, user)

        converter.convert(user["filename"], user["filename_pdf"])
Beispiel #6
0
 def init_conn(self):
     logger = logging.getLogger('main')
     try:
         self.oservice = DocumentConverter(self.oo_host, self.oo_port)
     except DocumentConversionException as e:
         self.oservice = None
         logger.warning("Failed to initiate OpenOffice/LibreOffice connection.")
    def run(self):
        while True:
            doc = model.get_task_to_process()
            if doc is None:
                time.sleep(1)
            else:
                try:
                    #log selected document
                    web.debug(doc)
                    #ensure that tmp direcory exist
                    if not os.path.exists('/tmp/converterdir'):
                        os.makedirs('/tmp/converterdir')

                    #check if document need to been converted
                    interim_path = '/tmp/converterdir/test2.pdf'
                    if re.search(r"\.pdf$", doc.srcname,
                                 re.IGNORECASE | re.MULTILINE) is None:
                        converter = DocumentConverter()
                        converter.convert(doc.srcname, interim_path)
                        #wait for converter closes
                        time.sleep(1)
                    else:
                        #call pdftk to atach original pdf to new pdf without signature
                        #subprocess.call("/opt/zimbra/bin/pdftk "+ doc.srcname +" cat '"+doc.srcname +"' output "+interim_path, shell=True, stdout=self.flog)
                        interim_path = doc.srcname

                    web.debug(interim_path)
                    #call pdftk to atach original
                    subprocess.call("/opt/zimbra/bin/pdftk " + interim_path +
                                    " attach_files '" + doc.srcname +
                                    "' output /tmp/converterdir/test3.pdf",
                                    shell=True,
                                    stdout=self.flog)

                    #sign document with Private Key from key.properties
                    d = datetime.today()
                    unic_file_name = d.strftime('%m_%d_%H_%M_%S')
                    subprocess.call(
                        "java -jar /opt/zimbra/bin/signapp/signApp.jar /tmp/converterdir/test3.pdf /tmp/converterdir/"
                        + unic_file_name +
                        ".pdf /opt/zimbra/bin/signapp/key.properties",
                        shell=True,
                        stdout=self.flog)

                    #mark document as ready
                    model.update_task(
                        doc.idtask, 10,
                        '/tmp/converterdir/' + unic_file_name + '.pdf')

                except DocumentConversionException, exception:
                    model.update_task(doc.idtask, -1)
                    web.debug(datetime.today().strftime('%y-%m-%d %H:%M:%S') +
                              "ERROR! " + str(exception))
                except ErrorCodeIOException, exception:
                    model.update_task(doc.idtask, -1)
                    web.debug(datetime.today().strftime('%y-%m-%d %H:%M:%S') +
                              "ERROR! ErrorCodeIOException %d" %
                              exception.ErrCode)
                except Exception, exception:
                    model.update_task(doc.idtask, -1)
                    web.debug(datetime.today().strftime('%y-%m-%d %H:%M:%S') +
                              " unexpected error! - " + str(exception))
    if not isdir(input_folder):
        print 'no such input folder: %s' % input_folder
        exit(1)

    if not isdir(output_folder):
        print 'no such output folder: %s' % output_folder
        exit(1)

    for entry in listdir(input_folder):
        fpath = input_folder + '/' + entry
        if isfile(fpath):
            fname = splitext(entry)[0]
            #print 'found file %s with name %s' % (entry, fname)
            new_fpath = output_folder + '/' + fname
            if not isdir(new_fpath):
                mkdir(new_fpath)
            try:
                converter = DocumentConverter()
                converter.convert(fpath, new_fpath + ('/%s.html' % fname))
                converter.convert(fpath, new_fpath + ('/%s.pdf' % fname))

                convert_file(new_fpath, fname + '.html')
            except DocumentConversionException, exception:
                print "ERROR!" + str(exception)
                exit(1)
            except Exception, exception:
                print "ERROR! ErrorCodeIOException %s" % exception
                exit(1)
        #else:
        #    print 'found non-file %s' % entry
Beispiel #9
0
 def __init__(self, cr, host, port):
     cr.execute("SELECT host, port, ooo_restart_cmd FROM oo_config")
     host, port, ooo_restart_cmd = cr.fetchone()
     DocumentConverter.__init__(self, host, port, ooo_restart_cmd)
     netsvc.Service.__init__(self, 'openoffice')
Beispiel #10
0
class OfficeService():
    def __init__(self, oo_host, oo_port, spool_dir, auth_type):
        self.oo_host = oo_host
        self.oo_port = oo_port
        self.spool_path = spool_dir + '/%s'
        self.auth = auth_type
        self.init_conn()
    
    def init_conn(self):
        logger = logging.getLogger('main')
        try:
            self.oservice = DocumentConverter(self.oo_host, self.oo_port)
        except DocumentConversionException as e:
            self.oservice = None
            logger.warning("Failed to initiate OpenOffice/LibreOffice connection.")
    
    def conn_healthy(self):
        if self.oservice is not None:
            return True
        logger = logging.getLogger('main')
        attempt = 0
        while self.oservice is None and attempt < 3:
            attempt += 1
            self.init_conn()
            if self.oservice is not None:
                return True
            sleep(3)
        message = 'Failed to initiate connection to OpenOffice/LibreOffice three times in a row.'
        logger.warning(message)
        raise NoOfficeConnection(message)
    
    def convert(self, data=False, identifier=False, in_mime=False, out_mime=False, username=None, password=None):
        logger = logging.getLogger('main')
        if not self.auth(username, password):
            raise AccessException('Access denied.')
        start_time = time()
        logger.debug('Openning identifier: %s' % identifier)
        if data is not False:
            data = base64.b64decode(data)
        elif identifier is not False:
            with open(self.spool_path % self._md5(str(identifier)), "r") as tmpfile:
                data = tmpfile.read()
            data = base64.b64decode(data)
        else:
            raise NoidentException('Wrong or no identifier.')
        logger.debug("  read file %s" % str(time() - start_time))
        self.conn_healthy()
        self.oservice.putDocument(data)
        logger.debug("  upload document to office %s" % str(time() - start_time))
        conv_data = self.oservice.saveByStream(filters[out_mime or 'odt'])
        logger.debug("  download converted document %s" % str(time() - start_time))
        self.oservice.closeDocument()
        logger.debug("  close document %s" % str(time() - start_time))
        return base64.b64encode(conv_data).decode('utf8')

    def _md5(self, data):
        return md5(data.encode()).hexdigest()
        
    def upload(self, data=False, is_last=False, identifier=False, username=None, password=None):
        logger = logging.getLogger('main')
        logger.debug('Upload identifier: %s' % identifier)
        try:
            start_time = time()
            
            if not self.auth(username, password):
                raise AccessException('Access denied.')
            # NOTE:md5 conversion on file operations to prevent path injection attack
            if identifier and not path.isfile(self.spool_path % '_'+self._md5(str(identifier))):
                raise NoidentException('Wrong or no identifier.')
            elif data is False:
                raise NodataException('No data to be converted.')
            
            fname = ''
            # generate random identifier
            while not identifier:
                new_ident = randint(1, MAXINT)
                fname = self._md5(str(new_ident))
                logger.debug('  assigning new identifier %s' % new_ident)
                # check if there is any other such files
                identifier = not path.isfile(self.spool_path % '_'+fname) \
                             and not path.isfile(self.spool_path % fname) \
                             and new_ident or False
            fname = fname or self._md5(str(identifier))
            with open(self.spool_path % '_'+fname, "a") as tmpfile:
                tmpfile.write(data)
            logger.debug("  chunk finished %s" % str(time() - start_time))            
            if is_last:
                rename(self.spool_path % '_'+fname, self.spool_path % fname)
                logger.debug("  file finished")
            return {'identifier': identifier}
        except AccessException as e:
            raise e
        except NoidentException as e:
            raise e
        except NodataException as e:
            raise e
        except:
            import sys, traceback
            exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
            traceback.print_exception(exceptionType, exceptionValue,
            exceptionTraceback, limit=2, file=sys.stdout)
            
        
    def join(self, idents, out_mime=False, username=None, password=None):
        logger = logging.getLogger('main')
        logger.debug('Join %s identifiers: %s' % (str(len(idents)),str(idents)))
        if not self.auth(username, password):
            raise AccessException('Access denied.')
        start_time = time()
        ident = idents.pop(0)
        with open(self.spool_path % self._md5(str(ident)), "r") as tmpfile:
            data = tmpfile.read()
        data = base64.b64decode(data)
        try:
            self.conn_healthy()
            self.oservice.putDocument(data)
            data_list = []
            for ident in idents:
                with open(self.spool_path % self._md5(str(ident)), "r") as tmpfile:
                    data = tmpfile.read()
                data = base64.b64decode(data)
                data_list.append(data)
            self.oservice.joinDocuments(data_list)
            result_data = self.oservice.saveByStream(filters[out_mime or 'odt'])
            self.oservice.closeDocument()
        except:
            self.oservice.closeDocument()
        logger.debug("  join finished %s" % str(time() - start_time))
        return base64.b64encode(result_data).decode('utf8')
Beispiel #11
0
class OfficeService():
    def __init__(self, oo_host, oo_port, spool_dir, auth_type):
        self.oo_host = oo_host
        self.oo_port = oo_port
        self.spool_path = spool_dir + '/%s'
        self.auth = auth_type
        self.init_conn()

    def init_conn(self):
        logger = logging.getLogger('main')
        try:
            self.oservice = DocumentConverter(self.oo_host, self.oo_port)
        except DocumentConversionException as e:
            self.oservice = None
            logger.warning(
                "Failed to initiate OpenOffice/LibreOffice connection.")

    def conn_healthy(self):
        if self.oservice is not None:
            return True
        logger = logging.getLogger('main')
        attempt = 0
        while self.oservice is None and attempt < 3:
            attempt += 1
            self.init_conn()
            if self.oservice is not None:
                return True
            sleep(3)
        message = 'Failed to initiate connection to OpenOffice/LibreOffice three times in a row.'
        logger.warning(message)
        raise NoOfficeConnection(message)

    def convert(self,
                data=False,
                identifier=False,
                in_mime=False,
                out_mime=False,
                username=None,
                password=None):
        logger = logging.getLogger('main')
        if not self.auth(username, password):
            raise AccessException('Access denied.')
        start_time = time()
        logger.debug('Openning identifier: %s' % identifier)
        if data is not False:
            data = base64.b64decode(data)
        elif identifier is not False:
            with open(self.spool_path % self._md5(str(identifier)),
                      "r") as tmpfile:
                data = tmpfile.read()
            data = base64.b64decode(data)
        else:
            raise NoidentException('Wrong or no identifier.')
        logger.debug("  read file %s" % str(time() - start_time))
        self.conn_healthy()
        self.oservice.putDocument(data)
        logger.debug("  upload document to office %s" %
                     str(time() - start_time))
        conv_data = self.oservice.saveByStream(filters[out_mime or 'odt'])
        logger.debug("  download converted document %s" %
                     str(time() - start_time))
        self.oservice.closeDocument()
        logger.debug("  close document %s" % str(time() - start_time))
        return base64.b64encode(conv_data).decode('utf8')

    def _md5(self, data):
        return md5(data.encode()).hexdigest()

    def upload(self,
               data=False,
               is_last=False,
               identifier=False,
               username=None,
               password=None):
        logger = logging.getLogger('main')
        logger.debug('Upload identifier: %s' % identifier)
        try:
            start_time = time()

            if not self.auth(username, password):
                raise AccessException('Access denied.')
            # NOTE:md5 conversion on file operations to prevent path injection attack
            if identifier and not path.isfile(self.spool_path % '_' +
                                              self._md5(str(identifier))):
                raise NoidentException('Wrong or no identifier.')
            elif data is False:
                raise NodataException('No data to be converted.')

            fname = ''
            # generate random identifier
            while not identifier:
                new_ident = randint(1, MAXINT)
                fname = self._md5(str(new_ident))
                logger.debug('  assigning new identifier %s' % new_ident)
                # check if there is any other such files
                identifier = not path.isfile(self.spool_path % '_'+fname) \
                             and not path.isfile(self.spool_path % fname) \
                             and new_ident or False
            fname = fname or self._md5(str(identifier))
            with open(self.spool_path % '_' + fname, "a") as tmpfile:
                tmpfile.write(data)
            logger.debug("  chunk finished %s" % str(time() - start_time))
            if is_last:
                rename(self.spool_path % '_' + fname, self.spool_path % fname)
                logger.debug("  file finished")
            return {'identifier': identifier}
        except AccessException as e:
            raise e
        except NoidentException as e:
            raise e
        except NodataException as e:
            raise e
        except:
            import sys, traceback
            exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
            traceback.print_exception(exceptionType,
                                      exceptionValue,
                                      exceptionTraceback,
                                      limit=2,
                                      file=sys.stdout)

    def join(self, idents, out_mime=False, username=None, password=None):
        logger = logging.getLogger('main')
        logger.debug('Join %s identifiers: %s' %
                     (str(len(idents)), str(idents)))
        if not self.auth(username, password):
            raise AccessException('Access denied.')
        start_time = time()
        ident = idents.pop(0)
        with open(self.spool_path % self._md5(str(ident)), "r") as tmpfile:
            data = tmpfile.read()
        data = base64.b64decode(data)
        try:
            self.conn_healthy()
            self.oservice.putDocument(data)
            data_list = []
            for ident in idents:
                with open(self.spool_path % self._md5(str(ident)),
                          "r") as tmpfile:
                    data = tmpfile.read()
                data = base64.b64decode(data)
                data_list.append(data)
            self.oservice.joinDocuments(data_list)
            result_data = self.oservice.saveByStream(filters[out_mime
                                                             or 'odt'])
            self.oservice.closeDocument()
        except:
            self.oservice.closeDocument()
        logger.debug("  join finished %s" % str(time() - start_time))
        return base64.b64encode(result_data).decode('utf8')
Beispiel #12
0
	baseoutname = os.path.basename(baseoutname)
	docextension=docextension[1:] # Remove leading period
	if not docextension.lower() in allowedextensions:
		print "Unknown extension '%s'" % docextension.lower()
		print "Only %s are allowed" % ", ".join(allowedextensions)
		sys.exit(1)

	# Clean up any old files
	for f in ('%s/converted.wiki' % tempdir, '%s/converted.xml' % tempdir):
		if os.path.exists(f): os.unlink(f)


	# Convert the actual document. This will always create temporary files,
	# there is no way to get the data in a buffer.
	with IOWrapper("Converting document format..."):
		converter = DocumentConverter()
		converter.convert(inputfile, '%s/converted.wiki' % tempdir)
		converter.convert(inputfile, '%s/converted.xml' % tempdir)

	# Read the converted wiki format, and append our hardcoded additions
	wf = codecs.open('%s/converted.wiki' % tempdir, "r", "utf-8")
	wikilines = wf.readlines()
	wf.close()
	wikilines.append("\n[[Category:word2mediawiki]]\n")

	# Grab images out of the XML data
	parser = XMLParser(target=ImageGrabber())

	# XXX: rewrite the namespace so as not to confuse certain XML parsers
	# (changes either in newer open/libreoffice or newer XML parsers require this)
	parser.feed(open('%s/converted.xml' % tempdir).read().replace("\"http://www.w3.org/XML/1998/namespace\"","\"http://www.w3.org/XML/1998/namespace_bugfix\""))
Beispiel #13
0
 def __init__(self, cr, host, port):
     cr.execute("SELECT host, port, ooo_restart_cmd FROM oo_config")
     host, port, ooo_restart_cmd = cr.fetchone()
     DocumentConverter.__init__(self, host, port, ooo_restart_cmd)
     netsvc.Service.__init__(self, 'openoffice')
Beispiel #14
0
 def __init__(self, cr, host, port, allow_raise_errors=True):
     cr.execute("SELECT host, port, ooo_restart_cmd FROM oo_config")
     host, port, ooo_restart_cmd = cr.fetchone()
     DocumentConverter.__init__(self, host, port, ooo_restart_cmd, allow_raise_errors)
class OfficeService(object):
    def __init__(self, oo_host, oo_port, spool_dir, auth_type):
        self.oo_host = oo_host
        self.oo_port = oo_port
        self.spool_path = spool_dir + '/%s'
        self.auth = auth_type
        self._init_conn()

    def _init_conn(self):
        try:
            self.oservice = DocumentConverter(self.oo_host, self.oo_port)
        except DocumentConversionException as e:
            self.oservice = None
            logger.warning(
                "Failed to initiate OpenOffice/LibreOffice connection.")

    def _conn_healthy(self):
        if hasattr(self, 'oservice'):
            if self.oservice is not None:
                return True
        else:
            self.oservice = None
        attempt = 0
        while self.oservice is None and attempt < 3:
            attempt += 1
            self._init_conn()
            if self.oservice is not None:
                return True
            sleep(3)
        message = 'Failed to initiate connection to OpenOffice/LibreOffice three times in a row.'
        logger.warning(message)
        raise NoOfficeConnection(message)

    def _chktime(self, start_time):
        return '%s s' % str(round(time() - start_time, 6))

    def get_file(self, ident, username=None, password=None):
        if not self.auth(username, password):
            raise AccessException('Access denied.')
        file_data = self._readFile(ident)
        return base64.b64encode(file_data).decode('utf8')

    def convert(self,
                data=False,
                identifier=False,
                in_mime=False,
                out_mime=False,
                username=None,
                password=None):
        if not self.auth(username, password):
            raise AccessException('Access denied.')
        start_time = time()
        logger.debug('Openning identifier: %s' % identifier)
        if data is not False:
            data = base64.b64decode(data)
        elif identifier is not False:
            data = self._readFile(identifier)
        else:
            raise NoidentException('Wrong or no identifier.')
        logger.debug("  read file %s" % self._chktime(start_time))
        self._conn_healthy()
        logger.debug("  connection test ok %s" % self._chktime(start_time))
        infilter = filters.get(in_mime, False)
        outfilter = filters.get(out_mime, False)
        self.oservice.putDocument(data, filter_name=infilter, read_only=False)
        logger.debug("  upload document to office %s" %
                     self._chktime(start_time))
        try:
            conv_data = self.oservice.saveByStream(filter_name=outfilter)
            logger.debug("  download converted document %s" %
                         self._chktime(start_time))
        except Exception as e:
            logger.debug("  conversion failed %s Exception: %s" %
                         (self._chktime(start_time), str(e)))
            self.oservice.closeDocument()
            logger.debug("  emergency close document %s" %
                         self._chktime(start_time))
            raise e
        else:
            self.oservice.closeDocument()
            logger.debug("  close document %s" % self._chktime(start_time))
        return base64.b64encode(conv_data).decode('utf8')

    def _md5(self, data):
        return md5(data.encode()).hexdigest()

    def upload(self,
               data=False,
               is_last=False,
               identifier=False,
               username=None,
               password=None):
        logger = logging.getLogger('main')
        logger.debug('Upload identifier: %s' % identifier)
        try:
            start_time = time()

            if not self.auth(username, password):
                raise AccessException('Access denied.')
            # NOTE:md5 conversion on file operations to prevent path injection attack
            if identifier and not path.isfile(self.spool_path % '_' +
                                              self._md5(str(identifier))):
                raise NoidentException('Wrong or no identifier.')
            elif data is False:
                raise NodataException('No data to be converted.')

            fname, identifier = self._get_filename_and_identifier(identifier)
            with open(self.spool_path % '_' + fname, "a") as tmpfile:
                tmpfile.write(data)
            logger.debug("  chunk finished %s" % self._chktime(start_time))
            if is_last:
                rename(self.spool_path % '_' + fname, self.spool_path % fname)
                logger.debug("  file finished")
            return {'identifier': identifier}
        except AccessException as e:
            raise e
        except NoidentException as e:
            raise e
        except NodataException as e:
            raise e
        except:
            import sys
            import traceback
            exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
            traceback.print_exception(exceptionType,
                                      exceptionValue,
                                      exceptionTraceback,
                                      limit=2,
                                      file=sys.stdout)

    def _get_filename_and_identifier(self, force_identifier=None):
        fname = ''
        # generate random identifier
        identifier = force_identifier
        while not identifier:
            new_ident = randint(1, MAXINT)
            fname = self._md5(str(new_ident))
            logger.debug('  assigning new identifier %s' % new_ident)
            # check if there is any other such files
            identifier = not path.isfile(self.spool_path % '_' + fname) \
                         and not path.isfile(self.spool_path % fname) \
                         and new_ident or False
        fname = fname or self._md5(str(identifier))
        return fname, identifier

    def _readFile(self, ident):
        spool_file_name = self._md5(str(ident))
        logger.debug("> read id %s for spool name %s", ident, spool_file_name)
        with open(self.spool_path % spool_file_name, "r") as tmpfile:
            data = tmpfile.read()
        return base64.b64decode(data)

    def _readFileUTF8(self, ident):
        spool_file_name = self._md5(str(ident))
        logger.debug("> read in utf-8 id %s for spool name %s", ident,
                     spool_file_name)
        return open(self.spool_path % spool_file_name,
                    'rb').read().decode('utf8')

    def _readFiles(self, idents):
        logger = logging.getLogger('main')
        for ident in idents:
            start_time = time()
            data = self._readFile(ident)
            logger.debug("    read next file: %s +%s" %
                         (ident, self._chktime(start_time)))
            yield data

    def join(self,
             idents,
             in_mime=False,
             out_mime=False,
             username=None,
             password=None):
        logger.debug('Join %s identifiers: %s' %
                     (str(len(idents)), str(idents)))
        if not self.auth(username, password):
            raise AccessException('Access denied.')

        if in_mime == out_mime == 'pdf':
            return self._join_pdf_to_pdf(idents, in_mime, out_mime)

        return self._join_default(idents, in_mime, out_mime)

    def _join_default(self, idents, in_mime=False, out_mime=False):
        """
        Join odt document to pdf or to another odt document
        :param idents: the aeroo_resport se4rvice file identifier return by upload function to join
        :param in_mime: accepted odt maybe other
        :param out_mime: accpeted odt, pdf
        :return: the join file result or raise if an error has occured
        """
        start_time = time()
        ident = idents.pop(0)
        data = self._readFile(ident)
        logger.debug("  read first file %s" % self._chktime(start_time))
        self._conn_healthy()
        logger.debug("  connection test ok %s" % self._chktime(start_time))
        try:
            infilter = filters.get(in_mime, False) or 'writer8'
            outfilter = filters.get(out_mime, False)
            self.oservice.putDocument(data,
                                      filter_name=infilter,
                                      read_only=True)
            logger.debug("  upload first document to office %s" %
                         self._chktime(start_time))
            self.oservice.appendDocuments(self._readFiles(idents),
                                          filter_name=infilter)
            result_data = self.oservice.saveByStream(outfilter)
        except Exception as e:
            logger.debug("  conversion failed %s Exception: %s" %
                         (self._chktime(start_time), str(e)))
            self.oservice.closeDocument()
            logger.debug("  emergency close document %s" %
                         self._chktime(start_time))
            raise e
        else:
            self.oservice.closeDocument()
            logger.debug("  close document %s" % self._chktime(start_time))
        logger.debug("  join finished %s" % self._chktime(start_time))
        return base64.b64encode(result_data).decode('utf8')

    def _join_pdf_to_pdf(self, idents, in_mime, out_mime):
        logger.debug('Merge %s pdf identifiers: %s' %
                     (str(len(idents)), str(idents)))
        try:
            new_idents = []
            while idents:
                to_process = idents[:100]
                idents = idents[100:]
                start_time = time()
                out_file_name, new_ident = self._merge_idents(to_process)
                new_idents.append(new_ident)
                logger.debug(">write merged file %s in %s", out_file_name,
                             self._chktime(start_time))

            if len(new_idents) > 1:
                return self._join_pdf_to_pdf(new_idents,
                                             in_mime=in_mime,
                                             out_mime=out_mime)
            return self._readFileUTF8(new_idents[0])
        except Exception as e:
            logger.info(e)
            logger.exception(e)
            raise e

    def _merge_idents(self, idents):
        merger = PdfFileMerger()
        out_io = io.BytesIO()
        for ident in idents:
            file_data = self._readFile(ident)
            merger.append(io.BytesIO(file_data))
        merger.write(out_io)
        merger.close()
        out_file_name, new_ident = self._get_filename_and_identifier()
        with open(self.spool_path % out_file_name, "wb") as outFile:
            outFile.write(base64.encodebytes(out_io.getvalue()))
        out_io.close()
        return out_file_name, new_ident
Beispiel #16
0
	baseoutname = os.path.basename(baseoutname)
	docextension=docextension[1:] # Remove leading period
	if not docextension.lower() in allowedextensions:
		print "Unknown extension '%s'" % docextension.lower()
		print "Only %s are allowed" % ", ".join(allowedextensions)
		sys.exit(1)

	# Clean up any old files
	for f in ('%s/converted.wiki' % tempdir, '%s/converted.xml' % tempdir):
		if os.path.exists(f): os.unlink(f)


	# Convert the actual document. This will always create temporary files,
	# there is no way to get the data in a buffer.
	with IOWrapper("Converting document format..."):
		converter = DocumentConverter()
		converter.convert(inputfile, '%s/converted.wiki' % tempdir)
		converter.convert(inputfile, '%s/converted.xml' % tempdir)

	# Read the converted wiki format, and append our hardcoded additions
	wf = codecs.open('%s/converted.wiki' % tempdir, "r", "utf-8")
	wikilines = wf.readlines()
	wf.close()
	wikilines.append("\n[[Category:word2mediawiki]]\n")

	# Grab images out of the XML data
	parser = XMLParser(target=ImageGrabber())
	parser.feed(open('%s/converted.xml' % tempdir).read())
	parser.close()

	# Prepare for uploading to mediawiki
Beispiel #17
0
class OfficeService:
    def __init__(self, oo_host, oo_port, spool_dir, auth_type):
        self.oo_host = oo_host
        self.oo_port = oo_port
        self.spool_path = spool_dir + "/%s"
        self.auth = auth_type
        self._init_conn()

    def _init_conn(self):
        logger = logging.getLogger("main")
        try:
            self.oservice = DocumentConverter(self.oo_host, self.oo_port)
        except DocumentConversionException as e:
            self.oservice = None
            logger.warning("Failed to initiate OpenOffice/LibreOffice connection.")

    def _conn_healthy(self):
        if hasattr(self, "oservice"):
            if self.oservice is not None:
                return True
        else:
            self.oservice = None
        logger = logging.getLogger("main")
        attempt = 0
        while self.oservice is None and attempt < 3:
            attempt += 1
            self._init_conn()
            if self.oservice is not None:
                return True
            sleep(3)
        message = "Failed to initiate connection to OpenOffice/LibreOffice three times in a row."
        logger.warning(message)
        raise NoOfficeConnection(message)

    def _chktime(self, start_time):
        return "%s s" % str(round(time() - start_time, 6))

    def convert(self, data=False, identifier=False, in_mime=False, out_mime=False, username=None, password=None):
        logger = logging.getLogger("main")
        if not self.auth(username, password):
            raise AccessException("Access denied.")
        start_time = time()
        logger.debug("Openning identifier: %s" % identifier)
        if data is not False:
            data = base64.b64decode(data)
        elif identifier is not False:
            data = self._readFile(identifier)
        else:
            raise NoidentException("Wrong or no identifier.")
        logger.debug("  read file %s" % self._chktime(start_time))
        self._conn_healthy()
        logger.debug("  connection test ok %s" % self._chktime(start_time))
        infilter = filters.get(in_mime, False)
        outfilter = filters.get(out_mime, False)
        self.oservice.putDocument(data, filter_name=infilter, read_only=True)
        logger.debug("  upload document to office %s" % self._chktime(start_time))
        try:
            conv_data = self.oservice.saveByStream(filter_name=outfilter)
            logger.debug("  download converted document %s" % self._chktime(start_time))
        except Exception as e:
            logger.debug("  conversion failed %s Exception: %s" % (self._chktime(start_time), str(e)))
            self.oservice.closeDocument()
            logger.debug("  emergency close document %s" % self._chktime(start_time))
            raise e
        else:
            self.oservice.closeDocument()
            logger.debug("  close document %s" % self._chktime(start_time))
        return base64.b64encode(conv_data).decode("utf8")

    def _md5(self, data):
        return md5(data.encode()).hexdigest()

    def upload(self, data=False, is_last=False, identifier=False, username=None, password=None):
        logger = logging.getLogger("main")
        logger.debug("Upload identifier: %s" % identifier)
        try:
            start_time = time()

            if not self.auth(username, password):
                raise AccessException("Access denied.")
            # NOTE:md5 conversion on file operations to prevent path injection attack
            if identifier and not path.isfile(self.spool_path % "_" + self._md5(str(identifier))):
                raise NoidentException("Wrong or no identifier.")
            elif data is False:
                raise NodataException("No data to be converted.")

            fname = ""
            # generate random identifier
            while not identifier:
                new_ident = randint(1, MAXINT)
                fname = self._md5(str(new_ident))
                logger.debug("  assigning new identifier %s" % new_ident)
                # check if there is any other such files
                identifier = (
                    not path.isfile(self.spool_path % "_" + fname)
                    and not path.isfile(self.spool_path % fname)
                    and new_ident
                    or False
                )
            fname = fname or self._md5(str(identifier))
            with open(self.spool_path % "_" + fname, "a") as tmpfile:
                tmpfile.write(data)
            logger.debug("  chunk finished %s" % self._chktime(start_time))
            if is_last:
                rename(self.spool_path % "_" + fname, self.spool_path % fname)
                logger.debug("  file finished")
            return {"identifier": identifier}
        except AccessException as e:
            raise e
        except NoidentException as e:
            raise e
        except NodataException as e:
            raise e
        except:
            import sys, traceback

            exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
            traceback.print_exception(exceptionType, exceptionValue, exceptionTraceback, limit=2, file=sys.stdout)

    def _readFile(self, ident):
        with open(self.spool_path % self._md5(str(ident)), "r") as tmpfile:
            data = tmpfile.read()
        return base64.b64decode(data)

    def _readFiles(self, idents):
        logger = logging.getLogger("main")
        for ident in idents:
            start_time = time()
            data = self._readFile(ident)
            logger.debug("    read next file: %s +%s" % (ident, self._chktime(start_time)))
            yield data

    def join(self, idents, in_mime=False, out_mime=False, username=None, password=None):
        logger = logging.getLogger("main")
        logger.debug("Join %s identifiers: %s" % (str(len(idents)), str(idents)))
        if not self.auth(username, password):
            raise AccessException("Access denied.")
        start_time = time()
        ident = idents.pop(0)
        data = self._readFile(ident)
        logger.debug("  read first file %s" % self._chktime(start_time))
        self._conn_healthy()
        logger.debug("  connection test ok %s" % self._chktime(start_time))
        try:
            infilter = filters.get(in_mime, False) or "writer8"
            outfilter = filters.get(out_mime, False)
            self.oservice.putDocument(data, filter_name=infilter, read_only=True)
            logger.debug("  upload first document to office %s" % self._chktime(start_time))
            self.oservice.appendDocuments(self._readFiles(idents), filter_name=infilter)
            result_data = self.oservice.saveByStream(outfilter)
        except Exception as e:
            logger.debug("  conversion failed %s Exception: %s" % (self._chktime(start_time), str(e)))
            self.oservice.closeDocument()
            logger.debug("  emergency close document %s" % self._chktime(start_time))
            raise e
        else:
            self.oservice.closeDocument()
            logger.debug("  close document %s" % self._chktime(start_time))
        logger.debug("  join finished %s" % self._chktime(start_time))
        return base64.b64encode(result_data).decode("utf8")
    if not isdir(input_folder):
        print "no such input folder: %s" % input_folder
        exit(1)

    if not isdir(output_folder):
        print "no such output folder: %s" % output_folder
        exit(1)

    for entry in listdir(input_folder):
        fpath = input_folder + "/" + entry
        if isfile(fpath):
            fname = splitext(entry)[0]
            # print 'found file %s with name %s' % (entry, fname)
            new_fpath = output_folder + "/" + fname
            if not isdir(new_fpath):
                mkdir(new_fpath)
            try:
                converter = DocumentConverter()
                converter.convert(fpath, new_fpath + ("/%s.html" % fname))
                converter.convert(fpath, new_fpath + ("/%s.pdf" % fname))

                convert_file(new_fpath, fname + ".html")
            except DocumentConversionException, exception:
                print "ERROR!" + str(exception)
                exit(1)
            except Exception, exception:
                print "ERROR! ErrorCodeIOException %s" % exception
                exit(1)
        # else:
        #    print 'found non-file %s' % entry
Beispiel #19
0
 def _new_ooproxy(self, cr, uid, host=None, port=None, context=None):
     if host is None:
         host = openerp.tools.config.get("ooproxy")
     if port is None:
         port = openerp.tools.config.get("ooport")
     return DocumentConverter(host, port)