Esempio n. 1
0
 def convert(self, destination_format):
   """ Convert the inputed file to output as format that were informed """
   # XXX This implementation could use ffmpeg -i pipe:0, but
   # XXX seems super unreliable currently and it generates currupted files in
   # the end
   logger.debug("FfmpegConvert: %s > %s" % (self.input.source_format, destination_format))
   output_url = mktemp(suffix=".%s" % destination_format,
                       dir=self.input.directory_name)
   command = ["ffmpeg",
              "-i",
              self.input.getUrl(),
              "-y",
              output_url]
   # XXX ffmpeg has a bug that needs this options to work with webm format
   if destination_format == "webm":
     command.insert(3, "32k")
     command.insert(3, "-ab")
   try:
     stdout, stderr = Popen(command,
                            stdout=PIPE,
                            stderr=PIPE,
                            close_fds=True,
                            env=self.environment).communicate()
     self.input.reload(output_url)
     if len(self.input.getContent()) == 0:
       logger.error(stderr.split("\n")[-2])
     return self.input.getContent()
   finally:
     self.input.trash()
Esempio n. 2
0
 def run(self):
   """Start the process"""
   port = self.openoffice.getAddress()[-1]
   pid = self.openoffice.pid()
   logger.debug("Monitoring OpenOffice: Port %s, Pid: %s" % (port, pid))
   self.status_flag = True
   sleep(self.interval)
   if self.openoffice.isLocked():
     logger.debug("Stop OpenOffice - Port %s - Pid %s" % (port, pid))
     self.openoffice.stop()
Esempio n. 3
0
 def getImageItemList(self):
     logger.debug("PDFImageGrainExtract")
     command = ["pdftohtml", self.file.getUrl(), "%s/" % self.grain_directory]
     stdout, stderr = Popen(command, stdout=PIPE, stderr=PIPE, close_fds=True, env=self.environment).communicate()
     # XXX - PDF can be protect
     if "Erro" in stderr:
         return False
     else:
         removeEqualImages(self.grain_directory)
         images = glob("%s/*.*" % self.grain_directory)
         imagesList = getImages(images)
         return imagesList
Esempio n. 4
0
 def stop(self):
   """Stop the process"""
   if hasattr(self, 'process') and self.status():
     process_pid = self.process.pid
     logger.debug("Stop Pid - %s" % process_pid)
     try:
       self.process.terminate()
       waitStopDaemon(self, self.timeout)
     finally:
       if pid_exists(process_pid) or self.status():
         Process(process_pid).kill()
     delattr(self, "process")
Esempio n. 5
0
 def stop(self):
     """Stop the process"""
     if hasattr(self, 'process') and self.status():
         process_pid = self.process.pid
         logger.debug("Stop Pid - %s" % process_pid)
         try:
             self.process.terminate()
             waitStopDaemon(self, self.timeout)
         finally:
             if pid_exists(process_pid) or self.status():
                 Process(process_pid).kill()
         delattr(self, "process")
Esempio n. 6
0
 def _releaseOpenOfficePort(self):
   for process in psutil.process_iter():
     try:
       if process.exe == join(self.office_binary_path, self._bin_soffice):
         for connection in process.get_connections():
           if connection.status == "LISTEN" and \
               connection.local_address[1] == self.port:
             process.terminate()
     except AccessDenied, e:
       pass
     except TypeError, e:
       # exception to prevent one psutil issue with zombie processes
       logger.debug(e)
Esempio n. 7
0
 def get_memory_usage(self):
   try:
     if not hasattr(self, 'process') or \
         self.process.pid != int(self.openoffice.pid()):
       self.create_process()
     return self.process.get_memory_info().rss / (1024 * 1024)
   except TypeError:
     logger.debug("OpenOffice is stopped")
     return 0
   except psutil.NoSuchProcess:
     # Exception raised when a process with a certain PID doesn't or no longer
     # exists (zombie).
     return 0
Esempio n. 8
0
 def _releaseOpenOfficePort(self):
   for process in psutil.process_iter():
     try:
       if process.exe == join(self.office_binary_path, self._bin_soffice):
         for connection in process.get_connections():
           if connection.status == "LISTEN" and \
               connection.local_address[1] == self.port:
             process.terminate()
     except psutil.error.AccessDenied, e:
       pass
     except TypeError, e:
       # exception to prevent one psutil issue with zombie processes
       logger.debug(e)
Esempio n. 9
0
 def convert(self, destination_format=None, **kw):
   """Convert a image"""
   logger.debug("ImageMagickConvert: %s > %s" % (self.file.source_format, destination_format))
   output_url = mktemp(suffix='.%s' % destination_format,
                       dir=self.base_folder_url)
   command = ["convert", self.file.getUrl(), output_url]
   stdout, stderr = Popen(command,
                         stdout=PIPE,
                         stderr=PIPE,
                         close_fds=True,
                         env=self.environment).communicate()
   self.file.reload(output_url)
   try:
     return self.file.getContent()
   finally:
     self.file.trash()
Esempio n. 10
0
 def setMetadata(self, metadata):
   """Returns a document with new metadata.
   Keyword arguments:
   metadata -- expected an dictionary with metadata.
   """
   metadata_pickled = json.dumps(metadata)
   logger.debug("setMetadata")
   kw = dict(metadata=encodestring(metadata_pickled))
   openoffice.acquire()
   try:
     stdout, stderr = self._callUnoConverter(*['setmetadata'], **kw)
   finally:
     openoffice.release()
   doc_loaded = self.document.getContent()
   self.document.trash()
   return doc_loaded
Esempio n. 11
0
 def convert(self, destination_format=None, **kw):
   """ Convert a pdf document """
   logger.debug("PDFConvert: %s > %s" % (self.document.source_format, destination_format))
   output_url = mktemp(suffix=".%s" % destination_format,
                       dir=self.document.directory_name)
   command = ["pdftotext", self.document.getUrl(), output_url]
   stdout, stderr = Popen(command,
                          stdout=PIPE,
                          stderr=PIPE,
                          close_fds=True,
                          env=self.environment).communicate()
   self.document.reload(output_url)
   try:
     return self.document.getContent()
   finally:
     self.document.trash()
Esempio n. 12
0
 def convert(self, destination_format=None, **kw):
   """Convert a image"""
   logger.debug("ImageMagickConvert: %s > %s" % (self.file.source_format, destination_format))
   output_url = mktemp(suffix='.%s' % destination_format,
                       dir=self.base_folder_url)
   command = ["convert", self.file.getUrl(), output_url]
   stdout, stderr = Popen(command,
                         stdout=PIPE,
                         stderr=PIPE,
                         close_fds=True,
                         env=self.environment).communicate()
   self.file.reload(output_url)
   try:
     return self.file.getContent()
   finally:
     self.file.trash()
Esempio n. 13
0
  def _callUnoConverter(self, *feature_list, **kw):
    """ """
    if not openoffice.status():
      openoffice.start()
    command_list = self._getCommand(*feature_list, **kw)
    stdout, stderr = self._subprocess(command_list)
    if not stdout and len(re.findall("\w*Exception|\w*Error", stderr)) >= 1:
      logger.debug(stderr)
      self.document.restoreOriginal()
      openoffice.restart()
      kw['document_url'] = self.document.getUrl()
      command = self._getCommand(*feature_list, **kw)
      stdout, stderr = self._subprocess(command)
      if stderr != "":
          raise Exception(stderr)

    return stdout, stderr
Esempio n. 14
0
 def convert(self, destination_format=None, **kw):
     """ Convert a pdf document """
     logger.debug("PDFConvert: %s > %s" %
                  (self.document.source_format, destination_format))
     output_url = mktemp(suffix=".%s" % destination_format,
                         dir=self.document.directory_name)
     command = ["pdftotext", self.document.getUrl(), output_url]
     stdout, stderr = Popen(command,
                            stdout=PIPE,
                            stderr=PIPE,
                            close_fds=True,
                            env=self.environment).communicate()
     self.document.reload(output_url)
     try:
         return self.document.getContent()
     finally:
         self.document.trash()
Esempio n. 15
0
  def _serializeMimemapper(self,
                           source_extension=None,
                           destination_extension=None):
    """Serialize parts of mimemapper"""
    if destination_extension is None:
      return json.dumps(dict(mimetype_by_filter_type=mimemapper._mimetype_by_filter_type))

    filter_list = []
    service_type_list = mimemapper._doc_type_list_by_extension.get(
      source_extension, mimemapper.document_service_list)
    for service_type in service_type_list:
      filter_list.append((destination_extension,
                          service_type,
                          mimemapper.getFilterName(destination_extension, service_type)))
    logger.debug("Filter List: %r" % filter_list)
    return json.dumps(dict(doc_type_list_by_extension=mimemapper._doc_type_list_by_extension,
                            filter_list=filter_list,
                            mimetype_by_filter_type=mimemapper._mimetype_by_filter_type))
Esempio n. 16
0
 def convert(self, destination_format=None, **kw):
   """Convert a document to another format supported by the OpenOffice
   Keyword Arguments:
   destination_format -- extension of document as String
   """
   logger.debug("OooConvert: %s > %s" % (self.source_format, destination_format))
   kw['source_format'] = self.source_format
   if destination_format:
     kw['destination_format'] = destination_format
   kw['mimemapper'] = self._serializeMimemapper(self.source_format,
                                                destination_format)
   kw['refresh'] = json.dumps(self.refresh)
   openoffice.acquire()
   try:
     stdout, stderr = self._callUnoConverter(*['convert'], **kw)
   finally:
     openoffice.release()
   url = stdout.replace('\n', '')
   self.document.reload(url)
   content = self.document.getContent(self.zip)
   self.document.trash()
   return content
Esempio n. 17
0
 def convert(self, destination_format=None, **kw):
   """Convert a image"""
   logger.debug("wkhtmltopdf convert: %s > %s" % (self.file.source_format, destination_format))
   output_path = self.makeTempFile(destination_format)
   command = self.makeWkhtmltopdfCommandList(
     self.convertPathToUrl(self.file.getUrl()),
     output_path,
     conversion_kw=kw,
   )
   stdout, stderr = Popen(
     command,
     stdout=PIPE,
     stderr=PIPE,
     close_fds=True,
     env=self.environment,
     cwd=self.file.directory_name,
   ).communicate()
   self.file.reload(output_path)
   try:
     return self.file.getContent()
   finally:
     self.file.trash()
Esempio n. 18
0
 def convert(self, destination_format=None, **kw):
     """Convert a image"""
     logger.debug("wkhtmltopdf convert: %s > %s" %
                  (self.file.source_format, destination_format))
     output_path = self.makeTempFile(destination_format)
     command = self.makeWkhtmltopdfCommandList(
         self.convertPathToUrl(self.file.getUrl()),
         output_path,
         conversion_kw=kw,
     )
     stdout, stderr = Popen(
         command,
         stdout=PIPE,
         stderr=PIPE,
         close_fds=True,
         env=self.environment,
         cwd=self.file.directory_name,
     ).communicate()
     self.file.reload(output_path)
     try:
         return self.file.getContent()
     finally:
         self.file.trash()
Esempio n. 19
0
 def getMetadata(self, base_document=False):
   """Returns a dictionary with all metadata of document.
   Keywords Arguments:
   base_document -- Boolean variable. if true, the document is also returned
   along with the metadata."""
   logger.debug("getMetadata")
   kw = dict(mimemapper=self._serializeMimemapper())
   if base_document:
     feature_list = ['getmetadata', 'convert']
   else:
     feature_list = ['getmetadata']
   openoffice.acquire()
   try:
     stdout, stderr = self._callUnoConverter(*feature_list, **kw)
   finally:
     openoffice.release()
   metadata = json.loads(decodestring(stdout))
   if 'document_url' in metadata:
     self.document.reload(metadata['document_url'])
     metadata['Data'] = self.document.getContent()
     del metadata['document_url']
   self.document.trash()
   return metadata
Esempio n. 20
0
 def convert(self, destination_format=None, **kw):
   """ Convert a pdf document """
   logger.debug("PDFConvert: %s > %s" % (self.document.source_format, 
                                       destination_format))
   output_url = NamedTemporaryFile(suffix=".%s" % destination_format,
                       dir=self.document.directory_name).name
   if self.document.source_format == 'ps':
     command = ["ps2pdf",
               "-dASCII85EncodePages=false",
               "-dLanguageLevel=1",
               self.document.getUrl(),
               output_url]
   else:
     command = ["pdftotext", self.document.getUrl(), output_url]
   stdout, stderr = Popen(command,
                          stdout=PIPE,
                          stderr=PIPE,
                          close_fds=True,
                          env=self.environment).communicate()
   self.document.reload(output_url)
   try:
     return self.document.getContent()
   finally:
     self.document.trash()
Esempio n. 21
0
 def run(self):
     """Is called by start function"""
     logger.debug("Start MonitorRequest")
     while self.status_flag:
         if self.openoffice.request > self.request_limit:
             self.openoffice.acquire()
             logger.debug("Openoffice: %s, %s will be restarted" % \
               self.openoffice.getAddress())
             self.openoffice.restart()
             self.openoffice.release()
         sleep(self.interval)
     logger.debug("Stop MonitorRequest ")
Esempio n. 22
0
 def run(self):
   """Is called by start function"""
   logger.debug("Start MonitorRequest")
   while self.status_flag:
     if self.openoffice.request > self.request_limit:
       self.openoffice.acquire()
       logger.debug("Openoffice: %s, %s will be restarted" % \
         self.openoffice.getAddress())
       self.openoffice.restart()
       self.openoffice.release()
     sleep(self.interval)
   logger.debug("Stop MonitorRequest ")
Esempio n. 23
0
 def run(self):
   """Is called by start function. this function is responsible for
   controlling the amount of memory used, and if the process exceeds the limit
   it is stopped forcibly
   """
   self.status_flag = True
   logger.debug("Start MonitorMemory")
   while self.status_flag:
     if self.get_memory_usage() > self.limit:
       logger.debug("Stopping OpenOffice")
       self.openoffice.stop()
     sleep(self.interval)
   logger.debug("Stop MonitorMemory")
Esempio n. 24
0
 def _testOpenOffice(self, host, port):
   """Test if OpenOffice was started correctly"""
   logger.debug("Test OpenOffice %s - Pid %s" % (self.getAddress()[-1],
                                                 self.pid()))
   python = join(self.office_binary_path, "python")
   args = [exists(python) and python or "python",
           pkg_resources.resource_filename("cloudooo",
                                     join('handler', 'ooo',
                                          "helper", "openoffice_tester.py")),
           "--hostname=%s" % host,
           "--port=%s" % port,
           "--uno_path=%s" % self.uno_path]
   logger.debug("Testing Openoffice Instance %s" % port)
   stdout, stderr = Popen(args, stdout=PIPE,
       stderr=PIPE, close_fds=True).communicate()
   stdout_bool = convertStringToBool(stdout.replace("\n", ""))
   if stdout_bool and stderr != "":
     logger.debug("%s\n%s" % (stderr, stdout))
     return False
   else:
     logger.debug("Instance %s works" % port)
     return True
Esempio n. 25
0
 def _testOpenOffice(self, host, port):
   """Test if OpenOffice was started correctly"""
   logger.debug("Test OpenOffice %s - Pid %s" % (self.getAddress()[-1],
                                                 self.pid()))
   python = join(self.office_binary_path, "python")
   args = [exists(python) and python or "python",
           pkg_resources.resource_filename("cloudooo",
                                     join('handler', 'ooo',
                                          "helper", "openoffice_tester.py")),
           "--hostname=%s" % host,
           "--port=%s" % port,
           "--uno_path=%s" % self.uno_path]
   logger.debug("Testing Openoffice Instance %s" % port)
   stdout, stderr = Popen(args, stdout=PIPE,
       stderr=PIPE, close_fds=True).communicate()
   stdout_bool = convertStringToBool(stdout.replace("\n", ""))
   if stdout_bool and stderr != "":
     logger.debug("%s\n%s" % (stderr, stdout))
     return False
   else:
     logger.debug("Instance %s works" % port)
     return True
Esempio n. 26
0
 def run(self):
     """Start monitoring process.
 Stop daemon if running and not touch after sleeping duration
 """
     logger.debug("Start MonitorSpleepingTime")
     while self.status_flag:
         current_time = time()
         if self.openoffice.status() and\
           (self._touched_at + self.sleeping_time) <= current_time:
             logger.debug("Stopping OpenOffice after sleeping time of %is" %\
                                                                 self.sleeping_time)
             self.openoffice.acquire()
             self.openoffice.stop()
             self.openoffice.release()
         sleep(self.interval)
     logger.debug("Stop MonitorSpleepingTime")
Esempio n. 27
0
 def run(self):
   """Start monitoring process.
   Stop daemon if running and not touch after sleeping duration
   """
   logger.debug("Start MonitorSpleepingTime")
   while self.status_flag:
     current_time = time()
     if self.openoffice.status() and\
       (self._touched_at + self.sleeping_time) <= current_time:
       logger.debug("Stopping OpenOffice after sleeping time of %is" %\
                                                           self.sleeping_time)
       self.openoffice.acquire()
       self.openoffice.stop()
       self.openoffice.release()
     sleep(self.interval)
   logger.debug("Stop MonitorSpleepingTime")
Esempio n. 28
0
 def release(self):
   """Unlock Instance."""
   logger.debug("OpenOffice %s, %s unlocked" % self.getAddress())
   self._lock.release()
Esempio n. 29
0
 def start(self, init=True):
     """Start Application"""
     logger.debug("Process Started %s, Port %s. Pid %s" %
                  (self.name, self.getAddress()[-1], self.pid()))
Esempio n. 30
0
 def start(self, init=True):
   """Start Application"""
   logger.debug("Process Started %s, Port %s. Pid %s" % (self.name,
                                                   self.getAddress()[-1],
                                                   self.pid()))
Esempio n. 31
0
 def release(self):
   """Unlock Instance."""
   logger.debug("OpenOffice %s, %s unlocked" % self.getAddress())
   self._lock.release()
Esempio n. 32
0
    def convert(self, destination_format=None, **kw):
        """ Convert the inputed file to output as format that were informed """
        source_format = self.file.source_format
        logger.debug("x2t convert: %s > %s" %
                     (source_format, destination_format))

        # init vars and xml configuration file
        in_format = format_code_map[source_format]
        out_format = format_code_map[destination_format]
        root_dir = self.file.directory_name
        input_dir = os.path.join(root_dir, "input")
        output_dir = os.path.join(root_dir, "output")
        final_file_name = os.path.join(root_dir,
                                       "document.%s" % destination_format)
        input_file_name = self.file.getUrl()
        output_file_name = final_file_name
        config_file_name = os.path.join(root_dir, "config.xml")

        if source_format in yformat_tuple:
            if self._data.startswith("PK\x03\x04"):
                os.mkdir(input_dir)
                unzip(self.file.getUrl(), input_dir)
                for _, _, files in os.walk(input_dir):
                    input_file_name, = files
                    break
                input_file_name = os.path.join(input_dir, input_file_name)
        if destination_format in yformat_tuple:
            os.mkdir(output_dir)
            output_file_name = os.path.join(output_dir, "body.txt")

        config_file = open(config_file_name, "w")

        config = {
            # 'm_sKey': 'from',
            'm_sFileFrom': input_file_name,
            'm_nFormatFrom': in_format,
            'm_sFileTo': output_file_name,
            'm_nFormatTo': out_format,
            # 'm_bPaid': 'true',
            # 'm_bEmbeddedFonts': 'false',
            # 'm_bFromChanges': 'false',
            # 'm_sFontDir': '/usr/share/fonts',
            # 'm_sThemeDir': '/var/www/onlyoffice/documentserver/FileConverterService/presentationthemes',
        }
        root = ElementTree.Element('root')
        for key, value in config.items():
            ElementTree.SubElement(root, key).text = value
        ElementTree.ElementTree(root).write(config_file,
                                            encoding='utf-8',
                                            xml_declaration=True,
                                            default_namespace=None,
                                            method="xml")
        config_file.close()

        # run convertion binary
        p = Popen(
            ["x2t", config_file.name],
            stdout=PIPE,
            stderr=PIPE,
            close_fds=True,
            env=self.environment,
        )
        stdout, stderr = p.communicate()
        if p.returncode != 0:
            raise RuntimeError(
                "x2t: exit code %d != 0\n+ %s\n> stdout: %s\n> stderr: %s@ x2t xml:\n%s"
                % (p.returncode, " ".join(["x2t", config_file.name
                                           ]), stdout, stderr,
                   "  " + open(config_file.name).read().replace("\n", "\n  ")))

        if destination_format in yformat_tuple:
            zipTree(
                final_file_name,
                (output_file_name, ""),
                (os.path.join(os.path.dirname(output_file_name), "media"), ""),
            )

        self.file.reload(final_file_name)
        try:
            return self.file.getContent()
        finally:
            self.file.trash()
Esempio n. 33
0
  def convert(self, destination_format=None, **kw):
    """ Convert the inputed file to output as format that were informed """
    source_format = self.file.source_format
    logger.debug("x2t convert: %s > %s" % (source_format, destination_format))

    # init vars and xml configuration file
    in_format = format_code_map[source_format]
    out_format = format_code_map_output.get(destination_format,
                                            format_code_map[destination_format])
    root_dir = self.file.directory_name
    input_dir = os.path.join(root_dir, "input");
    input_file_name = self.file.getUrl()
    output_file_name = os.path.join(root_dir, "document.%s" % destination_format)
    config_file_name = os.path.join(root_dir, "config.xml")
    metadata = None
    output_data = None

    if source_format in yformat_tuple:
      if self._data.startswith("PK\x03\x04"):
        os.mkdir(input_dir)
        unzip(self.file.getUrl(), input_dir)
        input_file_name = os.path.join(input_dir, "body.txt")
        if not os.path.isfile(input_file_name):
          input_file_name = os.path.join(input_dir, "Editor.bin")
          if not os.path.isfile(input_file_name):
            raise RuntimeError("input format incorrect: Editor.bin absent in zip archive")
        metadata_file_name = os.path.join(input_dir, "metadata.json")
        if os.path.isfile(metadata_file_name):
          with open(metadata_file_name) as metadata_file:
            metadata = json.loads(metadata_file.read())

    with open(config_file_name, "w") as config_file:
      config = {
        # 'm_sKey': 'from',
        'm_sFileFrom': input_file_name,
        'm_nFormatFrom': str(in_format),
        'm_sFileTo': output_file_name,
        'm_nFormatTo': str(out_format),
        # 'm_bPaid': 'true',
        # 'm_bEmbeddedFonts': 'false',
        # 'm_bFromChanges': 'false',
        # 'm_sFontDir': '/usr/share/fonts',
        # 'm_sThemeDir': '/var/www/onlyoffice/documentserver/FileConverterService/presentationthemes',
      }
      root = ElementTree.Element('root')
      for key, value in config.items():
        ElementTree.SubElement(root, key).text = value
      ElementTree.ElementTree(root).write(config_file, encoding='utf-8', xml_declaration=True,
                                          default_namespace=None, method="xml")

    # run convertion binary
    p = Popen(
      ["x2t", config_file.name],
      stdout=PIPE,
      stderr=PIPE,
      close_fds=True,
      env=self.environment,
    )
    stdout, stderr = p.communicate()
    if p.returncode != 0:
      raise RuntimeError("x2t: exit code %d != 0\n+ %s\n> stdout: %s\n> stderr: %s@ x2t xml:\n%s"
                         % (p.returncode, " ".join(["x2t", config_file.name]), stdout, stderr,
                            "  " + open(config_file.name).read().replace("\n", "\n  ")))

    self.file.reload(output_file_name)
    try:
      if source_format in yformat_tuple:
        if metadata:
          output_data = OOoHandler(self.base_folder_url, self.file.getContent(), source_format, **self._init_kw)\
            .setMetadata(metadata)
        else:
          output_data = self.file.getContent()
      elif destination_format in yformat_tuple:
        if not metadata:
          if source_format not in yformat_tuple:
            metadata = OOoHandler(self.base_folder_url, self._data, source_format, **self._init_kw).getMetadata()
          if not metadata:
            metadata = {}
          metadata.pop('MIMEType', None)
          metadata.pop('Generator', None)
          metadata.pop('AppVersion', None)
          metadata.pop('ImplementationName', None)
        with ZipFile(output_file_name, mode="a") as zipfile:
          zipfile.writestr("metadata.json", json.dumps(metadata))
        output_data = self.file.getContent()
    finally:
      self.file.trash()
    return output_data
Esempio n. 34
0
 def touch(self):
   """Restart countdown
   """
   logger.debug("Touch MonitorSpleepingTime")
   self._touched_at = time()
Esempio n. 35
0
  def convert(self, destination_format=None, **kw):
    """ Convert the inputed file to output as format that were informed """
    source_format = self.file.source_format
    logger.debug("x2t convert: %s > %s" % (source_format, destination_format))

    # init vars and xml configuration file
    in_format = format_code_map[source_format]
    out_format = format_code_map[destination_format]
    root_dir = self.file.directory_name
    input_dir = os.path.join(root_dir, "input");
    output_dir = os.path.join(root_dir, "output");
    final_file_name = os.path.join(root_dir, "document.%s" % destination_format)
    input_file_name = self.file.getUrl()
    output_file_name = final_file_name
    config_file_name = os.path.join(root_dir, "config.xml")

    if source_format in yformat_tuple:
      os.mkdir(input_dir)
      unzip(self.file.getUrl(), input_dir)
      for _, _, files in os.walk(input_dir):
        input_file_name, = files
        break
      input_file_name = os.path.join(input_dir, input_file_name)
    if destination_format in yformat_tuple:
      os.mkdir(output_dir)
      output_file_name = os.path.join(output_dir, "body.txt")

    config_file = open(config_file_name, "w")

    config = {
      # 'm_sKey': 'from',
      'm_sFileFrom': input_file_name,
      'm_nFormatFrom': in_format,
      'm_sFileTo': output_file_name,
      'm_nFormatTo': out_format,
      # 'm_bPaid': 'true',
      # 'm_bEmbeddedFonts': 'false',
      # 'm_bFromChanges': 'false',
      # 'm_sFontDir': '/usr/share/fonts',
      # 'm_sThemeDir': '/var/www/onlyoffice/documentserver/FileConverterService/presentationthemes',
    }
    root = ElementTree.Element('root')
    for key, value in config.items():
      ElementTree.SubElement(root, key).text = value
    ElementTree.ElementTree(root).write(config_file, encoding='utf-8', xml_declaration=True, default_namespace=None, method="xml")
    config_file.close()

    # run convertion binary
    p = Popen(
      ["x2t", config_file.name],
      stdout=PIPE,
      stderr=PIPE,
      close_fds=True,
      env=self.environment,
    )
    stdout, stderr = p.communicate()
    if p.returncode != 0:
      raise RuntimeError("x2t: exit code %d != 0\n+ %s\n> stdout: %s\n> stderr: %s@ x2t xml:\n%s" % (p.returncode, " ".join(["x2t", config_file.name]), stdout, stderr, "  " + open(config_file.name).read().replace("\n", "\n  ")))

    if destination_format in yformat_tuple:
      zipTree(
        final_file_name,
        (output_file_name, ""),
        (os.path.join(os.path.dirname(output_file_name), "media"), ""),
      )

    self.file.reload(final_file_name)
    try:
      return self.file.getContent()
    finally:
      self.file.trash()
Esempio n. 36
0
    def getTablesMatrix(self):
        """Returns the table as a matrix"""
        logger.debug("PDFTableGrainExtract")
        output_url = NamedTemporaryFile(suffix=".xml", dir=self.file.directory_name).name
        command = ["pdftohtml", "-xml", self.file.getUrl(), output_url]
        stdout, stderr = Popen(command, stdout=PIPE, stderr=PIPE, close_fds=True, env=self.environment).communicate()
        # XXX - PDF can be protect
        if "Erro" in stderr:
            return False
        else:
            output = etree.fromstring(open(output_url).read())
            row_list = output.xpath("//text")
            name, previous, next = "", "", ""
            tables = {}
            element = []
            line = []
            matrix = []
            i, j, l, m = 0, 0, 0, 0
            old_x_left = 600
            for x in row_list:
                base_line = x.attrib["top"]
                base_column = x.attrib["left"]
                i += 1
                for y in row_list[i:]:
                    if base_line == y.attrib["top"]:
                        l += 1
                        line.append(get_text(y))
                        base_column = y.attrib["left"]
                        row_list.remove(y)
                    elif base_column == y.attrib["left"]:
                        m = l
                        if len(element) > 0:
                            element.append(get_text(y))
                        # In case name of the table is after table
                        if len(line) == 0:
                            next = get_text(x)
                            if next != None and len(next.split(":")) == 2:
                                name = next
                                next = ""
                        elif len(line) > 0:
                            element.append(line.pop())
                            element.append(get_text(y))
                    else:
                        if len(element) > 0:
                            line.insert(m - 1, element)
                        l = 0
                        element = []
                        base_column = 0
                        break

                if len(line) > 0:
                    # In case name of the table is before table
                    previous = get_text(x.getprevious())
                    if previous != None and len(previous.split(":")) == 2:
                        name = previous
                        previous = ""
                    line.insert(0, get_text(x))
                    if len(line) > 1:
                        matrix.append(line)
                line = []
                if x.attrib["left"] < old_x_left and len(matrix) > 0:
                    if len(matrix) > 0:
                        j += 1
                        if name == "":
                            name = "Tabela %d" % j
                        name += " - pag %s" % x.getparent().attrib["number"]
                        tables[name] = matrix
                    name = ""
                    matrix = []
                old_x_left = x.attrib["left"]
            return tables
Esempio n. 37
0
 def touch(self):
     """Restart countdown
 """
     logger.debug("Touch MonitorSpleepingTime")
     self._touched_at = time()