Example #1
0
class Handler(object):
  """OOO Handler is used to access the one Document and OpenOffice.
  For each Document inputed is created on instance of this class to manipulate
  the document. This Document must be able to create and remove a temporary
  document at FS, load and export.
  """
  implements(IHandler)

  def __init__(self, base_folder_url, data, source_format, **kw):
    """Creates document in file system and loads it in OOo."""
    self.document = FileSystemDocument(base_folder_url,
                                      data,
                                      source_format)
    self.zip = kw.get('zip', False)
    self.uno_path = kw.get("uno_path", None)
    self.office_binary_path = kw.get("office_binary_path", None)
    self.timeout = kw.get("timeout", 600)
    self.refresh = kw.get('refresh', False)
    self.source_format = source_format
    if not self.uno_path:
      self.uno_path = environ.get("uno_path")
    if not self.office_binary_path:
      self.office_binary_path = environ.get("office_binary_path")

  def _getCommand(self, *args, **kw):
    """Transforms all parameters passed in a command"""
    hostname, port = openoffice.getAddress()
    kw['hostname'] = hostname
    kw['port'] = port
    python = path.join(self.office_binary_path, "python")
    command_list = [path.exists(python) and python or "python",
                    pkg_resources.resource_filename(__name__,
                                 path.join("helper", "unoconverter.py")),
                    "--uno_path=%s" % self.uno_path,
                    "--office_binary_path=%s" % self.office_binary_path,
                    '--document_url=%s' % self.document.getUrl()]
    for arg in args:
      command_list.insert(3, "--%s" % arg)
    for k, v in kw.iteritems():
      command_list.append("--%s=%s" % (k, v))

    return command_list

  def _startTimeout(self):
    """start the Monitor"""
    self.monitor = MonitorTimeout(openoffice, self.timeout)
    self.monitor.start()
    return

  def _stopTimeout(self):
    """stop the Monitor"""
    self.monitor.terminate()
    return

  def _subprocess(self, command_list):
    """Run one procedure"""
    if monitor_sleeping_time is not None:
      monitor_sleeping_time.touch()
    try:
      self._startTimeout()
      process = Popen(command_list, stdout=PIPE, stderr=PIPE, close_fds=True,
                      env=openoffice.environment_dict.copy())
      stdout, stderr = process.communicate()
    finally:
      self._stopTimeout()
      if pid_exists(process.pid):
        process.terminate()
    return stdout, stderr

  def _callUnoConverter(self, *feature_list, **kw):
    """ """
    if not openoffice.status():
      openoffice.start()
    command_list = self._getCommand(*feature_list, **kw)
    stdout, stderr = self._subprocess(command_list)
    if not stdout and len(re.findall("\w*Exception|\w*Error", stderr)) >= 1:
      logger.debug(stderr)
      self.document.restoreOriginal()
      openoffice.restart()
      kw['document_url'] = self.document.getUrl()
      command = self._getCommand(*feature_list, **kw)
      stdout, stderr = self._subprocess(command)
      if stderr != "":
          raise Exception(stderr)

    return stdout, stderr

  def _serializeMimemapper(self,
                           source_extension=None,
                           destination_extension=None):
    """Serialize parts of mimemapper"""
    if destination_extension is None:
      return json.dumps(dict(mimetype_by_filter_type=mimemapper._mimetype_by_filter_type))

    filter_list = []
    service_type_list = mimemapper._doc_type_list_by_extension.get(
      source_extension, mimemapper.document_service_list)
    for service_type in service_type_list:
      filter_list.append((destination_extension,
                          service_type,
                          mimemapper.getFilterName(destination_extension, service_type)))
    logger.debug("Filter List: %r" % filter_list)
    return json.dumps(dict(doc_type_list_by_extension=mimemapper._doc_type_list_by_extension,
                            filter_list=filter_list,
                            mimetype_by_filter_type=mimemapper._mimetype_by_filter_type))

  def convert(self, destination_format=None, **kw):
    """Convert a document to another format supported by the OpenOffice
    Keyword Arguments:
    destination_format -- extension of document as String
    """
    logger.debug("OooConvert: %s > %s" % (self.source_format, destination_format))
    kw['source_format'] = self.source_format
    if destination_format:
      kw['destination_format'] = destination_format
    kw['mimemapper'] = self._serializeMimemapper(self.source_format,
                                                 destination_format)
    kw['refresh'] = json.dumps(self.refresh)
    openoffice.acquire()
    try:
      stdout, stderr = self._callUnoConverter(*['convert'], **kw)
    finally:
      openoffice.release()
    url = stdout.replace('\n', '')
    self.document.reload(url)
    content = self.document.getContent(self.zip)
    self.document.trash()
    return content

  def getMetadata(self, base_document=False):
    """Returns a dictionary with all metadata of document.
    Keywords Arguments:
    base_document -- Boolean variable. if true, the document is also returned
    along with the metadata."""
    logger.debug("getMetadata")
    kw = dict(mimemapper=self._serializeMimemapper())
    if base_document:
      feature_list = ['getmetadata', 'convert']
    else:
      feature_list = ['getmetadata']
    openoffice.acquire()
    try:
      stdout, stderr = self._callUnoConverter(*feature_list, **kw)
    finally:
      openoffice.release()
    metadata = json.loads(decodestring(stdout))
    if 'document_url' in metadata:
      self.document.reload(metadata['document_url'])
      metadata['Data'] = self.document.getContent()
      del metadata['document_url']
    self.document.trash()
    return metadata

  def setMetadata(self, metadata):
    """Returns a document with new metadata.
    Keyword arguments:
    metadata -- expected an dictionary with metadata.
    """
    metadata_pickled = json.dumps(metadata)
    logger.debug("setMetadata")
    kw = dict(metadata=encodestring(metadata_pickled))
    openoffice.acquire()
    try:
      stdout, stderr = self._callUnoConverter(*['setmetadata'], **kw)
    finally:
      openoffice.release()
    doc_loaded = self.document.getContent()
    self.document.trash()
    return doc_loaded
Example #2
0
 def _startTimeout(self):
   """start the Monitor"""
   self.monitor = MonitorTimeout(openoffice, self.timeout)
   self.monitor.start()
   return
Example #3
0
class Handler(object):
  """OOO Handler is used to access the one Document and OpenOffice.
  For each Document inputed is created on instance of this class to manipulate
  the document. This Document must be able to create and remove a temporary
  document at FS, load and export.
  """
  implements(IHandler)

  def __init__(self, base_folder_url, data, source_format, **kw):
    """Creates document in file system and loads it in OOo."""
    self.zip = kw.get('zip', False)
    self.uno_path = kw.get("uno_path", None)
    self.office_binary_path = kw.get("office_binary_path", None)
    self.timeout = kw.get("timeout", 600)
    self.refresh = kw.get('refresh', False)
    self.source_format = source_format
    if not self.uno_path:
      self.uno_path = environ.get("uno_path")
    if not self.office_binary_path:
      self.office_binary_path = environ.get("office_binary_path")
    self._createDocument(base_folder_url, data, source_format)

  def _createDocument(self, base_folder_url, data, source_format):
    if source_format == 'csv':
      # Cloudooo expect utf-8 encoded csv, but also tolerate latin9 for
      # backward compatibility.
      # The heuristic is "if it's not utf-8", let's assume it's iso-8859-15.
      try:
        unicode(data, 'utf-8')
      except UnicodeDecodeError:
        data = unicode(data, 'iso-8859-15').encode('utf-8')
        logger.warn("csv data is not utf-8, assuming iso-8859-15")
    self.document = FileSystemDocument(
         base_folder_url,
         data,
         source_format)

  def _getCommand(self, *args, **kw):
    """Transforms all parameters passed in a command"""
    hostname, port = openoffice.getAddress()
    kw['hostname'] = hostname
    kw['port'] = port
    python = path.join(self.office_binary_path, "python")
    command_list = [path.exists(python) and python or "python",
                    pkg_resources.resource_filename(__name__,
                                 path.join("helper", "unoconverter.py")),
                    "--uno_path=%s" % self.uno_path,
                    "--office_binary_path=%s" % self.office_binary_path,
                    '--document_url=%s' % self.document.getUrl()]
    for arg in args:
      command_list.insert(3, "--%s" % arg)
    for k, v in kw.iteritems():
      command_list.append("--%s=%s" % (k, v))

    return command_list

  def _startTimeout(self):
    """start the Monitor"""
    self.monitor = MonitorTimeout(openoffice, self.timeout)
    self.monitor.start()
    return

  def _stopTimeout(self):
    """stop the Monitor"""
    self.monitor.terminate()
    return

  def _subprocess(self, command_list):
    """Run one procedure"""
    if monitor_sleeping_time is not None:
      monitor_sleeping_time.touch()
    try:
      self._startTimeout()
      process = Popen(command_list, stdout=PIPE, stderr=PIPE, close_fds=True,
                      env=openoffice.environment_dict.copy())
      stdout, stderr = process.communicate()
    finally:
      self._stopTimeout()
      if pid_exists(process.pid):
        process.terminate()
    return stdout, stderr

  def _callUnoConverter(self, *feature_list, **kw):
    """ """
    if not openoffice.status():
      openoffice.start()
    command_list = self._getCommand(*feature_list, **kw)
    stdout, stderr = self._subprocess(command_list)
    if not stdout and stderr:
      first_error = stderr
      logger.error(stderr)
      self.document.restoreOriginal()
      openoffice.restart()
      kw['document_url'] = self.document.getUrl()
      command = self._getCommand(*feature_list, **kw)
      stdout, stderr = self._subprocess(command)
      if not stdout and stderr:
        second_error = "\nerror of the second run: " + stderr
        logger.error(second_error)
        raise Exception(first_error + second_error)

    return stdout, stderr

  def _serializeMimemapper(self,
                           source_extension=None,
                           destination_extension=None):
    """Serialize parts of mimemapper"""
    if destination_extension is None:
      return json.dumps(dict(mimetype_by_filter_type=mimemapper._mimetype_by_filter_type))

    filter_list = []
    service_type_list = mimemapper._doc_type_list_by_extension.get(
      source_extension, mimemapper.document_service_list)
    for service_type in service_type_list:
      filter_list.append((destination_extension,
                          service_type,
                          mimemapper.getFilterName(destination_extension, service_type)))
    logger.debug("Filter List: %r" % filter_list)
    return json.dumps(dict(doc_type_list_by_extension=mimemapper._doc_type_list_by_extension,
                            filter_list=filter_list,
                            mimetype_by_filter_type=mimemapper._mimetype_by_filter_type))

  def convert(self, destination_format=None, **kw):
    """Convert a document to another format supported by the OpenOffice
    Keyword Arguments:
    destination_format -- extension of document as String
    """
    logger.debug("OooConvert: %s > %s" % (self.source_format, destination_format))
    kw['source_format'] = self.source_format
    if destination_format:
      kw['destination_format'] = destination_format
    kw['mimemapper'] = self._serializeMimemapper(self.source_format,
                                                 destination_format)
    kw['refresh'] = json.dumps(self.refresh)
    openoffice.acquire()
    try:
      stdout, stderr = self._callUnoConverter(*['convert'], **kw)
    finally:
      openoffice.release()
    url = stdout.replace('\n', '')
    self.document.reload(url)
    content = self.document.getContent(self.zip)
    self.document.trash()
    return content

  def getMetadata(self, base_document=False):
    """Returns a dictionary with all metadata of document.
    Keywords Arguments:
    base_document -- Boolean variable. if true, the document is also returned
    along with the metadata."""
    logger.debug("getMetadata")
    kw = dict(mimemapper=self._serializeMimemapper())
    if base_document:
      feature_list = ['getmetadata', 'convert']
    else:
      feature_list = ['getmetadata']
    openoffice.acquire()
    try:
      stdout, stderr = self._callUnoConverter(*feature_list, **kw)
    finally:
      openoffice.release()
    metadata = json.loads(decodestring(stdout))
    if 'document_url' in metadata:
      self.document.reload(metadata['document_url'])
      metadata['Data'] = self.document.getContent()
      del metadata['document_url']
    self.document.trash()
    return metadata

  def setMetadata(self, metadata):
    """Returns a document with new metadata.
    Keyword arguments:
    metadata -- expected an dictionary with metadata.
    """
    metadata_pickled = json.dumps(metadata)
    logger.debug("setMetadata")
    kw = dict(metadata=encodestring(metadata_pickled))
    openoffice.acquire()
    try:
      stdout, stderr = self._callUnoConverter(*['setmetadata'], **kw)
    finally:
      openoffice.release()
    doc_loaded = self.document.getContent()
    self.document.trash()
    return doc_loaded

  @staticmethod
  def getAllowedConversionFormatList(source_mimetype):
    """Returns a list content_type and their titles which are supported
    by enabled handlers.

    [('application/vnd.oasis.opendocument.text', 'ODF Text Document'),
     ('application/pdf', 'PDF - Portable Document Format'),
     ...
    ]
    """
    # XXX please never guess extension from mimetype
    output_set = set()
    if "/" in source_mimetype:
      parsed_mimetype_type = parseContentType(source_mimetype).gettype()
      # here `guess_all_extensions` never handles mimetype parameters
      #   (even for `text/plain;charset=UTF-8` which is standard)
      extension_list = mimetypes.guess_all_extensions(parsed_mimetype_type)  # XXX never guess
    else:
      extension_list = [source_mimetype]

    for ext in extension_list:
      for ext, title in mimemapper.getAllowedExtensionList(extension=ext.replace(".", "")):
        if ext in ("fodt", ".fodt"):  # BBB
          output_set.add(("application/vnd.oasis.opendocument.text-flat-xml", title))
          continue
        if ext:
          mimetype, _ = mimetypes.guess_type("a." + ext)  # XXX never guess
          if mimetype:
            output_set.add((mimetype, title))
    return list(output_set)