Example #1
0
    def updateBaseMetadata(self, **kw):
        """
      Updates metadata information in the converted OOo document
      based on the values provided by the user. This is implemented
      through the invocation of the conversion server.
    """
        if not self.hasBaseData():
            # XXX please pass a meaningful description of error as argument
            raise NotConvertedError()

        server_proxy = OOoServerProxy(self)
        response_code, response_dict, response_message = \
              server_proxy.run_setmetadata(self.getId(),
                                           enc(str(self.getBaseData())),
                                           kw)
        if response_code == 200:
            # successful meta data extraction
            self._setBaseData(dec(response_dict['data']))
            self.updateFileMetadata(
            )  # record in workflow history # XXX must put appropriate comments.
        else:
            # Explicitly raise the exception!
            raise ConversionError(
                "OOoDocument: error getting document metadata (Code %s: %s)" %
                (response_code, response_message))
Example #2
0
    def _getConversionFromProxyServer(self, format):
        """
      Communicates with server to convert a file
    """
        if not self.hasBaseData():
            # XXX please pass a meaningful description of error as argument
            raise NotConvertedError()
        if format == 'text-content':
            # Extract text from the ODF file
            cs = cStringIO.StringIO()
            cs.write(str(self.getBaseData()))
            z = zipfile.ZipFile(cs)
            s = z.read('content.xml')
            s = self.rx_strip.sub(" ", s)  # strip xml
            s = self.rx_compr.sub(" ", s)  # compress multiple spaces
            cs.close()
            z.close()
            return 'text/plain', s
        server_proxy = OOoServerProxy(self)
        orig_format = self.getBaseContentType()
        generate_result = server_proxy.run_generate(
            self.getId(), enc(str(self.getBaseData())), None, format,
            orig_format)
        try:
            response_code, response_dict, response_message = generate_result
        except ValueError:
            # This is for backward compatibility with older oood version returning
            # only response_dict
            response_dict = generate_result

        # XXX: handle possible OOOd server failure
        return response_dict['mime'], Pdata(dec(response_dict['data']))
Example #3
0
    def _convert(self, format, frame=0, **kw):
        """Convert the document to the given format.

    If a conversion is already stored for this format, it is returned
    directly, otherwise the conversion is stored for the next time.

    frame: Only used for image conversion

    XXX Cascading conversions must be delegated to conversion server,
    not by OOoDocument._convert (ie: convert to pdf, then convert to image, then resize)
    *OR* as an optimisation we can read cached intermediate conversions
    instead of compute them each times.
      1- odt->pdf->png
      2- odt->cached(pdf)->jpg
    """
        #XXX if document is empty, stop to try to convert.
        #XXX but I don't know what is a appropriate mime-type.(Yusei)
        if not self.hasData():
            return 'text/plain', ''
        # if no conversion asked (format empty)
        # return raw data
        if not format:
            return self.getContentType(), self.getData()
        # Check if we have already a base conversion
        if not self.hasBaseData():
            # XXX please pass a meaningful description of error as argument
            raise NotConvertedError()
        # Make sure we can support html and pdf by default
        is_html = 0
        requires_pdf_first = 0
        original_format = format
        allowed_format_list = self.getTargetFormatList()
        if format == 'base-data':
            return self.getBaseContentType(), str(self.getBaseData())
        if format == 'pdf':
            format_list = [x for x in allowed_format_list if x.endswith('pdf')]
            format = format_list[0]
        elif format in VALID_IMAGE_FORMAT_LIST:
            format_list = [
                x for x in allowed_format_list if x.endswith(format)
            ]
            if len(format_list):
                format = format_list[0]
            else:
                # We must fist make a PDF which will be used to produce an image out of it
                requires_pdf_first = 1
                format_list = [
                    x for x in allowed_format_list if x.endswith('pdf')
                ]
                format = format_list[0]
        elif format == 'html':
            format_list = [
                x for x in allowed_format_list
                if x.startswith('html') or x.endswith('html')
            ]
            format = format_list[0]
            is_html = 1
        elif format in ('txt', 'text', 'text-content'):
            # if possible, we try to get utf8 text. ('enc.txt' will encode to utf8)
            if 'enc.txt' in allowed_format_list:
                format = 'enc.txt'
            elif format not in allowed_format_list:
                #Text conversion is not supported by oood, do it in other way
                if not self.hasConversion(format=original_format):
                    #Do real conversion for text
                    mime, data = self._getConversionFromProxyServer(
                        format='text-content')
                    self.setConversion(data, mime, format=original_format)
                    return mime, data
                return self.getConversion(format=original_format)
        # Raise an error if the format is not supported
        if not self.isTargetFormatAllowed(format):
            raise ConversionError(
                "OOoDocument: target format %s is not supported" % format)
        has_format = self.hasConversion(format=original_format, **kw)
        if not has_format:
            # Do real conversion
            mime, data = self._getConversionFromProxyServer(format)
            if is_html:
                # Extra processing required since
                # we receive a zip file
                cs = cStringIO.StringIO()
                cs.write(str(data))
                z = zipfile.ZipFile(
                    cs)  # A disk file would be more RAM efficient
                for f in z.infolist():
                    fn = f.filename
                    if fn.endswith('html'):
                        if self.getPortalType() == 'Presentation'\
                              and not (fn.find('impr') >= 0):
                            continue
                        data = z.read(fn)
                        break
                mime = 'text/html'
                self._populateConversionCacheWithHTML(
                    zip_file=z)  # Maybe some parts should be asynchronous for
                # better usability
                z.close()
                cs.close()
            if original_format not in VALID_IMAGE_FORMAT_LIST \
              and not requires_pdf_first:
                self.setConversion(data, mime, format=original_format, **kw)
            else:
                # create temporary image and use it to resize accordingly
                temp_image = self.portal_contributions.newContent(
                    portal_type='Image',
                    file=cStringIO.StringIO(),
                    filename=self.getId(),
                    temp_object=1)
                temp_image._setData(data)
                # we care for first page only but as well for image quality
                mime, data = temp_image.convert(original_format,
                                                frame=frame,
                                                **kw)
                # store conversion
                self.setConversion(data, mime, format=original_format, **kw)

        return self.getConversion(format=original_format, **kw)