def convert_by_stream(self, data, format=LIBREOFFICE_OPEN_DOCUMENT): data.seek(0) input_stream = self._service_manager.createInstanceWithContext( "com.sun.star.io.SequenceInputStream", self._local_context) input_stream.initialize((uno.ByteSequence(data.read()), )) document = self._desktop.loadComponentFromURL( 'private:stream', "_blank", 0, self._to_properties(InputStream=input_stream, ReadOnly=True)) if not document: raise Exception, "Error making document" try: document.refresh() except AttributeError: pass output_stream = output_stream_wrapper() try: document.storeToURL( 'private:stream', self._to_properties(OutputStream=output_stream, FilterName=format)) finally: document.close(True) if format == LIBREOFFICE_OPEN_DOCUMENT: doc_type = document_type.detect_document_type(output_stream.data) if doc_type != document_type.types.oasis_open_document: raise docvert_exception.converter_unable_to_generate_open_document( ) return output_stream.data
def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) for filename, data in files.iteritems(): doc_type = document_type.detect_document_type(data) if doc_type != document_type.types.oasis_open_document: data = generate_open_document(data, converter) document_xml = opendocument.extract_useful_open_document_files(data, storage, filename) process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename) return storage
def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice, suppress_errors=False): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) def _title(name, files, data): filename = os.path.basename(name).replace('\\','-').replace('/','-').replace(':','-') if len(filename) == 0: filename = "document.odt" if files.has_key(filename): if data and hasattr(files[filename], 'read') and files[filename].getvalue() == data: return filename unique = 1 potential_filename = filename while files.has_key(potential_filename): unique += 1 if filename.count("."): potential_filename = filename.replace(".", "%i." % unique, 1) else: potential_filename = filename + str(unique) filename = potential_filename return filename for filename, data in files.iteritems(): storage.set_friendly_name(filename, filename) for url in urls: try: data = urllib2.urlopen(url, None, http_timeout).read() doc_type = document_type.detect_document_type(data) if doc_type == document_type.types.html: data = html_to_opendocument(data, url) filename = _title(url, files, data) storage.set_friendly_name(filename, "%s (%s)" % (filename, url)) files[filename] = StringIO.StringIO(data) except IOError, e: filename = _title(url, files, None) storage.set_friendly_name(filename, "%s (%s)" % (filename, url)) files[filename] = Exception("Download error from %s: %s" % (url, e))
def process_conversion( files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) for filename, data in files.iteritems(): doc_type = document_type.detect_document_type(data) if doc_type != document_type.types.oasis_open_document: data = generate_open_document(data, converter) document_xml = opendocument.extract_useful_open_document_files( data, storage, filename) process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename) return storage
def convert_by_stream(self, data, format=LIBREOFFICE_OPEN_DOCUMENT): data.seek(0) input_stream = self._service_manager.createInstanceWithContext("com.sun.star.io.SequenceInputStream", self._local_context) input_stream.initialize((uno.ByteSequence(data.read()),)) document = self._desktop.loadComponentFromURL('private:stream', "_blank", 0, self._to_properties(InputStream=input_stream,ReadOnly=True)) if not document: raise Exception, "Error making document" try: document.refresh() except AttributeError: pass output_stream = output_stream_wrapper() try: document.storeToURL('private:stream', self._to_properties( OutputStream=output_stream, FilterName=format)) finally: document.close(True) if format == LIBREOFFICE_OPEN_DOCUMENT: doc_type = document_type.detect_document_type(output_stream.data) if doc_type != document_type.types.oasis_open_document: raise docvert_exception.converter_unable_to_generate_open_document() return output_stream.data
try: data = urllib2.urlopen(url, None, http_timeout).read() doc_type = document_type.detect_document_type(data) if doc_type == document_type.types.html: data = html_to_opendocument(data, url) filename = _title(url, files, data) storage.set_friendly_name(filename, "%s (%s)" % (filename, url)) files[filename] = StringIO.StringIO(data) except IOError, e: filename = _title(url, files, None) storage.set_friendly_name(filename, "%s (%s)" % (filename, url)) files[filename] = Exception("Download error from %s: %s" % (url, e)) for filename, data in files.iteritems(): if storage.default_document is None: storage.default_document = filename doc_type = document_type.detect_document_type(data) if doc_type == document_type.types.exception: storage.add("%s/index.txt" % filename, str(data)) elif doc_type != document_type.types.oasis_open_document: try: data = generate_open_document(data, converter) doc_type = document_type.types.oasis_open_document except Exception, e: if not suppress_errors: raise e storage.add("%s/index.txt" % filename, str(e)) if doc_type == document_type.types.oasis_open_document: if pipeline_id == "open document": #reserved term, for when people want the Open Document file back directly. Don't bother loading pipeline. storage.add("%s/index.odt" % filename, data) thumbnail = opendocument.extract_thumbnail(data) if thumbnail:
document = self._desktop.loadComponentFromURL('private:stream', "_blank", 0, self._to_properties(InputStream=input_stream,ReadOnly=True)) if not document: raise Exception, "Error making document" try: document.refresh() except AttributeError: pass output_stream = output_stream_wrapper() try: document.storeToURL('private:stream', self._to_properties(OutputStream=output_stream, FilterName=format)) except Exception, e: #ignore any error, verify the output before complaining pass finally: document.close(True) if format == LIBREOFFICE_OPEN_DOCUMENT or format == LIBREOFFICE_PDF: doc_type = document_type.detect_document_type(output_stream.data) output_stream.data.seek(0) if format == LIBREOFFICE_OPEN_DOCUMENT and doc_type != document_type.types.oasis_open_document: raise docvert_exception.converter_unable_to_generate_open_document("Unable to generate OpenDocument, was detected as %s. First 2 bytes = %s" % (doc_type, output_stream.data.read(2))) elif format == LIBREOFFICE_PDF and doc_type != document_type.types.pdf: raise docvert_exception.converter_unable_to_generate_pdf("Unable to generate PDF, was detected as %s. First 4 bytes = %s" % (doc_type, output_stream.data.read(4))) return output_stream.data def _to_properties(self, **args): props = [] for key in args: prop = PropertyValue() prop.Name = key prop.Value = args[key] props.append(prop) return tuple(props)
try: document.refresh() except AttributeError: pass output_stream = output_stream_wrapper() try: document.storeToURL( 'private:stream', self._to_properties(OutputStream=output_stream, FilterName=format)) except Exception, e: #ignore any error, verify the output before complaining pass finally: document.close(True) if format == LIBREOFFICE_OPEN_DOCUMENT or format == LIBREOFFICE_PDF: doc_type = document_type.detect_document_type(output_stream.data) output_stream.data.seek(0) if format == LIBREOFFICE_OPEN_DOCUMENT and doc_type != document_type.types.oasis_open_document: raise docvert_exception.converter_unable_to_generate_open_document( "Unable to generate OpenDocument, was detected as %s. First 2 bytes = %s" % (doc_type, output_stream.data.read(2))) elif format == LIBREOFFICE_PDF and doc_type != document_type.types.pdf: raise docvert_exception.converter_unable_to_generate_pdf( "Unable to generate PDF, was detected as %s. First 4 bytes = %s" % (doc_type, output_stream.data.read(4))) return output_stream.data def _to_properties(self, **args): props = [] for key in args: prop = PropertyValue()