def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) for filename, data in files.iteritems(): doc_type = document_type.detect_document_type(data) if doc_type != document_type.types.oasis_open_document: data = generate_open_document(data, converter) document_xml = opendocument.extract_useful_open_document_files(data, storage, filename) process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename) return storage
def process_conversion( files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) for filename, data in files.iteritems(): doc_type = document_type.detect_document_type(data) if doc_type != document_type.types.oasis_open_document: data = generate_open_document(data, converter) document_xml = opendocument.extract_useful_open_document_files( data, storage, filename) process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename) return storage
elif doc_type != document_type.types.oasis_open_document: try: data = generate_open_document(data, converter) doc_type = document_type.types.oasis_open_document except Exception, e: if not suppress_errors: raise e storage.add("%s/index.txt" % filename, str(e)) if doc_type == document_type.types.oasis_open_document: if pipeline_id == "open document": #reserved term, for when people want the Open Document file back directly. Don't bother loading pipeline. storage.add("%s/index.odt" % filename, data) thumbnail = opendocument.extract_thumbnail(data) if thumbnail: storage.add("%s/thumbnail.png" % filename, thumbnail) else: document_xml = opendocument.extract_useful_open_document_files(data, storage, filename) storage.add("%s/opendocument.xml" % filename, document_xml) process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename) storage.remove("%s/opendocument.xml" % filename) return storage def process_pipeline(initial_pipeline_value, pipeline_id, pipeline_type, auto_pipeline_id, storage, storage_prefix=None): pipeline_definition = docvert_pipeline.get_pipeline_definition(pipeline_type, pipeline_id, auto_pipeline_id) pipeline = docvert_pipeline.pipeline_processor(storage, pipeline_definition['stages'], pipeline_definition['pipeline_directory'], storage_prefix) return pipeline.start(initial_pipeline_value) def generate_open_document(data, converter=converter_type.python_streaming_to_libreoffice): if converter == converter_type.python_streaming_to_libreoffice: return docvert_libreoffice.get_client().convert_by_stream(data, docvert_libreoffice.LIBREOFFICE_OPEN_DOCUMENT) raise docvert_exception.unrecognised_converter("Unknown converter '%s'" % converter)