def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) for filename, data in files.iteritems(): doc_type = document_type.detect_document_type(data) if doc_type != document_type.types.oasis_open_document: data = generate_open_document(data, converter) document_xml = opendocument.extract_useful_open_document_files(data, storage, filename) process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename) return storage
def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice, suppress_errors=False): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) def _title(name, files, data): filename = os.path.basename(name).replace('\\','-').replace('/','-').replace(':','-') if len(filename) == 0: filename = "document.odt" if files.has_key(filename): if data and hasattr(files[filename], 'read') and files[filename].getvalue() == data: return filename unique = 1 potential_filename = filename while files.has_key(potential_filename): unique += 1 if filename.count("."): potential_filename = filename.replace(".", "%i." % unique, 1) else: potential_filename = filename + str(unique) filename = potential_filename return filename for filename, data in files.iteritems(): storage.set_friendly_name(filename, filename) for url in urls: try: data = urllib2.urlopen(url, None, http_timeout).read() doc_type = document_type.detect_document_type(data) if doc_type == document_type.types.html: data = html_to_opendocument(data, url) filename = _title(url, files, data) storage.set_friendly_name(filename, "%s (%s)" % (filename, url)) files[filename] = StringIO.StringIO(data) except IOError, e: filename = _title(url, files, None) storage.set_friendly_name(filename, "%s (%s)" % (filename, url)) files[filename] = Exception("Download error from %s: %s" % (url, e))
def process_conversion( files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) for filename, data in files.iteritems(): doc_type = document_type.detect_document_type(data) if doc_type != document_type.types.oasis_open_document: data = generate_open_document(data, converter) document_xml = opendocument.extract_useful_open_document_files( data, storage, filename) process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename) return storage