def get_pipeline_xml(pipeline_type, pipeline_id, auto_pipeline_id): path = os.path.join(docvert_root, "pipelines", pipeline_type, pipeline_id, "pipeline.xml") if not os.path.exists(path): raise docvert_exception.unrecognised_pipeline( "Unknown pipeline_id '%s' (checked %s)" % (pipeline_id, path)) autopipeline_path = None xml = lxml.etree.parse(path) if xml.getroot().tag == "autopipeline": if auto_pipeline_id is None: raise docvert_exception.unrecognised_auto_pipeline( "Unknown auto pipeline '%s'" % auto_pipeline_id) autopipeline_path = os.path.join(docvert_root, "pipelines", "auto_pipelines", auto_pipeline_id, "pipeline.xml") if not os.path.exists(path): raise docvert_exception.unrecognised_auto_pipeline( "Unknown auto pipeline '%s'" % auto_pipeline_id) custom_stages = "".join(map(lxml.etree.tostring, xml.getroot())) autopipeline = "" try: autopipeline_handle = open(autopipeline_path) except IOError, e: autopipeline_path_with_default = os.path.join( docvert_root, "pipelines", "auto_pipelines", "%s.default" % auto_pipeline_id, "pipeline.xml") autopipeline_handle = open(autopipeline_path_with_default) autopipeline = autopipeline_handle.read().replace( '{{custom-stages}}', custom_stages) xml = lxml.etree.fromstring(autopipeline) xml = xml.getroottree()
def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) for filename, data in files.iteritems(): doc_type = document_type.detect_document_type(data) if doc_type != document_type.types.oasis_open_document: data = generate_open_document(data, converter) document_xml = opendocument.extract_useful_open_document_files(data, storage, filename) process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename) return storage
def get_pipeline_xml(namespaced_pipeline_id, auto_pipeline_id): path = os.path.join(docvert_root, "pipelines", namespaced_pipeline_id, "pipeline.xml") autopath = None if not os.path.exists(path): raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s' (checked %s)" % (namespaced_pipeline_id, path)) xml = lxml.etree.parse(path) if xml.getroot().tag == "autopipeline": if auto_pipeline_id is None: raise docvert_exception.unrecognised_auto_pipeline("Unknown auto pipeline '%s'" % auto_pipeline_id) raise Exception("Sorry, auto pipelines aren't implemented yet.") autopath = os.path.join(docvert_root, "pipelines", "autopipeline", auto_pipeline_id, "pipeline.xml") if not os.path.exists(path): raise docvert_exception.unrecognised_auto_pipeline("Unknown auto pipeline '%s'" % auto_pipeline_id) return dict(xml=xml, pipeline_directory=os.path.dirname(path), path=path, autopath=autopath)
def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice, suppress_errors=False): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) def _title(name, files, data): filename = os.path.basename(name).replace('\\','-').replace('/','-').replace(':','-') if len(filename) == 0: filename = "document.odt" if files.has_key(filename): if data and hasattr(files[filename], 'read') and files[filename].getvalue() == data: return filename unique = 1 potential_filename = filename while files.has_key(potential_filename): unique += 1 if filename.count("."): potential_filename = filename.replace(".", "%i." % unique, 1) else: potential_filename = filename + str(unique) filename = potential_filename return filename for filename, data in files.iteritems(): storage.set_friendly_name(filename, filename) for url in urls: try: data = urllib2.urlopen(url, None, http_timeout).read() doc_type = document_type.detect_document_type(data) if doc_type == document_type.types.html: data = html_to_opendocument(data, url) filename = _title(url, files, data) storage.set_friendly_name(filename, "%s (%s)" % (filename, url)) files[filename] = StringIO.StringIO(data) except IOError, e: filename = _title(url, files, None) storage.set_friendly_name(filename, "%s (%s)" % (filename, url)) files[filename] = Exception("Download error from %s: %s" % (url, e))
def get_pipeline_xml(pipeline_type, pipeline_id, auto_pipeline_id): path = os.path.join(docvert_root, "pipelines", pipeline_type, pipeline_id, "pipeline.xml") if not os.path.exists(path): raise docvert_exception.unrecognised_pipeline("Unknown pipeline_id '%s' (checked %s)" % (pipeline_id, path)) autopipeline_path = None xml = lxml.etree.parse(path) if xml.getroot().tag == "autopipeline": if auto_pipeline_id is None: raise docvert_exception.unrecognised_auto_pipeline("Unknown auto pipeline '%s'" % auto_pipeline_id) autopipeline_path = os.path.join(docvert_root, "pipelines", "auto_pipelines", auto_pipeline_id, "pipeline.xml") if not os.path.exists(path): raise docvert_exception.unrecognised_auto_pipeline("Unknown auto pipeline '%s'" % auto_pipeline_id) custom_stages = "".join(map(lxml.etree.tostring,xml.getroot())) autopipeline = "" try: autopipeline_handle = open(autopipeline_path) except IOError, e: autopipeline_path_with_default = os.path.join(docvert_root, "pipelines", "auto_pipelines", "%s.default" % auto_pipeline_id, "pipeline.xml") autopipeline_handle = open(autopipeline_path_with_default) autopipeline = autopipeline_handle.read().replace('{{custom-stages}}', custom_stages) xml = lxml.etree.fromstring(autopipeline) xml = xml.getroottree()
def process_conversion( files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice): if files is None and urls is None: raise docvert_exception.needs_files_or_urls() if pipeline_id is None: raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id) storage = docvert_storage.get_storage(storage_type_name) for filename, data in files.iteritems(): doc_type = document_type.detect_document_type(data) if doc_type != document_type.types.oasis_open_document: data = generate_open_document(data, converter) document_xml = opendocument.extract_useful_open_document_files( data, storage, filename) process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename) return storage