Esempio n. 1
0
def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice):
    if files is None and urls is None:
        raise docvert_exception.needs_files_or_urls()
    if pipeline_id is None:
        raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id)
    storage = docvert_storage.get_storage(storage_type_name)
    for filename, data in files.iteritems():
        doc_type = document_type.detect_document_type(data)
        if doc_type != document_type.types.oasis_open_document:
            data = generate_open_document(data, converter)
        document_xml = opendocument.extract_useful_open_document_files(data, storage, filename)
        process_pipeline(document_xml, pipeline_id, pipeline_type, auto_pipeline_id, storage, filename)
    return storage
Esempio n. 2
0
def process_conversion(files=None, urls=None, pipeline_id=None, pipeline_type="pipelines", auto_pipeline_id=None, storage_type_name=docvert_storage.storage_type.memory_based, converter=converter_type.python_streaming_to_libreoffice, suppress_errors=False):
    if files is None and urls is None:
        raise docvert_exception.needs_files_or_urls()
    if pipeline_id is None:
        raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" % pipeline_id)
    storage = docvert_storage.get_storage(storage_type_name)

    def _title(name, files, data):
        filename = os.path.basename(name).replace('\\','-').replace('/','-').replace(':','-')
        if len(filename) == 0:
            filename = "document.odt"
        if files.has_key(filename):
            if data and hasattr(files[filename], 'read') and files[filename].getvalue() == data:
                return filename
            unique = 1
            potential_filename = filename
            while files.has_key(potential_filename):
                unique += 1
                if filename.count("."):
                    potential_filename = filename.replace(".", "%i." % unique, 1)
                else:
                    potential_filename = filename + str(unique)
            filename = potential_filename
        return filename

    for filename, data in files.iteritems():
        storage.set_friendly_name(filename, filename)

    for url in urls:
        try:
            data = urllib2.urlopen(url, None, http_timeout).read()
            doc_type = document_type.detect_document_type(data)
            if doc_type == document_type.types.html:
                data = html_to_opendocument(data, url)
            filename = _title(url, files, data)
            storage.set_friendly_name(filename, "%s (%s)" % (filename, url))
            files[filename] = StringIO.StringIO(data)
        except IOError, e:
            filename = _title(url, files, None)
            storage.set_friendly_name(filename, "%s (%s)" % (filename, url))
            files[filename] = Exception("Download error from %s: %s" % (url, e))
Esempio n. 3
0
def process_conversion(
        files=None,
        urls=None,
        pipeline_id=None,
        pipeline_type="pipelines",
        auto_pipeline_id=None,
        storage_type_name=docvert_storage.storage_type.memory_based,
        converter=converter_type.python_streaming_to_libreoffice):
    if files is None and urls is None:
        raise docvert_exception.needs_files_or_urls()
    if pipeline_id is None:
        raise docvert_exception.unrecognised_pipeline("Unknown pipeline '%s'" %
                                                      pipeline_id)
    storage = docvert_storage.get_storage(storage_type_name)
    for filename, data in files.iteritems():
        doc_type = document_type.detect_document_type(data)
        if doc_type != document_type.types.oasis_open_document:
            data = generate_open_document(data, converter)
        document_xml = opendocument.extract_useful_open_document_files(
            data, storage, filename)
        process_pipeline(document_xml, pipeline_id, pipeline_type,
                         auto_pipeline_id, storage, filename)
    return storage