Esempio n. 1
0
 def __enter__(self, *args):
     """
     Add this plugin to the python path so that it's contents become
     directly importable.  Useful when bundling large python libraries into
     the plugin. Use it like this::
         with plugin:
             import something
     """
     if self.plugin_path is not None:
         from ebook_converter.utils.zipfile import ZipFile
         zf = ZipFile(self.plugin_path)
         extensions = {x.rpartition('.')[-1].lower() for x in
                       zf.namelist()}
         zip_safe = True
         for ext in ('pyd', 'so', 'dll', 'dylib'):
             if ext in extensions:
                 zip_safe = False
                 break
         if zip_safe:
             sys.path.insert(0, self.plugin_path)
             self.sys_insertion_path = self.plugin_path
         else:
             from ebook_converter.ptempfile import TemporaryDirectory
             self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
             self.sys_insertion_path = (self._sys_insertion_tdir.
                                        __enter__(*args))
             zf.extractall(self.sys_insertion_path)
             sys.path.insert(0, self.sys_insertion_path)
         zf.close()
Esempio n. 2
0
    def safe_replace(self,
                     name,
                     datastream,
                     extra_replacements={},
                     add_missing=False):
        from ebook_converter.utils.zipfile import ZipFile, ZipInfo
        replacements = {name: datastream}
        replacements.update(extra_replacements)
        names = frozenset(list(replacements.keys()))
        found = set()

        def rbytes(name):
            r = replacements[name]
            if not isinstance(r, bytes):
                r = r.read()
            return r

        with SpooledTemporaryFile(max_size=100 * 1024 * 1024) as temp:
            ztemp = ZipFile(temp, 'w')
            for offset, header in self.file_info.values():
                if header.filename in names:
                    zi = ZipInfo(header.filename)
                    zi.compress_type = header.compression_method
                    ztemp.writestr(zi, rbytes(header.filename))
                    found.add(header.filename)
                else:
                    ztemp.writestr(header.filename,
                                   self.read(header.filename, spool_size=0))
            if add_missing:
                for name in names - found:
                    ztemp.writestr(name, rbytes(name))
            ztemp.close()
            zipstream = self.stream
            temp.seek(0)
            zipstream.seek(0)
            zipstream.truncate()
            shutil.copyfileobj(temp, zipstream)
            zipstream.flush()
Esempio n. 3
0
    def convert(self, recipe_or_file, opts, file_ext, log,
            accelerators):
        from ebook_converter.web.feeds.recipes import compile_recipe
        opts.output_profile.flow_size = 0
        if file_ext == 'downloaded_recipe':
            from ebook_converter.utils.zipfile import ZipFile
            zf = ZipFile(recipe_or_file, 'r')
            zf.extractall()
            zf.close()
            with open('download.recipe', 'rb') as f:
                self.recipe_source = f.read()
            recipe = compile_recipe(self.recipe_source)
            recipe.needs_subscription = False
            self.recipe_object = recipe(opts, log, self.report_progress)
        else:
            if os.environ.get('CALIBRE_RECIPE_URN'):
                from ebook_converter.web.feeds.recipes.collection import get_custom_recipe, get_builtin_recipe_by_id
                urn = os.environ['CALIBRE_RECIPE_URN']
                log('Downloading recipe urn: ' + urn)
                rtype, recipe_id = urn.partition(':')[::2]
                if not recipe_id:
                    raise ValueError('Invalid recipe urn: ' + urn)
                if rtype == 'custom':
                    self.recipe_source = get_custom_recipe(recipe_id)
                else:
                    self.recipe_source = get_builtin_recipe_by_id(urn, log=log, download_recipe=True)
                if not self.recipe_source:
                    raise ValueError('Could not find recipe with urn: ' + urn)
                if not isinstance(self.recipe_source, bytes):
                    self.recipe_source = self.recipe_source.encode('utf-8')
                recipe = compile_recipe(self.recipe_source)
            elif os.access(recipe_or_file, os.R_OK):
                with open(recipe_or_file, 'rb') as f:
                    self.recipe_source = f.read()
                recipe = compile_recipe(self.recipe_source)
                log('Using custom recipe')
            else:
                from ebook_converter.web.feeds.recipes.collection import (
                        get_builtin_recipe_by_title, get_builtin_recipe_titles)
                title = getattr(opts, 'original_recipe_input_arg', recipe_or_file)
                title = os.path.basename(title).rpartition('.')[0]
                titles = frozenset(get_builtin_recipe_titles())
                if title not in titles:
                    title = getattr(opts, 'original_recipe_input_arg', recipe_or_file)
                    title = title.rpartition('.')[0]

                raw = get_builtin_recipe_by_title(title, log=log,
                        download_recipe=not opts.dont_download_recipe)
                builtin = False
                try:
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                    if recipe.requires_version > numeric_version:
                        log.warn(
                        'Downloaded recipe needs calibre version at least: %s' %
                        ('.'.join(recipe.requires_version)))
                        builtin = True
                except:
                    log.exception('Failed to compile downloaded recipe. Falling '
                            'back to builtin one')
                    builtin = True
                if builtin:
                    log('Using bundled builtin recipe')
                    raw = get_builtin_recipe_by_title(title, log=log,
                            download_recipe=False)
                    if raw is None:
                        raise ValueError('Failed to find builtin recipe: '+title)
                    recipe = compile_recipe(raw)
                    self.recipe_source = raw
                else:
                    log('Using downloaded builtin recipe')

            if recipe is None:
                raise ValueError('%r is not a valid recipe file or builtin recipe' %
                        recipe_or_file)

            disabled = getattr(recipe, 'recipe_disabled', None)
            if disabled is not None:
                raise RecipeDisabled(disabled)
            ro = recipe(opts, log, self.report_progress)
            ro.download()
            self.recipe_object = ro

        for key, val in self.recipe_object.conversion_options.items():
            setattr(opts, key, val)

        for f in os.listdir('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)

        for f in walk('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)
Esempio n. 4
0
class DOCX(object):
    def __init__(self, path_or_stream, log=None, extract=True):
        self.docx_is_transitional = True
        stream = path_or_stream
        if not hasattr(path_or_stream, 'read'):
            stream = open(path_or_stream, 'rb')
        self.name = getattr(stream, 'name', None) or '<stream>'
        self.log = log or default_log
        if extract:
            self.extract(stream)
        else:
            self.init_zipfile(stream)
        self.read_content_types()
        self.read_package_relationships()
        self.namespace = DOCXNamespace(self.docx_is_transitional)

    def init_zipfile(self, stream):
        self.zipf = ZipFile(stream)
        self.names = frozenset(self.zipf.namelist())

    def extract(self, stream):
        self.tdir = PersistentTemporaryDirectory('docx_container')
        try:
            zf = ZipFile(stream)
            zf.extractall(self.tdir)
        except Exception:
            self.log.exception('DOCX appears to be invalid ZIP file, trying a'
                               ' more forgiving ZIP parser')
            from ebook_converter.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream, self.tdir)

        self.names = {}
        for f in walk(self.tdir):
            name = os.path.relpath(f, self.tdir).replace(os.sep, '/')
            self.names[name] = f

    def exists(self, name):
        return name in self.names

    def read(self, name):
        if hasattr(self, 'zipf'):
            return self.zipf.open(name).read()
        path = self.names[name]
        with open(path, 'rb') as f:
            return f.read()

    def read_content_types(self):
        try:
            raw = self.read('[Content_Types].xml')
        except KeyError:
            raise InvalidDOCX('The file %s docx file has no '
                              '[Content_Types].xml' % self.name)
        root = etree.fromstring(raw)
        self.content_types = {}
        self.default_content_types = {}
        for item in root.xpath('//*[local-name()="Types"]/*[local-name()='
                               '"Default" and @Extension and @ContentType]'):
            self.default_content_types[item.get('Extension').lower()] = \
                    item.get('ContentType')
        for item in root.xpath('//*[local-name()="Types"]/*[local-name()='
                               '"Override" and @PartName and @ContentType]'):
            name = item.get('PartName').lstrip('/')
            self.content_types[name] = item.get('ContentType')

    def content_type(self, name):
        if name in self.content_types:
            return self.content_types[name]
        ext = name.rpartition('.')[-1].lower()
        if ext in self.default_content_types:
            return self.default_content_types[ext]
        return mimetypes.guess_type(name)[0]

    def read_package_relationships(self):
        try:
            raw = self.read('_rels/.rels')
        except KeyError:
            raise InvalidDOCX('The file %s docx file has no _rels/.rels' %
                              self.name)
        root = etree.fromstring(raw)
        self.relationships = {}
        self.relationships_rmap = {}
        for item in root.xpath('//*[local-name()="Relationships"]/*[local-name'
                               '()="Relationship" and @Type and @Target]'):
            target = item.get('Target').lstrip('/')
            typ = item.get('Type')
            if target == 'word/document.xml':
                self.docx_is_transitional = (typ != 'http://purl.oclc.org/'
                                             'ooxml/officeDocument/'
                                             'relationships/officeDocument')
            self.relationships[typ] = target
            self.relationships_rmap[target] = typ

    @property
    def document_name(self):
        name = self.relationships.get(self.namespace.names['DOCUMENT'], None)
        if name is None:
            names = tuple(
                n for n in self.names
                if n == 'document.xml' or n.endswith('/document.xml'))
            if not names:
                raise InvalidDOCX('The file %s docx file has no main '
                                  'document' % self.name)
            name = names[0]
        return name

    @property
    def document(self):
        return etree.fromstring(self.read(self.document_name))

    @property
    def document_relationships(self):
        return self.get_relationships(self.document_name)

    def get_relationships(self, name):
        base = '/'.join(name.split('/')[:-1])
        by_id, by_type = {}, {}
        parts = name.split('/')
        name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels'])
        try:
            raw = self.read(name)
        except KeyError:
            pass
        else:
            root = etree.fromstring(raw)
            for item in root.xpath('//*[local-name()="Relationships"]/*'
                                   '[local-name()="Relationship" and @Type '
                                   'and @Target]'):
                target = item.get('Target')
                if (item.get('TargetMode', None) != 'External'
                        and not target.startswith('#')):
                    target = '/'.join((base, target.lstrip('/')))
                typ = item.get('Type')
                Id = item.get('Id')
                by_id[Id] = by_type[typ] = target

        return by_id, by_type

    def get_document_properties_names(self):
        name = self.relationships.get(self.namespace.names['DOCPROPS'], None)
        if name is None:
            names = tuple(n for n in self.names
                          if n.lower() == 'docprops/core.xml')
            if names:
                name = names[0]
        yield name
        name = self.relationships.get(self.namespace.names['APPPROPS'], None)
        if name is None:
            names = tuple(n for n in self.names
                          if n.lower() == 'docprops/app.xml')
            if names:
                name = names[0]
        yield name

    @property
    def metadata(self):
        mi = Metadata('Unknown')
        dp_name, ap_name = self.get_document_properties_names()
        if dp_name:
            try:
                raw = self.read(dp_name)
            except KeyError:
                pass
            else:
                read_doc_props(raw, mi, self.namespace.XPath)
        if mi.is_null('language'):
            try:
                raw = self.read('word/styles.xml')
            except KeyError:
                pass
            else:
                read_default_style_language(raw, mi, self.namespace.XPath)

        ap_name = self.relationships.get(self.namespace.names['APPPROPS'],
                                         None)
        if ap_name:
            try:
                raw = self.read(ap_name)
            except KeyError:
                pass
            else:
                read_app_props(raw, mi)

        return mi

    def close(self):
        if hasattr(self, 'zipf'):
            self.zipf.close()
        else:
            try:
                shutil.rmtree(self.tdir)
            except EnvironmentError:
                pass