Beispiel #1
0
def update(pathtozip,
           patterns,
           filepaths,
           names,
           compression=zipfile.ZIP_DEFLATED,
           verbose=True):
    '''
    Update files in the zip file at `pathtozip` matching the given
    `patterns` with the given `filepaths`. If more than
    one file matches, all of the files are replaced.

    :param patterns:    A list of compiled regular expressions
    :param filepaths:   A list of paths to the replacement files. Must have the
                        same length as `patterns`.
    :param names:       A list of archive names for each file in filepaths.
                        A name can be `None` in which case the name of the existing
                        file in the archive is used.
    :param compression: The compression to use when replacing files. Can be
                        either `zipfile.ZIP_DEFLATED` or `zipfile.ZIP_STORED`.
    '''
    assert len(patterns) == len(filepaths) == len(names)
    z = zipfile.ZipFile(pathtozip, mode='a')
    for name in z.namelist():
        for pat, fname, new_name in zip(patterns, filepaths, names):
            if pat.search(name):
                if verbose:
                    print('Updating %s with %s' % (name, fname))
                if new_name is None:
                    z.replace(fname, arcname=name, compress_type=compression)
                else:
                    z.delete(name)
                    z.write(fname, new_name, compress_type=compression)
                break
    z.close()
    def write(self, path):
        self.flush_cache()

        if os.path.exists(path):
            os.unlink(path)
        epub = zipfile.ZipFile(path, 'w', compression=zipfile.ZIP_DEFLATED)
        epub.writestr('mimetype',
                      bytes(guess_type('a.epub')[0]),
                      compression=zipfile.ZIP_STORED)

        cwd = os.getcwdu()
        os.chdir(self.root)
        zip_prefix = self.root
        if not zip_prefix.endswith(os.sep):
            zip_prefix += os.sep
        for t in os.walk(self.root, topdown=True):
            for f in t[2]:
                if f not in EXCLUDE_FROM_ZIP:
                    filepath = os.path.join(t[0], f).replace(zip_prefix, '')
                    st = os.stat(filepath)
                    mtime = time.localtime(st.st_mtime)
                    if mtime[0] < 1980:
                        os.utime(filepath, None)
                    epub.write(filepath)
        epub.close()
        os.chdir(cwd)
Beispiel #3
0
def extract_member(filename, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I), sort_alphabetically=False):
    zf = zipfile.ZipFile(filename)
    names = list(zf.namelist())
    if sort_alphabetically:
        names.sort(key=sort_key)
    for name in names:
        if match.search(name):
            return name, zf.read(name)
    def __init__(self, path):
        tmpdir = PersistentTemporaryDirectory("_kobo-driver-extended")
        zf = zipfile.ZipFile(path)
        zf.extractall(tmpdir)

        self.root = os.path.abspath(tmpdir)
        self.log = logging.Log(level=logging.DEBUG if DEBUG else logging.WARN)
        self.dirtied = set([])
        self.cache = {}
        self.mime_map = {}

        print("Container:__init__:Got container path {0}".format(self.root))

        if os.path.exists(os.path.join(self.root, 'mimetype')):
            os.remove(os.path.join(self.root, 'mimetype'))

        container_path = os.path.join(self.root, 'META-INF', 'container.xml')
        if not os.path.exists(container_path):
            raise InvalidEpub('No META-INF/container.xml in epub')
        self.container = etree.fromstring(open(container_path, 'rb').read())
        opf_files = self.container.xpath((
            r'child::ocf:rootfiles/ocf:rootfile[@media-type="{0}" and @full-path]'
            .format(guess_type('a.opf')[0])),
                                         namespaces=self.namespaces)
        if not opf_files:
            raise InvalidEpub(
                'META-INF/container.xml contains no link to OPF file')
        opf_path = os.path.join(self.root,
                                *opf_files[0].get('full-path').split('/'))
        if not os.path.exists(opf_path):
            raise InvalidEpub(
                'OPF file does not exist at location pointed to by META-INF/container.xml'
            )

        # Map of relative paths with / separators to absolute
        # paths on filesystem with os separators
        self.name_map = {}
        for dirpath, dirnames, filenames in os.walk(self.root):
            for f in filenames:
                path = os.path.join(dirpath, f)
                name = os.path.relpath(path, self.root).replace(os.sep, '/')
                self.name_map[name] = path
                self.mime_map[name] = guess_type(f)[0]
                if path == opf_path:
                    self.opf_name = name
                    self.mime_map[name] = guess_type('a.opf')[0]

        opf = self.opf
        for item in opf.xpath('//opf:manifest/opf:item[@href and @media-type]',
                              namespaces=self.namespaces):
            href = unquote(item.get('href'))
            item.set("href", href)
            self.mime_map[self.href_to_name(
                href,
                os.path.dirname(self.opf_name).replace(
                    os.sep, '/'))] = item.get('media-type')
        self.set(self.opf_name, opf)
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.utils import zipfile
        from templite import Templite
        from lxml import etree

        image_types = ['image/jpeg', 'image/png']

        tempdir = os.path.realpath(PersistentTemporaryDirectory())

        log.info('Creating temp dir ' + tempdir)

        with CurrentDir(tempdir):
            cover_ref = None
            cover_ext = None

            if oeb_book.guide['cover'] is not None:
                cover_ref = oeb_book.guide['cover'].href
                orig_name, file_extension = os.path.splitext(cover_ref)
                cover_ext = file_extension

            for item in oeb_book.manifest:
                if item.media_type in image_types:
                    log.info('Found image ' + item.id + ' ' + item.media_type +
                             ' ' + item.href)

                    if cover_ref is not None and item.href == cover_ref:
                        file_name = os.path.join(tempdir, '00000' + cover_ext)
                    else:
                        file_name = os.path.join(tempdir,
                                                 os.path.basename(item.href))

                    with open(file_name, 'wb') as image:
                        image.write(item.data)

        log.info('Finished extracting images, repackaging them as CBZ ' +
                 output_path)

        zfile = zipfile.ZipFile(output_path, mode="w")
        zfile.add_dir(tempdir)
        log.info('Added files. Preparing to compress.')

        log.info('Cleaning up temp dir...')
        shutil.rmtree(tempdir)
        log.info('All done.')
Beispiel #6
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.utils import zipfile
        from templite import Templite
        from urllib import unquote
        from calibre.ebooks.html.meta import EasyMeta

        # read template files
        if opts.template_html_index is not None:
            template_html_index_data = open(opts.template_html_index,
                                            'rb').read()
        else:
            template_html_index_data = P(
                'templates/html_export_default_index.tmpl', data=True)

        if opts.template_html is not None:
            template_html_data = open(opts.template_html, 'rb').read()
        else:
            template_html_data = P('templates/html_export_default.tmpl',
                                   data=True)

        if opts.template_css is not None:
            template_css_data = open(opts.template_css, 'rb').read()
        else:
            template_css_data = P('templates/html_export_default.css',
                                  data=True)

        template_html_index_data = template_html_index_data.decode('utf-8')
        template_html_data = template_html_data.decode('utf-8')
        template_css_data = template_css_data.decode('utf-8')

        self.log = log
        self.opts = opts
        meta = EasyMeta(oeb_book.metadata)

        tempdir = os.path.realpath(PersistentTemporaryDirectory())
        output_file = os.path.join(
            tempdir, basename(re.sub(r'\.zip', '', output_path) + '.html'))
        output_dir = re.sub(r'\.html', '', output_file) + '_files'

        if not exists(output_dir):
            os.makedirs(output_dir)

        css_path = output_dir + os.sep + 'calibreHtmlOutBasicCss.css'
        with open(css_path, 'wb') as f:
            f.write(template_css_data.encode('utf-8'))

        with open(output_file, 'wb') as f:
            html_toc = self.generate_html_toc(oeb_book, output_file,
                                              output_dir)
            templite = Templite(template_html_index_data)
            nextLink = oeb_book.spine[0].href
            nextLink = relpath(output_dir + os.sep + nextLink,
                               dirname(output_file))
            cssLink = relpath(abspath(css_path), dirname(output_file))
            tocUrl = relpath(output_file, dirname(output_file))
            t = templite.render(has_toc=bool(oeb_book.toc.count()),
                                toc=html_toc,
                                meta=meta,
                                nextLink=nextLink,
                                tocUrl=tocUrl,
                                cssLink=cssLink,
                                firstContentPageLink=nextLink)
            if isinstance(t, unicode_type):
                t = t.encode('utf-8')
            f.write(t)

        with CurrentDir(output_dir):
            for item in oeb_book.manifest:
                path = abspath(unquote(item.href))
                dir = dirname(path)
                if not exists(dir):
                    os.makedirs(dir)
                if item.spine_position is not None:
                    with open(path, 'wb') as f:
                        pass
                else:
                    with open(path, 'wb') as f:
                        f.write(str(item))
                    item.unload_data_from_memory(memory=path)

            for item in oeb_book.spine:
                path = abspath(unquote(item.href))
                dir = dirname(path)
                root = item.data.getroottree()

                # get & clean HTML <HEAD>-data
                head = root.xpath(
                    '//h:head',
                    namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
                head_content = etree.tostring(head,
                                              pretty_print=True,
                                              encoding='utf-8')
                head_content = re.sub(r'\<\/?head.*\>', '', head_content)
                head_content = re.sub(
                    re.compile(r'\<style.*\/style\>', re.M | re.S), '',
                    head_content)
                head_content = re.sub(r'<(title)([^>]*)/>', r'<\1\2></\1>',
                                      head_content)

                # get & clean HTML <BODY>-data
                body = root.xpath(
                    '//h:body',
                    namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
                ebook_content = etree.tostring(body,
                                               pretty_print=True,
                                               encoding='utf-8')
                ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
                ebook_content = re.sub(r'<(div|a|span)([^>]*)/>',
                                       r'<\1\2></\1>', ebook_content)

                # generate link to next page
                if item.spine_position + 1 < len(oeb_book.spine):
                    nextLink = oeb_book.spine[item.spine_position + 1].href
                    nextLink = relpath(abspath(nextLink), dir)
                else:
                    nextLink = None

                # generate link to previous page
                if item.spine_position > 0:
                    prevLink = oeb_book.spine[item.spine_position - 1].href
                    prevLink = relpath(abspath(prevLink), dir)
                else:
                    prevLink = None

                cssLink = relpath(abspath(css_path), dir)
                tocUrl = relpath(output_file, dir)
                firstContentPageLink = oeb_book.spine[0].href

                # render template
                templite = Templite(template_html_data)
                toc = lambda: self.generate_html_toc(oeb_book, path, output_dir
                                                     )
                t = templite.render(ebookContent=ebook_content,
                                    prevLink=prevLink,
                                    nextLink=nextLink,
                                    has_toc=bool(oeb_book.toc.count()),
                                    toc=toc,
                                    tocUrl=tocUrl,
                                    head_content=head_content,
                                    meta=meta,
                                    cssLink=cssLink,
                                    firstContentPageLink=firstContentPageLink)

                # write html to file
                with open(path, 'wb') as f:
                    f.write(t)
                item.unload_data_from_memory(memory=path)

        zfile = zipfile.ZipFile(output_path, "w")
        zfile.add_dir(output_dir, basename(output_dir))
        zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)

        if opts.extract_to:
            if os.path.exists(opts.extract_to):
                shutil.rmtree(opts.extract_to)
            os.makedirs(opts.extract_to)
            zfile.extractall(opts.extract_to)
            self.log('Zip file extracted to', opts.extract_to)

        zfile.close()

        # cleanup temp dir
        shutil.rmtree(tempdir)
Beispiel #7
0
def extract_cover_image(filename):
    with zipfile.ZipFile(filename) as zf:
        for name in sorted(zf.namelist(), key=sort_key):
            if name_ok(name):
                return name, zf.read(name)
Beispiel #8
0
def extract(filename, dir):
    """
    Extract archive C{filename} into directory C{dir}
    """
    zf = zipfile.ZipFile(filename)
    zf.extractall(dir)