def update(pathtozip, patterns, filepaths, names, compression=zipfile.ZIP_DEFLATED, verbose=True): ''' Update files in the zip file at `pathtozip` matching the given `patterns` with the given `filepaths`. If more than one file matches, all of the files are replaced. :param patterns: A list of compiled regular expressions :param filepaths: A list of paths to the replacement files. Must have the same length as `patterns`. :param names: A list of archive names for each file in filepaths. A name can be `None` in which case the name of the existing file in the archive is used. :param compression: The compression to use when replacing files. Can be either `zipfile.ZIP_DEFLATED` or `zipfile.ZIP_STORED`. ''' assert len(patterns) == len(filepaths) == len(names) z = zipfile.ZipFile(pathtozip, mode='a') for name in z.namelist(): for pat, fname, new_name in zip(patterns, filepaths, names): if pat.search(name): if verbose: print('Updating %s with %s' % (name, fname)) if new_name is None: z.replace(fname, arcname=name, compress_type=compression) else: z.delete(name) z.write(fname, new_name, compress_type=compression) break z.close()
def write(self, path): self.flush_cache() if os.path.exists(path): os.unlink(path) epub = zipfile.ZipFile(path, 'w', compression=zipfile.ZIP_DEFLATED) epub.writestr('mimetype', bytes(guess_type('a.epub')[0]), compression=zipfile.ZIP_STORED) cwd = os.getcwdu() os.chdir(self.root) zip_prefix = self.root if not zip_prefix.endswith(os.sep): zip_prefix += os.sep for t in os.walk(self.root, topdown=True): for f in t[2]: if f not in EXCLUDE_FROM_ZIP: filepath = os.path.join(t[0], f).replace(zip_prefix, '') st = os.stat(filepath) mtime = time.localtime(st.st_mtime) if mtime[0] < 1980: os.utime(filepath, None) epub.write(filepath) epub.close() os.chdir(cwd)
def extract_member(filename, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I), sort_alphabetically=False): zf = zipfile.ZipFile(filename) names = list(zf.namelist()) if sort_alphabetically: names.sort(key=sort_key) for name in names: if match.search(name): return name, zf.read(name)
def __init__(self, path): tmpdir = PersistentTemporaryDirectory("_kobo-driver-extended") zf = zipfile.ZipFile(path) zf.extractall(tmpdir) self.root = os.path.abspath(tmpdir) self.log = logging.Log(level=logging.DEBUG if DEBUG else logging.WARN) self.dirtied = set([]) self.cache = {} self.mime_map = {} print("Container:__init__:Got container path {0}".format(self.root)) if os.path.exists(os.path.join(self.root, 'mimetype')): os.remove(os.path.join(self.root, 'mimetype')) container_path = os.path.join(self.root, 'META-INF', 'container.xml') if not os.path.exists(container_path): raise InvalidEpub('No META-INF/container.xml in epub') self.container = etree.fromstring(open(container_path, 'rb').read()) opf_files = self.container.xpath(( r'child::ocf:rootfiles/ocf:rootfile[@media-type="{0}" and @full-path]' .format(guess_type('a.opf')[0])), namespaces=self.namespaces) if not opf_files: raise InvalidEpub( 'META-INF/container.xml contains no link to OPF file') opf_path = os.path.join(self.root, *opf_files[0].get('full-path').split('/')) if not os.path.exists(opf_path): raise InvalidEpub( 'OPF file does not exist at location pointed to by META-INF/container.xml' ) # Map of relative paths with / separators to absolute # paths on filesystem with os separators self.name_map = {} for dirpath, dirnames, filenames in os.walk(self.root): for f in filenames: path = os.path.join(dirpath, f) name = os.path.relpath(path, self.root).replace(os.sep, '/') self.name_map[name] = path self.mime_map[name] = guess_type(f)[0] if path == opf_path: self.opf_name = name self.mime_map[name] = guess_type('a.opf')[0] opf = self.opf for item in opf.xpath('//opf:manifest/opf:item[@href and @media-type]', namespaces=self.namespaces): href = unquote(item.get('href')) item.set("href", href) self.mime_map[self.href_to_name( href, os.path.dirname(self.opf_name).replace( os.sep, '/'))] = item.get('media-type') self.set(self.opf_name, opf)
def convert(self, oeb_book, output_path, input_plugin, opts, log): from calibre.utils import zipfile from templite import Templite from lxml import etree image_types = ['image/jpeg', 'image/png'] tempdir = os.path.realpath(PersistentTemporaryDirectory()) log.info('Creating temp dir ' + tempdir) with CurrentDir(tempdir): cover_ref = None cover_ext = None if oeb_book.guide['cover'] is not None: cover_ref = oeb_book.guide['cover'].href orig_name, file_extension = os.path.splitext(cover_ref) cover_ext = file_extension for item in oeb_book.manifest: if item.media_type in image_types: log.info('Found image ' + item.id + ' ' + item.media_type + ' ' + item.href) if cover_ref is not None and item.href == cover_ref: file_name = os.path.join(tempdir, '00000' + cover_ext) else: file_name = os.path.join(tempdir, os.path.basename(item.href)) with open(file_name, 'wb') as image: image.write(item.data) log.info('Finished extracting images, repackaging them as CBZ ' + output_path) zfile = zipfile.ZipFile(output_path, mode="w") zfile.add_dir(tempdir) log.info('Added files. Preparing to compress.') log.info('Cleaning up temp dir...') shutil.rmtree(tempdir) log.info('All done.')
def convert(self, oeb_book, output_path, input_plugin, opts, log): from lxml import etree from calibre.utils import zipfile from templite import Templite from urllib import unquote from calibre.ebooks.html.meta import EasyMeta # read template files if opts.template_html_index is not None: template_html_index_data = open(opts.template_html_index, 'rb').read() else: template_html_index_data = P( 'templates/html_export_default_index.tmpl', data=True) if opts.template_html is not None: template_html_data = open(opts.template_html, 'rb').read() else: template_html_data = P('templates/html_export_default.tmpl', data=True) if opts.template_css is not None: template_css_data = open(opts.template_css, 'rb').read() else: template_css_data = P('templates/html_export_default.css', data=True) template_html_index_data = template_html_index_data.decode('utf-8') template_html_data = template_html_data.decode('utf-8') template_css_data = template_css_data.decode('utf-8') self.log = log self.opts = opts meta = EasyMeta(oeb_book.metadata) tempdir = os.path.realpath(PersistentTemporaryDirectory()) output_file = os.path.join( tempdir, basename(re.sub(r'\.zip', '', output_path) + '.html')) output_dir = re.sub(r'\.html', '', output_file) + '_files' if not exists(output_dir): os.makedirs(output_dir) css_path = output_dir + os.sep + 'calibreHtmlOutBasicCss.css' with open(css_path, 'wb') as f: f.write(template_css_data.encode('utf-8')) with open(output_file, 'wb') as f: html_toc = self.generate_html_toc(oeb_book, output_file, output_dir) templite = Templite(template_html_index_data) nextLink = oeb_book.spine[0].href nextLink = relpath(output_dir + os.sep + nextLink, dirname(output_file)) cssLink = relpath(abspath(css_path), dirname(output_file)) tocUrl = relpath(output_file, dirname(output_file)) t = templite.render(has_toc=bool(oeb_book.toc.count()), toc=html_toc, meta=meta, nextLink=nextLink, tocUrl=tocUrl, cssLink=cssLink, firstContentPageLink=nextLink) if isinstance(t, unicode_type): t = t.encode('utf-8') f.write(t) with CurrentDir(output_dir): for item in oeb_book.manifest: path = abspath(unquote(item.href)) dir = dirname(path) if not exists(dir): os.makedirs(dir) if item.spine_position is not None: with open(path, 'wb') as f: pass else: with open(path, 'wb') as f: f.write(str(item)) item.unload_data_from_memory(memory=path) for item in oeb_book.spine: path = abspath(unquote(item.href)) dir = dirname(path) root = item.data.getroottree() # get & clean HTML <HEAD>-data head = root.xpath( '//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0] head_content = etree.tostring(head, pretty_print=True, encoding='utf-8') head_content = re.sub(r'\<\/?head.*\>', '', head_content) head_content = re.sub( re.compile(r'\<style.*\/style\>', re.M | re.S), '', head_content) head_content = re.sub(r'<(title)([^>]*)/>', r'<\1\2></\1>', head_content) # get & clean HTML <BODY>-data body = root.xpath( '//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0] ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8') ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content) ebook_content = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2></\1>', ebook_content) # generate link to next page if item.spine_position + 1 < len(oeb_book.spine): nextLink = oeb_book.spine[item.spine_position + 1].href nextLink = relpath(abspath(nextLink), dir) else: nextLink = None # generate link to previous page if item.spine_position > 0: prevLink = oeb_book.spine[item.spine_position - 1].href prevLink = relpath(abspath(prevLink), dir) else: prevLink = None cssLink = relpath(abspath(css_path), dir) tocUrl = relpath(output_file, dir) firstContentPageLink = oeb_book.spine[0].href # render template templite = Templite(template_html_data) toc = lambda: self.generate_html_toc(oeb_book, path, output_dir ) t = templite.render(ebookContent=ebook_content, prevLink=prevLink, nextLink=nextLink, has_toc=bool(oeb_book.toc.count()), toc=toc, tocUrl=tocUrl, head_content=head_content, meta=meta, cssLink=cssLink, firstContentPageLink=firstContentPageLink) # write html to file with open(path, 'wb') as f: f.write(t) item.unload_data_from_memory(memory=path) zfile = zipfile.ZipFile(output_path, "w") zfile.add_dir(output_dir, basename(output_dir)) zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED) if opts.extract_to: if os.path.exists(opts.extract_to): shutil.rmtree(opts.extract_to) os.makedirs(opts.extract_to) zfile.extractall(opts.extract_to) self.log('Zip file extracted to', opts.extract_to) zfile.close() # cleanup temp dir shutil.rmtree(tempdir)
def extract_cover_image(filename): with zipfile.ZipFile(filename) as zf: for name in sorted(zf.namelist(), key=sort_key): if name_ok(name): return name, zf.read(name)
def extract(filename, dir): """ Extract archive C{filename} into directory C{dir} """ zf = zipfile.ZipFile(filename) zf.extractall(dir)