def convert_to_cbz(self): ''' Converts a cbr-comic to a cbz-comic ''' from calibre.utils.unrar import RARFile, extract with TemporaryDirectory('_cbr2cbz') as tdir: # extract the rar file ffile = self.db.format(self.book_id, "cbr", as_path=True) extract(ffile, tdir) # get the comment with open(ffile, 'rb') as stream: zr = RARFile(stream, get_comment=True) comment = zr.comment delete_temp_file(ffile) # make the cbz file with TemporaryFile("comic.cbz") as tf: zf = ZipFile(tf, "w") zf.add_dir(tdir) zf.close() # write comment if comment: writeZipComment(tf, comment) # add the cbz format to calibres library self.db.add_format(self.book_id, "cbz", tf) self.format = "cbz"
class OCFZipReader(OCFReader): def __init__(self, stream, mode='r', root=None): if isinstance(stream, (LocalZipFile, ZipFile)): self.archive = stream else: try: self.archive = ZipFile(stream, mode=mode) except BadZipfile: raise EPubException("not a ZIP .epub OCF container") self.root = root if self.root is None: name = getattr(stream, 'name', False) if name: self.root = os.path.abspath(os.path.dirname(name)) else: self.root = getcwd() super(OCFZipReader, self).__init__() def open(self, name, mode='r'): if isinstance(self.archive, LocalZipFile): return self.archive.open(name) return io.BytesIO(self.archive.read(name)) def read_bytes(self, name): return self.archive.read(name)
def get_metadata(stream): from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.archive import is_comic stream_type = None zf = ZipFile(stream, 'r') names = zf.namelist() if is_comic(names): # Is probably a comic return get_metadata(stream, 'cbz') for f in names: stream_type = os.path.splitext(f)[1].lower() if stream_type: stream_type = stream_type[1:] if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub', 'rb', 'imp', 'pdf', 'lrf', 'azw', 'azw1', 'azw3'): with TemporaryDirectory() as tdir: with CurrentDir(tdir): path = zf.extract(f) mi = get_metadata(open(path,'rb'), stream_type) if stream_type == 'opf' and mi.application_id is None: try: # zip archive opf files without an application_id were assumed not to have a cover # reparse the opf and if cover exists read its data from zip archive for the metadata nmi = zip_opf_metadata(path, zf) nmi.timestamp = None return nmi except: pass mi.timestamp = None return mi raise ValueError('No ebook found in ZIP archive (%s)' % os.path.basename(getattr(stream, 'name', '') or '<stream>'))
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.utils.zipfile import ZipFile self.options = options self.log = log pages, images = [], [] toc = TOC() if file_ext == 'pmlz': log.debug('De-compressing content to temporary directory...') with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for pml in pmls: html_name = os.path.splitext(os.path.basename(pml))[0]+'.html' html_path = os.path.join(getcwd(), html_name) pages.append(html_name) log.debug('Processing PML item %s...' % pml) ttoc = self.process_pml(pml, html_path) toc += ttoc images = self.get_images(stream, tdir, True) else: toc = self.process_pml(stream, 'index.html') pages.append('index.html') if hasattr(stream, 'name'): images = self.get_images(stream, os.path.abspath(os.path.dirname(stream.name))) # We want pages to be orded alphabetically. pages.sort() manifest_items = [] for item in pages+images: manifest_items.append((item, None)) from calibre.ebooks.metadata.meta import get_metadata log.debug('Reading metadata from input file...') mi = get_metadata(stream, 'pml') if 'images/cover.png' in images: mi.cover = 'images/cover.png' opf = OPFCreator(getcwd(), mi) log.debug('Generating manifest...') opf.create_manifest(manifest_items) opf.create_spine(pages) opf.set_toc(toc) with lopen('metadata.opf', 'wb') as opffile: with lopen('toc.ncx', 'wb') as tocfile: opf.render(opffile, tocfile, 'toc.ncx') return os.path.join(getcwd(), 'metadata.opf')
def safe_replace(self, name, datastream, extra_replacements={}, add_missing=False): from calibre.utils.zipfile import ZipFile, ZipInfo replacements = {name:datastream} replacements.update(extra_replacements) names = frozenset(replacements.keys()) found = set([]) with SpooledTemporaryFile(max_size=100*1024*1024) as temp: ztemp = ZipFile(temp, 'w') for offset, header in self.file_info.itervalues(): if header.filename in names: zi = ZipInfo(header.filename) zi.compress_type = header.compression_method ztemp.writestr(zi, replacements[header.filename].read()) found.add(header.filename) else: ztemp.writestr(header.filename, self.read(header.filename, spool_size=0)) if add_missing: for name in names - found: ztemp.writestr(name, replacements[name].read()) ztemp.close() zipstream = self.stream temp.seek(0) zipstream.seek(0) zipstream.truncate() shutil.copyfileobj(temp, zipstream) zipstream.flush()
def initialize_container(path_to_container, opf_name='metadata.opf', extra_entries=[]): ''' Create an empty EPUB document, with a default skeleton. ''' rootfiles = '' for path, mimetype, _ in extra_entries: rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format( path, mimetype) CONTAINER = u'''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> {extra_entries} </rootfiles> </container> '''.format(opf_name, extra_entries=rootfiles).encode('utf-8') zf = ZipFile(path_to_container, 'w') zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', '', 0755) zf.writestr('META-INF/container.xml', CONTAINER) for path, _, data in extra_entries: zf.writestr(path, data) return zf
def run(epub, opts, log): with TemporaryDirectory('_epub-fix') as tdir: with CurrentDir(tdir): zf = ZipFile(epub) zf.extractall() zf.close() container = Container(tdir, log) for fixer in epub_fixers(): fix = getattr(opts, fixer.fix_name, False) fixer.run(container, opts, log, fix=fix) container.write(epub)
def run(self, archive): from calibre.utils.zipfile import ZipFile is_rar = archive.lower().endswith('.rar') if is_rar: from calibre.utils.unrar import extract_member, names else: zf = ZipFile(archive, 'r') if is_rar: with open(archive, 'rb') as rf: fnames = list(names(rf)) else: fnames = zf.namelist() def fname_ok(fname): bn = os.path.basename(fname).lower() if bn == 'thumbs.db': return False if '.' not in bn: return False if bn.rpartition('.')[-1] in {'diz', 'nfo'}: return False if '__MACOSX' in fname.split('/'): return False return True fnames = list(filter(fname_ok, fnames)) if is_comic(fnames): ext = '.cbr' if is_rar else '.cbz' of = self.temporary_file('_archive_extract'+ext) with open(archive, 'rb') as f: of.write(f.read()) of.close() return of.name if len(fnames) > 1 or not fnames: return archive fname = fnames[0] ext = os.path.splitext(fname)[1][1:] if ext.lower() not in { 'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}: return archive of = self.temporary_file('_archive_extract.'+ext) with closing(of): if is_rar: with open(archive, 'rb') as f: data = extract_member(f, match=None, name=fname)[1] of.write(data) else: of.write(zf.read(fname)) return of.name
def __init__(self, pathtoepub, log, clone_data=None, tdir=None): if clone_data is not None: super(EpubContainer, self).__init__(None, None, log, clone_data=clone_data) for x in ('pathtoepub', 'obfuscated_fonts'): setattr(self, x, clone_data[x]) return self.pathtoepub = pathtoepub if tdir is None: tdir = PersistentTemporaryDirectory('_epub_container') tdir = os.path.abspath(os.path.realpath(tdir)) self.root = tdir with open(self.pathtoepub, 'rb') as stream: try: zf = ZipFile(stream) zf.extractall(tdir) except: log.exception('EPUB appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) try: os.remove(join(tdir, 'mimetype')) except EnvironmentError: pass container_path = join(self.root, 'META-INF', 'container.xml') if not exists(container_path): raise InvalidEpub('No META-INF/container.xml in epub') container = etree.fromstring(open(container_path, 'rb').read()) opf_files = container.xpath(( r'child::ocf:rootfiles/ocf:rootfile' '[@media-type="%s" and @full-path]'%guess_type('a.opf') ), namespaces={'ocf':OCF_NS} ) if not opf_files: raise InvalidEpub('META-INF/container.xml contains no link to OPF file') opf_path = os.path.join(self.root, *(urlunquote(opf_files[0].get('full-path')).split('/'))) if not exists(opf_path): raise InvalidEpub('OPF file does not exist at location pointed to' ' by META-INF/container.xml') super(EpubContainer, self).__init__(tdir, opf_path, log) self.obfuscated_fonts = {} if 'META-INF/encryption.xml' in self.name_path_map: self.process_encryption() self.parsed_cache['META-INF/container.xml'] = container
def get_fb2_data(stream): from calibre.utils.zipfile import ZipFile, BadZipfile pos = stream.tell() try: zf = ZipFile(stream) except BadZipfile: stream.seek(pos) ans = stream.read() zip_file_name = None else: names = zf.namelist() names = [x for x in names if x.lower().endswith('.fb2')] or names zip_file_name = names[0] ans = zf.open(zip_file_name).read() return ans, zip_file_name
def extract(self, stream): self.tdir = PersistentTemporaryDirectory('docx_container') try: zf = ZipFile(stream) zf.extractall(self.tdir) except: self.log.exception('DOCX appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream, self.tdir) self.names = {} for f in walk(self.tdir): name = os.path.relpath(f, self.tdir).replace(os.sep, '/') self.names[name] = f
def __init__(self, pathtoepub, log, clone_data=None, tdir=None): if clone_data is not None: super(EpubContainer, self).__init__(None, None, log, clone_data=clone_data) for x in ("pathtoepub", "obfuscated_fonts"): setattr(self, x, clone_data[x]) return self.pathtoepub = pathtoepub if tdir is None: tdir = PersistentTemporaryDirectory("_epub_container") tdir = os.path.abspath(os.path.realpath(tdir)) self.root = tdir with open(self.pathtoepub, "rb") as stream: try: zf = ZipFile(stream) zf.extractall(tdir) except: log.exception("EPUB appears to be invalid ZIP file, trying a" " more forgiving ZIP parser") from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) try: os.remove(join(tdir, "mimetype")) except EnvironmentError: pass container_path = join(self.root, "META-INF", "container.xml") if not exists(container_path): raise InvalidEpub("No META-INF/container.xml in epub") container = etree.fromstring(open(container_path, "rb").read()) opf_files = container.xpath( (r"child::ocf:rootfiles/ocf:rootfile" '[@media-type="%s" and @full-path]' % guess_type("a.opf")), namespaces={"ocf": OCF_NS}, ) if not opf_files: raise InvalidEpub("META-INF/container.xml contains no link to OPF file") opf_path = os.path.join(self.root, *(urlunquote(opf_files[0].get("full-path")).split("/"))) if not exists(opf_path): raise InvalidEpub("OPF file does not exist at location pointed to" " by META-INF/container.xml") super(EpubContainer, self).__init__(tdir, opf_path, log) self.obfuscated_fonts = {} if "META-INF/encryption.xml" in self.name_path_map: self.process_encryption() self.parsed_cache["META-INF/container.xml"] = container
def dump_input(self, ret, output_dir): out_dir = os.path.join(self.opts.debug_pipeline, 'input') if isinstance(ret, basestring): shutil.copytree(output_dir, out_dir) else: if not os.path.exists(out_dir): os.makedirs(out_dir) self.dump_oeb(ret, out_dir) if self.input_fmt == 'recipe': zf = ZipFile(os.path.join(self.opts.debug_pipeline, 'periodical.downloaded_recipe'), 'w') zf.add_dir(out_dir) with self.input_plugin: self.input_plugin.save_download(zf) zf.close() self.log.info('Input debug saved to:', out_dir)
def update_cover(self): # get the calibre cover cover_path = self.db.cover(self.book_id, as_path=True) fmt = cover_path.rpartition('.')[-1] new_cover_name = "00000000_cover." + fmt self.make_temp_cbz_file() # search for a previously embeded cover zf = ZipFile(self.file) cover_info = "" for name in zf.namelist(): if name.rsplit(".", 1)[0] == "00000000_cover": cover_info = name break # delete previous cover if cover_info != "": with open(self.file, 'r+b') as zf, open(cover_path, 'r+b') as cp: safe_replace(zf, cover_info, cp) # save the cover in the file else: zf = ZipFile(self.file, "a") zf.write(cover_path, new_cover_name) zf.close() delete_temp_file(cover_path)
def convert(self, oeb_book, output_path, input_plugin, opts, log): from calibre.ebooks.pml.pmlml import PMLMLizer from calibre.utils.zipfile import ZipFile with TemporaryDirectory('_pmlz_output') as tdir: pmlmlizer = PMLMLizer(log) pml = unicode(pmlmlizer.extract_content(oeb_book, opts)) with open(os.path.join(tdir, 'index.pml'), 'wb') as out: out.write(pml.encode(opts.pml_output_encoding, 'replace')) img_path = os.path.join(tdir, 'index_img') if not os.path.exists(img_path): os.makedirs(img_path) self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, img_path, opts) log.debug('Compressing output...') pmlz = ZipFile(output_path, 'w') pmlz.add_dir(tdir)
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pml = '' if stream.name.endswith('.pmlz'): with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for p in pmls: with open(p, 'r+b') as p_stream: pml += p_stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True) else: pml = stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name))) for comment in re.findall(r'(?mus)\\v.*?\\v', pml): m = re.search(r'TITLE="(.*?)"', comment) if m: mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'AUTHOR="(.*?)"', comment) if m: if mi.authors == [_('Unknown')]: mi.authors = [] mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))) m = re.search(r'PUBLISHER="(.*?)"', comment) if m: mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'COPYRIGHT="(.*?)"', comment) if m: mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'ISBN="(.*?)"', comment) if m: mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) return mi
def initialize_container(path_to_container, opf_name='metadata.opf', extra_entries=[]): ''' Create an empty EPUB document, with a default skeleton. ''' rootfiles = '' for path, mimetype, _ in extra_entries: rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format( path, mimetype) CONTAINER = simple_container_xml(opf_name, rootfiles).encode('utf-8') zf = ZipFile(path_to_container, 'w') zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', '', 0755) zf.writestr('META-INF/container.xml', CONTAINER) for path, _, data in extra_entries: zf.writestr(path, data) return zf
def embed_cix_metadata(self): ''' Embeds the cix_metadata ''' from io import StringIO cix_string = ComicInfoXml().stringFromMetadata(self.comic_metadata) # ensure we have a temp file self.make_temp_cbz_file() # make a new cbz if a metadata file is already there, to prevent corruption if self.zipinfo is not None: with open(self.file, 'r+b') as zf: safe_replace(zf, self.zipinfo, StringIO(cix_string.decode('utf-8', 'ignore'))) else: # save the metadata in the file zf = ZipFile(self.file, "a") zf.writestr("ComicInfo.xml", cix_string.decode('utf-8', 'ignore')) zf.close()
def get_cover(opf, opf_path, stream, reader=None): raster_cover = opf.raster_cover stream.seek(0) try: zf = ZipFile(stream) except: stream.seek(0) zf = LocalZipFile(stream) if raster_cover: base = posixpath.dirname(opf_path) cpath = posixpath.normpath(posixpath.join(base, raster_cover)) if reader is not None and \ reader.encryption_meta.is_encrypted(cpath): return try: member = zf.getinfo(cpath) except: pass else: f = zf.open(member) data = f.read() f.close() zf.close() return data return render_cover(opf, opf_path, zf, reader=reader)
def __enter__(self, *args): ''' Add this plugin to the python path so that it's contents become directly importable. Useful when bundling large python libraries into the plugin. Use it like this:: with plugin: import something ''' if self.plugin_path is not None: from calibre.utils.zipfile import ZipFile zf = ZipFile(self.plugin_path) extensions = set([x.rpartition('.')[-1].lower() for x in zf.namelist()]) zip_safe = True for ext in ('pyd', 'so', 'dll', 'dylib'): if ext in extensions: zip_safe = False break if zip_safe: sys.path.insert(0, self.plugin_path) self.sys_insertion_path = self.plugin_path else: from calibre.ptempfile import TemporaryDirectory self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip') self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args) zf.extractall(self.sys_insertion_path) sys.path.insert(0, self.sys_insertion_path) zf.close()
def run(self, archive): from calibre.utils.zipfile import ZipFile is_rar = archive.lower().endswith('.rar') if is_rar: from calibre.utils.unrar import extract_member, names else: zf = ZipFile(archive, 'r') if is_rar: with open(archive, 'rb') as rf: fnames = list(names(rf)) else: fnames = zf.namelist() fnames = [x for x in fnames if '.' in x and x.lower().rpartition('/')[-1] != 'thumbs.db'] if is_comic(fnames): ext = '.cbr' if is_rar else '.cbz' of = self.temporary_file('_archive_extract'+ext) with open(archive, 'rb') as f: of.write(f.read()) of.close() return of.name if len(fnames) > 1 or not fnames: return archive fname = fnames[0] ext = os.path.splitext(fname)[1][1:] if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2'): return archive of = self.temporary_file('_archive_extract.'+ext) with closing(of): if is_rar: with open(archive, 'rb') as f: data = extract_member(f, match=None, name=fname)[1] of.write(data) else: of.write(zf.read(fname)) return of.name
def get_comic_metadata_from_cbz(self): ''' Reads the comic metadata from the comic cbz file as comictagger metadata ''' self.make_temp_cbz_file() # open the zipfile zf = ZipFile(self.file) # get cix metadata for name in zf.namelist(): if name.lower() == "comicinfo.xml": self.cix_metadata = ComicInfoXml().metadataFromString(zf.read(name)) self.zipinfo = name break # get the cbi metadata if ComicBookInfo().validateString(zf.comment): self.cbi_metadata = ComicBookInfo().metadataFromString(zf.comment) zf.close() # get combined metadata self._get_combined_metadata()
def __init__(self, stream, mode='r', root=None): try: self.archive = ZipFile(stream, mode=mode) except BadZipfile: raise EPubException("not a ZIP .epub OCF container") self.root = root if self.root is None: name = getattr(stream, 'name', False) if name: self.root = os.path.abspath(os.path.dirname(name)) else: self.root = os.getcwdu() super(OCFZipReader, self).__init__()
def convert(self, oeb_book, output_path, input_plugin, opts, log): from calibre.ebooks.oeb.base import OEB_IMAGES from calibre.utils.zipfile import ZipFile from lxml import etree with TemporaryDirectory('_txtz_output') as tdir: # TXT txt_name = 'index.txt' if opts.txt_output_formatting.lower() == 'textile': txt_name = 'index.text' with TemporaryFile(txt_name) as tf: TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log) shutil.copy(tf, os.path.join(tdir, txt_name)) # Images for item in oeb_book.manifest: if item.media_type in OEB_IMAGES: if hasattr(self.writer, 'images'): path = os.path.join(tdir, 'images') if item.href in self.writer.images: href = self.writer.images[item.href] else: continue else: path = os.path.join(tdir, os.path.dirname(item.href)) href = os.path.basename(item.href) if not os.path.exists(path): os.makedirs(path) with open(os.path.join(path, href), 'wb') as imgf: imgf.write(item.data) # Metadata with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: mdataf.write(etree.tostring(oeb_book.metadata.to_opf1())) txtz = ZipFile(output_path, 'w') txtz.add_dir(tdir)
def build_plugin(path): from calibre import prints from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.zipfile import ZipFile, ZIP_STORED path = type(u'')(path) names = frozenset(os.listdir(path)) if u'__init__.py' not in names: prints(path, ' is not a valid plugin') raise SystemExit(1) t = PersistentTemporaryFile(u'.zip') with ZipFile(t, u'w', ZIP_STORED) as zf: zf.add_dir(path, simple_filter=lambda x:x in {'.git', '.bzr', '.svn', '.hg'}) t.close() plugin = add_plugin(t.name) os.remove(t.name) prints(u'Plugin updated:', plugin.name, plugin.version)
class OCFZipReader(OCFReader): def __init__(self, stream, mode='r', root=None): try: self.archive = ZipFile(stream, mode=mode) except BadZipfile: raise EPubException("not a ZIP .epub OCF container") self.root = root if self.root is None: name = getattr(stream, 'name', False) if name: self.root = os.path.abspath(os.path.dirname(name)) else: self.root = os.getcwdu() super(OCFZipReader, self).__init__() def open(self, name, mode='r'): return StringIO(self.archive.read(name))
def write(self, path): for name in self.dirtied: data = self.cache[name] raw = data if hasattr(data, 'xpath'): raw = etree.tostring(data, encoding='utf-8', xml_declaration=True) with open(self.name_map[name], 'wb') as f: f.write(raw) self.dirtied.clear() zf = ZipFile(path, 'w') zf.writestr('mimetype', bytes(guess_type('a.epub')[0]), compression=ZIP_STORED) zf.add_dir(self.root) zf.close()
def zip_rebuilder(tdir, path): with ZipFile(path, 'w', compression=ZIP_DEFLATED) as zf: # Write mimetype mt = os.path.join(tdir, 'mimetype') if os.path.exists(mt): zf.write(mt, 'mimetype', compress_type=ZIP_STORED) # Write everything else exclude_files = {'.DS_Store', 'mimetype', 'iTunesMetadata.plist'} for root, dirs, files in os.walk(tdir): for fn in files: if fn in exclude_files: continue absfn = os.path.join(root, fn) zfn = unicodedata.normalize( 'NFC', os.path.relpath(absfn, tdir).replace(os.sep, '/')) zf.write(absfn, zfn)
def set_metadata(stream, mi): replacements = {} # Get the OPF in the archive. with ZipFile(stream) as zf: opf_path = get_first_opf_name(zf) opf_stream = io.BytesIO(zf.read(opf_path)) opf = OPF(opf_stream) # Cover. new_cdata = None try: new_cdata = mi.cover_data[1] if not new_cdata: raise Exception('no cover') except: try: with open(mi.cover, 'rb') as f: new_cdata = f.read() except: pass if new_cdata: cpath = opf.raster_cover if not cpath: cpath = 'cover.jpg' new_cover = _write_new_cover(new_cdata, cpath) replacements[cpath] = open(new_cover.name, 'rb') mi.cover = cpath # Update the metadata. opf.smart_update(mi, replace_metadata=True) newopf = io.BytesIO(opf.render()) safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True) # Cleanup temporary files. try: if cpath is not None: replacements[cpath].close() os.remove(replacements[cpath].name) except: pass
def write(self, path_or_stream, mi, create_empty_document=False): if create_empty_document: self.create_empty_document(mi) with ZipFile(path_or_stream, 'w') as zf: zf.writestr('[Content_Types].xml', self.contenttypes) zf.writestr('_rels/.rels', self.containerrels) zf.writestr('docProps/core.xml', self.convert_metadata(mi)) zf.writestr('docProps/app.xml', self.appproperties) zf.writestr('word/webSettings.xml', self.websettings) zf.writestr('word/document.xml', xml2str(self.document)) zf.writestr('word/styles.xml', xml2str(self.styles)) zf.writestr('word/fontTable.xml', xml2str(self.font_table)) zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize()) zf.writestr('word/_rels/fontTable.xml.rels', xml2str(self.embedded_fonts)) for fname, data_getter in self.images.iteritems(): zf.writestr(fname, data_getter()) for fname, data in self.fonts.iteritems(): zf.writestr(fname, data)
def embed_cbi_metadata(self): ''' Embeds the cbi_metadata ''' cbi_string = ComicBookInfo().stringFromMetadata(self.comic_metadata) # ensure we have a temp file self.make_temp_cbz_file() # save the metadata in the comment zf = ZipFile(self.file, 'a') zf.comment = cbi_string.encode("utf-8") zf._didModify = True zf.close()
def import_from_oxt(source_path, name, dest_dir=None, prefix='dic-'): from calibre.spell.dictionary import parse_lang_code dest_dir = dest_dir or os.path.join(config_dir, 'dictionaries') if not os.path.exists(dest_dir): os.makedirs(dest_dir) num = 0 with ZipFile(source_path) as zf: def read_file(key): try: return zf.open(key).read() except KeyError: # Some dictionaries apparently put the xcu in a sub-directory # and incorrectly make paths relative to that directory instead # of the root, for example: # http://extensions.libreoffice.org/extension-center/italian-dictionary-thesaurus-hyphenation-patterns/releases/4.1/dict-it.oxt while key.startswith('../'): key = key[3:] return zf.open(key.lstrip('/')).read() root = etree.fromstring(zf.open('META-INF/manifest.xml').read()) xcu = XPath( '//manifest:file-entry[@manifest:media-type="application/vnd.sun.star.configuration-data"]' )(root)[0].get('{%s}full-path' % NS_MAP['manifest']) for (dic, aff), locales in iteritems( parse_xcu(zf.open(xcu).read(), origin='')): dic, aff = dic.lstrip('/'), aff.lstrip('/') d = tempfile.mkdtemp(prefix=prefix, dir=dest_dir) locales = uniq([ x for x in map(fill_country_code, locales) if parse_lang_code(x).countrycode ]) if not locales: continue metadata = [name] + list(locales) with open(os.path.join(d, 'locales'), 'wb') as f: f.write(('\n'.join(metadata)).encode('utf-8')) dd, ad = convert_to_utf8(read_file(dic), read_file(aff)) with open(os.path.join(d, '%s.dic' % locales[0]), 'wb') as f: f.write(dd) with open(os.path.join(d, '%s.aff' % locales[0]), 'wb') as f: f.write(ad) num += 1 return num
def get_metadata(stream): c = DOCX(stream, extract=False) mi = c.metadata c.close() stream.seek(0) cdata = None with ZipFile(stream, 'r') as zf: for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: raw = zf.read(zi) try: width, height, fmt = identify_data(raw) except: continue if 0.8 <= height / width <= 1.8 and height * width >= 12000: cdata = (fmt, raw) if cdata is not None: mi.cover_data = cdata return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) try: with ZipFile(stream) as zf: opf_name = get_first_opf_name(zf) opf_stream = StringIO(zf.read(opf_name)) opf = OPF(opf_stream) mi = opf.to_book_metadata() if extract_cover: cover_href = opf.raster_cover if cover_href: mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href)) except: return mi return mi
def set_metadata(stream, mi, apply_null=False, update_timestamp=False): stream.seek(0) raw, zip_file_name = get_fb2_data(stream) root = _get_fbroot(raw) ctx = Context(root) desc = ctx.get_or_create(root, 'description') ti = ctx.get_or_create(desc, 'title-info') pi = ctx.get_or_create(desc, 'publish-info') indent = ti.text _set_comments(ti, mi, ctx) _set_series(ti, mi, ctx) _set_tags(ti, mi, ctx) _set_authors(ti, mi, ctx) _set_title(ti, mi, ctx) _set_publisher(pi, mi, ctx) _set_pubdate(pi, mi, ctx) _set_cover(ti, mi, ctx) for child in ti: child.tail = indent # Apparently there exists FB2 reading software that chokes on the use of # single quotes in xml declaration. Sigh. See # https://www.mobileread.com/forums/showthread.php?p=2273184#post2273184 raw = b'<?xml version="1.0" encoding="UTF-8"?>\n' raw += etree.tostring(root, method='xml', encoding='utf-8', xml_declaration=False) stream.seek(0) stream.truncate() if zip_file_name: from calibre.utils.zipfile import ZipFile with ZipFile(stream, 'w') as zf: zf.writestr(zip_file_name, raw) else: stream.write(raw)
def __call__(self, stream, odir, log): from calibre.utils.zipfile import ZipFile from calibre.ebooks.metadata.odt import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator if not os.path.exists(odir): os.makedirs(odir) with CurrentDir(odir): log('Extracting ODT file...') stream.seek(0) mi = get_metadata(stream, 'odt') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] self.filter_load(stream, mi, log) html = self.xhtml() # A blanket img specification like this causes problems # with EPUB output as the containing element often has # an absolute height and width set that is larger than # the available screen real estate html = html.replace('img { width: 100%; height: 100%; }', '') # odf2xhtml creates empty title tag html = html.replace('<title></title>', '<title>%s</title>' % (mi.title, )) try: html = self.fix_markup(html, log) except: log.exception('Failed to filter CSS, conversion may be slow') with open('index.xhtml', 'wb') as f: f.write(as_bytes(html)) zf = ZipFile(stream, 'r') self.extract_pictures(zf) opf = OPFCreator(os.path.abspath(getcwd()), mi) opf.create_manifest([(os.path.abspath(f2), None) for f2 in walk(getcwd())]) opf.create_spine([os.path.abspath('index.xhtml')]) with open('metadata.opf', 'wb') as f: opf.render(f) return os.path.abspath('metadata.opf')
def dump(path): dest = os.path.splitext(os.path.basename(path))[0] dest += '-dumped' if os.path.exists(dest): shutil.rmtree(dest) with ZipFile(path) as zf: zf.extractall(dest) for f in walk(dest): if f.endswith('.xml') or f.endswith('.rels'): with open(f, 'r+b') as stream: raw = stream.read() root = etree.fromstring(raw) stream.seek(0) stream.truncate() stream.write( etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True)) print(path, 'dumped to', dest)
def __enter__(self, *args): if self.plugin_path is not None: from calibre.utils.zipfile import ZipFile zf = ZipFile(self.plugin_path) extensions = set([x.rpartition('.')[-1].lower() for x in zf.namelist()]) zip_safe = True for ext in ('pyd', 'so', 'dll', 'dylib'): if ext in extensions: zip_safe = False if zip_safe: sys.path.insert(0, self.plugin_path) self.sys_insertion_path = self.plugin_path else: from calibre.ptempfile import TemporaryDirectory self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip') self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args) zf.extractall(self.sys_insertion_path) sys.path.insert(0, self.sys_insertion_path) zf.close()
def compile_content_server_translations(self): self.info('Compiling content-server translations') from calibre.utils.rapydscript import msgfmt from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED, ZipInfo, ZIP_STORED with ZipFile(self.j(self.RESOURCES, 'content-server', 'locales.zip'), 'w', ZIP_DEFLATED) as zf: for src in glob.glob( os.path.join(self.TRANSLATIONS, 'content-server', '*.po')): data, h = self.hash_and_data(src) current_hash = h.digest() saved_hash, saved_data = self.read_cache(src) if current_hash == saved_hash: raw = saved_data else: # self.info('\tParsing ' + os.path.basename(src)) raw = None po_data = data.decode('utf-8') data = json.loads(msgfmt(po_data)) translated_entries = { k: v for k, v in iteritems(data['entries']) if v and sum(map(len, v)) } data[u'entries'] = translated_entries data[u'hash'] = h.hexdigest() cdata = b'{}' if translated_entries: raw = json.dumps(data, ensure_ascii=False, sort_keys=True) if isinstance(raw, type(u'')): raw = raw.encode('utf-8') cdata = raw self.write_cache(cdata, current_hash, src) if raw: zi = ZipInfo(os.path.basename(src).rpartition('.')[0]) zi.compress_type = ZIP_STORED if is_ci else ZIP_DEFLATED zf.writestr(zi, raw)
def get_metadata(stream): with ZipFile(stream, 'r') as zf: mi = Metadata(_('Unknown')) cdata = None for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if zi.filename.lower() == 'docprops/core.xml': _read_doc_props(zf.read(zi), mi) elif zi.filename.lower() == 'docprops/app.xml': _read_app_props(zf.read(zi), mi) elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}: raw = zf.read(zi) try: width, height, fmt = identify_data(raw) except: continue if 0.8 <= height / width <= 1.8 and height * width >= 12000: cdata = (fmt, raw) if cdata is not None: mi.cover_data = cdata return mi
def compile_content_server_translations(self): self.info('\nCompiling content-server translations') from calibre.utils.rapydscript import msgfmt from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED, ZipInfo with ZipFile(self.j(self.RESOURCES, 'content-server', 'locales.zip'), 'w', ZIP_DEFLATED) as zf: for src in glob.glob( os.path.join(self.TRANSLATIONS, 'content-server', '*.po')): with open(src, 'rb') as f: po_data = f.read().decode('utf-8') data = json.loads(msgfmt(po_data)) translated_entries = { k: v for k, v in data['entries'].iteritems() if v and sum(map(len, v)) } data['entries'] = translated_entries if translated_entries: raw = json.dumps(data, ensure_ascii=False, sort_keys=True) if isinstance(raw, type(u'')): raw = raw.encode('utf-8') zi = ZipInfo(os.path.basename(src).rpartition('.')[0]) zi.compress_type = ZIP_DEFLATED zf.writestr(zi, raw)
def handle_zip_of_opf_files(self, stream): ''' Given a zip up of a bunch of opf files, either merge them or add them to library ''' result = {'updated': 0, 'added': 0} with ZipFile(stream, 'r') as zf: self.start_applying_updates() for zi in zf.infolist(): ext = zi.filename.rpartition('.')[-1].lower() if ext in {'opf'}: try: raw = zf.open(zi) opf = OPF(raw) mi = opf.to_book_metadata() casanova_id = self.extract_id(mi) if casanova_id: book_mi = self.get_casanova_metadata( casanova_id['id']) if book_mi: # Update an existing book's metadata! result['updated'] = result['updated'] + 1 self.apply_metadata_update( casanova_id['id'], book_mi, mi) else: # Create a new book entry result['added'] = result['added'] + 1 self.model.db.import_book(mi, []) except: foo = False if ext in {'jpg', 'png', 'gif'}: # try and handle the cover casanova_id = zi.filename.partition('.')[0].lower() if casanova_id in self.book_map: book_id = self.book_map[casanova_id] raw = zf.open(zi) self.db.set_cover(book_id, raw) self.finish_applying_updates() return result
def convert_cbr_to_cbz(self): ''' Converts a rar or cbr-comic to a cbz-comic ''' from calibre.utils.unrar import extract, comment with TemporaryDirectory('_cbr2cbz') as tdir: # extract the rar file ffile = self.db.format(self.book_id, self.format, as_path=True) extract(ffile, tdir) comments = comment(ffile) delete_temp_file(ffile) # make the cbz file with TemporaryFile("comic.cbz") as tf: zf = ZipFile(tf, "w") zf.add_dir(tdir) if comments: zf.comment = comments.encode("utf-8") zf.close() # add the cbz format to calibres library self.db.add_format(self.book_id, "cbz", tf) self.format = "cbz"
def embed_cix_metadata(self): ''' Embeds the cix_metadata ''' from io import StringIO cix_string = ComicInfoXml().stringFromMetadata(self.comic_metadata) # ensure we have a temp file self.make_temp_cbz_file() # use the safe_replace function from calibre to prevent coruption if self.zipinfo is not None: with open(self.file, 'r+b') as zf: safe_replace(zf, self.zipinfo, StringIO(cix_string.decode('utf-8', 'ignore'))) # save the metadata in the file else: zf = ZipFile(self.file, "a") zf.writestr("ComicInfo.xml", cix_string.decode('utf-8', 'ignore')) zf.close()
def _initialize_overlay(self): ''' Perform any additional initialization ''' self._log_location(self.ios_reader_app) self.assets_subpath = '/Media/Books/Sync/Database/OutstandingAssets_4.sqlite' self.books_subpath = '/Documents/BKLibrary_database/iBooks_*.sqlite' # Confirm/create folder size archive if not os.path.exists(self.cache_dir): self._log_diagnostic("creating folder cache at '%s'" % self.cache_dir) os.makedirs(self.cache_dir) self.folder_archive_path = os.path.join(self.cache_dir, "folders.zip") if not os.path.exists(self.folder_archive_path): self._log_diagnostic("creating folder cache") zfw = ZipFile(self.folder_archive_path, mode='w', compression=0) zfw.writestr("%s Folder Size Archive" % self.name, '') zfw.close() else: self._log_diagnostic("existing folder cache at '%s'" % self.folder_archive_path)
def get_cover(opf, opf_path, stream, reader=None): raster_cover = opf.raster_cover stream.seek(0) zf = ZipFile(stream) if raster_cover: base = posixpath.dirname(opf_path) cpath = posixpath.normpath(posixpath.join(base, raster_cover)) if reader is not None and \ reader.encryption_meta.is_encrypted(cpath): return try: member = zf.getinfo(cpath) except: pass else: f = zf.open(member) data = f.read() f.close() zf.close() return data return render_cover(opf, opf_path, zf, reader=reader)
class DOCX(object): def __init__(self, path_or_stream, log=None, extract=True): self.docx_is_transitional = True stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb') self.name = getattr(stream, 'name', None) or '<stream>' self.log = log or default_log if extract: self.extract(stream) else: self.init_zipfile(stream) self.read_content_types() self.read_package_relationships() self.namespace = DOCXNamespace(self.docx_is_transitional) def init_zipfile(self, stream): self.zipf = ZipFile(stream) self.names = frozenset(self.zipf.namelist()) def extract(self, stream): self.tdir = PersistentTemporaryDirectory('docx_container') try: zf = ZipFile(stream) zf.extractall(self.tdir) except: self.log.exception('DOCX appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream, self.tdir) self.names = {} for f in walk(self.tdir): name = os.path.relpath(f, self.tdir).replace(os.sep, '/') self.names[name] = f def exists(self, name): return name in self.names def read(self, name): if hasattr(self, 'zipf'): return self.zipf.open(name).read() path = self.names[name] with open(path, 'rb') as f: return f.read() def read_content_types(self): try: raw = self.read('[Content_Types].xml') except KeyError: raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name) root = fromstring(raw) self.content_types = {} self.default_content_types = {} for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'): self.default_content_types[item.get('Extension').lower()] = item.get('ContentType') for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'): name = item.get('PartName').lstrip('/') self.content_types[name] = item.get('ContentType') def content_type(self, name): if name in self.content_types: return self.content_types[name] ext = name.rpartition('.')[-1].lower() if ext in self.default_content_types: return self.default_content_types[ext] return guess_type(name)[0] def read_package_relationships(self): try: raw = self.read('_rels/.rels') except KeyError: raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name) root = fromstring(raw) self.relationships = {} self.relationships_rmap = {} for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target').lstrip('/') typ = item.get('Type') if target == 'word/document.xml': self.docx_is_transitional = typ != 'http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument' self.relationships[typ] = target self.relationships_rmap[target] = typ @property def document_name(self): name = self.relationships.get(self.namespace.names['DOCUMENT'], None) if name is None: names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml')) if not names: raise InvalidDOCX('The file %s docx file has no main document' % self.name) name = names[0] return name @property def document(self): return fromstring(self.read(self.document_name)) @property def document_relationships(self): return self.get_relationships(self.document_name) def get_relationships(self, name): base = '/'.join(name.split('/')[:-1]) by_id, by_type = {}, {} parts = name.split('/') name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels']) try: raw = self.read(name) except KeyError: pass else: root = fromstring(raw) for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target') if item.get('TargetMode', None) != 'External' and not target.startswith('#'): target = '/'.join((base, target.lstrip('/'))) typ = item.get('Type') Id = item.get('Id') by_id[Id] = by_type[typ] = target return by_id, by_type def get_document_properties_names(self): name = self.relationships.get(self.namespace.names['DOCPROPS'], None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml') if names: name = names[0] yield name name = self.relationships.get(self.namespace.names['APPPROPS'], None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml') if names: name = names[0] yield name @property def metadata(self): mi = Metadata(_('Unknown')) dp_name, ap_name = self.get_document_properties_names() if dp_name: try: raw = self.read(dp_name) except KeyError: pass else: read_doc_props(raw, mi, self.namespace.XPath) if mi.is_null('language'): try: raw = self.read('word/styles.xml') except KeyError: pass else: read_default_style_language(raw, mi, self.namespace.XPath) ap_name = self.relationships.get(self.namespace.names['APPPROPS'], None) if ap_name: try: raw = self.read(ap_name) except KeyError: pass else: read_app_props(raw, mi) return mi def close(self): if hasattr(self, 'zipf'): self.zipf.close() else: try: shutil.rmtree(self.tdir) except EnvironmentError: pass
def init_zipfile(self, stream): self.zipf = ZipFile(stream) self.names = frozenset(self.zipf.namelist())
report_progress=notification, abort_after_input_dump=False) plumber.merge_ui_recommendations(recommendations) plumber.run() try: os.remove(cpath) except: pass if GENERATE_DEBUG_EPUB: from calibre.ebooks.epub import initialize_container from calibre.ebooks.tweak import zip_rebuilder from calibre.utils.zipfile import ZipFile input_path = os.path.join(catalog_debug_path, 'input') epub_shell = os.path.join(catalog_debug_path, 'epub_shell.zip') initialize_container(epub_shell, opf_name='content.opf') with ZipFile(epub_shell, 'r') as zf: zf.extractall(path=input_path) os.remove(epub_shell) zip_rebuilder(input_path, os.path.join(catalog_debug_path, 'input.epub')) if opts.verbose: log.info(" Catalog creation complete (%s)\n" % str( datetime.timedelta(seconds=int(time.time() - opts.start_time)))) # returns to gui2.actions.catalog:catalog_generated() return catalog.error
def convert(self, oeb_book, output_path, input_plugin, opts, log): from lxml import etree from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf from calibre.utils.zipfile import ZipFile from calibre.utils.filenames import ascii_filename # HTML if opts.htmlz_css_type == 'inline': from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer OEB2HTMLizer = OEB2HTMLInlineCSSizer elif opts.htmlz_css_type == 'tag': from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer OEB2HTMLizer = OEB2HTMLNoCSSizer else: from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer with TemporaryDirectory(u'_htmlz_output') as tdir: htmlizer = OEB2HTMLizer(log) html = htmlizer.oeb2html(oeb_book, opts) fname = u'index' if opts.htmlz_title_filename: from calibre.utils.filenames import shorten_components_to fname = shorten_components_to(100, (ascii_filename( unicode_type(oeb_book.metadata.title[0])), ))[0] with open(os.path.join(tdir, fname + u'.html'), 'wb') as tf: if isinstance(html, unicode_type): html = html.encode('utf-8') tf.write(html) # CSS if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external': with open(os.path.join(tdir, u'style.css'), 'wb') as tf: tf.write(htmlizer.get_css(oeb_book)) # Images images = htmlizer.images if images: if not os.path.exists(os.path.join(tdir, u'images')): os.makedirs(os.path.join(tdir, u'images')) for item in oeb_book.manifest: if item.media_type in OEB_IMAGES and item.href in images: if item.media_type == SVG_MIME: data = unicode_type( etree.tostring(item.data, encoding=unicode_type)) else: data = item.data fname = os.path.join(tdir, u'images', images[item.href]) with open(fname, 'wb') as img: img.write(data) # Cover cover_path = None try: cover_data = None if oeb_book.metadata.cover: term = oeb_book.metadata.cover[0].term cover_data = oeb_book.guide[term].item.data if cover_data: from calibre.utils.img import save_cover_data_to cover_path = os.path.join(tdir, u'cover.jpg') with lopen(cover_path, 'w') as cf: cf.write('') save_cover_data_to(cover_data, cover_path) except: import traceback traceback.print_exc() # Metadata with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf: opf = OPF( io.BytesIO( etree.tostring(oeb_book.metadata.to_opf1(), encoding='UTF-8'))) mi = opf.to_book_metadata() if cover_path: mi.cover = u'cover.jpg' mdataf.write(metadata_to_opf(mi)) htmlz = ZipFile(output_path, 'w') htmlz.add_dir(tdir)
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' if fmt not in valid_empty_formats: raise ValueError('Cannot create empty book in the %s format' % fmt) if fmt == 'txt': with open(path, 'wb') as f: if not mi.is_null('title'): f.write(as_bytes(mi.title)) return if fmt == 'docx': from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.docx.writer.container import DOCX from calibre.utils.logging import default_log p = Plumber('a.docx', 'b.docx', default_log) p.setup_options() # Use the word default of one inch page margins for x in 'left right top bottom'.split(): setattr(p.opts, 'margin_' + x, 72) DOCX(p.opts, default_log).write(path, mi, create_empty_document=True) return path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace( '_TITLE_', prepare_string_for_xml(mi.title)).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string( mi.authors))).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0o755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def convert(self, stream, options, file_ext, log, accelerators): """Convert a KePub file into a structure calibre can process.""" log("KEPUBInput::convert - start") from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd() zf.extractall(cwd) except Exception: log.exception("KEPUB appears to be invalid ZIP file, trying a " "more forgiving ZIP parser") from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) opf = self.find_opf() if opf is None: for f in walk("."): if (f.lower().endswith(".opf") and "__MACOSX" not in f and not os.path.basename(f).startswith(".")): opf = os.path.abspath(f) break path = getattr(stream, "name", "stream") if opf is None: raise ValueError( _( # noqa: F821 "{0} is not a valid KEPUB file (could not find opf)"). format(path)) encfile = os.path.abspath("rights.xml") if os.path.exists(encfile): raise DRMError(os.path.basename(path)) cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd() opf = os.path.relpath(opf, cwd) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self.encrypted_fonts = [] if len(parts) > 1 and parts[0]: delta = "/".join(parts[:-1]) + "/" for elem in opf.itermanifest(): elem.set("href", delta + elem.get("href")) for elem in opf.iterguide(): elem.set("href", delta + elem.get("href")) f = (self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2) self.removed_cover = f(opf, log) self.optimize_opf_parsing = opf for x in opf.itermanifest(): if x.get("media-type", "") == "application/x-dtbook+xml": raise ValueError( _("EPUB files with DTBook markup are not supported" ) # noqa: F821 ) not_for_spine = set() for y in opf.itermanifest(): id_ = y.get("id", None) if id_ and y.get("media-type", None) in { "application/vnd.adobe-page-template+xml", "application/vnd.adobe.page-template+xml", "application/adobe-page-template+xml", "application/adobe.page-template+xml", "application/text", }: not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get("idref", None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError( _("No valid entries in the spine of this EPUB") # noqa: F821 ) with open("content.opf", "wb") as nopf: nopf.write(opf.render()) return os.path.abspath("content.opf")
def convert(self, recipe_or_file, opts, file_ext, log, accelerators): from calibre.web.feeds.recipes import compile_recipe opts.output_profile.flow_size = 0 if file_ext == 'downloaded_recipe': from calibre.utils.zipfile import ZipFile zf = ZipFile(recipe_or_file, 'r') zf.extractall() zf.close() self.recipe_source = open(u'download.recipe', 'rb').read() recipe = compile_recipe(self.recipe_source) recipe.needs_subscription = False self.recipe_object = recipe(opts, log, self.report_progress) else: if os.access(recipe_or_file, os.R_OK): self.recipe_source = open(recipe_or_file, 'rb').read() recipe = compile_recipe(self.recipe_source) log('Using custom recipe') else: from calibre.web.feeds.recipes.collection import \ get_builtin_recipe_by_title title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = os.path.basename(title).rpartition('.')[0] raw = get_builtin_recipe_by_title(title, log=log, download_recipe=not opts.dont_download_recipe) builtin = False try: recipe = compile_recipe(raw) self.recipe_source = raw if recipe.requires_version > numeric_version: log.warn( 'Downloaded recipe needs calibre version at least: %s' % \ ('.'.join(recipe.requires_version))) builtin = True except: log.exception('Failed to compile downloaded recipe. Falling ' 'back to builtin one') builtin = True if builtin: log('Using bundled builtin recipe') raw = get_builtin_recipe_by_title(title, log=log, download_recipe=False) if raw is None: raise ValueError('Failed to find builtin recipe: '+title) recipe = compile_recipe(raw) self.recipe_source = raw else: log('Using downloaded builtin recipe') if recipe is None: raise ValueError('%r is not a valid recipe file or builtin recipe' % recipe_or_file) disabled = getattr(recipe, 'recipe_disabled', None) if disabled is not None: raise RecipeDisabled(disabled) ro = recipe(opts, log, self.report_progress) ro.download() self.recipe_object = ro for key, val in self.recipe_object.conversion_options.items(): setattr(opts, key, val) for f in os.listdir(u'.'): if f.endswith('.opf'): return os.path.abspath(f) for f in walk(u'.'): if f.endswith('.opf'): return os.path.abspath(f)
class DOCX(object): def __init__(self, path_or_stream, log=None, extract=True): stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb') self.name = getattr(stream, 'name', None) or '<stream>' self.log = log or default_log if extract: self.extract(stream) else: self.init_zipfile(stream) self.read_content_types() self.read_package_relationships() def init_zipfile(self, stream): self.zipf = ZipFile(stream) self.names = frozenset(self.zipf.namelist()) def extract(self, stream): self.tdir = PersistentTemporaryDirectory('docx_container') try: zf = ZipFile(stream) zf.extractall(self.tdir) except: self.log.exception('DOCX appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream, self.tdir) self.names = {} for f in walk(self.tdir): name = os.path.relpath(f, self.tdir).replace(os.sep, '/') self.names[name] = f def exists(self, name): return name in self.names def read(self, name): if hasattr(self, 'zipf'): return self.zipf.open(name).read() path = self.names[name] with open(path, 'rb') as f: return f.read() def read_content_types(self): try: raw = self.read('[Content_Types].xml') except KeyError: raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name) root = fromstring(raw) self.content_types = {} self.default_content_types = {} for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'): self.default_content_types[item.get('Extension').lower()] = item.get('ContentType') for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'): name = item.get('PartName').lstrip('/') self.content_types[name] = item.get('ContentType') def content_type(self, name): if name in self.content_types: return self.content_types[name] ext = name.rpartition('.')[-1].lower() if ext in self.default_content_types: return self.default_content_types[ext] return guess_type(name)[0] def read_package_relationships(self): try: raw = self.read('_rels/.rels') except KeyError: raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name) root = fromstring(raw) self.relationships = {} self.relationships_rmap = {} for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target').lstrip('/') typ = item.get('Type') self.relationships[typ] = target self.relationships_rmap[target] = typ @property def document_name(self): name = self.relationships.get(DOCUMENT, None) if name is None: names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml')) if not names: raise InvalidDOCX('The file %s docx file has no main document' % self.name) name = names[0] return name @property def document(self): return fromstring(self.read(self.document_name)) @property def document_relationships(self): return self.get_relationships(self.document_name) def get_relationships(self, name): base = '/'.join(name.split('/')[:-1]) by_id, by_type = {}, {} parts = name.split('/') name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels']) try: raw = self.read(name) except KeyError: pass else: root = fromstring(raw) for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target') if item.get('TargetMode', None) != 'External' and not target.startswith('#'): target = '/'.join((base, target.lstrip('/'))) typ = item.get('Type') Id = item.get('Id') by_id[Id] = by_type[typ] = target return by_id, by_type @property def metadata(self): mi = Metadata(_('Unknown')) name = self.relationships.get(DOCPROPS, None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml') if names: name = names[0] if name: try: raw = self.read(name) except KeyError: pass else: read_doc_props(raw, mi) if mi.is_null('language'): try: raw = self.read('word/styles.xml') except KeyError: pass else: read_default_style_language(raw, mi) name = self.relationships.get(APPPROPS, None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml') if names: name = names[0] if name: try: raw = self.read(name) except KeyError: pass else: read_app_props(raw, mi) return mi def close(self): if hasattr(self, 'zipf'): self.zipf.close() else: try: shutil.rmtree(self.tdir) except EnvironmentError: pass