def container_diff(left, right): left_names, right_names = set(left.name_path_map), set(right.name_path_map) if left.cloned or right.cloned: # Since containers are often clones of each other, as a performance # optimization, discard identical names that point to the same physical # file, without needing to read the file's contents. # First commit dirtied names for c in (left, right): Container.commit(c, keep_parsed=True) samefile_names = {name for name in left_names & right_names if samefile( left.name_path_map[name], right.name_path_map[name])} left_names -= samefile_names right_names -= samefile_names cache, changed_names, renamed_names, removed_names, added_names = changed_files( left_names, right_names, left.raw_data, right.raw_data) def syntax(container, name): mt = container.mime_map[name] return syntax_from_mime(name, mt) syntax_map = {name:syntax(left, name) for name in changed_names} syntax_map.update({name:syntax(left, name) for name in renamed_names}) syntax_map.update({name:syntax(right, name) for name in added_names}) syntax_map.update({name:syntax(left, name) for name in removed_names}) return cache, syntax_map, changed_names, renamed_names, removed_names, added_names
def container_diff(left, right): left_names, right_names = set(left.name_path_map), set(right.name_path_map) if left.cloned or right.cloned: # Since containers are often clones of each other, as a performance # optimization, discard identical names that point to the same physical # file, without needing to read the file's contents. # First commit dirtied names for c in (left, right): Container.commit(c, keep_parsed=True) samefile_names = { name for name in left_names & right_names if samefile(left.name_path_map[name], right.name_path_map[name]) } left_names -= samefile_names right_names -= samefile_names cache, changed_names, renamed_names, removed_names, added_names = changed_files( left_names, right_names, left.raw_data, right.raw_data) def syntax(container, name): mt = container.mime_map[name] return syntax_from_mime(name, mt) syntax_map = {name: syntax(left, name) for name in changed_names} syntax_map.update({name: syntax(left, name) for name in renamed_names}) syntax_map.update({name: syntax(right, name) for name in added_names}) syntax_map.update({name: syntax(left, name) for name in removed_names}) return cache, syntax_map, changed_names, renamed_names, removed_names, added_names
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None): log = log or default_log book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) ContainerBase.__init__(self, tdir, opfpath, log) excluded_names = { name for name, mt in self.mime_map.iteritems() if name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or name == 'mimetype' } self.book_render_data = data = { 'version': RENDER_VERSION, 'toc':get_toc(self).as_dict, 'spine':[name for name, is_linear in self.spine_names], 'link_uid': uuid4(), 'book_hash': book_hash, 'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}, } # Mark the spine as dirty since we have to ensure it is normalized for name in data['spine']: self.parsed(name), self.dirty(name) self.virtualized_names = set() self.virtualize_resources() def manifest_data(name): return {'size':os.path.getsize(self.name_path_map[name]), 'is_virtualized': name in self.virtualized_names, 'mimetype':self.mime_map.get(name)} data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names} self.commit() for name in excluded_names: os.remove(self.name_path_map[name]) with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f: f.write(json.dumps(self.book_render_data, ensure_ascii=False).encode('utf-8'))
def __init__(self, path_to_ebook, tdir, log=None): log = log or default_log book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) ContainerBase.__init__(self, tdir, opfpath, log) excluded_names = { name for name, mt in self.mime_map.iteritems() if name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') } self.book_render_data = data = { 'version': self.RENDER_VERSION, 'toc':get_toc(self).as_dict, 'spine':[name for name, is_linear in self.spine_names], 'link_uid': uuid4(), 'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}, 'manifest': list(set(self.name_path_map) - excluded_names), } # Mark the spine as dirty since we have to ensure it is normalized for name in data['spine']: self.parsed(name), self.dirty(name) self.virtualize_resources() self.commit() for name in excluded_names: os.remove(self.name_path_map[name]) with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f: f.write(json.dumps(self.book_render_data, ensure_ascii=False).encode('utf-8'))
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None): log = log or default_log book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) ContainerBase.__init__(self, tdir, opfpath, log) excluded_names = { name for name, mt in self.mime_map.iteritems() if name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or name == 'mimetype' } raster_cover_name, titlepage_name = self.create_cover_page(input_fmt.lower()) self.book_render_data = data = { 'version': RENDER_VERSION, 'toc':get_toc(self).as_dict, 'spine':[name for name, is_linear in self.spine_names], 'link_uid': uuid4(), 'book_hash': book_hash, 'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}, 'raster_cover_name': raster_cover_name, 'title_page_name': titlepage_name, 'has_maths': False, 'total_length': 0, 'spine_length': 0, } # Mark the spine as dirty since we have to ensure it is normalized for name in data['spine']: self.parsed(name), self.dirty(name) self.transform_css() self.virtualized_names = set() self.virtualize_resources() def manifest_data(name): mt = (self.mime_map.get(name) or 'application/octet-stream').lower() ans = { 'size':os.path.getsize(self.name_path_map[name]), 'is_virtualized': name in self.virtualized_names, 'mimetype':mt, 'is_html': mt in OEB_DOCS, } if ans['is_html']: root = self.parsed(name) ans['length'] = l = get_length(root) self.book_render_data['total_length'] += l if name in data['spine']: self.book_render_data['spine_length'] += l ans['has_maths'] = hm = check_for_maths(root) if hm: self.book_render_data['has_maths'] = True return ans data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names} self.commit() for name in excluded_names: os.remove(self.name_path_map[name]) with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f: f.write(json.dumps(self.book_render_data, ensure_ascii=False).encode('utf-8'))
def serialize_item(self, name): mt = (self.mime_map[name] or '').lower() if mt in OEB_STYLES: ans = ContainerBase.serialize_item(self, name).lstrip() if not ans.startswith(b'@charset'): ans = b'@charset "UTF-8";\n' + ans return ans if mt not in OEB_DOCS: return ContainerBase.serialize_item(self, name) root = self.parsed(name) return json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':')).encode('utf-8')
def import_book_as_epub(srcpath, destpath, log=default_log): if not destpath.lower().endswith('.epub'): raise ValueError('Can only import books into the EPUB format, not %s' % (os.path.basename(destpath))) with TemporaryDirectory('eei') as tdir: tdir = os.path.abspath( os.path.realpath(tdir) ) # Needed to handle the multiple levels of symlinks for /tmp on OS X plumber = Plumber(srcpath, tdir, log) plumber.setup_options() if srcpath.lower().endswith('.opf'): plumber.opts.dont_package = True if hasattr(plumber.opts, 'no_process'): plumber.opts.no_process = True plumber.input_plugin.for_viewer = True with plumber.input_plugin, open(plumber.input, 'rb') as inf: pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, log, {}, tdir) if hasattr(pathtoopf, 'manifest'): from calibre.ebooks.oeb.iterator.book import write_oebbook pathtoopf = write_oebbook(pathtoopf, tdir) c = Container(tdir, pathtoopf, log) auto_fill_manifest(c) # Auto fix all HTML/CSS for name, mt in iteritems(c.mime_map): if mt in set(OEB_DOCS) | set(OEB_STYLES): c.parsed(name) c.dirty(name) c.commit() zf = initialize_container(destpath, opf_name=c.opf_name) with zf: for name in c.name_path_map: zf.writestr(name, c.raw_data(name, decode=False))
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True) ).replace( '_TITLE_', prepare_string_for_xml(mi.title) ).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors)) ).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def serialize_item(self, name): mt = (self.mime_map[name] or '').lower() if mt not in OEB_DOCS: return ContainerBase.serialize_item(self, name) root = self.parsed(name) return json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':')).encode('utf-8')
def serialize_item(self, name): mt = self.mime_map[name] if mt not in OEB_DOCS: return ContainerBase.serialize_item(self, name) # Normalize markup root = self.parsed(name) for comment in tuple(root.iterdescendants(Comment)): comment.getparent().remove(comment) escape_cdata(root) return tostring(root, encoding='utf-8', xml_declaration=True, with_tail=False, doctype='<!DOCTYPE html>')
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None): log = log or default_log book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) ContainerBase.__init__(self, tdir, opfpath, log) # We do not add zero byte sized files as the IndexedDB API in the # browser has no good way to distinguish between zero byte files and # load failures. excluded_names = { name for name, mt in iteritems(self.mime_map) if name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or name == 'mimetype' or not self.has_name_and_is_not_empty(name)} raster_cover_name, titlepage_name = self.create_cover_page(input_fmt.lower()) toc = get_toc(self).to_dict(count()) spine = [name for name, is_linear in self.spine_names] spineq = frozenset(spine) landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq] self.book_render_data = data = { 'version': RENDER_VERSION, 'toc':toc, 'spine':spine, 'link_uid': uuid4(), 'book_hash': book_hash, 'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}, 'raster_cover_name': raster_cover_name, 'title_page_name': titlepage_name, 'has_maths': False, 'total_length': 0, 'spine_length': 0, 'toc_anchor_map': toc_anchor_map(toc), 'landmarks': landmarks, 'link_to_map': {}, } # Mark the spine as dirty since we have to ensure it is normalized for name in data['spine']: self.parsed(name), self.dirty(name) self.transform_css() self.virtualized_names = set() self.virtualize_resources() def manifest_data(name): mt = (self.mime_map.get(name) or 'application/octet-stream').lower() ans = { 'size':os.path.getsize(self.name_path_map[name]), 'is_virtualized': name in self.virtualized_names, 'mimetype':mt, 'is_html': mt in OEB_DOCS, } if ans['is_html']: root = self.parsed(name) ans['length'] = l = get_length(root) self.book_render_data['total_length'] += l if name in data['spine']: self.book_render_data['spine_length'] += l ans['has_maths'] = hm = check_for_maths(root) if hm: self.book_render_data['has_maths'] = True ans['anchor_map'] = anchor_map(root) return ans data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names} self.commit() for name in excluded_names: os.remove(self.name_path_map[name]) data = json.dumps(self.book_render_data, ensure_ascii=False) if not isinstance(data, bytes): data = data.encode('utf-8') with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f: f.write(data)
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_bookmark_data=False, book_metadata=None): log = log or default_log book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) ContainerBase.__init__(self, tdir, opfpath, log) self.book_metadata = book_metadata if save_bookmark_data: bm_file = 'META-INF/calibre_bookmarks.txt' self.bookmark_data = None if self.exists(bm_file): with self.open(bm_file, 'rb') as f: self.bookmark_data = f.read() # We do not add zero byte sized files as the IndexedDB API in the # browser has no good way to distinguish between zero byte files and # load failures. excluded_names = { name for name, mt in iteritems(self.mime_map) if name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or name == 'mimetype' or not self.has_name_and_is_not_empty(name) } raster_cover_name, titlepage_name = self.create_cover_page( input_fmt.lower()) toc = get_toc(self).to_dict(count()) if not toc or not toc.get('children'): toc = from_xpaths(self, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count()) spine = [name for name, is_linear in self.spine_names] spineq = frozenset(spine) landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq] self.book_render_data = data = { 'version': RENDER_VERSION, 'toc': toc, 'book_format': book_fmt, 'spine': spine, 'link_uid': uuid4(), 'book_hash': book_hash, 'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}, 'raster_cover_name': raster_cover_name, 'title_page_name': titlepage_name, 'has_maths': False, 'total_length': 0, 'spine_length': 0, 'toc_anchor_map': toc_anchor_map(toc), 'landmarks': landmarks, 'link_to_map': {}, } # Mark the spine as dirty since we have to ensure it is normalized for name in data['spine']: self.parsed(name), self.dirty(name) self.transform_css() self.virtualized_names = set() self.virtualize_resources() def manifest_data(name): mt = (self.mime_map.get(name) or 'application/octet-stream').lower() ans = { 'size': os.path.getsize(self.name_path_map[name]), 'is_virtualized': name in self.virtualized_names, 'mimetype': mt, 'is_html': mt in OEB_DOCS, } if ans['is_html']: root = self.parsed(name) ans['length'] = l = get_length(root) self.book_render_data['total_length'] += l if name in data['spine']: self.book_render_data['spine_length'] += l ans['has_maths'] = hm = check_for_maths(root) if hm: self.book_render_data['has_maths'] = True ans['anchor_map'] = anchor_map(root) return ans data['files'] = { name: manifest_data(name) for name in set(self.name_path_map) - excluded_names } self.commit() for name in excluded_names: os.remove(self.name_path_map[name]) data = json.dumps(self.book_render_data, ensure_ascii=False) if not isinstance(data, bytes): data = data.encode('utf-8') with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f: f.write(data)
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None): log = log or default_log book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) ContainerBase.__init__(self, tdir, opfpath, log) excluded_names = { name for name, mt in self.mime_map.iteritems() if name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or name == 'mimetype' } raster_cover_name, titlepage_name = self.create_cover_page( input_fmt.lower()) toc = get_toc(self).to_dict(count()) spine = [name for name, is_linear in self.spine_names] spineq = frozenset(spine) landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq] self.book_render_data = data = { 'version': RENDER_VERSION, 'toc': toc, 'spine': spine, 'link_uid': uuid4(), 'book_hash': book_hash, 'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}, 'raster_cover_name': raster_cover_name, 'title_page_name': titlepage_name, 'has_maths': False, 'total_length': 0, 'spine_length': 0, 'toc_anchor_map': toc_anchor_map(toc), 'landmarks': landmarks, 'link_to_map': {}, } # Mark the spine as dirty since we have to ensure it is normalized for name in data['spine']: self.parsed(name), self.dirty(name) self.transform_css() self.virtualized_names = set() self.virtualize_resources() def manifest_data(name): mt = (self.mime_map.get(name) or 'application/octet-stream').lower() ans = { 'size': os.path.getsize(self.name_path_map[name]), 'is_virtualized': name in self.virtualized_names, 'mimetype': mt, 'is_html': mt in OEB_DOCS, } if ans['is_html']: root = self.parsed(name) ans['length'] = l = get_length(root) self.book_render_data['total_length'] += l if name in data['spine']: self.book_render_data['spine_length'] += l ans['has_maths'] = hm = check_for_maths(root) if hm: self.book_render_data['has_maths'] = True ans['anchor_map'] = anchor_map(root) return ans data['files'] = { name: manifest_data(name) for name in set(self.name_path_map) - excluded_names } self.commit() for name in excluded_names: os.remove(self.name_path_map[name]) with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f: f.write( json.dumps(self.book_render_data, ensure_ascii=False).encode('utf-8'))
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' if fmt not in valid_empty_formats: raise ValueError('Cannot create empty book in the %s format' % fmt) if fmt == 'txt': with open(path, 'wb') as f: if not mi.is_null('title'): f.write(as_bytes(mi.title)) return if fmt == 'docx': from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.docx.writer.container import DOCX from calibre.utils.logging import default_log p = Plumber('a.docx', 'b.docx', default_log) p.setup_options() # Use the word default of one inch page margins for x in 'left right top bottom'.split(): setattr(p.opts, 'margin_' + x, 72) DOCX(p.opts, default_log).write(path, mi, create_empty_document=True) return path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace( '_TITLE_', prepare_string_for_xml(mi.title)).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string( mi.authors))).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0o755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def __init__(self, opf_path, log, root_dir=None): ContainerBase.__init__(self, root_dir or os.path.dirname(opf_path), opf_path, log)