def create_from_xpath(self, xpaths, remove_duplicates=True): toc = from_xpaths(self.ebook, xpaths) if len(toc) == 0: return error_dialog(self, _('No items found'), _('No items were found that could be added to the Table of Contents.'), show=True) if remove_duplicates: toc.remove_duplicates() self.insert_toc_fragment(toc)
def create_from_xpath(self, xpaths, remove_duplicates=True, prefer_title=False): toc = from_xpaths(self.ebook, xpaths, prefer_title=prefer_title) if len(toc) == 0: return error_dialog(self, _('No items found'), _('No items were found that could be added to the Table of Contents.'), show=True) if remove_duplicates: toc.remove_duplicates() self.insert_toc_fragment(toc)
def create_from_xpath(self, xpaths): toc = from_xpaths(self.ebook, xpaths) if len(toc) == 0: return error_dialog( self, _('No items found'), _('No items were found that could be added to the Table of Contents.' ), show=True) self.insert_toc_fragment(toc)
def process_exploded_book(book_fmt, opfpath, input_fmt, tdir, render_manager, log=None, book_hash=None, save_bookmark_data=False, book_metadata=None, virtualize_resources=True): log = log or default_log container = SimpleContainer(tdir, opfpath, log) input_plugin = plugin_for_input_format(input_fmt) is_comic = bool(getattr(input_plugin, 'is_image_collection', False)) def needs_work(mt): return mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml' def work_priority(name): # ensure workers with large files or stylesheets # have the less names size = os.path.getsize(container.name_path_map[name]), is_html = container.mime_map.get(name) in OEB_DOCS return (0 if is_html else 1), size if not is_comic: render_manager.launch_workers( tuple(n for n, mt in iteritems(container.mime_map) if needs_work(mt)), container) bookmark_data = None if save_bookmark_data: bm_file = 'META-INF/calibre_bookmarks.txt' if container.exists(bm_file): with container.open(bm_file, 'rb') as f: bookmark_data = f.read() # We do not add zero byte sized files as the IndexedDB API in the # browser has no good way to distinguish between zero byte files and # load failures. excluded_names = { name for name, mt in iteritems(container.mime_map) if name == container.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or name == 'mimetype' or not container.has_name_and_is_not_empty(name) } raster_cover_name, titlepage_name = create_cover_page( container, input_fmt.lower(), is_comic, book_metadata) toc = get_toc(container, verify_destinations=False).to_dict(count()) if not toc or not toc.get('children'): toc = from_xpaths(container, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count()) spine = [name for name, is_linear in container.spine_names] spineq = frozenset(spine) landmarks = [l for l in get_landmarks(container) if l['dest'] in spineq] book_render_data = { 'version': RENDER_VERSION, 'toc': toc, 'book_format': book_fmt, 'spine': spine, 'link_uid': uuid4(), 'book_hash': book_hash, 'is_comic': is_comic, 'raster_cover_name': raster_cover_name, 'title_page_name': titlepage_name, 'has_maths': False, 'total_length': 0, 'spine_length': 0, 'toc_anchor_map': toc_anchor_map(toc), 'landmarks': landmarks, 'link_to_map': {}, } names = sorted( (n for n, mt in iteritems(container.mime_map) if needs_work(mt)), key=work_priority) results = render_manager( names, (tdir, opfpath, virtualize_resources, book_render_data['link_uid'], container.data_for_clone()), container) ltm = book_render_data['link_to_map'] html_data = {} virtualized_names = set() def merge_ltm(dest, src): for k, v in iteritems(src): if k in dest: dest[k] |= v else: dest[k] = v for link_to_map, hdata, vnames in results: html_data.update(hdata) virtualized_names |= vnames for k, v in iteritems(link_to_map): if k in ltm: merge_ltm(ltm[k], v) else: ltm[k] = v def manifest_data(name): mt = (container.mime_map.get(name) or 'application/octet-stream').lower() ans = { 'size': os.path.getsize(container.name_path_map[name]), 'is_virtualized': name in virtualized_names, 'mimetype': mt, 'is_html': mt in OEB_DOCS, } if ans['is_html']: data = html_data[name] ans['length'] = l = data['length'] book_render_data['total_length'] += l if name in book_render_data['spine']: book_render_data['spine_length'] += l ans['has_maths'] = hm = data['has_maths'] if hm: book_render_data['has_maths'] = True ans['anchor_map'] = data['anchor_map'] return ans book_render_data['files'] = { name: manifest_data(name) for name in set(container.name_path_map) - excluded_names } container.commit() for name in excluded_names: os.remove(container.name_path_map[name]) ltm = book_render_data['link_to_map'] for name, amap in iteritems(ltm): for k, v in tuple(iteritems(amap)): amap[k] = tuple(v) # needed for JSON serialization data = as_bytes(json.dumps(book_render_data, ensure_ascii=False)) with lopen(os.path.join(container.root, 'calibre-book-manifest.json'), 'wb') as f: f.write(data) return container, bookmark_data
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_bookmark_data=False, book_metadata=None): log = log or default_log book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) ContainerBase.__init__(self, tdir, opfpath, log) self.book_metadata = book_metadata if save_bookmark_data: bm_file = 'META-INF/calibre_bookmarks.txt' self.bookmark_data = None if self.exists(bm_file): with self.open(bm_file, 'rb') as f: self.bookmark_data = f.read() # We do not add zero byte sized files as the IndexedDB API in the # browser has no good way to distinguish between zero byte files and # load failures. excluded_names = { name for name, mt in iteritems(self.mime_map) if name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or name == 'mimetype' or not self.has_name_and_is_not_empty(name) } raster_cover_name, titlepage_name = self.create_cover_page( input_fmt.lower()) toc = get_toc(self).to_dict(count()) if not toc or not toc.get('children'): toc = from_xpaths(self, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count()) spine = [name for name, is_linear in self.spine_names] spineq = frozenset(spine) landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq] self.book_render_data = data = { 'version': RENDER_VERSION, 'toc': toc, 'book_format': book_fmt, 'spine': spine, 'link_uid': uuid4(), 'book_hash': book_hash, 'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}, 'raster_cover_name': raster_cover_name, 'title_page_name': titlepage_name, 'has_maths': False, 'total_length': 0, 'spine_length': 0, 'toc_anchor_map': toc_anchor_map(toc), 'landmarks': landmarks, 'link_to_map': {}, } # Mark the spine as dirty since we have to ensure it is normalized for name in data['spine']: self.parsed(name), self.dirty(name) self.transform_css() self.virtualized_names = set() self.virtualize_resources() def manifest_data(name): mt = (self.mime_map.get(name) or 'application/octet-stream').lower() ans = { 'size': os.path.getsize(self.name_path_map[name]), 'is_virtualized': name in self.virtualized_names, 'mimetype': mt, 'is_html': mt in OEB_DOCS, } if ans['is_html']: root = self.parsed(name) ans['length'] = l = get_length(root) self.book_render_data['total_length'] += l if name in data['spine']: self.book_render_data['spine_length'] += l ans['has_maths'] = hm = check_for_maths(root) if hm: self.book_render_data['has_maths'] = True ans['anchor_map'] = anchor_map(root) return ans data['files'] = { name: manifest_data(name) for name in set(self.name_path_map) - excluded_names } self.commit() for name in excluded_names: os.remove(self.name_path_map[name]) data = json.dumps(self.book_render_data, ensure_ascii=False) if not isinstance(data, bytes): data = data.encode('utf-8') with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f: f.write(data)