def convert(self, stream, options, file_ext, log, accelerators): from ebook_converter.ebooks.metadata.toc import TOC from ebook_converter.ebooks.metadata.opf2 import OPFCreator from ebook_converter.utils.zipfile import ZipFile self.options = options self.log = log pages, images = [], [] toc = TOC() if file_ext == 'pmlz': log.debug('De-compressing content to temporary directory...') with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for pml in pmls: html_name = os.path.splitext( os.path.basename(pml))[0] + '.html' html_path = os.path.join(os.getcwd(), html_name) pages.append(html_name) log.debug('Processing PML item %s...', pml) ttoc = self.process_pml(pml, html_path) toc += ttoc images = self.get_images(stream, tdir, True) else: toc = self.process_pml(stream, 'index.html') pages.append('index.html') if hasattr(stream, 'name'): images = self.get_images( stream, os.path.abspath(os.path.dirname(stream.name))) # We want pages to be orded alphabetically. pages.sort() manifest_items = [] for item in pages + images: manifest_items.append((item, None)) from ebook_converter.ebooks.metadata.meta import get_metadata log.debug('Reading metadata from input file...') mi = get_metadata(stream, 'pml') if 'images/cover.png' in images: mi.cover = 'images/cover.png' opf = OPFCreator(os.getcwd(), mi) log.debug('Generating manifest...') opf.create_manifest(manifest_items) opf.create_spine(pages) opf.set_toc(toc) with open('metadata.opf', 'wb') as opffile: with open('toc.ncx', 'wb') as tocfile: opf.render(opffile, tocfile, 'toc.ncx') return os.path.join(os.getcwd(), 'metadata.opf')
def write_opf(self, guide, toc, spine, resource_map): mi = self.header.exth.mi if (self.cover_offset is not None and self.cover_offset < len(resource_map)): mi.cover = resource_map[self.cover_offset] if len(list(toc)) < 2: self.log.warning('KF8 has no metadata Table of Contents') for ref in guide: if ref.type == 'toc': href = ref.href() href, frag = urllib.parse.urldefrag(href) if os.path.exists(href.replace('/', os.sep)): try: toc = self.read_inline_toc(href, frag) except Exception: self.log.exception('Failed to read inline ToC') opf = OPFCreator(os.getcwd(), mi) opf.guide = guide def exclude(path): return os.path.basename(path) == 'debug-raw.html' # If there are no images then the azw3 input plugin dumps all # binary records as .unknown images, remove them if (self.for_tweak and os.path.exists('images') and os.path.isdir('images')): files = os.listdir('images') unknown = [x for x in files if x.endswith('.unknown')] if len(files) == len(unknown): [os.remove('images/' + f) for f in files] if self.for_tweak: try: os.remove('debug-raw.html') except Exception: pass opf.create_manifest_from_files_in([os.getcwd()], exclude=exclude) for entry in opf.manifest: if entry.mime_type == 'text/html': entry.mime_type = 'application/xhtml+xml' opf.create_spine(spine) opf.set_toc(toc) ppd = getattr(self.header.exth, 'page_progression_direction', None) if ppd in {'ltr', 'rtl', 'default'}: opf.page_progression_direction = ppd pwm = getattr(self.header.exth, 'primary_writing_mode', None) if pwm is not None: opf.primary_writing_mode = pwm with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') return 'metadata.opf'
def create_opf(self, output_dir, images, toc): with directory.CurrentDir(output_dir): if 'cover.png' in images: self.mi.cover = os.path.join('images', 'cover.png') opf = OPFCreator(output_dir, self.mi) manifest = [('index.html', None)] for i in images: manifest.append((os.path.join('images', i), None)) opf.create_manifest(manifest) opf.create_spine(['index.html']) opf.set_toc(toc) with open('metadata.opf', 'wb') as opffile: with open('toc.ncx', 'wb') as tocfile: opf.render(opffile, tocfile, 'toc.ncx') return os.path.join(output_dir, 'metadata.opf')
def convert(self, stream, opts, file_ext, log, accelerators): from ebook_converter.ebooks.metadata import MetaInformation from ebook_converter.ebooks.metadata.opf2 import OPFCreator from ebook_converter.ebooks.metadata.toc import TOC self.opts, self.log = opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: comics_ = [['Comic', os.path.abspath(stream.name)]] stream.close() comics = [] for i, x in enumerate(comics_): title, fname = x cdir = 'comic_%d' % (i + 1) if len(comics_) > 1 else '.' cdir = os.path.abspath(cdir) if not os.path.exists(cdir): os.makedirs(cdir) pages = self.get_pages(fname, cdir) if not pages: continue if self.for_viewer: comics.append( (title, pages, [self.create_viewer_wrapper(pages)])) else: wrappers = self.create_wrappers(pages) comics.append((title, pages, wrappers)) if not comics: raise ValueError('No comic pages found in %s' % stream.name) mi = MetaInformation( os.path.basename(stream.name).rpartition('.')[0], ['Unknown']) opf = OPFCreator(os.getcwd(), mi) entries = [] def href(x): if len(comics) == 1: return os.path.basename(x) return '/'.join(x.split(os.sep)[-2:]) cover_href = None for comic in comics: pages, wrappers = comic[1:] page_entries = [(x, None) for x in map(href, pages)] entries += [(w, None) for w in map(href, wrappers)] + page_entries if cover_href is None and page_entries: cover_href = page_entries[0][0] opf.create_manifest(entries) spine = [] for comic in comics: spine.extend(map(href, comic[2])) self._images = [] for comic in comics: self._images.extend(comic[1]) opf.create_spine(spine) if self.for_viewer and cover_href: opf.guide.set_cover(cover_href) toc = TOC() if len(comics) == 1: wrappers = comics[0][2] for i, x in enumerate(wrappers): toc.add_item(href(x), None, 'Page %d' % (i + 1), play_order=i) else: po = 0 for comic in comics: po += 1 wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) if not opts.dont_add_comic_pages_to_toc: for i, x in enumerate(wrappers): stoc.add_item(href(x), None, 'Page %d' % (i + 1), play_order=po) po += 1 opf.set_toc(toc) with open('metadata.opf', 'wb') as m, open('toc.ncx', 'wb') as n: opf.render(m, n, 'toc.ncx') return os.path.abspath('metadata.opf')
def create_opf(self, htmlfile, guide=None, root=None): mi = getattr(self.book_header.exth, 'mi', self.embedded_mi) if mi is None: mi = MetaInformation(self.book_header.title, ['Unknown']) opf = OPFCreator(os.path.dirname(htmlfile), mi) if hasattr(self.book_header.exth, 'cover_offset'): opf.cover = 'images/%05d.jpg' % (self.book_header .exth.cover_offset + 1) elif mi.cover is not None: opf.cover = mi.cover else: opf.cover = 'images/%05d.jpg' % 1 if not os.path.exists(os.path.join(os.path.dirname(htmlfile), * opf.cover.split('/'))): opf.cover = None cover = opf.cover cover_copied = None if cover is not None: cover = cover.replace('/', os.sep) if os.path.exists(cover): ncover = 'images'+os.sep+'calibre_cover.jpg' if os.path.exists(ncover): os.remove(ncover) shutil.copyfile(cover, ncover) cover_copied = os.path.abspath(ncover) opf.cover = ncover.replace(os.sep, '/') manifest = [(htmlfile, 'application/xhtml+xml'), (os.path.abspath('styles.css'), 'text/css')] bp = os.path.dirname(htmlfile) added = set() for i in getattr(self, 'image_names', []): path = os.path.join(bp, 'images', i) added.add(path) manifest.append((path, mimetypes.guess_type(path)[0] or 'image/jpeg')) if cover_copied is not None: manifest.append((cover_copied, 'image/jpeg')) opf.create_manifest(manifest) opf.create_spine([os.path.basename(htmlfile)]) toc = None if guide is not None: opf.create_guide(guide) for ref in opf.guide: if ref.type.lower() == 'toc': toc = ref.href() ncx_manifest_entry = None if toc: ncx_manifest_entry = 'toc.ncx' elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1]) tocobj = None ent_pat = re.compile(r'&(\S+?);') if elems: tocobj = TOC() found = False reached = False for x in root.iter(): if x == elems[-1]: reached = True continue if reached and x.tag == 'a': href = x.get('href', '') if href and re.match(r'\w+://', href) is None: try: text = ' '.join([t.strip() for t in x.xpath('descendant:' ':text()')]) except Exception: text = '' text = ent_pat.sub(entities.entity_to_unicode, text) item = tocobj.add_item(toc.partition('#')[0], href[1:], text) item.left_space = int(self.get_left_whitespace(x)) found = True if (reached and found and x.get('class', None) == 'mbp_pagebreak'): break if tocobj is not None: tocobj = self.structure_toc(tocobj) opf.set_toc(tocobj) return opf, ncx_manifest_entry