def structure_toc(entries): indent_vals = sorted({x.indent for x in entries}) last_found = [None for i in indent_vals] newtoc = TOC() if len(indent_vals) > 6: for x in entries: newtoc.add_item('index.html', x.anchor, x.text) return newtoc def find_parent(level): candidates = last_found[:level] for x in reversed(candidates): if x is not None: return x return newtoc for item in entries: level = indent_vals.index(item.indent) parent = find_parent(level) last_found[level] = parent.add_item('index.html', item.anchor, item.text) for i in xrange(level+1, len(last_found)): last_found[i] = None return newtoc
def structure_toc(entries): indent_vals = sorted({x.indent for x in entries}) last_found = [None for i in indent_vals] newtoc = TOC() if len(indent_vals) > 6: for x in entries: newtoc.add_item('index.html', x.anchor, x.text) return newtoc def find_parent(level): candidates = last_found[:level] for x in reversed(candidates): if x is not None: return x return newtoc for item in entries: level = indent_vals.index(item.indent) parent = find_parent(level) last_found[level] = parent.add_item('index.html', item.anchor, item.text) for i in range(level+1, len(last_found)): last_found[i] = None return newtoc
def structure_toc(self, toc): indent_vals = set() for item in toc: indent_vals.add(item.left_space) if len(indent_vals) > 6 or len(indent_vals) < 2: # Too many or too few levels, give up return toc indent_vals = sorted(indent_vals) last_found = [None for i in indent_vals] newtoc = TOC() def find_parent(level): candidates = last_found[:level] for x in reversed(candidates): if x is not None: return x return newtoc for item in toc: level = indent_vals.index(item.left_space) parent = find_parent(level) last_found[level] = parent.add_item(item.href, item.fragment, item.text) return newtoc
def build_toc(index_entries): ans = TOC(base_path=os.getcwdu()) levels = {x['hlvl'] for x in index_entries} num_map = {-1: ans} level_map = {l:[x for x in index_entries if x['hlvl'] == l] for l in levels} for lvl in sorted(levels): for item in level_map[lvl]: parent = num_map[item['parent']] child = parent.add_item(item['href'], item['idtag'], item['text']) num_map[item['num']] = child # Set play orders in depth first order for i, item in enumerate(ans.flat()): item.play_order = i return ans
def from_headings(body, log, namespace): ' Create a TOC from headings in the document ' XPath, descendants = namespace.XPath, namespace.descendants headings = ('h1', 'h2', 'h3') tocroot = TOC() xpaths = [XPath('//%s' % x) for x in headings] level_prev = {i + 1: None for i in xrange(len(xpaths))} level_prev[0] = tocroot level_item_map = { i + 1: frozenset(xp(body)) for i, xp in enumerate(xpaths) } item_level_map = { e: i for i, elems in level_item_map.iteritems() for e in elems } idcount = Count() def ensure_id(elem): ans = elem.get('id', None) if not ans: idcount.val += 1 ans = 'toc_id_%d' % idcount.val elem.set('id', ans) return ans for item in descendants(body, *headings): lvl = plvl = item_level_map.get(item, None) if lvl is None: continue parent = None while parent is None: plvl -= 1 parent = level_prev[plvl] lvl = plvl + 1 elem_id = ensure_id(item) text = elem_to_toc_text(item) toc = parent.add_item('index.html', elem_id, text) level_prev[lvl] = toc for i in xrange(lvl + 1, len(xpaths) + 1): level_prev[i] = None if len(tuple(tocroot.flat())) > 1: log('Generating Table of Contents from headings') return tocroot
def create_toc(self): ' Create a TOC from headings in the document ' root = self.body headings = ('h1', 'h2', 'h3') tocroot = TOC() xpaths = [XPath('//%s' % x) for x in headings] level_prev = {i + 1: None for i in xrange(len(xpaths))} level_prev[0] = tocroot level_item_map = { i + 1: frozenset(xp(root)) for i, xp in enumerate(xpaths) } item_level_map = { e: i for i, elems in level_item_map.iteritems() for e in elems } self.idcount = 0 def ensure_id(elem): ans = elem.get('id', None) if not ans: self.idcount += 1 ans = 'toc_id_%d' % self.idcount elem.set('id', ans) return ans for item in root.iterdescendants(*headings): lvl = plvl = item_level_map.get(item, None) if lvl is None: continue parent = None while parent is None: plvl -= 1 parent = level_prev[plvl] lvl = plvl + 1 elem_id = ensure_id(item) text = elem_to_toc_text(item) toc = parent.add_item('index.html', elem_id, text) level_prev[lvl] = toc for i in xrange(lvl + 1, len(xpaths) + 1): level_prev[i] = None if len(tuple(tocroot.flat())) > 1: return tocroot
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.utils.zipfile import ZipFile self.options = options self.log = log pages, images = [], [] toc = TOC() if file_ext == 'pmlz': log.debug('De-compressing content to temporary directory...') with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for pml in pmls: html_name = os.path.splitext( os.path.basename(pml))[0] + '.html' html_path = os.path.join(os.getcwd(), html_name) pages.append(html_name) log.debug('Processing PML item %s...' % pml) ttoc = self.process_pml(pml, html_path) toc += ttoc images = self.get_images(stream, tdir, True) else: toc = self.process_pml(stream, 'index.html') pages.append('index.html') if hasattr(stream, 'name'): images = self.get_images( stream, os.path.abspath(os.path.dirname(stream.name))) # We want pages to be ordered alphabetically. pages.sort() manifest_items = [] for item in pages + images: manifest_items.append((item, None)) from calibre.ebooks.metadata.meta import get_metadata log.debug('Reading metadata from input file...') mi = get_metadata(stream, 'pml') if 'images/cover.png' in images: mi.cover = 'images/cover.png' opf = OPFCreator(os.getcwd(), mi) log.debug('Generating manifest...') opf.create_manifest(manifest_items) opf.create_spine(pages) opf.set_toc(toc) with lopen('metadata.opf', 'wb') as opffile: with lopen('toc.ncx', 'wb') as tocfile: opf.render(opffile, tocfile, 'toc.ncx') return os.path.join(os.getcwd(), 'metadata.opf')
def build_toc(index_entries): ans = TOC(base_path=os.getcwdu()) levels = {x['hlvl'] for x in index_entries} num_map = {-1: ans} level_map = {l:[x for x in index_entries if x['hlvl'] == l] for l in levels} for lvl in sorted(levels): for item in level_map[lvl]: parent = num_map[item['parent']] child = parent.add_item(item['href'], item['idtag'], replace_entities(item['text'], encoding=None)) num_map[item['num']] = child # Set play orders in depth first order for i, item in enumerate(ans.flat()): item.play_order = i return ans
def from_headings(body, log, namespace, num_levels=3): ' Create a TOC from headings in the document ' tocroot = TOC() all_heading_nodes = body.xpath('//*[@data-heading-level]') level_prev = {i + 1: None for i in range(num_levels)} level_prev[0] = tocroot level_item_map = { i: frozenset(x for x in all_heading_nodes if int(x.get('data-heading-level')) == i) for i in range(1, num_levels + 1) } item_level_map = { e: i for i, elems in iteritems(level_item_map) for e in elems } idcount = count() def ensure_id(elem): ans = elem.get('id', None) if not ans: ans = 'toc_id_%d' % (next(idcount) + 1) elem.set('id', ans) return ans for item in all_heading_nodes: lvl = plvl = item_level_map.get(item, None) if lvl is None: continue parent = None while parent is None: plvl -= 1 parent = level_prev[plvl] lvl = plvl + 1 elem_id = ensure_id(item) text = elem_to_toc_text(item) toc = parent.add_item('index.html', elem_id, text) level_prev[lvl] = toc for i in range(lvl + 1, num_levels + 1): level_prev[i] = None if len(tuple(tocroot.flat())) > 1: log('Generating Table of Contents from headings') return tocroot
def _parse_toc(self, ul, basedir=os.getcwdu()): toc = TOC(play_order=self._playorder, base_path=basedir, text='') self._playorder += 1 for li in ul('li', recursive=False): href = li.object('param', {'name': 'Local'})[0]['value'] if href.count('#'): href, frag = href.split('#') else: frag = None name = self._deentity(li.object('param', {'name': 'Name'})[0]['value']) #print "========>", name toc.add_item(href, frag, name, play_order=self._playorder) self._playorder += 1 if li.ul: child = self._parse_toc(li.ul) child.parent = toc toc.append(child) #print toc return toc
def from_headings(body, log, namespace): ' Create a TOC from headings in the document ' XPath, descendants = namespace.XPath, namespace.descendants headings = ('h1', 'h2', 'h3') tocroot = TOC() xpaths = [XPath('//%s' % x) for x in headings] level_prev = {i+1:None for i in xrange(len(xpaths))} level_prev[0] = tocroot level_item_map = {i+1:frozenset(xp(body)) for i, xp in enumerate(xpaths)} item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems} idcount = Count() def ensure_id(elem): ans = elem.get('id', None) if not ans: idcount.val += 1 ans = 'toc_id_%d' % idcount.val elem.set('id', ans) return ans for item in descendants(body, *headings): lvl = plvl = item_level_map.get(item, None) if lvl is None: continue parent = None while parent is None: plvl -= 1 parent = level_prev[plvl] lvl = plvl + 1 elem_id = ensure_id(item) text = elem_to_toc_text(item) toc = parent.add_item('index.html', elem_id, text) level_prev[lvl] = toc for i in xrange(lvl+1, len(xpaths)+1): level_prev[i] = None if len(tuple(tocroot.flat())) > 1: log('Generating Table of Contents from headings') return tocroot
def create_toc(self): " Create a TOC from headings in the document " root = self.body headings = ("h1", "h2", "h3") tocroot = TOC() xpaths = [XPath("//%s" % x) for x in headings] level_prev = {i + 1: None for i in xrange(len(xpaths))} level_prev[0] = tocroot level_item_map = {i + 1: frozenset(xp(root)) for i, xp in enumerate(xpaths)} item_level_map = {e: i for i, elems in level_item_map.iteritems() for e in elems} self.idcount = 0 def ensure_id(elem): ans = elem.get("id", None) if not ans: self.idcount += 1 ans = "toc_id_%d" % self.idcount elem.set("id", ans) return ans for item in root.iterdescendants(*headings): lvl = plvl = item_level_map.get(item, None) if lvl is None: continue parent = None while parent is None: plvl -= 1 parent = level_prev[plvl] lvl = plvl + 1 elem_id = ensure_id(item) text = elem_to_toc_text(item) toc = parent.add_item("index.html", elem_id, text) level_prev[lvl] = toc for i in xrange(lvl + 1, len(xpaths) + 1): level_prev[i] = None if len(tuple(tocroot.flat())) > 1: return tocroot
def _parse_toc(self, ul, basedir=getcwd()): toc = TOC(play_order=self._playorder, base_path=basedir, text='') self._playorder += 1 for li in ul('li', recursive=False): href = li.object('param', {'name': 'Local'})[0]['value'] if href.count('#'): href, frag = href.split('#') else: frag = None name = self._deentity(li.object('param', {'name': 'Name'})[0]['value']) # print "========>", name toc.add_item(href, frag, name, play_order=self._playorder) self._playorder += 1 if li.ul: child = self._parse_toc(li.ul) child.parent = toc toc.append(child) # print toc return toc
def create_opf(self, htmlfile, guide=None, root=None): mi = getattr(self.book_header.exth, 'mi', self.embedded_mi) if mi is None: mi = MetaInformation(self.book_header.title, [_('Unknown')]) opf = OPFCreator(os.path.dirname(htmlfile), mi) if hasattr(self.book_header.exth, 'cover_offset'): opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1) elif mi.cover is not None: opf.cover = mi.cover else: opf.cover = u'images/%05d.jpg' % 1 if not os.path.exists(os.path.join(os.path.dirname(htmlfile), * opf.cover.split('/'))): opf.cover = None cover = opf.cover cover_copied = None if cover is not None: cover = cover.replace('/', os.sep) if os.path.exists(cover): ncover = u'images'+os.sep+u'calibre_cover.jpg' if os.path.exists(ncover): os.remove(ncover) shutil.copyfile(cover, ncover) cover_copied = os.path.abspath(ncover) opf.cover = ncover.replace(os.sep, '/') manifest = [(htmlfile, 'application/xhtml+xml'), (os.path.abspath(u'styles.css'), 'text/css')] bp = os.path.dirname(htmlfile) added = set([]) for i in getattr(self, 'image_names', []): path = os.path.join(bp, 'images', i) added.add(path) manifest.append((path, 'image/jpeg')) if cover_copied is not None: manifest.append((cover_copied, 'image/jpeg')) opf.create_manifest(manifest) opf.create_spine([os.path.basename(htmlfile)]) toc = None if guide is not None: opf.create_guide(guide) for ref in opf.guide: if ref.type.lower() == 'toc': toc = ref.href() ncx_manifest_entry = None if toc: ncx_manifest_entry = 'toc.ncx' elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1]) tocobj = None ent_pat = re.compile(r'&(\S+?);') if elems: tocobj = TOC() found = False reached = False for x in root.iter(): if x == elems[-1]: reached = True continue if reached and x.tag == 'a': href = x.get('href', '') if href and re.match('\w+://', href) is None: try: text = u' '.join([t.strip() for t in x.xpath('descendant::text()')]) except: text = '' text = ent_pat.sub(entity_to_unicode, text) item = tocobj.add_item(toc.partition('#')[0], href[1:], text) item.left_space = int(self.get_left_whitespace(x)) found = True if reached and found and x.get('class', None) == 'mbp_pagebreak': break if tocobj is not None: tocobj = self.structure_toc(tocobj) opf.set_toc(tocobj) return opf, ncx_manifest_entry
def convert(self, stream, opts, file_ext, log, accelerators): from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC self.opts, self.log = opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: comics_ = [['Comic', os.path.abspath(stream.name)]] stream.close() comics = [] num_pages_per_comic = [] for i, x in enumerate(comics_): title, fname = x cdir = 'comic_%d' % (i + 1) if len(comics_) > 1 else '.' cdir = os.path.abspath(cdir) if not os.path.exists(cdir): os.makedirs(cdir) pages = self.get_pages(fname, cdir) if not pages: continue num_pages_per_comic.append(len(pages)) if self.for_viewer: comics.append( (title, pages, [self.create_viewer_wrapper(pages, cdir)])) else: wrappers = self.create_wrappers(pages) comics.append((title, pages, wrappers)) if not comics: raise ValueError('No comic pages found in %s' % stream.name) mi = MetaInformation( os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')]) opf = OPFCreator(os.getcwd(), mi) entries = [] def href(x): if len(comics) == 1: return os.path.basename(x) return '/'.join(x.split(os.sep)[-2:]) cover_href = None for comic in comics: pages, wrappers = comic[1:] page_entries = [(x, None) for x in map(href, pages)] entries += [(w, None) for w in map(href, wrappers)] + page_entries if cover_href is None and page_entries: cover_href = page_entries[0][0] opf.create_manifest(entries) spine = [] for comic in comics: spine.extend(map(href, comic[2])) self._images = [] for comic in comics: self._images.extend(comic[1]) opf.create_spine(spine) if self.for_viewer and cover_href: if os.path.isabs(cover_href): cover_href = os.path.relpath(cover_href).replace(os.sep, '/') opf.guide.set_cover(cover_href) toc = TOC() if len(comics) == 1: wrappers = comics[0][2] if self.for_viewer: wrapper_page_href = href(wrappers[0]) for i in range(num_pages_per_comic[0]): toc.add_item('{}#page_{}'.format(wrapper_page_href, i + 1), None, _('Page') + ' %d' % (i + 1), play_order=i) else: for i, x in enumerate(wrappers): toc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=i) else: po = 0 for num_pages, comic in zip(num_pages_per_comic, comics): po += 1 wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) if not opts.dont_add_comic_pages_to_toc: if self.for_viewer: wrapper_page_href = href(wrappers[0]) for i in range(num_pages): stoc.add_item('{}#page_{}'.format( wrapper_page_href, i + 1), None, _('Page') + ' %d' % (i + 1), play_order=po) po += 1 else: for i, x in enumerate(wrappers): stoc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=po) po += 1 opf.set_toc(toc) with open('metadata.opf', 'wb') as m, open('toc.ncx', 'wb') as n: opf.render(m, n, 'toc.ncx') return os.path.abspath('metadata.opf')
def convert(self, stream, opts, file_ext, log, accelerators): from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC self.opts, self.log = opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: comics_ = [['Comic', os.path.abspath(stream.name)]] stream.close() comics = [] for i, x in enumerate(comics_): title, fname = x cdir = u'comic_%d' % (i + 1) if len(comics_) > 1 else u'.' cdir = os.path.abspath(cdir) if not os.path.exists(cdir): os.makedirs(cdir) pages = self.get_pages(fname, cdir) if not pages: continue wrappers = self.create_wrappers(pages) comics.append((title, pages, wrappers)) if not comics: raise ValueError('No comic pages found in %s' % stream.name) mi = MetaInformation( os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')]) opf = OPFCreator(os.getcwdu(), mi) entries = [] def href(x): if len(comics) == 1: return os.path.basename(x) return '/'.join(x.split(os.sep)[-2:]) for comic in comics: pages, wrappers = comic[1:] entries += [(w, None) for w in map(href, wrappers)] + \ [(x, None) for x in map(href, pages)] opf.create_manifest(entries) spine = [] for comic in comics: spine.extend(map(href, comic[2])) self._images = [] for comic in comics: self._images.extend(comic[1]) opf.create_spine(spine) toc = TOC() if len(comics) == 1: wrappers = comics[0][2] for i, x in enumerate(wrappers): toc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=i) else: po = 0 for comic in comics: po += 1 wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) if not opts.dont_add_comic_pages_to_toc: for i, x in enumerate(wrappers): stoc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=po) po += 1 opf.set_toc(toc) m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb') opf.render(m, n, u'toc.ncx') return os.path.abspath(u'metadata.opf')
def read_inline_toc(self, href, frag): ans = TOC() base_href = '/'.join(href.split('/')[:-1]) with open(href.replace('/', os.sep), 'rb') as f: raw = f.read().decode(self.header.codec) root = parse_html(raw, log=self.log) body = XPath('//h:body')(root) reached = False if body: start = body[0] else: start = None reached = True if frag: elems = XPath('//*[@id="%s"]' % frag)(root) if elems: start = elems[0] def node_depth(elem): ans = 0 parent = elem.getparent() while parent is not None: parent = parent.getparent() ans += 1 return ans # Layer the ToC based on nesting order in the source HTML current_depth = None parent = ans seen = set() links = [] for elem in root.iterdescendants(etree.Element): if reached and elem.tag == XHTML('a') and elem.get('href', False): href = elem.get('href') href, frag = urldefrag(href) href = base_href + '/' + href text = xml2text(elem).strip() if (text, href, frag) in seen: continue seen.add((text, href, frag)) links.append((text, href, frag, node_depth(elem))) elif elem is start: reached = True depths = sorted(set(x[-1] for x in links)) depth_map = {x: i for i, x in enumerate(depths)} for text, href, frag, depth in links: depth = depth_map[depth] if current_depth is None: current_depth = 0 parent.add_item(href, frag, text) elif current_depth == depth: parent.add_item(href, frag, text) elif current_depth < depth: parent = parent[-1] if len(parent) > 0 else parent parent.add_item(href, frag, text) current_depth += 1 else: delta = current_depth - depth while delta > 0 and parent.parent is not None: parent = parent.parent delta -= 1 parent.add_item(href, frag, text) current_depth = depth return ans
def get_toc(self): ''' Toc can have up to 5 levels, 0 - 4 inclusive. This function will add items to their appropriate depth in the TOC tree. If the specified depth is invalid (item would not have a valid parent) add it to the next valid level above the specified level. ''' # Base toc object all items will be added to. n_toc = TOC() # Used to track nodes in the toc so we can add # sub items to the appropriate place in tree. t_l0 = None t_l1 = None t_l2 = None t_l3 = None for level, (href, id, text) in self.toc: if level == '0': t_l0 = n_toc.add_item(href, id, text) t_l1 = None t_l2 = None t_l3 = None elif level == '1': if t_l0 is None: t_l0 = n_toc t_l1 = t_l0.add_item(href, id, text) t_l2 = None t_l3 = None elif level == '2': if t_l1 is None: if t_l0 is None: t_l1 = n_toc else: t_l1 = t_l0 t_l2 = t_l1.add_item(href, id, text) t_l3 = None elif level == '3': if t_l2 is None: if t_l1 is None: if t_l0 is None: t_l2 = n_toc else: t_l2 = t_l0 else: t_l2 = t_l1 t_l3 = t_l2.add_item(href, id, text) # Level 4. # Anything above 4 is invalid but we will count # it as level 4. else: if t_l3 is None: if t_l2 is None: if t_l1 is None: if t_l0 is None: t_l3 = n_toc else: t_l3 = t_l0 else: t_l3 = t_l1 else: t_l3 = t_l2 t_l3.add_item(href, id, text) return n_toc
def get_toc(self): ''' Toc can have up to 5 levels, 0 - 4 inclusive. This function will add items to their appropriate depth in the TOC tree. If the specified depth is invalid (item would not have a valid parent) add it to the next valid level above the specified level. ''' # Base toc object all items will be added to. n_toc = TOC() # Used to track nodes in the toc so we can add # sub items to the appropriate place in tree. t_l0 = None t_l1 = None t_l2 = None t_l3 = None for level, (href, id, text) in self.toc: if level == u'0': t_l0 = n_toc.add_item(href, id, text) t_l1 = None t_l2 = None t_l3 = None elif level == u'1': if t_l0 is None: t_l0 = n_toc t_l1 = t_l0.add_item(href, id, text) t_l2 = None t_l3 = None elif level == u'2': if t_l1 is None: if t_l0 is None: t_l1 = n_toc else: t_l1 = t_l0 t_l2 = t_l1.add_item(href, id, text) t_l3 = None elif level == u'3': if t_l2 is None: if t_l1 is None: if t_l0 is None: t_l2 = n_toc else: t_l2 = t_l0 else: t_l2 = t_l1 t_l3 = t_l2.add_item(href, id, text) # Level 4. # Anything above 4 is invalid but we will count # it as level 4. else: if t_l3 is None: if t_l2 is None: if t_l1 is None: if t_l0 is None: t_l3 = n_toc else: t_l3 = t_l0 else: t_l3 = t_l1 else: t_l3 = t_l2 t_l3.add_item(href, id, text) return n_toc
def convert(self, stream, opts, file_ext, log, accelerators): from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC self.opts, self.log= opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: comics_ = [['Comic', os.path.abspath(stream.name)]] stream.close() comics = [] for i, x in enumerate(comics_): title, fname = x cdir = u'comic_%d'%(i+1) if len(comics_) > 1 else u'.' cdir = os.path.abspath(cdir) if not os.path.exists(cdir): os.makedirs(cdir) pages = self.get_pages(fname, cdir) if not pages: continue wrappers = self.create_wrappers(pages) comics.append((title, pages, wrappers)) if not comics: raise ValueError('No comic pages found in %s'%stream.name) mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')]) opf = OPFCreator(os.getcwdu(), mi) entries = [] def href(x): if len(comics) == 1: return os.path.basename(x) return '/'.join(x.split(os.sep)[-2:]) for comic in comics: pages, wrappers = comic[1:] entries += [(w, None) for w in map(href, wrappers)] + \ [(x, None) for x in map(href, pages)] opf.create_manifest(entries) spine = [] for comic in comics: spine.extend(map(href, comic[2])) self._images = [] for comic in comics: self._images.extend(comic[1]) opf.create_spine(spine) toc = TOC() if len(comics) == 1: wrappers = comics[0][2] for i, x in enumerate(wrappers): toc.add_item(href(x), None, _('Page')+' %d'%(i+1), play_order=i) else: po = 0 for comic in comics: po += 1 wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) if not opts.dont_add_comic_pages_to_toc: for i, x in enumerate(wrappers): stoc.add_item(href(x), None, _('Page')+' %d'%(i+1), play_order=po) po += 1 opf.set_toc(toc) m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb') opf.render(m, n, u'toc.ncx') return os.path.abspath(u'metadata.opf')
def create_opf(self, htmlfile, guide=None, root=None): mi = getattr(self.book_header.exth, 'mi', self.embedded_mi) if mi is None: mi = MetaInformation(self.book_header.title, [_('Unknown')]) opf = OPFCreator(os.path.dirname(htmlfile), mi) if hasattr(self.book_header.exth, 'cover_offset'): opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1) elif mi.cover is not None: opf.cover = mi.cover else: opf.cover = u'images/%05d.jpg' % 1 if not os.path.exists(os.path.join(os.path.dirname(htmlfile), * opf.cover.split('/'))): opf.cover = None cover = opf.cover cover_copied = None if cover is not None: cover = cover.replace('/', os.sep) if os.path.exists(cover): ncover = u'images'+os.sep+u'calibre_cover.jpg' if os.path.exists(ncover): os.remove(ncover) shutil.copyfile(cover, ncover) cover_copied = os.path.abspath(ncover) opf.cover = ncover.replace(os.sep, '/') manifest = [(htmlfile, 'application/xhtml+xml'), (os.path.abspath(u'styles.css'), 'text/css')] bp = os.path.dirname(htmlfile) added = set([]) for i in getattr(self, 'image_names', []): path = os.path.join(bp, 'images', i) added.add(path) manifest.append((path, 'image/jpeg')) if cover_copied is not None: manifest.append((cover_copied, 'image/jpeg')) opf.create_manifest(manifest) opf.create_spine([os.path.basename(htmlfile)]) toc = None if guide is not None: opf.create_guide(guide) for ref in opf.guide: if ref.type.lower() == 'toc': toc = ref.href() ncx_manifest_entry = None if toc: ncx_manifest_entry = 'toc.ncx' elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1]) tocobj = None ent_pat = re.compile(r'&(\S+?);') if elems: tocobj = TOC() found = False reached = False for x in root.iter(): if x == elems[-1]: reached = True continue if reached and x.tag == 'a': href = x.get('href', '') if href and re.match('\\w+://', href) is None: try: text = u' '.join([t.strip() for t in x.xpath('descendant::text()')]) except: text = '' text = ent_pat.sub(entity_to_unicode, text) item = tocobj.add_item(toc.partition('#')[0], href[1:], text) item.left_space = int(self.get_left_whitespace(x)) found = True if reached and found and x.get('class', None) == 'mbp_pagebreak': break if tocobj is not None: tocobj = self.structure_toc(tocobj) opf.set_toc(tocobj) return opf, ncx_manifest_entry