def upshift_markup(self): # {{{ 'Upgrade markup to comply with XHTML 1.1 where possible' from calibre.ebooks.oeb.base import XPath, XML for x in self.oeb.spine: root = x.data if (not root.get(XML('lang'))) and (root.get('lang')): root.set(XML('lang'), root.get('lang')) body = XPath('//h:body')(root) if body: body = body[0] if not hasattr(body, 'xpath'): continue for u in XPath('//h:u')(root): u.tag = 'span' seen_ids, seen_names = set(), set() for x in XPath('//*[@id or @name]')(root): eid, name = x.get('id', None), x.get('name', None) if eid: if eid in seen_ids: del x.attrib['id'] else: seen_ids.add(eid) if name: if name in seen_names: del x.attrib['name'] else: seen_names.add(name)
def upshift_markup(self): # {{{ 'Upgrade markup to comply with XHTML 1.1 where possible' from calibre.ebooks.oeb.base import XPath, XML for x in self.oeb.spine: root = x.data if (not root.get(XML('lang'))) and (root.get('lang')): root.set(XML('lang'), root.get('lang')) body = XPath('//h:body')(root) if body: body = body[0] if not hasattr(body, 'xpath'): continue for u in XPath('//h:u')(root): u.tag = 'span' u.set('style', 'text-decoration:underline')
def __call__(self, oeb, context): has_toc = getattr(getattr(oeb, 'toc', False), 'nodes', False) if 'toc' in oeb.guide: # Ensure toc pointed to in <guide> is in spine from calibre.ebooks.oeb.base import urlnormalize href = urlnormalize(oeb.guide['toc'].href) if href in oeb.manifest.hrefs: item = oeb.manifest.hrefs[href] if (hasattr(item.data, 'xpath') and XPath('//h:a[@href]')(item.data)): if oeb.spine.index(item) < 0: if self.position == 'end': oeb.spine.add(item, linear=False) else: oeb.spine.insert(0, item, linear=True) return elif has_toc: oeb.guide.remove('toc') else: oeb.guide.remove('toc') if not has_toc: return oeb.logger.info('Generating in-line TOC...') title = self.title or oeb.translate(DEFAULT_TITLE) style = self.style if style not in STYLE_CSS: oeb.logger.error('Unknown TOC style %r' % style) style = 'nested' id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css') oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style]) language = unicode_type(oeb.metadata.language[0]) contents = element(None, XHTML('html'), nsmap={None: XHTML_NS}, attrib={XML('lang'): language}) head = element(contents, XHTML('head')) htitle = element(head, XHTML('title')) htitle.text = title element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME, href=css_href) body = element(contents, XHTML('body'), attrib={'class': 'calibre_toc'}) h1 = element(body, XHTML('h2'), attrib={'class': 'calibre_toc_header'}) h1.text = title self.add_toc_level(body, oeb.toc) id, href = oeb.manifest.generate('contents', 'contents.xhtml') item = oeb.manifest.add(id, href, XHTML_MIME, data=contents) if self.position == 'end': oeb.spine.add(item, linear=False) else: oeb.spine.insert(0, item, linear=True) oeb.guide.add('toc', 'Table of Contents', href)
def create_ncx(toc, to_href, btitle, lang, uid): lang = lang.replace('_', '-') ncx = etree.Element(NCX('ncx'), attrib={ 'version': '2005-1', XML('lang'): lang }, nsmap={None: NCX_NS}) head = etree.SubElement(ncx, NCX('head')) etree.SubElement(head, NCX('meta'), name='dtb:uid', content=unicode_type(uid)) etree.SubElement(head, NCX('meta'), name='dtb:depth', content=unicode_type(toc.depth)) generator = ''.join(['calibre (', __version__, ')']) etree.SubElement(head, NCX('meta'), name='dtb:generator', content=generator) etree.SubElement(head, NCX('meta'), name='dtb:totalPageCount', content='0') etree.SubElement(head, NCX('meta'), name='dtb:maxPageNumber', content='0') title = etree.SubElement(ncx, NCX('docTitle')) text = etree.SubElement(title, NCX('text')) text.text = btitle navmap = etree.SubElement(ncx, NCX('navMap')) spat = re.compile(r'\s+') play_order = Counter() def process_node(xml_parent, toc_parent): for child in toc_parent: play_order['c'] += 1 point = etree.SubElement(xml_parent, NCX('navPoint'), id='num_%d' % play_order['c'], playOrder=unicode_type(play_order['c'])) label = etree.SubElement(point, NCX('navLabel')) title = child.title if title: title = spat.sub(' ', title) etree.SubElement(label, NCX('text')).text = title if child.dest: href = to_href(child.dest) if child.frag: href += '#' + child.frag etree.SubElement(point, NCX('content'), src=href) process_node(point, child) process_node(navmap, toc) return ncx
def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[], inhead=False, preserve=False): if not isinstance(elem.tag, basestring): # Don't emit any comments or raw entities return nsrmap = copy.copy(nsrmap) attrib = dict(elem.attrib) style = self.stylizer.style(elem) if self.stylizer else None for key, value in elem.nsmap.items(): if value not in nsrmap or nsrmap[value] != key: xmlns = ('xmlns:' + key) if key else 'xmlns' attrib[xmlns] = value nsrmap[value] = key tag = prefixname(elem.tag, nsrmap) tag_offset = self.buf.tell() if tag == 'head': inhead = True flags = FLAG_OPENING if not elem.text and len(elem) == 0: flags |= FLAG_CLOSING if inhead: flags |= FLAG_HEAD if style and self.is_block(style): flags |= FLAG_BLOCK self.write(0, flags) tattrs = self.tattrs[0] if tag in self.tags: index = self.tags[tag] self.write(index) if self.tattrs[index]: tattrs = self.tattrs[index] else: self.write(FLAG_CUSTOM, len(tag) + 1, tag) last_break = self.page_breaks[-1][0] if self.page_breaks else None if style and last_break != tag_offset \ and style['page-break-before'] in PAGE_BREAKS: self.page_breaks.append((tag_offset, list(parents))) for attr, value in attrib.items(): attr = prefixname(attr, nsrmap) if attr in ('href', 'src'): value = urlnormalize(value) path, frag = urldefrag(value) if self.item: path = self.item.abshref(path) prefix = unichr(3) if path in self.manifest.hrefs: prefix = unichr(2) value = self.manifest.hrefs[path].id if frag: value = '#'.join((value, frag)) value = prefix + value elif attr in ('id', 'name'): self.anchors.append((value, tag_offset)) elif attr.startswith('ms--'): attr = '%' + attr[4:] elif tag == 'link' and attr == 'type' and value in OEB_STYLES: value = CSS_MIME if attr in tattrs: self.write(tattrs[attr]) else: self.write(FLAG_CUSTOM, len(attr) + 1, attr) try: self.write(ATTR_NUMBER, int(value) + 1) except ValueError: self.write(len(value) + 1, value) self.write(0) old_preserve = preserve if style: preserve = (style['white-space'] in ('pre', 'pre-wrap')) xml_space = elem.get(XML('space')) if xml_space == 'preserve': preserve = True elif xml_space == 'normal': preserve = False if elem.text: if preserve: self.write(elem.text) elif len(elem) == 0 or not elem.text.isspace(): self.write(COLLAPSE.sub(' ', elem.text)) # else: de nada parents.append(tag_offset) child = cstyle = nstyle = None for next in chain(elem, [None]): if self.stylizer: nstyle = None if next is None else self.stylizer.style(next) if child is not None: if not preserve \ and (inhead or not nstyle or self.is_block(cstyle) or self.is_block(nstyle)) \ and child.tail and child.tail.isspace(): child.tail = None self.tree_to_binary(child, nsrmap, parents, inhead, preserve) child, cstyle = next, nstyle parents.pop() preserve = old_preserve if not flags & FLAG_CLOSING: self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0) if elem.tail and tag != 'html': tail = elem.tail if not preserve: tail = COLLAPSE.sub(' ', tail) self.write(tail) if style and style['page-break-after'] not in ('avoid', 'auto'): self.page_breaks.append((self.buf.tell(), list(parents)))