def commit_nav_toc(container, toc, lang=None): from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree tocname = find_existing_nav_toc(container) if tocname is None: item = container.generate_item('nav.xhtml', id_prefix='nav') item.set('properties', 'nav') tocname = container.href_to_name(item.get('href'), base=container.opf_name) try: root = container.parsed(tocname) except KeyError: root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8')) et = '{%s}type' % EPUB_NS navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == 'toc'] for x in navs[1:]: extract(x) if navs: nav = navs[0] tail = nav.tail attrib = dict(nav.attrib) nav.clear() nav.attrib.update(attrib) nav.tail = tail else: nav = root.makeelement(XHTML('nav')) first_child(root, XHTML('body')).append(nav) nav.set('{%s}type' % EPUB_NS, 'toc') if toc.toc_title: nav.append(nav.makeelement(XHTML('h1'))) nav[-1].text = toc.toc_title rnode = nav.makeelement(XHTML('ol')) nav.append(rnode) to_href = partial(container.name_to_href, base=tocname) spat = re.compile(r'\s+') def process_node(xml_parent, toc_parent): for child in toc_parent: li = xml_parent.makeelement(XHTML('li')) xml_parent.append(li) title = child.title or '' title = spat.sub(' ', title).strip() a = li.makeelement(XHTML('a' if child.dest else 'span')) a.text = title li.append(a) if child.dest: href = to_href(child.dest) if child.frag: href += '#'+child.frag a.set('href', href) if len(child): ol = li.makeelement(XHTML('ol')) li.append(ol) process_node(ol, child) process_node(rnode, toc) pretty_xml_tree(rnode) for li in rnode.iterdescendants(XHTML('li')): if len(li) == 1: li.text = None li[0].tail = None container.replace(tocname, root)
def ensure_head(root): # Make sure we have only a single <head> heads = list(root.iterchildren(XHTML('head'))) if len(heads) != 1: if not heads: root.insert(0, root.makeelement(XHTML('head'))) return root[0] head = heads[0] for eh in heads[1:]: for child in eh.iterchildren('*'): head.append(child) extract(eh) return head return heads[0]
def ensure_single_nav_of_type(root, ntype='toc'): et = '{%s}type' % EPUB_NS navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == ntype] for x in navs[1:]: extract(x) if navs: nav = navs[0] tail = nav.tail attrib = dict(nav.attrib) nav.clear() nav.attrib.update(attrib) nav.tail = tail else: nav = root.makeelement(XHTML('nav')) first_child(root, XHTML('body')).append(nav) nav.set('{%s}type' % EPUB_NS, ntype) return nav
def remove_links_to(container, predicate): """ predicate must be a function that takes the arguments (name, href, fragment=None) and returns True iff the link should be removed """ from calibre.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML stylepath = XPath("//h:style") styleattrpath = XPath("//*[@style]") changed = set() for name, mt in container.mime_map.iteritems(): removed = False if mt in OEB_DOCS: root = container.parsed(name) for el, attr, href, pos in iterlinks(root, find_links_in_css=False): hname = container.href_to_name(href, name) frag = href.partition("#")[-1] if predicate(hname, href, frag): if attr is None: el.text = None else: if el.tag == XHTML("link") or el.tag == XHTML("img"): extract(el) else: del el.attrib[attr] removed = True for tag in stylepath(root): if tag.text and (tag.get("type") or "text/css").lower() == "text/css": sheet = container.parse_css(tag.text) if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate): tag.text = sheet.cssText removed = True for tag in styleattrpath(root): style = tag.get("style") if style: style = container.parse_css(style, is_declaration=True) if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate): removed = True tag.set("style", style.cssText) elif mt in OEB_STYLES: removed = remove_links_in_sheet( partial(container.href_to_name, base=name), container.parsed(name), predicate ) if removed: changed.add(name) tuple(map(container.dirty, changed)) return changed
def ensure_single_nav_of_type(root, ntype='toc'): et = '{%s}type' % EPUB_NS navs = [ n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == ntype ] for x in navs[1:]: extract(x) if navs: nav = navs[0] tail = nav.tail attrib = dict(nav.attrib) nav.clear() nav.attrib.update(attrib) nav.tail = tail else: nav = root.makeelement(XHTML('nav')) first_child(root, XHTML('body')).append(nav) nav.set('{%s}type' % EPUB_NS, ntype) return nav
def remove_links_to(container, predicate): ''' predicate must be a function that takes the arguments (name, href, fragment=None) and returns True iff the link should be removed ''' from calibre.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML stylepath = XPath('//h:style') styleattrpath = XPath('//*[@style]') changed = set() for name, mt in iteritems(container.mime_map): removed = False if mt in OEB_DOCS: root = container.parsed(name) for el, attr, href, pos in iterlinks(root, find_links_in_css=False): hname = container.href_to_name(href, name) frag = href.partition('#')[-1] if predicate(hname, href, frag): if attr is None: el.text = None else: if el.tag == XHTML('link') or el.tag == XHTML('img'): extract(el) else: del el.attrib[attr] removed = True for tag in stylepath(root): if tag.text and (tag.get('type') or 'text/css').lower() == 'text/css': sheet = container.parse_css(tag.text) if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate): tag.text = css_text(sheet) removed = True for tag in styleattrpath(root): style = tag.get('style') if style: style = container.parse_css(style, is_declaration=True) if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate): removed = True tag.set('style', css_text(style)) elif mt in OEB_STYLES: removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate) if removed: changed.add(name) for i in changed: container.dirty(i) return changed
def remove_links_to(container, predicate): ''' predicate must be a function that takes the arguments (name, href, fragment=None) and returns True iff the link should be removed ''' from calibre.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML stylepath = XPath('//h:style') styleattrpath = XPath('//*[@style]') changed = set() for name, mt in iteritems(container.mime_map): removed = False if mt in OEB_DOCS: root = container.parsed(name) for el, attr, href, pos in iterlinks(root, find_links_in_css=False): hname = container.href_to_name(href, name) frag = href.partition('#')[-1] if predicate(hname, href, frag): if attr is None: el.text = None else: if el.tag == XHTML('link') or el.tag == XHTML('img'): extract(el) else: del el.attrib[attr] removed = True for tag in stylepath(root): if tag.text and (tag.get('type') or 'text/css').lower() == 'text/css': sheet = container.parse_css(tag.text) if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate): tag.text = css_text(sheet) removed = True for tag in styleattrpath(root): style = tag.get('style') if style: style = container.parse_css(style, is_declaration=True) if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate): removed = True tag.set('style', css_text(style)) elif mt in OEB_STYLES: removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate) if removed: changed.add(name) tuple(map(container.dirty, changed)) return changed
def transform_css(self): transform_css(self, transform_sheet=transform_sheet, transform_style=transform_declaration) # Firefox flakes out sometimes when dynamically creating <style> tags, # so convert them to external stylesheets to ensure they never fail style_xpath = XPath('//h:style') for name, mt in tuple(self.mime_map.iteritems()): mt = mt.lower() if mt in OEB_DOCS: head = ensure_head(self.parsed(name)) for style in style_xpath(self.parsed(name)): if style.text and (style.get('type') or 'text/css').lower() == 'text/css': in_head = has_ancestor(style, head) if not in_head: extract(style) head.append(style) css = style.text style.clear() style.tag = XHTML('link') style.set('type', 'text/css') style.set('rel', 'stylesheet') sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True) style.set('href', self.name_to_href(sname, name))
def transform_css(self): transform_css(self, transform_sheet=transform_sheet, transform_style=transform_declaration) # Firefox flakes out sometimes when dynamically creating <style> tags, # so convert them to external stylesheets to ensure they never fail style_xpath = XPath('//h:style') for name, mt in tuple(iteritems(self.mime_map)): mt = mt.lower() if mt in OEB_DOCS: head = ensure_head(self.parsed(name)) for style in style_xpath(self.parsed(name)): if style.text and (style.get('type') or 'text/css').lower() == 'text/css': in_head = has_ancestor(style, head) if not in_head: extract(style) head.append(style) css = style.text style.clear() style.tag = XHTML('link') style.set('type', 'text/css') style.set('rel', 'stylesheet') sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True) style.set('href', self.name_to_href(sname, name))
def transform_html(self, name, virtualize_resources): style_xpath = XPath('//h:style') link_xpath = XPath('//h:a[@href]') img_xpath = XPath('//h:img[@src]') res_link_xpath = XPath('//h:link[@href]') root = self.parsed(name) head = ensure_head(root) changed = False for style in style_xpath(root): # Firefox flakes out sometimes when dynamically creating <style> tags, # so convert them to external stylesheets to ensure they never fail if style.text and (style.get('type') or 'text/css').lower() == 'text/css': in_head = has_ancestor(style, head) if not in_head: extract(style) head.append(style) css = style.text style.clear() style.tag = XHTML('link') style.set('type', 'text/css') style.set('rel', 'stylesheet') sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True) style.set('href', self.name_to_href(sname, name)) changed = True # Used for viewing images for img in img_xpath(root): img_name = self.href_to_name(img.get('src'), name) if img_name: img.set('data-calibre-src', img_name) changed = True # Disable non stylsheet link tags. This link will not be loaded by the # browser anyway and will causes the resource load check to hang for link in res_link_xpath(root): ltype = (link.get('type') or 'text/css').lower() rel = (link.get('rel') or 'stylesheet').lower() if ltype != 'text/css' or rel != 'stylesheet': link.attrib.clear() changed = True # Transform <style> and style="" if transform_inline_styles(self, name, transform_sheet=transform_sheet, transform_style=transform_declaration): changed = True if not virtualize_resources: link_uid = self.book_render_data['link_uid'] link_replacer = create_link_replacer(self, link_uid, set()) ltm = self.book_render_data['link_to_map'] for a in link_xpath(root): href = link_replacer(name, a.get('href')) if href and href.startswith(link_uid): a.set('href', 'javascript:void(0)') parts = decode_url(href.split('|')[1]) lname, lfrag = parts[0], parts[1] ltm.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name) a.set( 'data-' + link_uid, json.dumps({ 'name': lname, 'frag': lfrag }, ensure_ascii=False)) changed = True if changed: self.dirty(name)
def commit_nav_toc(container, toc, lang=None): from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree tocname = find_existing_nav_toc(container) if tocname is None: item = container.generate_item('nav.xhtml', id_prefix='nav') item.set('properties', 'nav') tocname = container.href_to_name(item.get('href'), base=container.opf_name) try: root = container.parsed(tocname) except KeyError: root = container.parse_xhtml( P('templates/new_nav.html', data=True).decode('utf-8')) et = '{%s}type' % EPUB_NS navs = [ n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == 'toc' ] for x in navs[1:]: extract(x) if navs: nav = navs[0] tail = nav.tail attrib = dict(nav.attrib) nav.clear() nav.attrib.update(attrib) nav.tail = tail else: nav = root.makeelement(XHTML('nav')) first_child(root, XHTML('body')).append(nav) nav.set('{%s}type' % EPUB_NS, 'toc') if toc.toc_title: nav.append(nav.makeelement(XHTML('h1'))) nav[-1].text = toc.toc_title rnode = nav.makeelement(XHTML('ol')) nav.append(rnode) to_href = partial(container.name_to_href, base=tocname) spat = re.compile(r'\s+') def process_node(xml_parent, toc_parent): for child in toc_parent: li = xml_parent.makeelement(XHTML('li')) xml_parent.append(li) title = child.title or '' title = spat.sub(' ', title).strip() a = li.makeelement(XHTML('a' if child.dest else 'span')) a.text = title li.append(a) if child.dest: href = to_href(child.dest) if child.frag: href += '#' + child.frag a.set('href', href) if len(child): ol = li.makeelement(XHTML('ol')) li.append(ol) process_node(ol, child) process_node(rnode, toc) pretty_xml_tree(rnode) for li in rnode.iterdescendants(XHTML('li')): if len(li) == 1: li.text = None li[0].tail = None container.replace(tocname, root)