def transform_html(container, name, virtualize_resources, link_uid, link_to_map, virtualized_names): link_xpath = XPath('//h:a[@href]') svg_link_xpath = XPath('//svg:a') img_xpath = XPath('//h:img[@src]') res_link_xpath = XPath('//h:link[@href]') root = container.parsed(name) changed_names = set() link_replacer = create_link_replacer(container, link_uid, changed_names) # Used for viewing images for img in img_xpath(root): img_name = container.href_to_name(img.get('src'), name) if img_name: img.set('data-calibre-src', img_name) # Disable non-stylesheet link tags. This link will not be loaded by the # browser anyway and will causes the resource load check to hang for link in res_link_xpath(root): ltype = (link.get('type') or 'text/css').lower() rel = (link.get('rel') or 'stylesheet').lower() if ltype != 'text/css' or rel != 'stylesheet': link.attrib.clear() def transform_and_virtualize_sheet(sheet): changed = transform_sheet(sheet) if virtualize_resources: replaceUrls(sheet, partial(link_replacer, name)) if name in changed_names: virtualized_names.add(name) changed = True return changed # Transform <style> and style="" transform_inline_styles(container, name, transform_sheet=transform_and_virtualize_sheet, transform_style=transform_declaration) if virtualize_resources: virtualize_html(container, name, link_uid, link_to_map, virtualized_names) else: def handle_link(a, attr='href'): href = a.get(attr) if href: href = link_replacer(name, href) if href and href.startswith(link_uid): a.set(attr, 'javascript:void(0)') parts = decode_url(href.split('|')[1]) lname, lfrag = parts[0], parts[1] link_to_map.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name) a.set('data-' + link_uid, json.dumps({'name':lname, 'frag':lfrag}, ensure_ascii=False)) for a in link_xpath(root): handle_link(a) xhref = XLINK('href') for a in svg_link_xpath(root): handle_link(a, xhref) shtml = html_as_json(root) with container.open(name, 'wb') as f: f.write(shtml)
def transform_html(self, name, virtualize_resources): style_xpath = XPath('//h:style') link_xpath = XPath('//h:a[@href]') img_xpath = XPath('//h:img[@src]') res_link_xpath = XPath('//h:link[@href]') root = self.parsed(name) head = ensure_head(root) changed = False for style in style_xpath(root): # Firefox flakes out sometimes when dynamically creating <style> tags, # so convert them to external stylesheets to ensure they never fail if style.text and (style.get('type') or 'text/css').lower() == 'text/css': in_head = has_ancestor(style, head) if not in_head: extract(style) head.append(style) css = style.text style.clear() style.tag = XHTML('link') style.set('type', 'text/css') style.set('rel', 'stylesheet') sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True) style.set('href', self.name_to_href(sname, name)) changed = True # Used for viewing images for img in img_xpath(root): img_name = self.href_to_name(img.get('src'), name) if img_name: img.set('data-calibre-src', img_name) changed = True # Disable non stylsheet link tags. This link will not be loaded by the # browser anyway and will causes the resource load check to hang for link in res_link_xpath(root): ltype = (link.get('type') or 'text/css').lower() rel = (link.get('rel') or 'stylesheet').lower() if ltype != 'text/css' or rel != 'stylesheet': link.attrib.clear() changed = True # Transform <style> and style="" if transform_inline_styles(self, name, transform_sheet=transform_sheet, transform_style=transform_declaration): changed = True if not virtualize_resources: link_uid = self.book_render_data['link_uid'] link_replacer = create_link_replacer(self, link_uid, set()) ltm = self.book_render_data['link_to_map'] for a in link_xpath(root): href = link_replacer(name, a.get('href')) if href and href.startswith(link_uid): a.set('href', 'javascript:void(0)') parts = decode_url(href.split('|')[1]) lname, lfrag = parts[0], parts[1] ltm.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name) a.set( 'data-' + link_uid, json.dumps({ 'name': lname, 'frag': lfrag }, ensure_ascii=False)) changed = True if changed: self.dirty(name)