Example #1
0
def transform_html(container, name, virtualize_resources, link_uid, link_to_map, virtualized_names):
    link_xpath = XPath('//h:a[@href]')
    svg_link_xpath = XPath('//svg:a')
    img_xpath = XPath('//h:img[@src]')
    res_link_xpath = XPath('//h:link[@href]')
    root = container.parsed(name)
    changed_names = set()
    link_replacer = create_link_replacer(container, link_uid, changed_names)

    # Used for viewing images
    for img in img_xpath(root):
        img_name = container.href_to_name(img.get('src'), name)
        if img_name:
            img.set('data-calibre-src', img_name)

    # Disable non-stylesheet link tags. This link will not be loaded by the
    # browser anyway and will causes the resource load check to hang
    for link in res_link_xpath(root):
        ltype = (link.get('type') or 'text/css').lower()
        rel = (link.get('rel') or 'stylesheet').lower()
        if ltype != 'text/css' or rel != 'stylesheet':
            link.attrib.clear()

    def transform_and_virtualize_sheet(sheet):
        changed = transform_sheet(sheet)
        if virtualize_resources:
            replaceUrls(sheet, partial(link_replacer, name))
            if name in changed_names:
                virtualized_names.add(name)
                changed = True
        return changed

    # Transform <style> and style=""
    transform_inline_styles(container, name, transform_sheet=transform_and_virtualize_sheet, transform_style=transform_declaration)

    if virtualize_resources:
        virtualize_html(container, name, link_uid, link_to_map, virtualized_names)
    else:

        def handle_link(a, attr='href'):
            href = a.get(attr)
            if href:
                href = link_replacer(name, href)
            if href and href.startswith(link_uid):
                a.set(attr, 'javascript:void(0)')
                parts = decode_url(href.split('|')[1])
                lname, lfrag = parts[0], parts[1]
                link_to_map.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name)
                a.set('data-' + link_uid, json.dumps({'name':lname, 'frag':lfrag}, ensure_ascii=False))

        for a in link_xpath(root):
            handle_link(a)
        xhref = XLINK('href')
        for a in svg_link_xpath(root):
            handle_link(a, xhref)

    shtml = html_as_json(root)
    with container.open(name, 'wb') as f:
        f.write(shtml)
Example #2
0
    def transform_html(self, name, virtualize_resources):
        style_xpath = XPath('//h:style')
        link_xpath = XPath('//h:a[@href]')
        img_xpath = XPath('//h:img[@src]')
        res_link_xpath = XPath('//h:link[@href]')
        root = self.parsed(name)
        head = ensure_head(root)
        changed = False
        for style in style_xpath(root):
            # Firefox flakes out sometimes when dynamically creating <style> tags,
            # so convert them to external stylesheets to ensure they never fail
            if style.text and (style.get('type')
                               or 'text/css').lower() == 'text/css':
                in_head = has_ancestor(style, head)
                if not in_head:
                    extract(style)
                    head.append(style)
                css = style.text
                style.clear()
                style.tag = XHTML('link')
                style.set('type', 'text/css')
                style.set('rel', 'stylesheet')
                sname = self.add_file(name + '.css',
                                      css.encode('utf-8'),
                                      modify_name_if_needed=True)
                style.set('href', self.name_to_href(sname, name))
                changed = True

        # Used for viewing images
        for img in img_xpath(root):
            img_name = self.href_to_name(img.get('src'), name)
            if img_name:
                img.set('data-calibre-src', img_name)
                changed = True

        # Disable non stylsheet link tags. This link will not be loaded by the
        # browser anyway and will causes the resource load check to hang
        for link in res_link_xpath(root):
            ltype = (link.get('type') or 'text/css').lower()
            rel = (link.get('rel') or 'stylesheet').lower()
            if ltype != 'text/css' or rel != 'stylesheet':
                link.attrib.clear()
                changed = True

        # Transform <style> and style=""
        if transform_inline_styles(self,
                                   name,
                                   transform_sheet=transform_sheet,
                                   transform_style=transform_declaration):
            changed = True

        if not virtualize_resources:
            link_uid = self.book_render_data['link_uid']
            link_replacer = create_link_replacer(self, link_uid, set())
            ltm = self.book_render_data['link_to_map']
            for a in link_xpath(root):
                href = link_replacer(name, a.get('href'))
                if href and href.startswith(link_uid):
                    a.set('href', 'javascript:void(0)')
                    parts = decode_url(href.split('|')[1])
                    lname, lfrag = parts[0], parts[1]
                    ltm.setdefault(lname, {}).setdefault(lfrag or '',
                                                         set()).add(name)
                    a.set(
                        'data-' + link_uid,
                        json.dumps({
                            'name': lname,
                            'frag': lfrag
                        },
                                   ensure_ascii=False))
                    changed = True

        if changed:
            self.dirty(name)