Python extract Examples, calibre.ebooks.oeb.polish.utils.extract Python Examples

Example #1

0

Show file

def commit_nav_toc(container, toc, lang=None):
    from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree
    tocname = find_existing_nav_toc(container)
    if tocname is None:
        item = container.generate_item('nav.xhtml', id_prefix='nav')
        item.set('properties', 'nav')
        tocname = container.href_to_name(item.get('href'), base=container.opf_name)
    try:
        root = container.parsed(tocname)
    except KeyError:
        root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
    et = '{%s}type' % EPUB_NS
    navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == 'toc']
    for x in navs[1:]:
        extract(x)
    if navs:
        nav = navs[0]
        tail = nav.tail
        attrib = dict(nav.attrib)
        nav.clear()
        nav.attrib.update(attrib)
        nav.tail = tail
    else:
        nav = root.makeelement(XHTML('nav'))
        first_child(root, XHTML('body')).append(nav)
    nav.set('{%s}type' % EPUB_NS, 'toc')
    if toc.toc_title:
        nav.append(nav.makeelement(XHTML('h1')))
        nav[-1].text = toc.toc_title

    rnode = nav.makeelement(XHTML('ol'))
    nav.append(rnode)
    to_href = partial(container.name_to_href, base=tocname)
    spat = re.compile(r'\s+')

    def process_node(xml_parent, toc_parent):
        for child in toc_parent:
            li = xml_parent.makeelement(XHTML('li'))
            xml_parent.append(li)
            title = child.title or ''
            title = spat.sub(' ', title).strip()
            a = li.makeelement(XHTML('a' if child.dest else 'span'))
            a.text = title
            li.append(a)
            if child.dest:
                href = to_href(child.dest)
                if child.frag:
                    href += '#'+child.frag
                a.set('href', href)
            if len(child):
                ol = li.makeelement(XHTML('ol'))
                li.append(ol)
                process_node(ol, child)
    process_node(rnode, toc)
    pretty_xml_tree(rnode)
    for li in rnode.iterdescendants(XHTML('li')):
        if len(li) == 1:
            li.text = None
            li[0].tail = None
    container.replace(tocname, root)

Example #2

0

Show file

File: render_book.py Project: davidfor/calibre

def ensure_head(root):
    # Make sure we have only a single <head>
    heads = list(root.iterchildren(XHTML('head')))
    if len(heads) != 1:
        if not heads:
            root.insert(0, root.makeelement(XHTML('head')))
            return root[0]
        head = heads[0]
        for eh in heads[1:]:
            for child in eh.iterchildren('*'):
                head.append(child)
            extract(eh)
        return head
    return heads[0]

Example #3

0

Show file

File: toc.py Project: j-howell/calibre

def ensure_single_nav_of_type(root, ntype='toc'):
    et = '{%s}type' % EPUB_NS
    navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == ntype]
    for x in navs[1:]:
        extract(x)
    if navs:
        nav = navs[0]
        tail = nav.tail
        attrib = dict(nav.attrib)
        nav.clear()
        nav.attrib.update(attrib)
        nav.tail = tail
    else:
        nav = root.makeelement(XHTML('nav'))
        first_child(root, XHTML('body')).append(nav)
    nav.set('{%s}type' % EPUB_NS, ntype)
    return nav

Example #4

0

Show file

File: replace.py Project: botmtl/calibre

def remove_links_to(container, predicate):
    """ predicate must be a function that takes the arguments (name, href,
    fragment=None) and returns True iff the link should be removed """
    from calibre.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML

    stylepath = XPath("//h:style")
    styleattrpath = XPath("//*[@style]")
    changed = set()
    for name, mt in container.mime_map.iteritems():
        removed = False
        if mt in OEB_DOCS:
            root = container.parsed(name)
            for el, attr, href, pos in iterlinks(root, find_links_in_css=False):
                hname = container.href_to_name(href, name)
                frag = href.partition("#")[-1]
                if predicate(hname, href, frag):
                    if attr is None:
                        el.text = None
                    else:
                        if el.tag == XHTML("link") or el.tag == XHTML("img"):
                            extract(el)
                        else:
                            del el.attrib[attr]
                    removed = True
            for tag in stylepath(root):
                if tag.text and (tag.get("type") or "text/css").lower() == "text/css":
                    sheet = container.parse_css(tag.text)
                    if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate):
                        tag.text = sheet.cssText
                        removed = True
            for tag in styleattrpath(root):
                style = tag.get("style")
                if style:
                    style = container.parse_css(style, is_declaration=True)
                    if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate):
                        removed = True
                        tag.set("style", style.cssText)
        elif mt in OEB_STYLES:
            removed = remove_links_in_sheet(
                partial(container.href_to_name, base=name), container.parsed(name), predicate
            )
        if removed:
            changed.add(name)
    tuple(map(container.dirty, changed))
    return changed

Example #5

0

Show file

File: toc.py Project: buoyantair/calibre

def ensure_single_nav_of_type(root, ntype='toc'):
    et = '{%s}type' % EPUB_NS
    navs = [
        n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == ntype
    ]
    for x in navs[1:]:
        extract(x)
    if navs:
        nav = navs[0]
        tail = nav.tail
        attrib = dict(nav.attrib)
        nav.clear()
        nav.attrib.update(attrib)
        nav.tail = tail
    else:
        nav = root.makeelement(XHTML('nav'))
        first_child(root, XHTML('body')).append(nav)
    nav.set('{%s}type' % EPUB_NS, ntype)
    return nav

Example #6

0

Show file

def remove_links_to(container, predicate):
    ''' predicate must be a function that takes the arguments (name, href,
    fragment=None) and returns True iff the link should be removed '''
    from calibre.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML
    stylepath = XPath('//h:style')
    styleattrpath = XPath('//*[@style]')
    changed = set()
    for name, mt in iteritems(container.mime_map):
        removed = False
        if mt in OEB_DOCS:
            root = container.parsed(name)
            for el, attr, href, pos in iterlinks(root, find_links_in_css=False):
                hname = container.href_to_name(href, name)
                frag = href.partition('#')[-1]
                if predicate(hname, href, frag):
                    if attr is None:
                        el.text = None
                    else:
                        if el.tag == XHTML('link') or el.tag == XHTML('img'):
                            extract(el)
                        else:
                            del el.attrib[attr]
                    removed = True
            for tag in stylepath(root):
                if tag.text and (tag.get('type') or 'text/css').lower() == 'text/css':
                    sheet = container.parse_css(tag.text)
                    if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate):
                        tag.text = css_text(sheet)
                        removed = True
            for tag in styleattrpath(root):
                style = tag.get('style')
                if style:
                    style = container.parse_css(style, is_declaration=True)
                    if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate):
                        removed = True
                        tag.set('style', css_text(style))
        elif mt in OEB_STYLES:
            removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate)
        if removed:
            changed.add(name)
    for i in changed:
        container.dirty(i)
    return changed

Example #7

0

Show file

File: replace.py Project: j-howell/calibre

def remove_links_to(container, predicate):
    ''' predicate must be a function that takes the arguments (name, href,
    fragment=None) and returns True iff the link should be removed '''
    from calibre.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML
    stylepath = XPath('//h:style')
    styleattrpath = XPath('//*[@style]')
    changed = set()
    for name, mt in iteritems(container.mime_map):
        removed = False
        if mt in OEB_DOCS:
            root = container.parsed(name)
            for el, attr, href, pos in iterlinks(root, find_links_in_css=False):
                hname = container.href_to_name(href, name)
                frag = href.partition('#')[-1]
                if predicate(hname, href, frag):
                    if attr is None:
                        el.text = None
                    else:
                        if el.tag == XHTML('link') or el.tag == XHTML('img'):
                            extract(el)
                        else:
                            del el.attrib[attr]
                    removed = True
            for tag in stylepath(root):
                if tag.text and (tag.get('type') or 'text/css').lower() == 'text/css':
                    sheet = container.parse_css(tag.text)
                    if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate):
                        tag.text = css_text(sheet)
                        removed = True
            for tag in styleattrpath(root):
                style = tag.get('style')
                if style:
                    style = container.parse_css(style, is_declaration=True)
                    if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate):
                        removed = True
                        tag.set('style', css_text(style))
        elif mt in OEB_STYLES:
            removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate)
        if removed:
            changed.add(name)
    tuple(map(container.dirty, changed))
    return changed

Example #8

0

Show file

File: render_book.py Project: davidfor/calibre

 def transform_css(self):
     transform_css(self, transform_sheet=transform_sheet, transform_style=transform_declaration)
     # Firefox flakes out sometimes when dynamically creating <style> tags,
     # so convert them to external stylesheets to ensure they never fail
     style_xpath = XPath('//h:style')
     for name, mt in tuple(self.mime_map.iteritems()):
         mt = mt.lower()
         if mt in OEB_DOCS:
             head = ensure_head(self.parsed(name))
             for style in style_xpath(self.parsed(name)):
                 if style.text and (style.get('type') or 'text/css').lower() == 'text/css':
                     in_head = has_ancestor(style, head)
                     if not in_head:
                         extract(style)
                         head.append(style)
                     css = style.text
                     style.clear()
                     style.tag = XHTML('link')
                     style.set('type', 'text/css')
                     style.set('rel', 'stylesheet')
                     sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True)
                     style.set('href', self.name_to_href(sname, name))

Example #9

0

Show file

 def transform_css(self):
     transform_css(self, transform_sheet=transform_sheet, transform_style=transform_declaration)
     # Firefox flakes out sometimes when dynamically creating <style> tags,
     # so convert them to external stylesheets to ensure they never fail
     style_xpath = XPath('//h:style')
     for name, mt in tuple(iteritems(self.mime_map)):
         mt = mt.lower()
         if mt in OEB_DOCS:
             head = ensure_head(self.parsed(name))
             for style in style_xpath(self.parsed(name)):
                 if style.text and (style.get('type') or 'text/css').lower() == 'text/css':
                     in_head = has_ancestor(style, head)
                     if not in_head:
                         extract(style)
                         head.append(style)
                     css = style.text
                     style.clear()
                     style.tag = XHTML('link')
                     style.set('type', 'text/css')
                     style.set('rel', 'stylesheet')
                     sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True)
                     style.set('href', self.name_to_href(sname, name))

Example #10

0

Show file

File: render_book.py Project: snowlitha/calibre

    def transform_html(self, name, virtualize_resources):
        style_xpath = XPath('//h:style')
        link_xpath = XPath('//h:a[@href]')
        img_xpath = XPath('//h:img[@src]')
        res_link_xpath = XPath('//h:link[@href]')
        root = self.parsed(name)
        head = ensure_head(root)
        changed = False
        for style in style_xpath(root):
            # Firefox flakes out sometimes when dynamically creating <style> tags,
            # so convert them to external stylesheets to ensure they never fail
            if style.text and (style.get('type')
                               or 'text/css').lower() == 'text/css':
                in_head = has_ancestor(style, head)
                if not in_head:
                    extract(style)
                    head.append(style)
                css = style.text
                style.clear()
                style.tag = XHTML('link')
                style.set('type', 'text/css')
                style.set('rel', 'stylesheet')
                sname = self.add_file(name + '.css',
                                      css.encode('utf-8'),
                                      modify_name_if_needed=True)
                style.set('href', self.name_to_href(sname, name))
                changed = True

        # Used for viewing images
        for img in img_xpath(root):
            img_name = self.href_to_name(img.get('src'), name)
            if img_name:
                img.set('data-calibre-src', img_name)
                changed = True

        # Disable non stylsheet link tags. This link will not be loaded by the
        # browser anyway and will causes the resource load check to hang
        for link in res_link_xpath(root):
            ltype = (link.get('type') or 'text/css').lower()
            rel = (link.get('rel') or 'stylesheet').lower()
            if ltype != 'text/css' or rel != 'stylesheet':
                link.attrib.clear()
                changed = True

        # Transform <style> and style=""
        if transform_inline_styles(self,
                                   name,
                                   transform_sheet=transform_sheet,
                                   transform_style=transform_declaration):
            changed = True

        if not virtualize_resources:
            link_uid = self.book_render_data['link_uid']
            link_replacer = create_link_replacer(self, link_uid, set())
            ltm = self.book_render_data['link_to_map']
            for a in link_xpath(root):
                href = link_replacer(name, a.get('href'))
                if href and href.startswith(link_uid):
                    a.set('href', 'javascript:void(0)')
                    parts = decode_url(href.split('|')[1])
                    lname, lfrag = parts[0], parts[1]
                    ltm.setdefault(lname, {}).setdefault(lfrag or '',
                                                         set()).add(name)
                    a.set(
                        'data-' + link_uid,
                        json.dumps({
                            'name': lname,
                            'frag': lfrag
                        },
                                   ensure_ascii=False))
                    changed = True

        if changed:
            self.dirty(name)

Example #11

0

Show file

File: toc.py Project: typeoneerror/calibre

def commit_nav_toc(container, toc, lang=None):
    from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree
    tocname = find_existing_nav_toc(container)
    if tocname is None:
        item = container.generate_item('nav.xhtml', id_prefix='nav')
        item.set('properties', 'nav')
        tocname = container.href_to_name(item.get('href'),
                                         base=container.opf_name)
    try:
        root = container.parsed(tocname)
    except KeyError:
        root = container.parse_xhtml(
            P('templates/new_nav.html', data=True).decode('utf-8'))
    et = '{%s}type' % EPUB_NS
    navs = [
        n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == 'toc'
    ]
    for x in navs[1:]:
        extract(x)
    if navs:
        nav = navs[0]
        tail = nav.tail
        attrib = dict(nav.attrib)
        nav.clear()
        nav.attrib.update(attrib)
        nav.tail = tail
    else:
        nav = root.makeelement(XHTML('nav'))
        first_child(root, XHTML('body')).append(nav)
    nav.set('{%s}type' % EPUB_NS, 'toc')
    if toc.toc_title:
        nav.append(nav.makeelement(XHTML('h1')))
        nav[-1].text = toc.toc_title

    rnode = nav.makeelement(XHTML('ol'))
    nav.append(rnode)
    to_href = partial(container.name_to_href, base=tocname)
    spat = re.compile(r'\s+')

    def process_node(xml_parent, toc_parent):
        for child in toc_parent:
            li = xml_parent.makeelement(XHTML('li'))
            xml_parent.append(li)
            title = child.title or ''
            title = spat.sub(' ', title).strip()
            a = li.makeelement(XHTML('a' if child.dest else 'span'))
            a.text = title
            li.append(a)
            if child.dest:
                href = to_href(child.dest)
                if child.frag:
                    href += '#' + child.frag
                a.set('href', href)
            if len(child):
                ol = li.makeelement(XHTML('ol'))
                li.append(ol)
                process_node(ol, child)

    process_node(rnode, toc)
    pretty_xml_tree(rnode)
    for li in rnode.iterdescendants(XHTML('li')):
        if len(li) == 1:
            li.text = None
            li[0].tail = None
    container.replace(tocname, root)