Beispiel #1
0
def pretty_html(container, name, raw):
    """
    Pretty print the HTML represented as a string in raw
    """
    root = container.parse_xhtml(raw)
    pretty_html_tree(container, root)
    return base.serialize(root, 'text/html')
Beispiel #2
0
def fix_html(container, raw):
    """
    Fix any parsing errors in the HTML represented as a string in raw. Fixing
    is done using the HTML5 parsing algorithm.
    """
    root = container.parse_xhtml(raw)
    return base.serialize(root, 'text/html')
Beispiel #3
0
def create_inline_toc(container, title=None):
    """
    Create an inline (HTML) Table of Contents from an existing NCX Table of
    Contents.

    :param title: The title for this table of contents.
    """
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    title = title or default_title
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    name = toc_name
    html = toc_to_html(toc, container, name, title, lang)
    raw = base.serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = 'toc%d.xhtml' % c
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    set_guide_item(container,
                   'toc',
                   title,
                   name,
                   frag='calibre_generated_inline_toc')
    return name
Beispiel #4
0
def pretty_xml(container, name, raw):
    """
    Pretty print the XML represented as a string in raw. If ``name`` is the
    name of the OPF, extra OPF-specific prettying is performed.
    """
    root = container.parse_xml(raw)
    if name == container.opf_name:
        pretty_opf(root)
    pretty_xml_tree(root)
    return base.serialize(root, 'text/xml')
Beispiel #5
0
def pretty_css(container, name, raw):
    """
    Pretty print the CSS represented as a string in raw
    """
    sheet = container.parse_css(raw)
    return base.serialize(sheet, 'text/css')
Beispiel #6
0
    def convert_epub3_nav(self, nav_path, opf, log, opts):
        from lxml import etree
        from ebook_converter.ebooks.chardet import xml_to_unicode
        from ebook_converter.ebooks.oeb.polish.parsing import parse
        from ebook_converter.ebooks.oeb.base import \
            serialize
        from ebook_converter.ebooks.oeb.polish.toc import first_child
        from tempfile import NamedTemporaryFile
        with open(nav_path, 'rb') as f:
            raw = f.read()
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                             assume_utf8=True)[0]
        root = parse(raw, log=log)
        ncx = etree.fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/'
                               'ncx/" version="2005-1" xml:lang="eng">'
                               '<navMap/></ncx>')
        navmap = ncx[0]
        et = '{%s}type' % const.EPUB_NS
        bn = os.path.basename(nav_path)

        def add_from_li(li, parent):
            href = text = None
            for x in li.iterchildren(base.tag('xhtml', 'a'),
                                     base.tag('xhtml', 'span')):
                text = etree.tostring(
                    x, method='text', encoding='unicode',
                    with_tail=False).strip() or ' '.join(
                        x.xpath('descendant-or-self::*/@title')).strip()
                href = x.get('href')
                if href:
                    if href.startswith('#'):
                        href = bn + href
                break
            np = parent.makeelement(base.tag('ncx', 'navPoint'))
            parent.append(np)
            np.append(np.makeelement(base.tag('ncx', 'navLabel')))
            np[0].append(np.makeelement(base.tag('ncx', 'text')))
            np[0][0].text = text
            if href:
                np.append(
                    np.makeelement(base.tag('ncx', 'content'),
                                   attrib={'src': href}))
            return np

        def process_nav_node(node, toc_parent):
            for li in node.iterchildren(base.tag('xhtml', 'li')):
                child = add_from_li(li, toc_parent)
                ol = first_child(li, base.tag('xhtml', 'ol'))
                if child is not None and ol is not None:
                    process_nav_node(ol, child)

        for nav in root.iterdescendants(base.tag('xhtml', 'nav')):
            if nav.get(et) == 'toc':
                ol = first_child(nav, base.tag('xhtml', 'ol'))
                if ol is not None:
                    process_nav_node(ol, navmap)
                    break
        else:
            return

        with NamedTemporaryFile(suffix='.ncx',
                                dir=os.path.dirname(nav_path),
                                delete=False) as f:
            f.write(etree.tostring(ncx, encoding='utf-8'))
        ncx_href = os.path.relpath(f.name, os.getcwd()).replace(os.sep, '/')
        ncx_id = opf.create_manifest_item(ncx_href, base.NCX_MIME,
                                          append=True).get('id')
        for spine in opf.root.xpath('//*[local-name()="spine"]'):
            spine.set('toc', ncx_id)
        url = os.path.relpath(nav_path).replace(os.sep, '/')
        opts.epub3_nav_href = base.urlnormalize(url)
        opts.epub3_nav_parsed = root
        if getattr(self, 'removed_cover', None):
            changed = False
            base_path = os.path.dirname(nav_path)
            for elem in root.xpath('//*[@href]'):
                href, frag = elem.get('href').partition('#')[::2]
                link_path = (os.path.relpath(
                    os.path.join(base_path, urllib.parse.unquote(href)),
                    base_path))
                abs_href = base.urlnormalize(link_path)
                if abs_href == self.removed_cover:
                    changed = True
                    elem.set('data-calibre-removed-titlepage', '1')
            if changed:
                with open(nav_path, 'wb') as f:
                    f.write(base.serialize(root, 'application/xhtml+xml'))