Example #1
0
File: toc.py Project: tokot/calibre
def create_inline_toc(container, title=None):
    '''
    Create an inline (HTML) Table of Contents from an existing NCX table of contents.

    :param title: The title for this table of contents.
    '''
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        default_title = translate(lang, default_title)
    title = title or default_title
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    name = toc_name
    html = toc_to_html(toc, container, name, title, lang)
    raw = serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = 'toc%d.xhtml' % c
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    set_guide_item(container,
                   'toc',
                   title,
                   name,
                   frag='calibre_generated_inline_toc')
    return name
Example #2
0
def create_inline_toc(container, title=None):
    title = title or _('Table of Contents')
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    def process_node(html_parent, toc, level=1, indent='  '):
        li = html_parent.makeelement(XHTML('li'))
        li.tail = '\n' + (indent * level)
        html_parent.append(li)
        name, frag = toc.dest, toc.frag
        href = '#'
        if name:
            href = container.name_to_href(name, toc_name)
            if frag:
                href += '#' + frag
        a = li.makeelement(XHTML('a'), href=href)
        a.text = toc.title
        li.append(a)
        if len(toc) > 0:
            parent = li.makeelement(XHTML('ul'))
            li.append(parent)
            a.tail = '\n\n' + (indent * (level + 2))
            parent.text = '\n' + (indent * (level + 3))
            parent.tail = '\n\n' + (indent * (level + 1))
            for child in toc:
                process_node(parent, child, level + 3)
            parent[-1].tail = '\n' + (indent * (level + 2))

    E = ElementMaker(namespace=XHTML_NS, nsmap={None: XHTML_NS})
    html = E.html(
        E.head(
            E.title(title),
            E.style('''
                li { list-style-type: none; padding-left: 2em; margin-left: 0}
                a { text-decoration: none }
                a:hover { color: red }''',
                    type='text/css'),
        ), E.body(
            E.h2(title),
            E.ul(),
            id="calibre_generated_inline_toc",
        ))

    name = toc_name
    for child in toc:
        process_node(html[1][1], child)
    pretty_html_tree(container, html)
    raw = serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = 'toc%d.xhtml' % c
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    return name
Example #3
0
def pretty_xml(container, name, raw):
    ' Pretty print the XML represented as a string in raw. If ``name`` is the name of the OPF, extra OPF-specific prettying is performed. '
    root = container.parse_xml(raw)
    if name == container.opf_name:
        pretty_opf(root)
    pretty_xml_tree(root)
    return serialize(root, 'text/xml')
Example #4
0
def pretty_xml(container, name, raw):
    ' Pretty print the XML represented as a string in raw. If ``name`` is the name of the OPF, extra OPF-specific prettying is performed. '
    root = container.parse_xml(raw)
    if name == container.opf_name:
        pretty_opf(root)
    pretty_xml_tree(root)
    return serialize(root, 'text/xml')
Example #5
0
def beautify_text(raw, syntax):
    from lxml import etree
    from calibre.ebooks.oeb.polish.parsing import parse
    from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree
    from calibre.ebooks.chardet import strip_encoding_declarations
    if syntax == 'xml':
        root = etree.fromstring(strip_encoding_declarations(raw))
        pretty_xml_tree(root)
    elif syntax == 'css':
        import logging
        from calibre.ebooks.oeb.base import serialize, _css_logger
        from calibre.ebooks.oeb.polish.utils import setup_css_parser_serialization
        from css_parser import CSSParser, log
        setup_css_parser_serialization(tprefs['editor_tab_stop_width'])
        log.setLevel(logging.WARN)
        log.raiseExceptions = False
        parser = CSSParser(loglevel=logging.WARNING,
                           # We dont care about @import rules
                           fetcher=lambda x: (None, None), log=_css_logger)
        data = parser.parseString(raw, href='<string>', validate=False)
        return serialize(data, 'text/css')
    else:
        root = parse(raw, line_numbers=False)
        pretty_html_tree(None, root)
    return etree.tostring(root, encoding=unicode)
Example #6
0
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)
    ).replace(
        '_TITLE_', prepare_string_for_xml(mi.title)
    ).replace(
        '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors))
    ).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
Example #7
0
def beautify_text(raw, syntax):
    from lxml import etree
    from calibre.ebooks.oeb.polish.parsing import parse
    from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree
    from calibre.ebooks.chardet import strip_encoding_declarations
    if syntax == 'xml':
        root = etree.fromstring(strip_encoding_declarations(raw))
        pretty_xml_tree(root)
    elif syntax == 'css':
        import logging
        from calibre.ebooks.oeb.base import serialize, _css_logger
        from calibre.ebooks.oeb.polish.utils import setup_cssutils_serialization
        from cssutils import CSSParser, log
        setup_cssutils_serialization(tprefs['editor_tab_stop_width'])
        log.setLevel(logging.WARN)
        log.raiseExceptions = False
        parser = CSSParser(loglevel=logging.WARNING,
                           # We dont care about @import rules
                           fetcher=lambda x: (None, None), log=_css_logger)
        data = parser.parseString(raw, href='<string>', validate=False)
        return serialize(data, 'text/css')
    else:
        root = parse(raw, line_numbers=False)
        pretty_html_tree(None, root)
    return etree.tostring(root, encoding=unicode)
Example #8
0
def create_inline_toc(container, title=None):
    '''
    Create an inline (HTML) Table of Contents from an existing NCX table of contents.

    :param title: The title for this table of contents.
    '''
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        default_title = translate(lang, default_title)
    title = title or default_title
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    name = toc_name
    html = toc_to_html(toc, container, name, title, lang)
    raw = serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = 'toc%d.xhtml' % c
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc')
    return name
Example #9
0
def parse_html(raw):
    root = parse(raw,
                 decoder=lambda x: x.decode('utf-8'),
                 line_numbers=True,
                 linenumber_attribute='data-lnum')
    ans = serialize(root, 'text/html')
    if not isinstance(ans, bytes):
        ans = ans.encode('utf-8')
    return ans
Example #10
0
 def serialize_item(self, name):
     data = self.parsed(name)
     if name == self.opf_name:
         self.format_opf()
     data = serialize(data, self.mime_map[name], pretty_print=name in self.pretty_print)
     if name == self.opf_name:
         # Needed as I can't get lxml to output opf:role and
         # not output <opf:metadata> as well
         data = re.sub(br"(<[/]{0,1})opf:", r"\1", data)
     return data
Example #11
0
 def serialize_item(self, name):
     data = self.parsed(name)
     if name == self.opf_name:
         self.format_opf()
     data = serialize(data, self.mime_map[name], pretty_print=name in
                      self.pretty_print)
     if name == self.opf_name:
         # Needed as I can't get lxml to output opf:role and
         # not output <opf:metadata> as well
         data = re.sub(br'(<[/]{0,1})opf:', r'\1', data)
     return data
Example #12
0
def parse_html(raw):
    root = parse(raw, decoder=lambda x:x.decode('utf-8'), line_numbers=True, linenumber_attribute='data-lnum')
    ans = serialize(root, 'text/html')
    if not isinstance(ans, bytes):
        ans = ans.encode('utf-8')
    return ans
Example #13
0
    def convert_epub3_nav(self, nav_path, opf, log, opts):
        from lxml import etree
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.ebooks.oeb.polish.parsing import parse
        from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize
        from calibre.ebooks.oeb.polish.toc import first_child
        from tempfile import NamedTemporaryFile
        with lopen(nav_path, 'rb') as f:
            raw = f.read()
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                             assume_utf8=True)[0]
        root = parse(raw, log=log)
        ncx = etree.fromstring(
            '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>'
        )
        navmap = ncx[0]
        et = '{%s}type' % EPUB_NS
        bn = os.path.basename(nav_path)

        def add_from_li(li, parent):
            href = text = None
            for x in li.iterchildren(XHTML('a'), XHTML('span')):
                text = etree.tostring(
                    x, method='text', encoding=unicode,
                    with_tail=False).strip() or ' '.join(
                        x.xpath('descendant-or-self::*/@title')).strip()
                href = x.get('href')
                if href:
                    if href.startswith('#'):
                        href = bn + href
                break
            np = parent.makeelement(NCX('navPoint'))
            parent.append(np)
            np.append(np.makeelement(NCX('navLabel')))
            np[0].append(np.makeelement(NCX('text')))
            np[0][0].text = text
            if href:
                np.append(np.makeelement(NCX('content'), attrib={'src': href}))
            return np

        def process_nav_node(node, toc_parent):
            for li in node.iterchildren(XHTML('li')):
                child = add_from_li(li, toc_parent)
                ol = first_child(li, XHTML('ol'))
                if child is not None and ol is not None:
                    process_nav_node(ol, child)

        for nav in root.iterdescendants(XHTML('nav')):
            if nav.get(et) == 'toc':
                ol = first_child(nav, XHTML('ol'))
                if ol is not None:
                    process_nav_node(ol, navmap)
                    break
        else:
            return

        with NamedTemporaryFile(suffix='.ncx',
                                dir=os.path.dirname(nav_path),
                                delete=False) as f:
            f.write(etree.tostring(ncx, encoding='utf-8'))
        ncx_href = os.path.relpath(f.name, os.getcwdu()).replace(os.sep, '/')
        ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME,
                                          append=True).get('id')
        for spine in opf.root.xpath('//*[local-name()="spine"]'):
            spine.set('toc', ncx_id)
        opts.epub3_nav_href = urlnormalize(
            os.path.relpath(nav_path).replace(os.sep, '/'))
        opts.epub3_nav_parsed = root
        if getattr(self, 'removed_cover', None):
            changed = False
            base_path = os.path.dirname(nav_path)
            for elem in root.xpath('//*[@href]'):
                href, frag = elem.get('href').partition('#')[::2]
                link_path = os.path.relpath(
                    os.path.join(base_path, urlunquote(href)), base_path)
                abs_href = urlnormalize(link_path)
                if abs_href == self.removed_cover:
                    changed = True
                    elem.set('data-calibre-removed-titlepage', '1')
            if changed:
                with open(nav_path, 'wb') as f:
                    f.write(serialize(root, 'application/xhtml+xml'))
Example #14
0
def create_inline_toc(container, title=None):
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        default_title = translate(lang, default_title)
    title = title or default_title
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    def process_node(html_parent, toc, level=1, indent='  ', style_level=2):
        li = html_parent.makeelement(XHTML('li'))
        li.tail = '\n'+ (indent*level)
        html_parent.append(li)
        name, frag = toc.dest, toc.frag
        href = '#'
        if name:
            href = container.name_to_href(name, toc_name)
            if frag:
                href += '#' + frag
        a = li.makeelement(XHTML('a'), href=href)
        a.text = toc.title
        li.append(a)
        if len(toc) > 0:
            parent = li.makeelement(XHTML('ul'))
            parent.set('class', 'level%d' % (style_level))
            li.append(parent)
            a.tail = '\n\n' + (indent*(level+2))
            parent.text = '\n'+(indent*(level+3))
            parent.tail = '\n\n' + (indent*(level+1))
            for child in toc:
                process_node(parent, child, level+3, style_level=style_level + 1)
            parent[-1].tail = '\n' + (indent*(level+2))

    E = ElementMaker(namespace=XHTML_NS, nsmap={None:XHTML_NS})
    html = E.html(
        E.head(
            E.title(title),
            E.style('''
                li { list-style-type: none; padding-left: 2em; margin-left: 0}
                a { text-decoration: none }
                a:hover { color: red }''', type='text/css'),
        ),
        E.body(
            E.h2(title),
            E.ul(),
            id="calibre_generated_inline_toc",
        )
    )

    name = toc_name
    ul = html[1][1]
    ul.set('class', 'level1')
    for child in toc:
        process_node(ul, child)
    if lang:
        html.set('lang', lang)
    pretty_html_tree(container, html)
    raw = serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = 'toc%d.xhtml' % c
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc')
    return name
Example #15
0
def pretty_css(container, name, raw):
    ' Pretty print the CSS represented as a string in raw '
    sheet = container.parse_css(raw)
    return serialize(sheet, 'text/css')
Example #16
0
def pretty_html(container, name, raw):
    root = container.parse_xhtml(raw)
    pretty_html_tree(container, root)
    return serialize(root, 'text/html')
Example #17
0
def pretty_xml(container, name, raw):
    root = container.parse_xml(raw)
    if name == container.opf_name:
        pretty_opf(root)
    pretty_xml_tree(root)
    return serialize(root, 'text/xml')
Example #18
0
def create_book(mi,
                path,
                fmt='epub',
                opf_name='metadata.opf',
                html_name='start.xhtml',
                toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    if fmt not in valid_empty_formats:
        raise ValueError('Cannot create empty book in the %s format' % fmt)
    if fmt == 'txt':
        with open(path, 'wb') as f:
            if not mi.is_null('title'):
                f.write(as_bytes(mi.title))
        return
    if fmt == 'docx':
        from calibre.ebooks.conversion.plumber import Plumber
        from calibre.ebooks.docx.writer.container import DOCX
        from calibre.utils.logging import default_log
        p = Plumber('a.docx', 'b.docx', default_log)
        p.setup_options()
        # Use the word default of one inch page margins
        for x in 'left right top bottom'.split():
            setattr(p.opts, 'margin_' + x, 72)
        DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
        return
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace(
            '_TITLE_', prepare_string_for_xml(mi.title)).replace(
                '_AUTHORS_',
                prepare_string_for_xml(authors_to_string(
                    mi.authors))).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang),
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf,
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name,
                                                                    ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name,
                          DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype',
                        b'application/epub+zip',
                        compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0o755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
Example #19
0
def fix_html(container, raw):
    ' Fix any parsing errors in the HTML represented as a string in raw. Fixing is done using the HTML5 parsing algorithm. '
    root = container.parse_xhtml(raw)
    return serialize(root, 'text/html')
Example #20
0
def pretty_css(container, name, raw):
    sheet = container.parse_css(raw)
    return serialize(sheet, 'text/css')
Example #21
0
def pretty_xml(container, name, raw):
    root = container.parse_xml(raw)
    if name == container.opf_name:
        pretty_opf(root)
    pretty_xml_tree(root)
    return serialize(root, 'text/xml')
Example #22
0
def pretty_html(container, name, raw):
    root = container.parse_xhtml(raw)
    pretty_html_tree(container, root)
    return serialize(root, 'text/html')
Example #23
0
def fix_html(container, raw):
    root = container.parse_xhtml(raw)
    return serialize(root, 'text/html')
Example #24
0
def create_inline_toc(container, title=None):
    '''
    Create an inline (HTML) Table of Contents from an existing NCX table of contents.

    :param title: The title for this table of contents.
    '''
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        default_title = translate(lang, default_title)
    title = title or default_title
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    def process_node(html_parent, toc, level=1, indent='  ', style_level=2):
        li = html_parent.makeelement(XHTML('li'))
        li.tail = '\n' + (indent * level)
        html_parent.append(li)
        name, frag = toc.dest, toc.frag
        href = '#'
        if name:
            href = container.name_to_href(name, toc_name)
            if frag:
                href += '#' + frag
        a = li.makeelement(XHTML('a'), href=href)
        a.text = toc.title
        li.append(a)
        if len(toc) > 0:
            parent = li.makeelement(XHTML('ul'))
            parent.set('class', 'level%d' % (style_level))
            li.append(parent)
            a.tail = '\n\n' + (indent * (level + 2))
            parent.text = '\n' + (indent * (level + 3))
            parent.tail = '\n\n' + (indent * (level + 1))
            for child in toc:
                process_node(parent,
                             child,
                             level + 3,
                             style_level=style_level + 1)
            parent[-1].tail = '\n' + (indent * (level + 2))

    E = ElementMaker(namespace=XHTML_NS, nsmap={None: XHTML_NS})
    html = E.html(
        E.head(
            E.title(title),
            E.style('''
                li { list-style-type: none; padding-left: 2em; margin-left: 0}
                a { text-decoration: none }
                a:hover { color: red }''',
                    type='text/css'),
        ), E.body(
            E.h2(title),
            E.ul(),
            id="calibre_generated_inline_toc",
        ))

    name = toc_name
    ul = html[1][1]
    ul.set('class', 'level1')
    for child in toc:
        process_node(ul, child)
    if lang:
        html.set('lang', lang)
    pretty_html_tree(container, html)
    raw = serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = 'toc%d.xhtml' % c
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    set_guide_item(container,
                   'toc',
                   title,
                   name,
                   frag='calibre_generated_inline_toc')
    return name
Example #25
0
def pretty_css(container, name, raw):
    ' Pretty print the CSS represented as a string in raw '
    sheet = container.parse_css(raw)
    return serialize(sheet, 'text/css')
Example #26
0
def parse_html(raw):
    root = parse(raw, decoder=lambda x: x.decode("utf-8"), line_numbers=True, linenumber_attribute="data-lnum")
    return serialize(root, "text/html").encode("utf-8")
Example #27
0
def pretty_html(container, name, raw):
    ' Pretty print the HTML represented as a string in raw '
    root = container.parse_xhtml(raw)
    pretty_html_tree(container, root)
    return serialize(root, 'text/html')
Example #28
0
    def convert_epub3_nav(self, nav_path, opf, log, opts):
        from lxml import etree
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.ebooks.oeb.polish.parsing import parse
        from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize
        from calibre.ebooks.oeb.polish.toc import first_child
        from tempfile import NamedTemporaryFile
        with lopen(nav_path, 'rb') as f:
            raw = f.read()
        raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0]
        root = parse(raw, log=log)
        ncx = etree.fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>')
        navmap = ncx[0]
        et = '{%s}type' % EPUB_NS
        bn = os.path.basename(nav_path)

        def add_from_li(li, parent):
            href = text = None
            for x in li.iterchildren(XHTML('a'), XHTML('span')):
                text = etree.tostring(x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
                href = x.get('href')
                if href:
                    if href.startswith('#'):
                        href = bn + href
                break
            np = parent.makeelement(NCX('navPoint'))
            parent.append(np)
            np.append(np.makeelement(NCX('navLabel')))
            np[0].append(np.makeelement(NCX('text')))
            np[0][0].text = text
            if href:
                np.append(np.makeelement(NCX('content'), attrib={'src':href}))
            return np

        def process_nav_node(node, toc_parent):
            for li in node.iterchildren(XHTML('li')):
                child = add_from_li(li, toc_parent)
                ol = first_child(li, XHTML('ol'))
                if child is not None and ol is not None:
                    process_nav_node(ol, child)

        for nav in root.iterdescendants(XHTML('nav')):
            if nav.get(et) == 'toc':
                ol = first_child(nav, XHTML('ol'))
                if ol is not None:
                    process_nav_node(ol, navmap)
                    break
        else:
            return

        with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f:
            f.write(etree.tostring(ncx, encoding='utf-8'))
        ncx_href = os.path.relpath(f.name, os.getcwdu()).replace(os.sep, '/')
        ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME, append=True).get('id')
        for spine in opf.root.xpath('//*[local-name()="spine"]'):
            spine.set('toc', ncx_id)
        opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/'))
        opts.epub3_nav_parsed = root
        if getattr(self, 'removed_cover', None):
            changed = False
            base_path = os.path.dirname(nav_path)
            for elem in root.xpath('//*[@href]'):
                href, frag = elem.get('href').partition('#')[::2]
                link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path)
                abs_href = urlnormalize(link_path)
                if abs_href == self.removed_cover:
                    changed = True
                    elem.set('data-calibre-removed-titlepage', '1')
            if changed:
                with open(nav_path, 'wb') as f:
                    f.write(serialize(root, 'application/xhtml+xml'))
Example #29
0
def fix_html(container, raw):
    root = container.parse_xhtml(raw)
    return serialize(root, 'text/html')
Example #30
0
def fix_html(container, raw):
    ' Fix any parsing errors in the HTML represented as a string in raw. Fixing is done using the HTML5 parsing algorithm. '
    root = container.parse_xhtml(raw)
    return serialize(root, 'text/html')
Example #31
0
def pretty_css(container, name, raw):
    sheet = container.parse_css(raw)
    return serialize(sheet, 'text/css')
Example #32
0
def pretty_html(container, name, raw):
    ' Pretty print the HTML represented as a string in raw '
    root = container.parse_xhtml(raw)
    pretty_html_tree(container, root)
    return serialize(root, 'text/html')
Example #33
0
def fix_html(raw):
    root = parse(raw)
    return serialize(root, 'text/html').decode('utf-8')
Example #34
0
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    if fmt not in valid_empty_formats:
        raise ValueError('Cannot create empty book in the %s format' % fmt)
    if fmt == 'txt':
        with open(path, 'wb') as f:
            if not mi.is_null('title'):
                f.write(mi.title)
        return
    if fmt == 'docx':
        from calibre.ebooks.conversion.plumber import Plumber
        from calibre.ebooks.docx.writer.container import DOCX
        from calibre.utils.logging import default_log
        p = Plumber('a.docx', 'b.docx', default_log)
        p.setup_options()
        # Use the word default of one inch page margins
        for x in 'left right top bottom'.split():
            setattr(p.opts, 'margin_' + x, 72)
        DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
        return
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)
    ).replace(
        '_TITLE_', prepare_string_for_xml(mi.title)
    ).replace(
        '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors))
    ).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
Example #35
0
def parse_html(raw):
    root = parse(raw,
                 decoder=lambda x: x.decode('utf-8'),
                 line_numbers=True,
                 linenumber_attribute='data-lnum')
    return serialize(root, 'text/html').encode('utf-8')
Example #36
0
def parse_html(raw):
    root = parse(raw, decoder=lambda x:x.decode('utf-8'), line_numbers=True, linenumber_attribute='data-lnum')
    return serialize(root, 'text/html').encode('utf-8')