Esempio n. 1
0
    def upshift_markup(self):  # {{{
        'Upgrade markup to comply with XHTML 1.1 where possible'
        from calibre.ebooks.oeb.base import XPath, XML
        for x in self.oeb.spine:
            root = x.data
            if (not root.get(XML('lang'))) and (root.get('lang')):
                root.set(XML('lang'), root.get('lang'))
            body = XPath('//h:body')(root)
            if body:
                body = body[0]

            if not hasattr(body, 'xpath'):
                continue
            for u in XPath('//h:u')(root):
                u.tag = 'span'

            seen_ids, seen_names = set(), set()
            for x in XPath('//*[@id or @name]')(root):
                eid, name = x.get('id', None), x.get('name', None)
                if eid:
                    if eid in seen_ids:
                        del x.attrib['id']
                    else:
                        seen_ids.add(eid)
                if name:
                    if name in seen_names:
                        del x.attrib['name']
                    else:
                        seen_names.add(name)
    def upshift_markup(self):  # {{{
        'Upgrade markup to comply with XHTML 1.1 where possible'
        from calibre.ebooks.oeb.base import XPath, XML
        for x in self.oeb.spine:
            root = x.data
            if (not root.get(XML('lang'))) and (root.get('lang')):
                root.set(XML('lang'), root.get('lang'))
            body = XPath('//h:body')(root)
            if body:
                body = body[0]

            if not hasattr(body, 'xpath'):
                continue
            for u in XPath('//h:u')(root):
                u.tag = 'span'
                u.set('style', 'text-decoration:underline')
Esempio n. 3
0
    def __call__(self, oeb, context):
        has_toc = getattr(getattr(oeb, 'toc', False), 'nodes', False)

        if 'toc' in oeb.guide:
            # Ensure toc pointed to in <guide> is in spine
            from calibre.ebooks.oeb.base import urlnormalize
            href = urlnormalize(oeb.guide['toc'].href)
            if href in oeb.manifest.hrefs:
                item = oeb.manifest.hrefs[href]
                if (hasattr(item.data, 'xpath')
                        and XPath('//h:a[@href]')(item.data)):
                    if oeb.spine.index(item) < 0:
                        if self.position == 'end':
                            oeb.spine.add(item, linear=False)
                        else:
                            oeb.spine.insert(0, item, linear=True)
                    return
                elif has_toc:
                    oeb.guide.remove('toc')
            else:
                oeb.guide.remove('toc')
        if not has_toc:
            return
        oeb.logger.info('Generating in-line TOC...')
        title = self.title or oeb.translate(DEFAULT_TITLE)
        style = self.style
        if style not in STYLE_CSS:
            oeb.logger.error('Unknown TOC style %r' % style)
            style = 'nested'
        id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css')
        oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
        language = unicode_type(oeb.metadata.language[0])
        contents = element(None,
                           XHTML('html'),
                           nsmap={None: XHTML_NS},
                           attrib={XML('lang'): language})
        head = element(contents, XHTML('head'))
        htitle = element(head, XHTML('title'))
        htitle.text = title
        element(head,
                XHTML('link'),
                rel='stylesheet',
                type=CSS_MIME,
                href=css_href)
        body = element(contents,
                       XHTML('body'),
                       attrib={'class': 'calibre_toc'})
        h1 = element(body, XHTML('h2'), attrib={'class': 'calibre_toc_header'})
        h1.text = title
        self.add_toc_level(body, oeb.toc)
        id, href = oeb.manifest.generate('contents', 'contents.xhtml')
        item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
        if self.position == 'end':
            oeb.spine.add(item, linear=False)
        else:
            oeb.spine.insert(0, item, linear=True)
        oeb.guide.add('toc', 'Table of Contents', href)
Esempio n. 4
0
def create_ncx(toc, to_href, btitle, lang, uid):
    lang = lang.replace('_', '-')
    ncx = etree.Element(NCX('ncx'),
                        attrib={
                            'version': '2005-1',
                            XML('lang'): lang
                        },
                        nsmap={None: NCX_NS})
    head = etree.SubElement(ncx, NCX('head'))
    etree.SubElement(head,
                     NCX('meta'),
                     name='dtb:uid',
                     content=unicode_type(uid))
    etree.SubElement(head,
                     NCX('meta'),
                     name='dtb:depth',
                     content=unicode_type(toc.depth))
    generator = ''.join(['calibre (', __version__, ')'])
    etree.SubElement(head,
                     NCX('meta'),
                     name='dtb:generator',
                     content=generator)
    etree.SubElement(head, NCX('meta'), name='dtb:totalPageCount', content='0')
    etree.SubElement(head, NCX('meta'), name='dtb:maxPageNumber', content='0')
    title = etree.SubElement(ncx, NCX('docTitle'))
    text = etree.SubElement(title, NCX('text'))
    text.text = btitle
    navmap = etree.SubElement(ncx, NCX('navMap'))
    spat = re.compile(r'\s+')

    play_order = Counter()

    def process_node(xml_parent, toc_parent):
        for child in toc_parent:
            play_order['c'] += 1
            point = etree.SubElement(xml_parent,
                                     NCX('navPoint'),
                                     id='num_%d' % play_order['c'],
                                     playOrder=unicode_type(play_order['c']))
            label = etree.SubElement(point, NCX('navLabel'))
            title = child.title
            if title:
                title = spat.sub(' ', title)
            etree.SubElement(label, NCX('text')).text = title
            if child.dest:
                href = to_href(child.dest)
                if child.frag:
                    href += '#' + child.frag
                etree.SubElement(point, NCX('content'), src=href)
            process_node(point, child)

    process_node(navmap, toc)
    return ncx
Esempio n. 5
0
 def tree_to_binary(self,
                    elem,
                    nsrmap=NSRMAP,
                    parents=[],
                    inhead=False,
                    preserve=False):
     if not isinstance(elem.tag, basestring):
         # Don't emit any comments or raw entities
         return
     nsrmap = copy.copy(nsrmap)
     attrib = dict(elem.attrib)
     style = self.stylizer.style(elem) if self.stylizer else None
     for key, value in elem.nsmap.items():
         if value not in nsrmap or nsrmap[value] != key:
             xmlns = ('xmlns:' + key) if key else 'xmlns'
             attrib[xmlns] = value
         nsrmap[value] = key
     tag = prefixname(elem.tag, nsrmap)
     tag_offset = self.buf.tell()
     if tag == 'head':
         inhead = True
     flags = FLAG_OPENING
     if not elem.text and len(elem) == 0:
         flags |= FLAG_CLOSING
     if inhead:
         flags |= FLAG_HEAD
     if style and self.is_block(style):
         flags |= FLAG_BLOCK
     self.write(0, flags)
     tattrs = self.tattrs[0]
     if tag in self.tags:
         index = self.tags[tag]
         self.write(index)
         if self.tattrs[index]:
             tattrs = self.tattrs[index]
     else:
         self.write(FLAG_CUSTOM, len(tag) + 1, tag)
     last_break = self.page_breaks[-1][0] if self.page_breaks else None
     if style and last_break != tag_offset \
        and style['page-break-before'] in PAGE_BREAKS:
         self.page_breaks.append((tag_offset, list(parents)))
     for attr, value in attrib.items():
         attr = prefixname(attr, nsrmap)
         if attr in ('href', 'src'):
             value = urlnormalize(value)
             path, frag = urldefrag(value)
             if self.item:
                 path = self.item.abshref(path)
             prefix = unichr(3)
             if path in self.manifest.hrefs:
                 prefix = unichr(2)
                 value = self.manifest.hrefs[path].id
                 if frag:
                     value = '#'.join((value, frag))
             value = prefix + value
         elif attr in ('id', 'name'):
             self.anchors.append((value, tag_offset))
         elif attr.startswith('ms--'):
             attr = '%' + attr[4:]
         elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
             value = CSS_MIME
         if attr in tattrs:
             self.write(tattrs[attr])
         else:
             self.write(FLAG_CUSTOM, len(attr) + 1, attr)
         try:
             self.write(ATTR_NUMBER, int(value) + 1)
         except ValueError:
             self.write(len(value) + 1, value)
     self.write(0)
     old_preserve = preserve
     if style:
         preserve = (style['white-space'] in ('pre', 'pre-wrap'))
     xml_space = elem.get(XML('space'))
     if xml_space == 'preserve':
         preserve = True
     elif xml_space == 'normal':
         preserve = False
     if elem.text:
         if preserve:
             self.write(elem.text)
         elif len(elem) == 0 or not elem.text.isspace():
             self.write(COLLAPSE.sub(' ', elem.text))
         # else: de nada
     parents.append(tag_offset)
     child = cstyle = nstyle = None
     for next in chain(elem, [None]):
         if self.stylizer:
             nstyle = None if next is None else self.stylizer.style(next)
         if child is not None:
             if not preserve \
                and (inhead or not nstyle or self.is_block(cstyle) or self.is_block(nstyle)) \
                and child.tail and child.tail.isspace():
                 child.tail = None
             self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
         child, cstyle = next, nstyle
     parents.pop()
     preserve = old_preserve
     if not flags & FLAG_CLOSING:
         self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
     if elem.tail and tag != 'html':
         tail = elem.tail
         if not preserve:
             tail = COLLAPSE.sub(' ', tail)
         self.write(tail)
     if style and style['page-break-after'] not in ('avoid', 'auto'):
         self.page_breaks.append((self.buf.tell(), list(parents)))