Example #1
0
def link_stylesheets(container, names, sheets, remove=False, mtype='text/css'):
    from calibre.ebooks.oeb.base import XPath, XHTML
    changed_names = set()
    snames = set(sheets)
    lp = XPath('//h:link[@href]')
    hp = XPath('//h:head')
    for name in names:
        root = container.parsed(name)
        if remove:
            for link in lp(root):
                if (link.get('type', mtype) or mtype) == mtype:
                    container.remove_from_xml(link)
                    changed_names.add(name)
                    container.dirty(name)
        existing = {container.href_to_name(l.get('href'), name) for l in lp(root) if (l.get('type', mtype) or mtype) == mtype}
        extra = snames - existing
        if extra:
            changed_names.add(name)
            try:
                parent = hp(root)[0]
            except (TypeError, IndexError):
                parent = XHTML('head')
                container.insert_into_xml(root, parent, index=0)
            for sheet in sheets:
                if sheet in extra:
                    container.insert_into_xml(
                        parent, parent.makeelement(XHTML('link'), rel='stylesheet', type=mtype,
                                                   href=container.name_to_href(sheet, name)))
            container.dirty(name)

    return changed_names
Example #2
0
def add_pagenum_toc(root, toc, opts, page_number_display_map):
    body = last_tag(root)
    indents = []
    for i in range(1, 7):
        indents.extend((i, 1.4 * i))

    css = '''
    .calibre-pdf-toc table { width: 100%% }

    .calibre-pdf-toc table tr td:last-of-type { text-align: right }

    .calibre-pdf-toc .level-0 {
        font-size: larger;
    }

    .calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
    .calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
    .calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
    .calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
    .calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
    .calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
    ''' % tuple(indents) + (opts.extra_css or '')
    style = body.makeelement(XHTML('style'), type='text/css')
    style.text = css
    body.append(style)
    body.set('class', 'calibre-pdf-toc')

    def E(tag, cls=None, text=None, tail=None, parent=None, **attrs):
        ans = body.makeelement(XHTML(tag), **attrs)
        ans.text, ans.tail = text, tail
        if cls is not None:
            ans.set('class', cls)
        if parent is not None:
            parent.append(ans)
        return ans

    E('h2', text=(opts.toc_title or _('Table of Contents')), parent=body)
    table = E('table', parent=body)
    for level, node in toc.iterdescendants(level=0):
        tr = E('tr', cls='level-%d' % level, parent=table)
        E('td', text=node.title or _('Unknown'), parent=tr)
        num = node.pdf_loc.pagenum
        num = page_number_display_map.get(num, num)
        E('td', text=f'{num}', parent=tr)
 def get_cover_page(self):
     from calibre.ebooks.oeb.stylizer import Stylizer
     from calibre.ebooks.oeb.base import XHTML
     output = u''
     if 'cover' in self.oeb_book.guide:
         if self.name_map.get(self.oeb_book.guide['cover'].href, None):
             output += '<IMG SRC="%s">' % self.name_map[
                 self.oeb_book.guide['cover'].href]
     if 'titlepage' in self.oeb_book.guide:
         self.log.debug('Generating cover page...')
         href = self.oeb_book.guide['titlepage'].href
         item = self.oeb_book.manifest.hrefs[href]
         if item.spine_position is None:
             stylizer = Stylizer(item.data, item.href, self.oeb_book,
                                 self.opts, self.opts.output_profile)
             output += ''.join(
                 self.dump_text(item.data.find(XHTML('body')), stylizer,
                                item))
     return output
Example #4
0
 def flatten_spine(self):
     names = defaultdict(int)
     styles = {}
     for item in self.oeb.spine:
         html = item.data
         stylizer = self.stylizers[item]
         if self.specializer is not None:
             self.specializer(item, stylizer)
         body = html.find(XHTML('body'))
         fsize = self.context.dest.fbase
         self.flatten_node(body, stylizer, names, styles, fsize, item.id)
     items = [(key, val) for (val, key) in styles.items()]
     items.sort()
     css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items)
     href = self.replace_css(css)
     global_css = self.collect_global_css()
     for item in self.oeb.spine:
         stylizer = self.stylizers[item]
         self.flatten_head(item, href, global_css[item])
Example #5
0
class RecodeCallbackDiv(RecodeCallbackBase):

    tag = XHTML('div')

    def get_begin(self, element):
        classes = self.get_classes(element)

        functions = []
        functions.extend(self.get_class_layout(classes))
        functions.extend(self.get_class_style(classes))

        # save the number of used functions, so we will close them properly
        self.push(len(functions))

        return "".join(functions)

    def get_end(self, element):
        # hence div is a block tag, add a new line at the end
        return "}" * self.pop() + "\n"
Example #6
0
 def mlize_spine(self, oeb_book):
     output = []
     for item in oeb_book.spine:
         self.log.debug('Converting %s to HTML...' % item.href)
         self.rewrite_ids(item.data, item)
         rewrite_links(item.data, partial(self.rewrite_link, page=item))
         stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
         output += self.dump_text(item.data.find(XHTML('body')), stylizer,
                                  item)
         output.append('\n\n')
     if self.opts.htmlz_class_style == 'external':
         css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
     else:
         css = u'<style type="text/css">' + self.get_css(
             oeb_book) + u'</style>'
     title = u'<title>%s</title>' % prepare_string_for_xml(self.book_title)
     output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \
         [css] + [title, u'</head><body>'] + output + [u'</body></html>']
     return ''.join(output)
Example #7
0
    def get_cover_page(self):
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.ebooks.oeb.base import XHTML

        output = ''
        if 'cover' in self.oeb_book.guide:
            output += '\\m="cover.png"\n'
            self.image_hrefs[self.oeb_book.guide['cover'].href] = 'cover.png'
        if 'titlepage' in self.oeb_book.guide:
            self.log.debug('Generating title page...')
            href = self.oeb_book.guide['titlepage'].href
            item = self.oeb_book.manifest.hrefs[href]
            if item.spine_position is None:
                stylizer = Stylizer(item.data, item.href, self.oeb_book,
                                    self.opts, self.opts.output_profile)
                output += ''.join(
                    self.dump_text(item.data.find(XHTML('body')), stylizer,
                                   item))
        return output
Example #8
0
def merge_css(container, names, master):
    p = container.parsed
    msheet = p(master)
    master_base = os.path.dirname(master)
    merged = set()

    for name in names:
        if name == master:
            continue
        # Rebase links if master is in a different directory
        if os.path.dirname(name) != master_base:
            container.replace_links(name, LinkRebaser(container, name, master))

        sheet = p(name)

        # Remove charset rules
        cr = [r for r in sheet.cssRules if r.type == r.CHARSET_RULE]
        [sheet.deleteRule(sheet.cssRules.index(r)) for r in cr]
        for rule in sheet.cssRules:
            msheet.add(rule)

        container.remove_item(name)
        merged.add(name)

    # Remove links to merged stylesheets in the html files, replacing with a
    # link to the master sheet
    for name, mt in container.mime_map.iteritems():
        if mt in OEB_DOCS:
            removed = False
            root = p(name)
            for link in XPath('//h:link[@href]')(root):
                q = container.href_to_name(link.get('href'), name)
                if q in merged:
                    container.remove_from_xml(link)
                    removed = True
            if removed:
                container.dirty(name)
            if removed and master not in set(all_stylesheets(container, name)):
                head = root.find('h:head', namespaces=XPNSMAP)
                if head is not None:
                    link = head.makeelement(XHTML('link'), type='text/css', rel='stylesheet', href=container.name_to_href(master, name))
                    container.insert_into_xml(head, link)
Example #9
0
    def extract_css_into_flows(self):
        inlines = defaultdict(list)  # Ensure identical <style>s not repeated
        sheets = {}

        for item in self.oeb.manifest:
            if item.media_type in OEB_STYLES:
                if not self.opts.expand_css and hasattr(item.data, 'cssText'):
                    condense_sheet(self.data(item))
                data = self.data(item).cssText
                sheets[item.href] = len(self.flows)
                self.flows.append(force_unicode(data, 'utf-8'))

        for item in self.oeb.spine:
            root = self.data(item)

            for link in XPath('//h:link[@href]')(root):
                href = item.abshref(link.get('href'))
                idx = sheets.get(href, None)
                if idx is not None:
                    idx = to_ref(idx)
                    link.set('href', 'kindle:flow:%s?mime=text/css' % idx)

            for tag in XPath('//h:style')(root):
                p = tag.getparent()
                idx = p.index(tag)
                raw = tag.text
                if not raw or not raw.strip():
                    extract(tag)
                    continue
                repl = etree.Element(XHTML('link'),
                                     type='text/css',
                                     rel='stylesheet')
                repl.tail = '\n'
                p.insert(idx, repl)
                extract(tag)
                inlines[raw].append(repl)

        for raw, elems in inlines.iteritems():
            idx = to_ref(len(self.flows))
            self.flows.append(raw)
            for link in elems:
                link.set('href', 'kindle:flow:%s?mime=text/css' % idx)
Example #10
0
def get_length(root):
    strip_space = re.compile(r'\s+')
    ans = 0

    def count(elem):
        num = 0
        tname = elem.tag.rpartition('}')[-1].lower()
        if elem.text and tname not in 'script style':
            num += len(strip_space.sub(elem.text, ''))
        if elem.tail:
            num += len(strip_space.sub(elem.tail, ''))
        if tname in 'img svg':
            num += 2000
        return num

    for body in root.iterdescendants(XHTML('body')):
        ans += count(body)
        for elem in body.iterdescendants('*'):
            ans += count(elem)
    return ans
Example #11
0
    def mlize_spine(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        output = [u'']
        output.append(self.get_toc())
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to TXT...' % item.href)
            content = unicode(etree.tostring(item.data, encoding=unicode))
            content = self.remove_newlines(content)
            content = etree.fromstring(content)
            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts,
                                self.opts.output_profile)
            output += self.dump_text(content.find(XHTML('body')), stylizer,
                                     item)
            output += '\n\n\n\n\n\n'
        output = u''.join(output)
        output = u'\n'.join(l.rstrip() for l in output.splitlines())
        output = self.cleanup_text(output)

        return output
Example #12
0
    def epubify_markup(self, root, log):
        from calibre.ebooks.oeb.base import XPath, XHTML
        # Fix empty title tags
        for t in XPath('//h:title')(root):
            if not t.text:
                t.text = u' '
        # Fix <p><div> constructs as the asinine epubchecker complains
        # about them
        pdiv = XPath('//h:p/h:div')
        for div in pdiv(root):
            div.getparent().tag = XHTML('div')

        # Remove the position:relative as it causes problems with some epub
        # renderers. Remove display: block on an image inside a div as it is
        # redundant and prevents text-align:center from working in ADE
        # Also ensure that the img is contained in its containing div
        imgpath = XPath('//h:div/h:img[@style]')
        for img in imgpath(root):
            div = img.getparent()
            if len(div) == 1:
                style = div.attrib.get('style', '')
                if style and not style.endswith(';'):
                    style = style + ';'
                style += 'position:static'  # Ensures position of containing
                # div is static
                # Ensure that the img is always contained in its frame
                div.attrib['style'] = style
                img.attrib['style'] = 'max-width: 100%; max-height: 100%'

        # A div/div/img construct causes text-align:center to not work in ADE
        # so set the display of the second div to inline. This should have no
        # effect (apart from minor vspace issues) in a compliant HTML renderer
        # but it fixes the centering of the image via a text-align:center on
        # the first div in ADE
        imgpath = XPath('descendant::h:div/h:div/h:img')
        for img in imgpath(root):
            div2 = img.getparent()
            div1 = div2.getparent()
            if len(div1) == len(div2) == 1:
                style = div2.attrib['style']
                div2.attrib['style'] = 'display:inline;' + style
Example #13
0
    def flatten_spine(self):
        names = defaultdict(int)
        styles, pseudo_styles = {}, defaultdict(dict)
        for item in self.items:
            html = item.data
            stylizer = self.stylizers[item]
            if self.specializer is not None:
                self.specializer(item, stylizer)
            fsize = self.context.dest.fbase
            self.flatten_node(html,
                              stylizer,
                              names,
                              styles,
                              pseudo_styles,
                              fsize,
                              item.id,
                              recurse=False)
            self.flatten_node(html.find(XHTML('body')), stylizer, names,
                              styles, pseudo_styles, fsize, item.id)
        items = sorted(((key, val) for (val, key) in iteritems(styles)),
                       key=lambda x: numeric_sort_key(x[0]))
        # :hover must come after link and :active must come after :hover
        psels = sorted(pseudo_styles,
                       key=lambda x: {
                           'hover': 1,
                           'active': 2
                       }.get(x, 0))
        for psel in psels:
            styles = pseudo_styles[psel]
            if not styles:
                continue
            x = sorted(((k + ':' + psel, v) for v, k in iteritems(styles)))
            items.extend(x)

        css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items)

        href = self.replace_css(css)
        global_css = self.collect_global_css()
        for item in self.items:
            stylizer = self.stylizers[item]
            self.flatten_head(item, href, global_css[item])
Example #14
0
 def rasterize_external(self, elem, style, item, svgitem):
     width = style['width']
     height = style['height']
     width = (width / 72) * self.profile.dpi
     height = (height / 72) * self.profile.dpi
     data = QByteArray(str(svgitem))
     svg = QSvgRenderer(data)
     size = svg.defaultSize()
     size.scale(width, height, Qt.KeepAspectRatio)
     key = (svgitem.href, size.width(), size.height())
     if key in self.images:
         href = self.images[key]
     else:
         logger = self.oeb.logger
         logger.info('Rasterizing %r to %dx%d' %
                     (svgitem.href, size.width(), size.height()))
         image = QImage(size, QImage.Format_ARGB32_Premultiplied)
         image.fill(QColor("white").rgb())
         painter = QPainter(image)
         svg.render(painter)
         painter.end()
         array = QByteArray()
         buffer = QBuffer(array)
         buffer.open(QIODevice.WriteOnly)
         image.save(buffer, 'PNG')
         data = str(array)
         manifest = self.oeb.manifest
         href = os.path.splitext(svgitem.href)[0] + '.png'
         id, href = manifest.generate(svgitem.id, href)
         manifest.add(id, href, PNG_MIME, data=data)
         self.images[key] = href
     elem.tag = XHTML('img')
     for attr in elem.attrib:
         if attr not in KEEP_ATTRS:
             del elem.attrib[attr]
     elem.attrib['src'] = item.relhref(href)
     if elem.text:
         elem.attrib['alt'] = elem.text
         elem.text = None
     for child in elem:
         elem.remove(child)
Example #15
0
    def split_on_page_breaks(self, orig_tree):
        ordered_ids = OrderedDict()
        all_page_break_ids = frozenset(self.page_break_ids)
        for elem_id in orig_tree.xpath('//*/@id'):
            if elem_id in all_page_break_ids:
                ordered_ids[elem_id] = self.page_breaks[
                    self.page_break_ids.index(elem_id)]

        self.trees = [orig_tree]
        while ordered_ids:
            pb_id, (pattern, before) = next(iteritems(ordered_ids))
            del ordered_ids[pb_id]
            for i in range(len(self.trees)-1, -1, -1):
                tree = self.trees[i]
                elem = pattern(tree)
                if elem:
                    self.log.debug('\t\tSplitting on page-break at id=%s'%
                                elem[0].get('id'))
                    before_tree, after_tree = self.do_split(tree, elem[0], before)
                    self.trees[i:i+1] = [before_tree, after_tree]
                    break

        trees, ids = [], set()
        for tree in self.trees:
            root = tree.getroot()
            if self.is_page_empty(root):
                discarded_ids = root.xpath('//*[@id]')
                for x in discarded_ids:
                    x = x.get('id')
                    if not x.startswith('calibre_'):
                        ids.add(x)
            else:
                if ids:
                    body = self.get_body(root)
                    if body is not None:
                        existing_ids = frozenset(body.xpath('//*/@id'))
                        for x in ids - existing_ids:
                            body.insert(0, body.makeelement(XHTML('div'), id=x, style='height:0pt'))
                ids = set()
                trees.append(tree)
        self.trees = trees
Example #16
0
def get_length(root):
    strip_space = re.compile(r'\s+')
    ans = 0
    ignore_tags = frozenset('script style title noscript'.split())

    def count(elem):
        num = 0
        tname = elem.tag.rpartition('}')[-1].lower()
        if elem.text and tname not in ignore_tags:
            num += len(strip_space.sub('', elem.text))
        if elem.tail:
            num += len(strip_space.sub('', elem.tail))
        if tname in 'img svg':
            num += 2000
        return num

    for body in root.iterdescendants(XHTML('body')):
        ans += count(body)
        for elem in body.iterdescendants('*'):
            ans += count(elem)
    return ans
Example #17
0
 def get_page_sheet(self):
     if self.page_sheet is None:
         manifest = self.oeb.manifest
         id_, href = manifest.generate('page_css', 'page_styles.css')
         self.page_sheet = manifest.add(id_,
                                        href,
                                        CSS_MIME,
                                        data=self.parser.parseString(
                                            '', validate=False))
         head = self.current_item.xpath('//*[local-name()="head"][1]')
         if head:
             href = self.current_item.relhref(href)
             l = etree.SubElement(head[0],
                                  XHTML('link'),
                                  rel='stylesheet',
                                  type=CSS_MIME,
                                  href=href)
             l.tail = '\n'
         else:
             self.log.warn('No <head> cannot embed font rules')
     return self.page_sheet
Example #18
0
 def transform_css(self):
     transform_css(self, transform_sheet=transform_sheet, transform_style=transform_declaration)
     # Firefox flakes out sometimes when dynamically creating <style> tags,
     # so convert them to external stylesheets to ensure they never fail
     style_xpath = XPath('//h:style')
     for name, mt in tuple(iteritems(self.mime_map)):
         mt = mt.lower()
         if mt in OEB_DOCS:
             head = ensure_head(self.parsed(name))
             for style in style_xpath(self.parsed(name)):
                 if style.text and (style.get('type') or 'text/css').lower() == 'text/css':
                     in_head = has_ancestor(style, head)
                     if not in_head:
                         extract(style)
                         head.append(style)
                     css = style.text
                     style.clear()
                     style.tag = XHTML('link')
                     style.set('type', 'text/css')
                     style.set('rel', 'stylesheet')
                     sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True)
                     style.set('href', self.name_to_href(sname, name))
Example #19
0
 def smallcaps_elem(self, elem, attr):
     texts = self.split_text(getattr(elem, attr))
     setattr(elem, attr, None)
     last = elem if attr == 'tail' else None
     attrib = {'class': 'calibre_lowercase'}
     for text in texts:
         if text.isupper():
             if last is None:
                 elem.text = text
             else:
                 last.tail = text
         else:
             child = elem.makeelement(XHTML('span'), attrib=attrib)
             child.text = text.upper()
             if last is None:
                 elem.insert(0, child)
             else:
                 # addnext() moves the tail for some reason
                 tail = last.tail
                 last.addnext(child)
                 last.tail = tail
                 child.tail = None
             last = child
Example #20
0
def add_anchors_markup(root, uuid, anchors):
    body = last_tag(root)
    div = body.makeelement(
        XHTML('div'),
        id=uuid,
        style=
        'display:block !important; page-break-before: always !important; break-before: always !important; white-space: pre-wrap !important'
    )
    div.text = '\n\n'
    body.append(div)

    def a(anchor):
        a = div.makeelement(
            XHTML('a'),
            href='#' + anchor,
            style=
            'min-width: 10px !important; min-height: 10px !important; border: solid 1px !important;'
        )
        a.text = a.tail = ' '
        div.append(a)

    tuple(map(a, anchors))
    a(uuid)
Example #21
0
 def serialize_item(self, item):
     '''
     Serialize an individual item from the spine of the input document.
     A reference to this item is stored in self.href_offsets
     '''
     buf = self.buf
     if not item.linear:
         self.breaks.append(buf.tell() - 1)
     self.id_offsets[urlnormalize(item.href)] = buf.tell()
     if item.is_section_start:
         buf.write(b'<a ></a> ')
     if item.is_article_start:
         buf.write(b'<a ></a> <a ></a>')
     for elem in item.data.find(XHTML('body')):
         self.serialize_elem(elem, item)
     if self.write_page_breaks_after_item:
         buf.write(b'<mbp:pagebreak/>')
     if item.is_article_end:
         # Kindle periodical article end marker
         buf.write(b'<a ></a> <a ></a>')
     if item.is_section_end:
         buf.write(b' <a ></a>')
     self.anchor_offset = None
Example #22
0
    def get_text(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        text = ['<body>']

        # Create main section if there are no others to create
        if self.opts.sectionize == 'nothing':
            text.append('<section>')
            self.section_level += 1

        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to FictionBook2 XML' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts,
                                self.opts.output_profile)

            # Start a <section> if we must sectionize each file or if the TOC references this page
            page_section_open = False
            if self.opts.sectionize == 'files' or None in self.toc.get(
                    item.href, ()):
                text.append('<section>')
                page_section_open = True
                self.section_level += 1

            text += self.dump_text(item.data.find(XHTML('body')), stylizer,
                                   item)

            if page_section_open:
                text.append('</section>')
                self.section_level -= 1

        # Close any open sections
        while self.section_level > 0:
            text.append('</section>')
            self.section_level -= 1

        text.append('</body>')
        return ''.join(text)
Example #23
0
    def extract_svg_into_flows(self):
        images = {}

        for item in self.oeb.manifest:
            if item.media_type == SVG_MIME:
                data = self.data(item)
                images[item.href] = len(self.flows)
                self.flows.append(
                    etree.tostring(data,
                                   encoding='UTF-8',
                                   with_tail=True,
                                   xml_declaration=True))

        for item in self.oeb.spine:
            root = self.data(item)

            for svg in XPath('//svg:svg')(root):
                raw = etree.tostring(svg,
                                     encoding=unicode_type,
                                     with_tail=False)
                idx = len(self.flows)
                self.flows.append(raw)
                p = svg.getparent()
                pos = p.index(svg)
                img = etree.Element(XHTML('img'),
                                    src="kindle:flow:%s?mime=image/svg+xml" %
                                    to_ref(idx))
                p.insert(pos, img)
                extract(svg)

            for img in XPath('//h:img[@src]')(root):
                src = img.get('src')
                abshref = item.abshref(src)
                idx = images.get(abshref, None)
                if idx is not None:
                    img.set('src',
                            'kindle:flow:%s?mime=image/svg+xml' % to_ref(idx))
Example #24
0
def get_length(root):
    ans = 0

    fast = getattr(speedup, 'get_element_char_length', None)
    if fast is None:
        ignore_tags = frozenset('script style title noscript'.split())
        img_tags = ('img', 'svg')
        strip_space = re.compile(r'\s+')

        def count(elem):
            tag = getattr(elem, 'tag', count)
            if callable(tag):
                return len(
                    strip_space.sub('',
                                    getattr(elem, 'tail', None) or ''))
            num = 0
            tname = tag.rpartition('}')[-1].lower()
            if elem.text and tname not in ignore_tags:
                num += len(strip_space.sub('', elem.text))
            if elem.tail:
                num += len(strip_space.sub('', elem.tail))
            if tname in img_tags:
                num += 1000
            return num
    else:

        def count(elem):
            tag = getattr(elem, 'tag', count)
            if callable(tag):
                return fast('', None, getattr(elem, 'tail', None))
            return fast(tag, elem.text, elem.tail)

    for body in root.iterchildren(XHTML('body')):
        ans += count(body)
        for elem in body.iterdescendants():
            ans += count(elem)
    return ans
Example #25
0
    def mlize_spine(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.utils.xml_parse import safe_xml_fromstring
        output = [u'']
        output.append(self.get_toc())
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to TXT...' % item.href)
            for x in item.data.iterdescendants(etree.Comment):
                if x.text and '--' in x.text:
                    x.text = x.text.replace('--', '__')
            content = etree.tostring(item.data, encoding='unicode')
            content = self.remove_newlines(content)
            content = safe_xml_fromstring(content)
            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts,
                                self.opts.output_profile)
            output += self.dump_text(content.find(XHTML('body')), stylizer,
                                     item)
            output += '\n\n\n\n\n\n'
        output = ''.join(output)
        output = '\n'.join(l.rstrip() for l in output.splitlines())
        output = self.cleanup_text(output)

        return output
Example #26
0
    def split_on_page_breaks(self, orig_tree):
        ordered_ids = []
        for elem in orig_tree.xpath('//*[@id]'):
            id = elem.get('id')
            if id in self.page_break_ids:
                ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])

        self.trees = []
        tree = orig_tree
        for pattern, before in ordered_ids:
            elem = pattern(tree)
            if elem:
                self.log.debug('\t\tSplitting on page-break')
                before, after = self.do_split(tree, elem[0], before)
                self.trees.append(before)
                tree = after
        self.trees.append(tree)
        trees, ids = [], set([])
        for tree in self.trees:
            root = tree.getroot()
            if self.is_page_empty(root):
                discarded_ids = root.xpath('//*[@id]')
                for x in discarded_ids:
                    x = x.get('id')
                    if not x.startswith('calibre_'):
                        ids.add(x)
            else:
                if ids:
                    body = self.get_body(root)
                    if body is not None:
                        for x in ids:
                            body.insert(0, body.makeelement(XHTML('div'),
                                id=x, style='height:0pt'))
                ids = set([])
                trees.append(tree)
        self.trees = trees
Example #27
0
 def stylize_spine(self):
     self.stylizers = {}
     profile = self.context.source
     css = ''
     for item in self.oeb.spine:
         html = item.data
         body = html.find(XHTML('body'))
         if 'style' in html.attrib:
             b = body.attrib.get('style', '')
             body.set('style', html.get('style') + ';' + b)
             del html.attrib['style']
         bs = body.get('style', '').split(';')
         bs.append('margin-top: 0pt')
         bs.append('margin-bottom: 0pt')
         if float(self.context.margin_left) >= 0:
             bs.append('margin-left : %gpt' %
                       float(self.context.margin_left))
         if float(self.context.margin_right) >= 0:
             bs.append('margin-right : %gpt' %
                       float(self.context.margin_right))
         bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
         if self.page_break_on_body:
             bs.extend(['page-break-before: always'])
         if self.context.change_justification != 'original':
             bs.append('text-align: ' + self.context.change_justification)
         if self.body_font_family:
             bs.append(u'font-family: ' + self.body_font_family)
         body.set('style', '; '.join(bs))
         stylizer = Stylizer(html,
                             item.href,
                             self.oeb,
                             self.context,
                             profile,
                             user_css=self.context.extra_css,
                             extra_css=css)
         self.stylizers[item] = stylizer
Example #28
0
def merge_html(container, names, master):
    p = container.parsed
    root = p(master)

    # Ensure master has a <head>
    head = root.find('h:head', namespaces=XPNSMAP)
    if head is None:
        head = root.makeelement(XHTML('head'))
        container.insert_into_xml(root, head, 0)

    seen_anchors = all_anchors(root)
    seen_stylesheets = set(all_stylesheets(container, master))
    master_body = p(master).findall('h:body', namespaces=XPNSMAP)[-1]
    master_base = os.path.dirname(master)
    anchor_map = {n: {} for n in names if n != master}

    for name in names:
        if name == master:
            continue
        # Insert new stylesheets into master
        for sheet in all_stylesheets(container, name):
            if sheet not in seen_stylesheets:
                seen_stylesheets.add(sheet)
                link = head.makeelement(XHTML('link'),
                                        rel='stylesheet',
                                        type='text/css',
                                        href=container.name_to_href(
                                            sheet, master))
                container.insert_into_xml(head, link)

        # Rebase links if master is in a different directory
        if os.path.dirname(name) != master_base:
            container.replace_links(name, LinkRebaser(container, name, master))

        root = p(name)
        children = []
        for body in p(name).findall('h:body', namespaces=XPNSMAP):
            children.append(
                body.text if body.text and body.text.strip() else '\n\n')
            children.extend(body)

        first_child = ''
        for first_child in children:
            if not isinstance(first_child, string_or_bytes):
                break
        if isinstance(first_child, string_or_bytes):
            # body contained only text, no tags
            first_child = body.makeelement(XHTML('p'))
            first_child.text, children[0] = children[0], first_child

        amap = anchor_map[name]
        remove_name_attributes(root)

        for elem in root.xpath('//*[@id]'):
            val = elem.get('id')
            if not val:
                continue
            if val in seen_anchors:
                nval = unique_anchor(seen_anchors, val)
                elem.set('id', nval)
                amap[val] = nval
            else:
                seen_anchors.add(val)

        if 'id' not in first_child.attrib:
            first_child.set('id', unique_anchor(seen_anchors, 'top'))
            seen_anchors.add(first_child.get('id'))

        amap[''] = first_child.get('id')

        # Fix links that point to local changed anchors
        for a in XPath('//h:a[starts-with(@href, "#")]')(root):
            q = a.get('href')[1:]
            if q in amap:
                a.set('href', '#' + amap[q])

        for child in children:
            if isinstance(child, string_or_bytes):
                add_text(master_body, child)
            else:
                master_body.append(copy.deepcopy(child))

        container.remove_item(name, remove_from_guide=False)

    # Fix all links in the container that point to merged files
    for fname, media_type in iteritems(container.mime_map):
        repl = MergeLinkReplacer(fname, anchor_map, master, container)
        container.replace_links(fname, repl)
Example #29
0
    def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id):
        if not isinstance(node.tag, string_or_bytes) \
           or namespace(node.tag) != XHTML_NS:
            return
        tag = barename(node.tag)
        style = stylizer.style(node)
        cssdict = style.cssdict()
        try:
            font_size = style['font-size']
        except:
            font_size = self.sbase if self.sbase is not None else \
                self.context.source.fbase
        if tag == 'body' and isinstance(font_size, numbers.Number):
            stylizer.body_font_size = font_size
        if 'align' in node.attrib:
            if tag != 'img':
                cssdict['text-align'] = node.attrib['align']
                if cssdict['text-align'] == 'center':
                    # align=center causes tables to be center aligned,
                    # which text-align does not. And the ever trustworthy Word
                    # uses this construct in its HTML output. See
                    # https://bugs.launchpad.net/bugs/1569583
                    if tag == 'table':
                        if 'margin-left' not in cssdict and 'margin-right' not in cssdict:
                            cssdict['margin-left'] = cssdict['margin-right'] = 'auto'
                    else:
                        for table in node.iterchildren(XHTML("table")):
                            ts = stylizer.style(table)
                            if ts.get('margin-left') is None and ts.get('margin-right') is None:
                                ts.set('margin-left', 'auto')
                                ts.set('margin-right', 'auto')
            else:
                val = node.attrib['align']
                if val in ('middle', 'bottom', 'top'):
                    cssdict['vertical-align'] = val
                elif val in ('left', 'right'):
                    cssdict['float'] = val
            del node.attrib['align']
        if 'valign' in node.attrib and tag == 'td':
            if cssdict.get('vertical-align') == 'inherit':
                cssdict['vertical-align'] = node.attrib['valign']
            del node.attrib['valign']
        if node.tag == XHTML('font'):
            tags = ['descendant::h:%s'%x for x in ('p', 'div', 'table', 'h1',
                'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote')]
            tag = 'div' if XPath('|'.join(tags))(node) else 'span'
            node.tag = XHTML(tag)
            if 'size' in node.attrib:
                def force_int(raw):
                    return int(re.search(r'([0-9+-]+)', raw).group(1))
                size = node.attrib['size'].strip()
                if size:
                    fnums = self.context.source.fnums
                    if size[0] in ('+', '-'):
                        # Oh, the warcrimes
                        try:
                            esize = 3 + force_int(size)
                        except:
                            esize = 3
                        if esize < 1:
                            esize = 1
                        if esize > 7:
                            esize = 7
                        font_size = fnums[esize]
                    else:
                        try:
                            font_size = fnums[force_int(size)]
                        except:
                            font_size = fnums[3]
                    cssdict['font-size'] = '%.1fpt'%font_size
                del node.attrib['size']
            if 'face' in node.attrib:
                cssdict['font-family'] = node.attrib['face']
                del node.attrib['face']
        if 'color' in node.attrib:
            try:
                cssdict['color'] = Property('color', node.attrib['color']).value
            except (ValueError, SyntaxErr):
                pass
            del node.attrib['color']
        if 'bgcolor' in node.attrib:
            try:
                cssdict['background-color'] = Property('background-color', node.attrib['bgcolor']).value
            except (ValueError, SyntaxErr):
                pass
            del node.attrib['bgcolor']
        if tag == 'ol' and 'type' in node.attrib:
            del node.attrib['type']
        if cssdict.get('font-weight', '').lower() == 'medium':
            cssdict['font-weight'] = 'normal'  # ADE chokes on font-weight medium

        fsize = font_size
        is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in cssdict and len(node) == 0 and node.text and (
            len(node.text) == 1 or (len(node.text) == 2 and 0x2000 <= ord(node.text[0]) <= 0x206f)))
        # Detect drop caps generated by the docx input plugin
        if node.tag and node.tag.endswith('}p') and len(node) == 0 and node.text and len(node.text.strip()) == 1 and \
                not node.tail and 'line-height' in cssdict and 'font-size' in cssdict:
            dp = node.getparent()
            if dp.tag and dp.tag.endswith('}div') and len(dp) == 1 and not dp.text:
                if stylizer.style(dp).cssdict().get('float', None) == 'left':
                    is_drop_cap = True
        if not self.context.disable_font_rescaling and not is_drop_cap:
            _sbase = self.sbase if self.sbase is not None else \
                self.context.source.fbase
            dyn_rescale = dynamic_rescale_factor(node)
            if dyn_rescale is not None:
                fsize = self.fmap[_sbase]
                fsize *= dyn_rescale
                cssdict['font-size'] = '%0.5fem'%(fsize/psize)
                psize = fsize
            elif 'font-size' in cssdict or tag == 'body':
                fsize = self.fmap[font_size]
                try:
                    cssdict['font-size'] = "%0.5fem" % (fsize / psize)
                except ZeroDivisionError:
                    cssdict['font-size'] = '%.1fpt'%fsize
                psize = fsize

        try:
            minlh = self.context.minimum_line_height / 100.
            if not is_drop_cap and style['line-height'] < minlh * fsize:
                cssdict['line-height'] = str(minlh)
        except:
            self.oeb.logger.exception('Failed to set minimum line-height')

        if cssdict:
            for x in self.filter_css:
                popval = cssdict.pop(x, None)
                if self.body_font_family and popval and x == 'font-family' \
                    and popval.partition(',')[0][1:-1] == self.body_font_family.partition(',')[0][1:-1]:
                    cssdict[x] = popval

        if cssdict:
            if self.lineh and self.fbase and tag != 'body':
                self.clean_edges(cssdict, style, psize)
            if 'display' in cssdict and cssdict['display'] == 'in-line':
                cssdict['display'] = 'inline'
            if self.unfloat and 'float' in cssdict \
               and cssdict.get('display', 'none') != 'none':
                del cssdict['display']
            if self.untable and 'display' in cssdict \
               and cssdict['display'].startswith('table'):
                display = cssdict['display']
                if display == 'table-cell':
                    cssdict['display'] = 'inline'
                else:
                    cssdict['display'] = 'block'
            if 'vertical-align' in cssdict \
               and cssdict['vertical-align'] == 'sup':
                cssdict['vertical-align'] = 'super'
        if self.lineh and 'line-height' not in cssdict:
            lineh = self.lineh / psize
            cssdict['line-height'] = "%0.5fem" % lineh

        if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'):
            if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle':
                for prop in ('margin', 'padding', 'border'):
                    for edge in ('top', 'bottom'):
                        cssdict['%s-%s'%(prop, edge)] = '0pt'
            if self.context.insert_blank_line:
                cssdict['margin-top'] = cssdict['margin-bottom'] = \
                    '%fem'%self.context.insert_blank_line_size
            indent_size = self.context.remove_paragraph_spacing_indent_size
            keep_indents = indent_size < 0.0
            if (self.context.remove_paragraph_spacing and not keep_indents and cssdict.get('text-align', None) not in ('center', 'right')):
                cssdict['text-indent'] =  "%1.1fem" % indent_size

        pseudo_classes = style.pseudo_classes(self.filter_css)
        if cssdict or pseudo_classes:
            keep_classes = set()

            if cssdict:
                items = sorted(iteritems(cssdict))
                css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
                classes = node.get('class', '').strip() or 'calibre'
                # lower() because otherwise if the document uses the same class
                # name with different case, both cases will apply, leading
                # to incorrect results.
                klass = ascii_text(STRIPNUM.sub('', classes.split()[0])).lower().strip().replace(' ', '_')
                if css in styles:
                    match = styles[css]
                else:
                    match = klass + str(names[klass] or '')
                    styles[css] = match
                    names[klass] += 1
                node.attrib['class'] = match
                keep_classes.add(match)

            for psel, cssdict in iteritems(pseudo_classes):
                items = sorted(iteritems(cssdict))
                css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
                pstyles = pseudo_styles[psel]
                if css in pstyles:
                    match = pstyles[css]
                else:
                    # We have to use a different class for each psel as
                    # otherwise you can have incorrect styles for a situation
                    # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green }
                    # If the pcalibre class for a:hover and a:link is the same,
                    # then the class attribute for a.x tags will contain both
                    # that class and the class for a.x:hover, which is wrong.
                    klass = 'pcalibre'
                    match = klass + str(names[klass] or '')
                    pstyles[css] = match
                    names[klass] += 1
                keep_classes.add(match)
                node.attrib['class'] = ' '.join(keep_classes)

        elif 'class' in node.attrib:
            del node.attrib['class']
        if 'style' in node.attrib:
            del node.attrib['style']
        for child in node:
            self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
Example #30
0
    def extract_css_into_flows(self):
        inlines = defaultdict(list)  # Ensure identical <style>s not repeated
        sheets = {}
        passthrough = getattr(self.opts, 'mobi_passthrough', False)

        for item in self.oeb.manifest:
            if item.media_type in OEB_STYLES:
                sheet = self.data(item)
                if not passthrough and not self.opts.expand_css and hasattr(
                        item.data, 'cssText'):
                    condense_sheet(sheet)
                sheets[item.href] = len(self.flows)
                self.flows.append(sheet)

        def fix_import_rules(sheet):
            changed = False
            for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE):
                if rule.href:
                    href = item.abshref(rule.href)
                    idx = sheets.get(href, None)
                    if idx is not None:
                        idx = to_ref(idx)
                        rule.href = 'kindle:flow:%s?mime=text/css' % idx
                        changed = True
            return changed

        for item in self.oeb.spine:
            root = self.data(item)

            for link in XPath('//h:link[@href]')(root):
                href = item.abshref(link.get('href'))
                idx = sheets.get(href, None)
                if idx is not None:
                    idx = to_ref(idx)
                    link.set('href', 'kindle:flow:%s?mime=text/css' % idx)

            for tag in XPath('//h:style')(root):
                p = tag.getparent()
                idx = p.index(tag)
                raw = tag.text
                if not raw or not raw.strip():
                    extract(tag)
                    continue
                sheet = cssutils.parseString(raw, validate=False)
                if fix_import_rules(sheet):
                    raw = force_unicode(sheet.cssText, 'utf-8')

                repl = etree.Element(XHTML('link'),
                                     type='text/css',
                                     rel='stylesheet')
                repl.tail = '\n'
                p.insert(idx, repl)
                extract(tag)
                inlines[raw].append(repl)

        for raw, elems in inlines.iteritems():
            idx = to_ref(len(self.flows))
            self.flows.append(raw)
            for link in elems:
                link.set('href', 'kindle:flow:%s?mime=text/css' % idx)

        for item in self.oeb.manifest:
            if item.media_type in OEB_STYLES:
                sheet = self.data(item)
                if hasattr(sheet, 'cssRules'):
                    fix_import_rules(sheet)

        for i, sheet in enumerate(tuple(self.flows)):
            if hasattr(sheet, 'cssText'):
                self.flows[i] = force_unicode(sheet.cssText, 'utf-8')
Example #31
0
    def mobimlize_elem(self,
                       elem,
                       stylizer,
                       bstate,
                       istates,
                       ignore_valign=False):
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            return
        style = stylizer.style(elem)
        # <mbp:frame-set/> does not exist lalalala
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
            id_ = elem.get('id', None)
            if id_:
                # Keep anchors so people can use display:none
                # to generate hidden TOCs
                tail = elem.tail
                elem.clear()
                elem.text = None
                elem.set('id', id_)
                elem.tail = tail
                elem.tag = XHTML('a')
            else:
                return
        tag = barename(elem.tag)
        istate = copy.copy(istates[-1])
        istate.rendered = False
        istate.list_num = 0
        if tag == 'ol' and 'start' in elem.attrib:
            try:
                istate.list_num = int(elem.attrib['start']) - 1
            except:
                pass
        istates.append(istate)
        left = 0
        display = style['display']
        if display == 'table-cell':
            display = 'inline'
        elif display.startswith('table'):
            display = 'block'
        isblock = (not display.startswith('inline')
                   and style['display'] != 'none')
        isblock = isblock and style['float'] == 'none'
        isblock = isblock and tag != 'br'
        if isblock:
            bstate.para = None
            istate.halign = style['text-align']
            istate.indent = style['text-indent']
            if style['margin-left'] == 'auto' \
               and style['margin-right'] == 'auto':
                istate.halign = 'center'
            margin = asfloat(style['margin-left'])
            padding = asfloat(style['padding-left'])
            if tag != 'body':
                left = margin + padding
            istate.left += left
            vmargin = asfloat(style['margin-top'])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style['padding-top'])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        elif not istate.href:
            margin = asfloat(style['margin-left'])
            padding = asfloat(style['padding-left'])
            lspace = margin + padding
            if lspace > 0:
                spaces = int(round((lspace * 3) / style['font-size']))
                elem.text = (u'\xa0' * spaces) + (elem.text or '')
            margin = asfloat(style['margin-right'])
            padding = asfloat(style['padding-right'])
            rspace = margin + padding
            if rspace > 0:
                spaces = int(round((rspace * 3) / style['font-size']))
                if len(elem) == 0:
                    elem.text = (elem.text or '') + (u'\xa0' * spaces)
                else:
                    last = elem[-1]
                    last.text = (last.text or '') + (u'\xa0' * spaces)
        if bstate.content and style['page-break-before'] in PAGE_BREAKS:
            bstate.pbreak = True
        istate.fsize = self.mobimlize_font(style['font-size'])
        istate.italic = True if style['font-style'] == 'italic' else False
        weight = style['font-weight']
        istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400
        istate.preserve = (style['white-space'] in ('pre', 'pre-wrap'))
        istate.bgcolor = style['background-color']
        istate.fgcolor = style['color']
        istate.strikethrough = style.effective_text_decoration == 'line-through'
        istate.underline = style.effective_text_decoration == 'underline'
        ff = style['font-family'].lower() if style['font-family'] else ''
        if 'monospace' in ff or 'courier' in ff or ff.endswith(' mono'):
            istate.family = 'monospace'
        elif ('sans-serif' in ff or 'sansserif' in ff or 'verdana' in ff
              or 'arial' in ff or 'helvetica' in ff):
            istate.family = 'sans-serif'
        else:
            istate.family = 'serif'
        if 'id' in elem.attrib:
            istate.ids.add(elem.attrib['id'])
        if 'name' in elem.attrib:
            istate.ids.add(elem.attrib['name'])
        if tag == 'a' and 'href' in elem.attrib:
            istate.href = elem.attrib['href']
        istate.attrib.clear()
        if tag == 'img' and 'src' in elem.attrib:
            istate.attrib['src'] = elem.attrib['src']
            istate.attrib['align'] = 'baseline'
            cssdict = style.cssdict()
            valign = cssdict.get('vertical-align', None)
            if valign in ('top', 'bottom', 'middle'):
                istate.attrib['align'] = valign
            for prop in ('width', 'height'):
                if cssdict[prop] != 'auto':
                    value = style[prop]
                    if value == getattr(self.profile, prop):
                        result = '100%'
                    else:
                        # Amazon's renderer does not support
                        # img sizes in units other than px
                        # See #7520 for test case
                        try:
                            pixs = int(
                                round(float(value) / (72. / self.profile.dpi)))
                        except:
                            continue
                        result = str(pixs)
                    istate.attrib[prop] = result
            if 'width' not in istate.attrib or 'height' not in istate.attrib:
                href = self.current_spine_item.abshref(elem.attrib['src'])
                try:
                    item = self.oeb.manifest.hrefs[urlnormalize(href)]
                except:
                    self.oeb.logger.warn('Failed to find image:', href)
                else:
                    try:
                        width, height = identify_data(item.data)[:2]
                    except:
                        self.oeb.logger.warn('Invalid image:', href)
                    else:
                        if 'width' not in istate.attrib and 'height' not in \
                                    istate.attrib:
                            istate.attrib['width'] = str(width)
                            istate.attrib['height'] = str(height)
                        else:
                            ar = float(width) / float(height)
                            if 'width' not in istate.attrib:
                                try:
                                    width = int(istate.attrib['height']) * ar
                                except:
                                    pass
                                istate.attrib['width'] = str(int(width))
                            else:
                                try:
                                    height = int(istate.attrib['width']) / ar
                                except:
                                    pass
                                istate.attrib['height'] = str(int(height))
                        item.unload_data_from_memory()
        elif tag == 'hr' and asfloat(style['width']) > 0:
            prop = style['width'] / self.profile.width
            istate.attrib['width'] = "%d%%" % int(round(prop * 100))
        elif display == 'table':
            tag = 'table'
        elif display == 'table-row':
            tag = 'tr'
        elif display == 'table-cell':
            tag = 'td'
        if tag in TABLE_TAGS and self.ignore_tables:
            tag = 'span' if tag == 'td' else 'div'

        if tag in ('table', 'td', 'tr'):
            col = style.backgroundColor
            if col:
                elem.set('bgcolor', col)
            css = style.cssdict()
            if 'border' in css or 'border-width' in css:
                elem.set('border', '1')
        if tag in TABLE_TAGS:
            for attr in ('rowspan', 'colspan', 'width', 'border', 'scope',
                         'bgcolor'):
                if attr in elem.attrib:
                    istate.attrib[attr] = elem.attrib[attr]
        if tag == 'q':
            t = elem.text
            if not t:
                t = ''
            elem.text = u'\u201c' + t
            t = elem.tail
            if not t:
                t = ''
            elem.tail = u'\u201d' + t
        text = None
        if elem.text:
            if istate.preserve:
                text = elem.text
            elif (len(elem) > 0 and isspace(elem.text)
                  and hasattr(elem[0].tag, 'rpartition')
                  and elem[0].tag.rpartition('}')[-1] not in INLINE_TAGS):
                text = None
            else:
                text = COLLAPSE.sub(' ', elem.text)
        valign = style['vertical-align']
        not_baseline = valign in ('super', 'sub', 'text-top', 'text-bottom',
                                  'top', 'bottom') or (isinstance(
                                      valign,
                                      (float, int)) and abs(valign) != 0)
        issup = valign in ('super', 'text-top',
                           'top') or (isinstance(valign,
                                                 (float, int)) and valign > 0)
        vtag = 'sup' if issup else 'sub'
        if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
            nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
            vbstate = BlockState(etree.SubElement(nroot, XHTML('body')))
            vbstate.para = etree.SubElement(vbstate.body, XHTML('p'))
            self.mobimlize_elem(elem,
                                stylizer,
                                vbstate,
                                istates,
                                ignore_valign=True)
            if len(istates) > 0:
                istates.pop()
            if len(istates) == 0:
                istates.append(FormatState())
            at_start = bstate.para is None
            if at_start:
                self.mobimlize_content('span', '', bstate, istates)
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
                vtag = etree.SubElement(vtag, XHTML('small'))
                # Add anchors
                for child in vbstate.body:
                    if child is not vbstate.para:
                        vtag.append(child)
                    else:
                        break
                if vbstate.para is not None:
                    if vbstate.para.text:
                        vtag.text = vbstate.para.text
                    for child in vbstate.para:
                        vtag.append(child)
                return

        if tag == 'blockquote':
            old_mim = self.opts.mobi_ignore_margins
            self.opts.mobi_ignore_margins = False

        if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or (
                # We have an id but no text and no children, the id should still
                # be added.
                istate.ids and tag in ('a', 'span', 'i', 'b', 'u')
                and len(elem) == 0)):
            self.mobimlize_content(tag, text, bstate, istates)
        for child in elem:
            self.mobimlize_elem(child, stylizer, bstate, istates)
            tail = None
            if child.tail:
                if istate.preserve:
                    tail = child.tail
                elif bstate.para is None and isspace(child.tail):
                    tail = None
                else:
                    tail = COLLAPSE.sub(' ', child.tail)
            if tail:
                self.mobimlize_content(tag, tail, bstate, istates)

        if tag == 'blockquote':
            self.opts.mobi_ignore_margins = old_mim

        if bstate.content and style['page-break-after'] in PAGE_BREAKS:
            bstate.pbreak = True
        if isblock:
            para = bstate.para
            if para is not None and para.text == u'\xa0' and len(para) < 1:
                if style.height > 2:
                    para.getparent().replace(para, etree.Element(XHTML('br')))
                else:
                    # This is too small to be rendered effectively, drop it
                    para.getparent().remove(para)
            bstate.para = None
            bstate.istate = None
            vmargin = asfloat(style['margin-bottom'])
            bstate.vmargin = max((bstate.vmargin, vmargin))
            vpadding = asfloat(style['padding-bottom'])
            if vpadding > 0:
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
        if bstate.nested and bstate.nested[-1].tag == elem.tag:
            bstate.nested.pop()
        istates.pop()